/** * Converts int16 half-float to float32 * Note this can be performed in 1 instruction if vcvtph2ps exists (f16c/cvt16) * [llvm.x86.vcvtph2ps / _mm_cvtph_ps] * * @param src value to convert * */ LLVMValueRef lp_build_half_to_float(struct gallivm_state *gallivm, LLVMValueRef src) { LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef src_type = LLVMTypeOf(src); unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ? LLVMGetVectorSize(src_type) : 1; struct lp_type f32_type = lp_type_float_vec(32, 32 * src_length); struct lp_type i32_type = lp_type_int_vec(32, 32 * src_length); LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type); LLVMValueRef h; if (util_cpu_caps.has_f16c && HAVE_LLVM >= 0x0301 && (src_length == 4 || src_length == 8)) { const char *intrinsic = NULL; if (src_length == 4) { src = lp_build_pad_vector(gallivm, src, 8); intrinsic = "llvm.x86.vcvtph2ps.128"; } else { intrinsic = "llvm.x86.vcvtph2ps.256"; } return lp_build_intrinsic_unary(builder, intrinsic, lp_build_vec_type(gallivm, f32_type), src); } /* Convert int16 vector to int32 vector by zero ext (might generate bad code) */ h = LLVMBuildZExt(builder, src, int_vec_type, ""); return lp_build_smallfloat_to_float(gallivm, f32_type, h, 10, 5, 0, true); }
/** * Interleave vector elements. * * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions * (but not for 256bit AVX vectors). */ LLVMValueRef lp_build_interleave2(struct gallivm_state *gallivm, struct lp_type type, LLVMValueRef a, LLVMValueRef b, unsigned lo_hi) { LLVMValueRef shuffle; if (type.length == 2 && type.width == 128 && util_cpu_caps.has_avx) { /* * XXX: This is a workaround for llvm code generation deficiency. Strangely * enough, while this needs vinsertf128/vextractf128 instructions (hence * a natural match when using 2x128bit vectors) the "normal" unpack shuffle * generates code ranging from atrocious (llvm 3.1) to terrible (llvm 3.2, 3.3). * So use some different shuffles instead (the exact shuffles don't seem to * matter, as long as not using 128bit wide vectors, works with 8x32 or 4x64). */ struct lp_type tmp_type = type; LLVMValueRef srchalf[2], tmpdst; tmp_type.length = 4; tmp_type.width = 64; a = LLVMBuildBitCast(gallivm->builder, a, lp_build_vec_type(gallivm, tmp_type), ""); b = LLVMBuildBitCast(gallivm->builder, b, lp_build_vec_type(gallivm, tmp_type), ""); srchalf[0] = lp_build_extract_range(gallivm, a, lo_hi * 2, 2); srchalf[1] = lp_build_extract_range(gallivm, b, lo_hi * 2, 2); tmp_type.length = 2; tmpdst = lp_build_concat(gallivm, srchalf, tmp_type, 2); return LLVMBuildBitCast(gallivm->builder, tmpdst, lp_build_vec_type(gallivm, type), ""); } shuffle = lp_build_const_unpack_shuffle(gallivm, type.length, lo_hi); return LLVMBuildShuffleVector(gallivm->builder, a, b, shuffle, ""); }
/** * Combined extract and broadcast (mere shuffle in most cases) */ LLVMValueRef lp_build_extract_broadcast(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, LLVMValueRef vector, LLVMValueRef index) { LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef res; assert(src_type.floating == dst_type.floating); assert(src_type.width == dst_type.width); assert(lp_check_value(src_type, vector)); assert(LLVMTypeOf(index) == i32t); if (src_type.length == 1) { if (dst_type.length == 1) { /* * Trivial scalar -> scalar. */ res = vector; } else { /* * Broadcast scalar -> vector. */ res = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, dst_type), vector); } } else { if (dst_type.length > 1) { /* * shuffle - result can be of different length. */ LLVMValueRef shuffle; shuffle = lp_build_broadcast(gallivm, LLVMVectorType(i32t, dst_type.length), index); res = LLVMBuildShuffleVector(gallivm->builder, vector, LLVMGetUndef(lp_build_vec_type(gallivm, src_type)), shuffle, ""); } else { /* * Trivial extract scalar from vector. */ res = LLVMBuildExtractElement(gallivm->builder, vector, index, ""); } } return res; }
/** * @brief lp_build_fetch_rgba_aos_array * * \param format_desc describes format of the image we're fetching from * \param dst_type output type * \param base_ptr address of the pixel block (or the texel if uncompressed) * \param offset ptr offset */ LLVMValueRef lp_build_fetch_rgba_aos_array(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type dst_type, LLVMValueRef base_ptr, LLVMValueRef offset) { struct lp_build_context bld; LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef src_elem_type, src_vec_type; LLVMValueRef ptr, res = NULL; struct lp_type src_type; memset(&src_type, 0, sizeof src_type); src_type.floating = format_desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT; src_type.fixed = format_desc->channel[0].type == UTIL_FORMAT_TYPE_FIXED; src_type.sign = format_desc->channel[0].type != UTIL_FORMAT_TYPE_UNSIGNED; src_type.norm = format_desc->channel[0].normalized; src_type.width = format_desc->channel[0].size; src_type.length = format_desc->nr_channels; assert(src_type.length <= dst_type.length); src_elem_type = lp_build_elem_type(gallivm, src_type); src_vec_type = lp_build_vec_type(gallivm, src_type); /* Read whole vector from memory, unaligned */ if (!res) { ptr = LLVMBuildGEP(builder, base_ptr, &offset, 1, ""); ptr = LLVMBuildPointerCast(builder, ptr, LLVMPointerType(src_vec_type, 0), ""); res = LLVMBuildLoad(builder, ptr, ""); lp_set_load_alignment(res, src_type.width / 8); } /* Truncate doubles to float */ if (src_type.floating && src_type.width == 64) { src_type.width = 32; src_vec_type = lp_build_vec_type(gallivm, src_type); res = LLVMBuildFPTrunc(builder, res, src_vec_type, ""); } /* Expand to correct length */ if (src_type.length < dst_type.length) { res = lp_build_pad_vector(gallivm, res, src_type, dst_type.length); src_type.length = dst_type.length; } /* Convert to correct format */ lp_build_conv(gallivm, src_type, dst_type, &res, 1, &res, 1); /* Swizzle it */ lp_build_context_init(&bld, gallivm, dst_type); return lp_build_format_swizzle_aos(format_desc, &bld, res); }
static LLVMValueRef add_conv_test(struct gallivm_state *gallivm, struct lp_type src_type, unsigned num_srcs, struct lp_type dst_type, unsigned num_dsts) { LLVMModuleRef module = gallivm->module; LLVMContextRef context = gallivm->context; LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef args[2]; LLVMValueRef func; LLVMValueRef src_ptr; LLVMValueRef dst_ptr; LLVMBasicBlockRef block; LLVMValueRef src[LP_MAX_VECTOR_LENGTH]; LLVMValueRef dst[LP_MAX_VECTOR_LENGTH]; unsigned i; args[0] = LLVMPointerType(lp_build_vec_type(gallivm, src_type), 0); args[1] = LLVMPointerType(lp_build_vec_type(gallivm, dst_type), 0); func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidTypeInContext(context), args, 2, 0)); LLVMSetFunctionCallConv(func, LLVMCCallConv); src_ptr = LLVMGetParam(func, 0); dst_ptr = LLVMGetParam(func, 1); block = LLVMAppendBasicBlockInContext(context, func, "entry"); LLVMPositionBuilderAtEnd(builder, block); for(i = 0; i < num_srcs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0); LLVMValueRef ptr = LLVMBuildGEP(builder, src_ptr, &index, 1, ""); src[i] = LLVMBuildLoad(builder, ptr, ""); } lp_build_conv(gallivm, src_type, dst_type, src, num_srcs, dst, num_dsts); for(i = 0; i < num_dsts; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0); LLVMValueRef ptr = LLVMBuildGEP(builder, dst_ptr, &index, 1, ""); LLVMBuildStore(builder, dst[i], ptr); } LLVMBuildRetVoid(builder);; gallivm_verify_function(gallivm, func); return func; }
static INLINE LLVMValueRef lp_build_round_sse41(struct lp_build_context *bld, LLVMValueRef a, enum lp_build_round_sse41_mode mode) { const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); const char *intrinsic; assert(type.floating); assert(type.width*type.length == 128); assert(lp_check_value(type, a)); assert(util_cpu_caps.has_sse4_1); switch(type.width) { case 32: intrinsic = "llvm.x86.sse41.round.ps"; break; case 64: intrinsic = "llvm.x86.sse41.round.pd"; break; default: assert(0); return bld->undef; } return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a, LLVMConstInt(LLVMInt32Type(), mode, 0)); }
/** * Set the sign of float vector 'a' according to 'sign'. * If sign==0, return abs(a). * If sign==1, return -abs(a); * Other values for sign produce undefined results. */ LLVMValueRef lp_build_set_sign(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef sign) { const struct lp_type type = bld->type; LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMValueRef shift = lp_build_int_const_scalar(type, type.width - 1); LLVMValueRef mask = lp_build_int_const_scalar(type, ~((unsigned long long) 1 << (type.width - 1))); LLVMValueRef val, res; assert(type.floating); /* val = reinterpret_cast<int>(a) */ val = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); /* val = val & mask */ val = LLVMBuildAnd(bld->builder, val, mask, ""); /* sign = sign << shift */ sign = LLVMBuildShl(bld->builder, sign, shift, ""); /* res = val | sign */ res = LLVMBuildOr(bld->builder, val, sign, ""); /* res = reinterpret_cast<float>(res) */ res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); return res; }
/** * Generate abs(a) */ LLVMValueRef lp_build_abs(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); if(!type.sign) return a; if(type.floating) { /* Mask out the sign bit */ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); unsigned long long absMask = ~(1ULL << (type.width - 1)); LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask)); a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); a = LLVMBuildAnd(bld->builder, a, mask, ""); a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); return a; } if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) { switch(type.width) { case 8: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a); case 16: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a); case 32: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a); } } return lp_build_max(bld, a, LLVMBuildNeg(bld->builder, a, "")); }
/** * Generate color blending and color output. * \param rt the render target index (to index blend, colormask state) * \param type the pixel color type * \param context_ptr pointer to the runtime JIT context * \param mask execution mask (active fragment/pixel mask) * \param src colors from the fragment shader * \param dst_ptr the destination color buffer pointer */ static void generate_blend(const struct pipe_blend_state *blend, unsigned rt, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef context_ptr, LLVMValueRef mask, LLVMValueRef *src, LLVMValueRef dst_ptr) { struct lp_build_context bld; struct lp_build_flow_context *flow; struct lp_build_mask_context mask_ctx; LLVMTypeRef vec_type; LLVMValueRef const_ptr; LLVMValueRef con[4]; LLVMValueRef dst[4]; LLVMValueRef res[4]; unsigned chan; lp_build_context_init(&bld, builder, type); flow = lp_build_flow_create(builder); /* we'll use this mask context to skip blending if all pixels are dead */ lp_build_mask_begin(&mask_ctx, flow, type, mask); vec_type = lp_build_vec_type(type); const_ptr = lp_jit_context_blend_color(builder, context_ptr); const_ptr = LLVMBuildBitCast(builder, const_ptr, LLVMPointerType(vec_type, 0), ""); /* load constant blend color and colors from the dest color buffer */ for(chan = 0; chan < 4; ++chan) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), ""); dst[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), ""); lp_build_name(con[chan], "con.%c", "rgba"[chan]); lp_build_name(dst[chan], "dst.%c", "rgba"[chan]); } /* do blend */ lp_build_blend_soa(builder, blend, type, rt, src, dst, con, res); /* store results to color buffer */ for(chan = 0; chan < 4; ++chan) { if(blend->rt[rt].colormask & (1 << chan)) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); lp_build_name(res[chan], "res.%c", "rgba"[chan]); res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, "")); } } lp_build_mask_end(&mask_ctx); lp_build_flow_destroy(flow); }
/** * Expands src vector from src.length to dst_length */ LLVMValueRef lp_build_pad_vector(struct gallivm_state *gallivm, LLVMValueRef src, struct lp_type src_type, unsigned dst_length) { LLVMValueRef undef = LLVMGetUndef(lp_build_vec_type(gallivm, src_type)); LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; unsigned i; assert(dst_length <= Elements(elems)); assert(dst_length > src_type.length); if (src_type.length == dst_length) return src; /* If its a single scalar type, no need to reinvent the wheel */ if (src_type.length == 1) { return lp_build_broadcast(gallivm, LLVMVectorType(lp_build_elem_type(gallivm, src_type), dst_length), src); } /* All elements from src vector */ for (i = 0; i < src_type.length; ++i) elems[i] = lp_build_const_int32(gallivm, i); /* Undef fill remaining space */ for (i = src_type.length; i < dst_length; ++i) elems[i] = lp_build_const_int32(gallivm, src_type.length); /* Combine the two vectors */ return LLVMBuildShuffleVector(gallivm->builder, src, undef, LLVMConstVector(elems, dst_length), ""); }
/** * Converts int16 half-float to float32 * Note this can be performed in 1 instruction if vcvtph2ps exists (sse5 i think?) * [llvm.x86.vcvtph2ps / _mm_cvtph_ps] * * @param src_type <vector> type of int16 * @param src value to convert * * ref http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ */ LLVMValueRef lp_build_half_to_float(struct gallivm_state *gallivm, struct lp_type src_type, LLVMValueRef src) { struct lp_type f32_type = lp_type_float_vec(32, 32 * src_type.length); struct lp_type i32_type = lp_type_int_vec(32, 32 * src_type.length); LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type); LLVMTypeRef float_vec_type = lp_build_vec_type(gallivm, f32_type); /* Constants */ LLVMValueRef i32_13 = lp_build_const_int_vec(gallivm, i32_type, 13); LLVMValueRef i32_16 = lp_build_const_int_vec(gallivm, i32_type, 16); LLVMValueRef i32_mask_nosign = lp_build_const_int_vec(gallivm, i32_type, 0x7fff); LLVMValueRef i32_was_infnan = lp_build_const_int_vec(gallivm, i32_type, 0x7bff); LLVMValueRef i32_exp_infnan = lp_build_const_int_vec(gallivm, i32_type, 0xff << 23); LLVMValueRef f32_magic = LLVMBuildBitCast(builder, lp_build_const_int_vec(gallivm, i32_type, (254 - 15) << 23), float_vec_type, ""); /* Convert int16 vector to int32 vector by zero ext */ LLVMValueRef h = LLVMBuildZExt(builder, src, int_vec_type, ""); /* Exponent / mantissa bits */ LLVMValueRef expmant = LLVMBuildAnd(builder, i32_mask_nosign, h, ""); LLVMValueRef shifted = LLVMBuildBitCast(builder, LLVMBuildShl(builder, expmant, i32_13, ""), float_vec_type, ""); /* Exponent adjust */ LLVMValueRef scaled = LLVMBuildBitCast(builder, LLVMBuildFMul(builder, shifted, f32_magic, ""), int_vec_type, ""); /* Make sure Inf/NaN survive */ LLVMValueRef b_wasinfnan = lp_build_compare(gallivm, i32_type, PIPE_FUNC_GREATER, expmant, i32_was_infnan); LLVMValueRef infnanexp = LLVMBuildAnd(builder, b_wasinfnan, i32_exp_infnan, ""); /* Sign bit */ LLVMValueRef justsign = LLVMBuildXor(builder, h, expmant, ""); LLVMValueRef sign = LLVMBuildShl(builder, justsign, i32_16, ""); /* Combine result */ LLVMValueRef sign_inf = LLVMBuildOr(builder, sign, infnanexp, ""); LLVMValueRef final = LLVMBuildOr(builder, scaled, sign_inf, ""); /* Cast from int32 vector to float32 vector */ return LLVMBuildBitCast(builder, final, float_vec_type, ""); }
/** * Generate the depth /stencil test code. */ static void generate_depth_stencil(LLVMBuilderRef builder, const struct lp_fragment_shader_variant_key *key, struct lp_type src_type, struct lp_build_mask_context *mask, LLVMValueRef stencil_refs[2], LLVMValueRef src, LLVMValueRef dst_ptr, LLVMValueRef facing, LLVMValueRef counter) { const struct util_format_description *format_desc; struct lp_type dst_type; if (!key->depth.enabled && !key->stencil[0].enabled && !key->stencil[1].enabled) return; format_desc = util_format_description(key->zsbuf_format); assert(format_desc); /* * Depths are expected to be between 0 and 1, even if they are stored in * floats. Setting these bits here will ensure that the lp_build_conv() call * below won't try to unnecessarily clamp the incoming values. */ if(src_type.floating) { src_type.sign = FALSE; src_type.norm = TRUE; } else { assert(!src_type.sign); assert(src_type.norm); } /* Pick the depth type. */ dst_type = lp_depth_type(format_desc, src_type.width*src_type.length); /* FIXME: Cope with a depth test type with a different bit width. */ assert(dst_type.width == src_type.width); assert(dst_type.length == src_type.length); /* Convert fragment Z from float to integer */ lp_build_conv(builder, src_type, dst_type, &src, 1, &src, 1); dst_ptr = LLVMBuildBitCast(builder, dst_ptr, LLVMPointerType(lp_build_vec_type(dst_type), 0), ""); lp_build_depth_stencil_test(builder, &key->depth, key->stencil, dst_type, format_desc, mask, stencil_refs, src, dst_ptr, facing, counter); }
void lp_build_exp2_approx(struct lp_build_context *bld, LLVMValueRef x, LLVMValueRef *p_exp2_int_part, LLVMValueRef *p_frac_part, LLVMValueRef *p_exp2) { const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef ipart = NULL; LLVMValueRef fpart = NULL; LLVMValueRef expipart = NULL; LLVMValueRef expfpart = NULL; LLVMValueRef res = NULL; if(p_exp2_int_part || p_frac_part || p_exp2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) debug_printf("%s: inefficient/imprecise constant arithmetic\n", __FUNCTION__); assert(type.floating && type.width == 32); x = lp_build_min(bld, x, lp_build_const_scalar(type, 129.0)); x = lp_build_max(bld, x, lp_build_const_scalar(type, -126.99999)); /* ipart = int(x - 0.5) */ ipart = LLVMBuildSub(bld->builder, x, lp_build_const_scalar(type, 0.5f), ""); ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, ""); /* fpart = x - ipart */ fpart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, ""); fpart = LLVMBuildSub(bld->builder, x, fpart, ""); } if(p_exp2_int_part || p_exp2) { /* expipart = (float) (1 << ipart) */ expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_scalar(type, 127), ""); expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_scalar(type, 23), ""); expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, ""); } if(p_exp2) { expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial, Elements(lp_build_exp2_polynomial)); res = LLVMBuildMul(bld->builder, expipart, expfpart, ""); } if(p_exp2_int_part) *p_exp2_int_part = expipart; if(p_frac_part) *p_frac_part = fpart; if(p_exp2) *p_exp2 = res; }
static LLVMValueRef add_blend_test(struct gallivm_state *gallivm, const struct pipe_blend_state *blend, struct lp_type type) { LLVMModuleRef module = gallivm->module; LLVMContextRef context = gallivm->context; LLVMTypeRef vec_type; LLVMTypeRef args[5]; LLVMValueRef func; LLVMValueRef src_ptr; LLVMValueRef src1_ptr; LLVMValueRef dst_ptr; LLVMValueRef const_ptr; LLVMValueRef res_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; const enum pipe_format format = PIPE_FORMAT_R8G8B8A8_UNORM; const unsigned rt = 0; const unsigned char swizzle[4] = { 0, 1, 2, 3 }; LLVMValueRef src; LLVMValueRef src1; LLVMValueRef dst; LLVMValueRef con; LLVMValueRef res; vec_type = lp_build_vec_type(gallivm, type); args[4] = args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0); func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidTypeInContext(context), args, 5, 0)); LLVMSetFunctionCallConv(func, LLVMCCallConv); src_ptr = LLVMGetParam(func, 0); src1_ptr = LLVMGetParam(func, 1); dst_ptr = LLVMGetParam(func, 2); const_ptr = LLVMGetParam(func, 3); res_ptr = LLVMGetParam(func, 4); block = LLVMAppendBasicBlockInContext(context, func, "entry"); builder = gallivm->builder; LLVMPositionBuilderAtEnd(builder, block); src = LLVMBuildLoad(builder, src_ptr, "src"); src1 = LLVMBuildLoad(builder, src1_ptr, "src1"); dst = LLVMBuildLoad(builder, dst_ptr, "dst"); con = LLVMBuildLoad(builder, const_ptr, "const"); res = lp_build_blend_aos(gallivm, blend, format, type, rt, src, NULL, src1, NULL, dst, NULL, con, NULL, swizzle, 4); lp_build_name(res, "res"); LLVMBuildStore(builder, res, res_ptr); LLVMBuildRetVoid(builder);; gallivm_verify_function(gallivm, func); return func; }
void lp_emit_declaration_aos( struct lp_build_tgsi_aos_context *bld, const struct tgsi_full_declaration *decl) { struct gallivm_state *gallivm = bld->bld_base.base.gallivm; LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type); unsigned first = decl->Range.First; unsigned last = decl->Range.Last; unsigned idx; for (idx = first; idx <= last; ++idx) { switch (decl->Declaration.File) { case TGSI_FILE_TEMPORARY: assert(idx < LP_MAX_INLINED_TEMPS); if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1); bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm, vec_type, array_size, ""); } else { bld->temps[idx] = lp_build_alloca(gallivm, vec_type, ""); } break; case TGSI_FILE_OUTPUT: bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, ""); break; case TGSI_FILE_ADDRESS: assert(idx < LP_MAX_TGSI_ADDRS); bld->addr[idx] = lp_build_alloca(gallivm, vec_type, ""); break; case TGSI_FILE_PREDICATE: assert(idx < LP_MAX_TGSI_PREDS); bld->preds[idx] = lp_build_alloca(gallivm, vec_type, ""); break; case TGSI_FILE_SAMPLER_VIEW: /* * The target stored here MUST match whatever there actually * is in the set sampler views (what about return type?). */ assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS); for (idx = first; idx <= last; ++idx) { bld->sv[idx] = decl->SamplerView; } break; default: /* don't need to declare other vars */ break; } } }
/** * Generate color blending and color output. */ static void generate_blend(const struct pipe_blend_state *blend, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef context_ptr, LLVMValueRef mask, LLVMValueRef *src, LLVMValueRef dst_ptr) { struct lp_build_context bld; struct lp_build_flow_context *flow; struct lp_build_mask_context mask_ctx; LLVMTypeRef vec_type; LLVMTypeRef int_vec_type; LLVMValueRef const_ptr; LLVMValueRef con[4]; LLVMValueRef dst[4]; LLVMValueRef res[4]; unsigned chan; lp_build_context_init(&bld, builder, type); flow = lp_build_flow_create(builder); lp_build_mask_begin(&mask_ctx, flow, type, mask); vec_type = lp_build_vec_type(type); int_vec_type = lp_build_int_vec_type(type); const_ptr = lp_jit_context_blend_color(builder, context_ptr); const_ptr = LLVMBuildBitCast(builder, const_ptr, LLVMPointerType(vec_type, 0), ""); for(chan = 0; chan < 4; ++chan) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), ""); dst[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), ""); lp_build_name(con[chan], "con.%c", "rgba"[chan]); lp_build_name(dst[chan], "dst.%c", "rgba"[chan]); } lp_build_blend_soa(builder, blend, type, src, dst, con, res); for(chan = 0; chan < 4; ++chan) { if(blend->colormask & (1 << chan)) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); lp_build_name(res[chan], "res.%c", "rgba"[chan]); res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, "")); } } lp_build_mask_end(&mask_ctx); lp_build_flow_destroy(flow); }
/** * Transpose from AOS <-> SOA * * @param single_type_lp type of pixels * @param src the 4 * n pixel input * @param dst the 4 * n pixel output */ void lp_build_transpose_aos(struct gallivm_state *gallivm, struct lp_type single_type_lp, const LLVMValueRef src[4], LLVMValueRef dst[4]) { struct lp_type double_type_lp = single_type_lp; LLVMTypeRef single_type; LLVMTypeRef double_type; LLVMValueRef t0, t1, t2, t3; double_type_lp.length >>= 1; double_type_lp.width <<= 1; double_type = lp_build_vec_type(gallivm, double_type_lp); single_type = lp_build_vec_type(gallivm, single_type_lp); /* Interleave x, y, z, w -> xy and zw */ t0 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 0); t1 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 0); t2 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 1); t3 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 1); /* Cast to double width type for second interleave */ t0 = LLVMBuildBitCast(gallivm->builder, t0, double_type, "t0"); t1 = LLVMBuildBitCast(gallivm->builder, t1, double_type, "t1"); t2 = LLVMBuildBitCast(gallivm->builder, t2, double_type, "t2"); t3 = LLVMBuildBitCast(gallivm->builder, t3, double_type, "t3"); /* Interleave xy, zw -> xyzw */ dst[0] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 0); dst[1] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 1); dst[2] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 0); dst[3] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 1); /* Cast back to original single width type */ dst[0] = LLVMBuildBitCast(gallivm->builder, dst[0], single_type, "dst0"); dst[1] = LLVMBuildBitCast(gallivm->builder, dst[1], single_type, "dst1"); dst[2] = LLVMBuildBitCast(gallivm->builder, dst[2], single_type, "dst2"); dst[3] = LLVMBuildBitCast(gallivm->builder, dst[3], single_type, "dst3"); }
/** * Generate 1/sqrt(a) */ LLVMValueRef lp_build_rsqrt(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; assert(type.floating); if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", lp_build_vec_type(type), a); return lp_build_rcp(bld, lp_build_sqrt(bld, a)); }
LLVMValueRef lp_build_zero(struct lp_type type) { if (type.length == 1) { if (type.floating) return LLVMConstReal(LLVMFloatType(), 0.0); else return LLVMConstInt(LLVMIntType(type.width), 0, 0); } else { LLVMTypeRef vec_type = lp_build_vec_type(type); return LLVMConstNull(vec_type); } }
LLVMValueRef lp_build_zero(struct gallivm_state *gallivm, struct lp_type type) { if (type.length == 1) { if (type.floating) return lp_build_const_float(gallivm, 0.0); else return LLVMConstInt(LLVMIntTypeInContext(gallivm->context, type.width), 0, 0); } else { LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type); return LLVMConstNull(vec_type); } }
/** * Twiddle from quad format to row format * * src0 src1 * ######### ######### ################# * # 0 | 1 # # 4 | 5 # # 0 | 1 | 4 | 5 # src0 * #---+---# #---+---# -> ################# * # 2 | 3 # # 6 | 7 # # 2 | 3 | 6 | 7 # src1 * ######### ######### ################# * */ void lp_bld_quad_twiddle(struct gallivm_state *gallivm, struct lp_type lp_dst_type, const LLVMValueRef* src, unsigned src_count, LLVMValueRef* dst) { LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef dst_type_ref; LLVMTypeRef type2_ref; struct lp_type type2; unsigned i; assert((src_count % 2) == 0); /* Create a type with only 2 elements */ type2 = lp_dst_type; type2.width = (lp_dst_type.width * lp_dst_type.length) / 2; type2.length = 2; type2.floating = 0; type2_ref = lp_build_vec_type(gallivm, type2); dst_type_ref = lp_build_vec_type(gallivm, lp_dst_type); for (i = 0; i < src_count; i += 2) { LLVMValueRef src0, src1; src0 = LLVMBuildBitCast(builder, src[i + 0], type2_ref, ""); src1 = LLVMBuildBitCast(builder, src[i + 1], type2_ref, ""); dst[i + 0] = lp_build_interleave2(gallivm, type2, src0, src1, 0); dst[i + 1] = lp_build_interleave2(gallivm, type2, src0, src1, 1); dst[i + 0] = LLVMBuildBitCast(builder, dst[i + 0], dst_type_ref, ""); dst[i + 1] = LLVMBuildBitCast(builder, dst[i + 1], dst_type_ref, ""); } }
static void emit_declaration( struct lp_build_tgsi_aos_context *bld, const struct tgsi_full_declaration *decl) { LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type); unsigned first = decl->Range.First; unsigned last = decl->Range.Last; unsigned idx; for (idx = first; idx <= last; ++idx) { switch (decl->Declaration.File) { case TGSI_FILE_TEMPORARY: assert(idx < LP_MAX_TGSI_TEMPS); if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), last + 1, 0); bld->temps_array = lp_build_array_alloca(bld->base.builder, vec_type, array_size, ""); } else { bld->temps[idx] = lp_build_alloca(bld->base.builder, vec_type, ""); } break; case TGSI_FILE_OUTPUT: bld->outputs[idx] = lp_build_alloca(bld->base.builder, vec_type, ""); break; case TGSI_FILE_ADDRESS: assert(idx < LP_MAX_TGSI_ADDRS); bld->addr[idx] = lp_build_alloca(bld->base.builder, vec_type, ""); break; case TGSI_FILE_PREDICATE: assert(idx < LP_MAX_TGSI_PREDS); bld->preds[idx] = lp_build_alloca(bld->base.builder, vec_type, ""); break; default: /* don't need to declare other vars */ break; } } }
/** * Generate sin(a) */ LLVMValueRef lp_build_sin(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; /* TODO: optimize the constant case */ assert(type.floating); util_snprintf(intrinsic, sizeof intrinsic, "llvm.sin.v%uf%u", type.length, type.width); return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a); }
/** * Small vector x scale multiplication optimization. */ LLVMValueRef lp_build_mul_imm(struct lp_build_context *bld, LLVMValueRef a, int b) { LLVMValueRef factor; if(b == 0) return bld->zero; if(b == 1) return a; if(b == -1) return LLVMBuildNeg(bld->builder, a, ""); if(b == 2 && bld->type.floating) return lp_build_add(bld, a, a); if(util_is_pot(b)) { unsigned shift = ffs(b) - 1; if(bld->type.floating) { #if 0 /* * Power of two multiplication by directly manipulating the mantissa. * * XXX: This might not be always faster, it will introduce a small error * for multiplication by zero, and it will produce wrong results * for Inf and NaN. */ unsigned mantissa = lp_mantissa(bld->type); factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa); a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), ""); a = LLVMBuildAdd(bld->builder, a, factor, ""); a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), ""); return a; #endif } else { factor = lp_build_const_scalar(bld->type, shift); return LLVMBuildShl(bld->builder, a, factor, ""); } } factor = lp_build_const_scalar(bld->type, (double)b); return lp_build_mul(bld, a, factor); }
/** * Inverse of lp_build_clamped_float_to_unsigned_norm above. * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1] * return {float, float, float, float} with values in range [0, 1]. */ LLVMValueRef lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, unsigned src_width, struct lp_type dst_type, LLVMValueRef src) { LLVMTypeRef vec_type = lp_build_vec_type(dst_type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(dst_type); LLVMValueRef bias_; LLVMValueRef res; unsigned mantissa; unsigned n; unsigned long long ubound; unsigned long long mask; double scale; double bias; assert(dst_type.floating); mantissa = lp_mantissa(dst_type); n = MIN2(mantissa, src_width); ubound = ((unsigned long long)1 << n); mask = ubound - 1; scale = (double)ubound/mask; bias = (double)((unsigned long long)1 << (mantissa - n)); res = src; if(src_width > mantissa) { int shift = src_width - mantissa; res = LLVMBuildLShr(builder, res, lp_build_const_int_vec(dst_type, shift), ""); } bias_ = lp_build_const_vec(dst_type, bias); res = LLVMBuildOr(builder, res, LLVMBuildBitCast(builder, bias_, int_vec_type, ""), ""); res = LLVMBuildBitCast(builder, res, vec_type, ""); res = LLVMBuildFSub(builder, res, bias_, ""); res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), ""); return res; }
/** * Generate a + b */ LLVMValueRef lp_build_add(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { const struct lp_type type = bld->type; LLVMValueRef res; if(a == bld->zero) return b; if(b == bld->zero) return a; if(a == bld->undef || b == bld->undef) return bld->undef; if(bld->type.norm) { const char *intrinsic = NULL; if(a == bld->one || b == bld->one) return bld->one; if(util_cpu_caps.has_sse2 && type.width * type.length == 128 && !type.floating && !type.fixed) { if(type.width == 8) intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b"; if(type.width == 16) intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w"; } if(intrinsic) return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); } if(LLVMIsConstant(a) && LLVMIsConstant(b)) res = LLVMConstAdd(a, b); else res = LLVMBuildAdd(bld->builder, a, b, ""); /* clamp to ceiling of 1.0 */ if(bld->type.norm && (bld->type.floating || bld->type.fixed)) res = lp_build_min_simple(bld, res, bld->one); /* XXX clamp to floor of -1 or 0??? */ return res; }
/** * Convert vector of int to vector of float. */ LLVMValueRef lp_build_int_to_float(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; assert(type.floating); /*assert(lp_check_value(type, a));*/ { LLVMTypeRef vec_type = lp_build_vec_type(type); /*LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);*/ LLVMValueRef res; res = LLVMBuildSIToFP(bld->builder, a, vec_type, ""); return res; } }
/** * Convert float[] to int[] with floor(). */ LLVMValueRef lp_build_ifloor(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef res; assert(type.floating); assert(lp_check_value(type, a)); if(util_cpu_caps.has_sse4_1) { res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); } else { /* Take the sign bit and add it to 1 constant */ LLVMTypeRef vec_type = lp_build_vec_type(type); unsigned mantissa = lp_mantissa(type); LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); LLVMValueRef sign; LLVMValueRef offset; /* sign = a < 0 ? ~0 : 0 */ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); sign = LLVMBuildAnd(bld->builder, sign, mask, ""); sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), ""); lp_build_name(sign, "floor.sign"); /* offset = -0.99999(9)f */ offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa)); offset = LLVMConstBitCast(offset, int_vec_type); /* offset = a < 0 ? -0.99999(9)f : 0.0f */ offset = LLVMBuildAnd(bld->builder, offset, sign, ""); offset = LLVMBuildBitCast(bld->builder, offset, vec_type, ""); lp_build_name(offset, "floor.offset"); res = LLVMBuildAdd(bld->builder, a, offset, ""); lp_build_name(res, "floor.res"); } res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); lp_build_name(res, "floor"); return res; }
LLVMValueRef lp_build_floor(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; assert(type.floating); if(util_cpu_caps.has_sse4_1) return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); else { LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMValueRef res; res = lp_build_ifloor(bld, a); res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); return res; } }
/** * Generate a - b */ LLVMValueRef lp_build_sub(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { const struct lp_type type = bld->type; LLVMValueRef res; if(b == bld->zero) return a; if(a == bld->undef || b == bld->undef) return bld->undef; if(a == b) return bld->zero; if(bld->type.norm) { const char *intrinsic = NULL; if(b == bld->one) return bld->zero; if(util_cpu_caps.has_sse2 && type.width * type.length == 128 && !type.floating && !type.fixed) { if(type.width == 8) intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b"; if(type.width == 16) intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w"; } if(intrinsic) return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); } if(LLVMIsConstant(a) && LLVMIsConstant(b)) res = LLVMConstSub(a, b); else res = LLVMBuildSub(bld->builder, a, b, ""); if(bld->type.norm && (bld->type.floating || bld->type.fixed)) res = lp_build_max_simple(bld, res, bld->zero); return res; }