/** * Converts int16 half-float to float32 * Note this can be performed in 1 instruction if vcvtph2ps exists (f16c/cvt16) * [llvm.x86.vcvtph2ps / _mm_cvtph_ps] * * @param src value to convert * */ LLVMValueRef lp_build_half_to_float(struct gallivm_state *gallivm, LLVMValueRef src) { LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef src_type = LLVMTypeOf(src); unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ? LLVMGetVectorSize(src_type) : 1; struct lp_type f32_type = lp_type_float_vec(32, 32 * src_length); struct lp_type i32_type = lp_type_int_vec(32, 32 * src_length); LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type); LLVMValueRef h; if (util_cpu_caps.has_f16c && HAVE_LLVM >= 0x0301 && (src_length == 4 || src_length == 8)) { const char *intrinsic = NULL; if (src_length == 4) { src = lp_build_pad_vector(gallivm, src, 8); intrinsic = "llvm.x86.vcvtph2ps.128"; } else { intrinsic = "llvm.x86.vcvtph2ps.256"; } return lp_build_intrinsic_unary(builder, intrinsic, lp_build_vec_type(gallivm, f32_type), src); } /* Convert int16 vector to int32 vector by zero ext (might generate bad code) */ h = LLVMBuildZExt(builder, src, int_vec_type, ""); return lp_build_smallfloat_to_float(gallivm, f32_type, h, 10, 5, 0, true); }
/** * @brief lp_build_fetch_rgba_aos_array * * \param format_desc describes format of the image we're fetching from * \param dst_type output type * \param base_ptr address of the pixel block (or the texel if uncompressed) * \param offset ptr offset */ LLVMValueRef lp_build_fetch_rgba_aos_array(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type dst_type, LLVMValueRef base_ptr, LLVMValueRef offset) { struct lp_build_context bld; LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef src_elem_type, src_vec_type; LLVMValueRef ptr, res = NULL; struct lp_type src_type; memset(&src_type, 0, sizeof src_type); src_type.floating = format_desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT; src_type.fixed = format_desc->channel[0].type == UTIL_FORMAT_TYPE_FIXED; src_type.sign = format_desc->channel[0].type != UTIL_FORMAT_TYPE_UNSIGNED; src_type.norm = format_desc->channel[0].normalized; src_type.width = format_desc->channel[0].size; src_type.length = format_desc->nr_channels; assert(src_type.length <= dst_type.length); src_elem_type = lp_build_elem_type(gallivm, src_type); src_vec_type = lp_build_vec_type(gallivm, src_type); /* Read whole vector from memory, unaligned */ if (!res) { ptr = LLVMBuildGEP(builder, base_ptr, &offset, 1, ""); ptr = LLVMBuildPointerCast(builder, ptr, LLVMPointerType(src_vec_type, 0), ""); res = LLVMBuildLoad(builder, ptr, ""); lp_set_load_alignment(res, src_type.width / 8); } /* Truncate doubles to float */ if (src_type.floating && src_type.width == 64) { src_type.width = 32; src_vec_type = lp_build_vec_type(gallivm, src_type); res = LLVMBuildFPTrunc(builder, res, src_vec_type, ""); } /* Expand to correct length */ if (src_type.length < dst_type.length) { res = lp_build_pad_vector(gallivm, res, src_type, dst_type.length); src_type.length = dst_type.length; } /* Convert to correct format */ lp_build_conv(gallivm, src_type, dst_type, &res, 1, &res, 1); /* Swizzle it */ lp_build_context_init(&bld, gallivm, dst_type); return lp_build_format_swizzle_aos(format_desc, &bld, res); }
/** * @brief lp_build_fetch_rgba_aos_array * * \param format_desc describes format of the image we're fetching from * \param dst_type output type * \param base_ptr address of the pixel block (or the texel if uncompressed) * \param offset ptr offset */ LLVMValueRef lp_build_fetch_rgba_aos_array(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type dst_type, LLVMValueRef base_ptr, LLVMValueRef offset) { struct lp_build_context bld; LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef src_vec_type; LLVMValueRef ptr, res = NULL; struct lp_type src_type; boolean pure_integer = format_desc->channel[0].pure_integer; struct lp_type tmp_type; lp_type_from_format_desc(&src_type, format_desc); assert(src_type.length <= dst_type.length); src_vec_type = lp_build_vec_type(gallivm, src_type); /* Read whole vector from memory, unaligned */ ptr = LLVMBuildGEP(builder, base_ptr, &offset, 1, ""); ptr = LLVMBuildPointerCast(builder, ptr, LLVMPointerType(src_vec_type, 0), ""); res = LLVMBuildLoad(builder, ptr, ""); LLVMSetAlignment(res, src_type.width / 8); /* Truncate doubles to float */ if (src_type.floating && src_type.width == 64) { src_type.width = 32; src_vec_type = lp_build_vec_type(gallivm, src_type); res = LLVMBuildFPTrunc(builder, res, src_vec_type, ""); } /* Expand to correct length */ if (src_type.length < dst_type.length) { res = lp_build_pad_vector(gallivm, res, dst_type.length); src_type.length = dst_type.length; } tmp_type = dst_type; if (pure_integer) { /* some callers expect (fake) floats other real ints. */ tmp_type.floating = 0; tmp_type.sign = src_type.sign; } /* Convert to correct format */ lp_build_conv(gallivm, src_type, tmp_type, &res, 1, &res, 1); /* Swizzle it */ lp_build_context_init(&bld, gallivm, tmp_type); res = lp_build_format_swizzle_aos(format_desc, &bld, res); /* Bitcast to floats (for pure integers) when requested */ if (pure_integer && dst_type.floating) { res = LLVMBuildBitCast(builder, res, lp_build_vec_type(gallivm, dst_type), ""); } return res; }
/** * Gather one element from scatter positions in memory. * Nearly the same as above, however the individual elements * may be vectors themselves, and fetches may be float type. * Can also do pad vector instead of ZExt. * * @sa lp_build_gather() */ static LLVMValueRef lp_build_gather_elem_vec(struct gallivm_state *gallivm, unsigned length, unsigned src_width, LLVMTypeRef src_type, struct lp_type dst_type, boolean aligned, LLVMValueRef base_ptr, LLVMValueRef offsets, unsigned i, boolean vector_justify) { LLVMValueRef ptr, res; LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)); ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i); ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, ""); res = LLVMBuildLoad(gallivm->builder, ptr, ""); /* XXX * On some archs we probably really want to avoid having to deal * with alignments lower than 4 bytes (if fetch size is a power of * two >= 32). On x86 it doesn't matter, however. * We should be able to guarantee full alignment for any kind of texture * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends * but I don't think that's quite what we wanted). * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT * looks like a good fit, but it seems this cap bit (and OpenGL) aren't * enforcing what we want (which is what d3d10 does, the offset needs to * be aligned to element size, but GL has bytes regardless of element * size which would only leave us with minimum alignment restriction of 16 * which doesn't make much sense if the type isn't 4x32bit). Due to * translation of offsets to first_elem in sampler_views it actually seems * gallium could not do anything else except 16 no matter what... */ if (!aligned) { LLVMSetAlignment(res, 1); } else if (!util_is_power_of_two(src_width)) { /* * Full alignment is impossible, assume the caller really meant * the individual elements were aligned (e.g. 3x32bit format). * And yes the generated code may otherwise crash, llvm will * really assume 128bit alignment with a 96bit fetch (I suppose * that makes sense as it can just assume the upper 32bit to be * whatever). * Maybe the caller should be able to explicitly set this, but * this should cover all the 3-channel formats. */ if (((src_width / 24) * 24 == src_width) && util_is_power_of_two(src_width / 24)) { LLVMSetAlignment(res, src_width / 24); } else { LLVMSetAlignment(res, 1); } } assert(src_width <= dst_type.width * dst_type.length); if (src_width < dst_type.width * dst_type.length) { if (dst_type.length > 1) { res = lp_build_pad_vector(gallivm, res, dst_type.length); /* * vector_justify hopefully a non-issue since we only deal * with src_width >= 32 here? */ } else { LLVMTypeRef dst_elem_type = lp_build_vec_type(gallivm, dst_type); /* * Only valid if src_ptr_type is int type... */ res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, ""); #ifdef PIPE_ARCH_BIG_ENDIAN if (vector_justify) { res = LLVMBuildShl(gallivm->builder, res, LLVMConstInt(dst_elem_type, dst_type.width - src_width, 0), ""); } if (src_width == 48) { /* Load 3x16 bit vector. * The sequence of loads on big-endian hardware proceeds as follows. * 16-bit fields are denoted by X, Y, Z, and 0. In memory, the sequence * of three fields appears in the order X, Y, Z. * * Load 32-bit word: 0.0.X.Y * Load 16-bit halfword: 0.0.0.Z * Rotate left: 0.X.Y.0 * Bitwise OR: 0.X.Y.Z * * The order in which we need the fields in the result is 0.Z.Y.X, * the same as on little-endian; permute 16-bit fields accordingly * within 64-bit register: */ LLVMValueRef shuffles[4] = { lp_build_const_int32(gallivm, 2), lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 0), lp_build_const_int32(gallivm, 3), }; res = LLVMBuildBitCast(gallivm->builder, res, lp_build_vec_type(gallivm, lp_type_uint_vec(16, 4*16)), ""); res = LLVMBuildShuffleVector(gallivm->builder, res, res, LLVMConstVector(shuffles, 4), ""); res = LLVMBuildBitCast(gallivm->builder, res, dst_elem_type, ""); } #endif } } return res; }