/** * Normalized 8bit multiplication. * * - alpha plus one * * makes the following approximation to the division (Sree) * * a*b/255 ~= (a*(b + 1)) >> 256 * * which is the fastest method that satisfies the following OpenGL criteria * * 0*0 = 0 and 255*255 = 255 * * - geometric series * * takes the geometric series approximation to the division * * t/255 = (t >> 8) + (t >> 16) + (t >> 24) .. * * in this case just the first two terms to fit in 16bit arithmetic * * t/255 ~= (t + (t >> 8)) >> 8 * * note that just by itself it doesn't satisfies the OpenGL criteria, as * 255*255 = 254, so the special case b = 255 must be accounted or roundoff * must be used * * - geometric series plus rounding * * when using a geometric series division instead of truncating the result * use roundoff in the approximation (Jim Blinn) * * t/255 ~= (t + (t >> 8) + 0x80) >> 8 * * achieving the exact results * * @sa Alvy Ray Smith, Image Compositing Fundamentals, Tech Memo 4, Aug 15, 1995, * ftp://ftp.alvyray.com/Acrobat/4_Comp.pdf * @sa Michael Herf, The "double blend trick", May 2000, * http://www.stereopsis.com/doubleblend.html */ static LLVMValueRef lp_build_mul_u8n(LLVMBuilderRef builder, struct lp_type i16_type, LLVMValueRef a, LLVMValueRef b) { LLVMValueRef c8; LLVMValueRef ab; c8 = lp_build_int_const_scalar(i16_type, 8); #if 0 /* a*b/255 ~= (a*(b + 1)) >> 256 */ b = LLVMBuildAdd(builder, b, lp_build_int_const_scalar(i16_type, 1), ""); ab = LLVMBuildMul(builder, a, b, ""); #else /* ab/255 ~= (ab + (ab >> 8) + 0x80) >> 8 */ ab = LLVMBuildMul(builder, a, b, ""); ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c8, ""), ""); ab = LLVMBuildAdd(builder, ab, lp_build_int_const_scalar(i16_type, 0x80), ""); #endif ab = LLVMBuildLShr(builder, ab, c8, ""); return ab; }
/** * Special case for converting clamped IEEE-754 floats to unsigned norms. * * The mathematical voodoo below may seem excessive but it is actually * paramount we do it this way for several reasons. First, there is no single * precision FP to unsigned integer conversion Intel SSE instruction. Second, * secondly, even if there was, since the FP's mantissa takes only a fraction * of register bits the typically scale and cast approach would require double * precision for accurate results, and therefore half the throughput * * Although the result values can be scaled to an arbitrary bit width specified * by dst_width, the actual result type will have the same width. */ LLVMValueRef lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, struct lp_type src_type, unsigned dst_width, LLVMValueRef src) { LLVMTypeRef int_vec_type = lp_build_int_vec_type(src_type); LLVMValueRef res; unsigned mantissa; unsigned n; unsigned long long ubound; unsigned long long mask; double scale; double bias; assert(src_type.floating); mantissa = lp_mantissa(src_type); /* We cannot carry more bits than the mantissa */ n = MIN2(mantissa, dst_width); /* This magic coefficients will make the desired result to appear in the * lowest significant bits of the mantissa. */ ubound = ((unsigned long long)1 << n); mask = ubound - 1; scale = (double)mask/ubound; bias = (double)((unsigned long long)1 << (mantissa - n)); res = LLVMBuildMul(builder, src, lp_build_const_scalar(src_type, scale), ""); res = LLVMBuildAdd(builder, res, lp_build_const_scalar(src_type, bias), ""); res = LLVMBuildBitCast(builder, res, int_vec_type, ""); if(dst_width > n) { int shift = dst_width - n; res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), ""); /* TODO: Fill in the empty lower bits for additional precision? */ #if 0 { LLVMValueRef msb; msb = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, dst_width - 1), ""); msb = LLVMBuildShl(builder, msb, lp_build_int_const_scalar(src_type, shift), ""); msb = LLVMBuildSub(builder, msb, lp_build_int_const_scalar(src_type, 1), ""); res = LLVMBuildOr(builder, res, msb, ""); } #elif 0 while(shift > 0) { res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, n), ""), ""); shift -= n; n *= 2; } #endif } else res = LLVMBuildAnd(builder, res, lp_build_int_const_scalar(src_type, mask), ""); return res; }
/** * Extract Y, U, V channels from packed YUYV. * @param packed is a <n x i32> vector with the packed YUYV blocks * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) */ static void yuyv_to_yuv_soa(struct gallivm_state *gallivm, unsigned n, LLVMValueRef packed, LLVMValueRef i, LLVMValueRef *y, LLVMValueRef *u, LLVMValueRef *v) { LLVMBuilderRef builder = gallivm->builder; struct lp_type type; LLVMValueRef mask; memset(&type, 0, sizeof type); type.width = 32; type.length = n; assert(lp_check_value(type, packed)); assert(lp_check_value(type, i)); /* * y = (yuyv >> 16*i) & 0xff * u = (yuyv >> 8 ) & 0xff * v = (yuyv >> 24 ) & 0xff */ #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) /* * Avoid shift with per-element count. * No support on x86, gets translated to roughly 5 instructions * per element. Didn't measure performance but cuts shader size * by quite a bit (less difference if cpu has no sse4.1 support). */ if (util_cpu_caps.has_sse2 && n == 4) { LLVMValueRef sel, tmp; struct lp_build_context bld32; lp_build_context_init(&bld32, gallivm, type); tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), ""); sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0)); *y = lp_build_select(&bld32, sel, packed, tmp); } else #endif { LLVMValueRef shift; shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), ""); *y = LLVMBuildLShr(builder, packed, shift, ""); } *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), ""); *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), ""); mask = lp_build_const_int_vec(gallivm, type, 0xff); *y = LLVMBuildAnd(builder, *y, mask, "y"); *u = LLVMBuildAnd(builder, *u, mask, "u"); *v = LLVMBuildAnd(builder, *v, mask, "v"); }
/** * Compute the offset of a pixel. * * x, y, y_stride are vectors */ LLVMValueRef lp_build_sample_offset(struct lp_build_context *bld, const struct util_format_description *format_desc, LLVMValueRef x, LLVMValueRef y, LLVMValueRef y_stride, LLVMValueRef data_ptr) { LLVMValueRef x_stride; LLVMValueRef offset; x_stride = lp_build_const_scalar(bld->type, format_desc->block.bits/8); if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { LLVMValueRef x_lo, x_hi; LLVMValueRef y_lo, y_hi; LLVMValueRef x_stride_lo, x_stride_hi; LLVMValueRef y_stride_lo, y_stride_hi; LLVMValueRef x_offset_lo, x_offset_hi; LLVMValueRef y_offset_lo, y_offset_hi; LLVMValueRef offset_lo, offset_hi; x_lo = LLVMBuildAnd(bld->builder, x, bld->one, ""); y_lo = LLVMBuildAnd(bld->builder, y, bld->one, ""); x_hi = LLVMBuildLShr(bld->builder, x, bld->one, ""); y_hi = LLVMBuildLShr(bld->builder, y, bld->one, ""); x_stride_lo = x_stride; y_stride_lo = lp_build_const_scalar(bld->type, 2*format_desc->block.bits/8); x_stride_hi = lp_build_const_scalar(bld->type, 4*format_desc->block.bits/8); y_stride_hi = LLVMBuildShl(bld->builder, y_stride, bld->one, ""); x_offset_lo = lp_build_mul(bld, x_lo, x_stride_lo); y_offset_lo = lp_build_mul(bld, y_lo, y_stride_lo); offset_lo = lp_build_add(bld, x_offset_lo, y_offset_lo); x_offset_hi = lp_build_mul(bld, x_hi, x_stride_hi); y_offset_hi = lp_build_mul(bld, y_hi, y_stride_hi); offset_hi = lp_build_add(bld, x_offset_hi, y_offset_hi); offset = lp_build_add(bld, offset_hi, offset_lo); } else { LLVMValueRef x_offset; LLVMValueRef y_offset; x_offset = lp_build_mul(bld, x, x_stride); y_offset = lp_build_mul(bld, y, y_stride); offset = lp_build_add(bld, x_offset, y_offset); } return offset; }
LLVMValueRef gen_name(struct node *ast) { LLVMValueRef func, ptr, val; LLVMTypeRef type; ptr = lvalue(ast); type = LLVMTypeOf(ptr); if (LLVMGetTypeKind(type) == LLVMLabelTypeKind) { func = LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)); return LLVMBuildPtrToInt(builder, LLVMBlockAddress(func, (LLVMBasicBlockRef)ptr), TYPE_INT, ""); } type = LLVMGetElementType(LLVMTypeOf(ptr)); switch (LLVMGetTypeKind(type)) { case LLVMIntegerTypeKind: val = LLVMBuildLoad(builder, ptr, ast->val); if (LLVMIsAGlobalValue(ptr)) val = LLVMBuildLShr(builder, val, CONST(WORDPOW), ""); return val; default: generror("unexpected type '%s'", LLVMPrintTypeToString(type)); return NULL; } }
LLVMValueRef build_t_from_tag(struct llvm_ctx *ctx, LLVMValueRef mr0) { LLVMValueRef t = LLVMBuildAnd(ctx->builder, LLVMBuildLShr(ctx->builder, mr0, CONST_WORD(6), "tag.t.raw"), CONST_WORD(0x3f), "tag.t"); return LLVMBuildTruncOrBitCast(ctx->builder, t, ctx->i32t, "tag.t.int"); }
/** * Convert a vector of rgba8 values into 32bit wide SoA vectors. * * \param dst_type The desired return type. For pure integer formats * this should be a 32bit wide int or uint vector type, * otherwise a float vector type. * * \param packed The rgba8 values to pack. * * \param rgba The 4 SoA return vectors. */ void lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm, struct lp_type dst_type, LLVMValueRef packed, LLVMValueRef *rgba) { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff); unsigned chan; /* XXX technically shouldn't use that for uint dst_type */ packed = LLVMBuildBitCast(builder, packed, lp_build_int_vec_type(gallivm, dst_type), ""); /* Decode the input vector components */ for (chan = 0; chan < 4; ++chan) { unsigned start = chan*8; unsigned stop = start + 8; LLVMValueRef input; input = packed; if (start) input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, dst_type, start), ""); if (stop < 32) input = LLVMBuildAnd(builder, input, mask, ""); if (dst_type.floating) input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input); rgba[chan] = input; } }
LLVMValueRef gen_right(struct node *ast) { return LLVMBuildLShr(builder, codegen(ast->one), codegen(ast->two), ""); }
static void emit_ushr(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { LLVMBuilderRef builder = bld_base->base.gallivm->builder; emit_data->output[emit_data->chan] = LLVMBuildLShr(builder, emit_data->args[0], emit_data->args[1], ""); }
static LLVMValueRef lvalue_to_rvalue(LLVMValueRef lvalue) { /* * TODO: Make sure all addresses are word-aligned * (autos, vectors, strings, etc.) */ lvalue = LLVMBuildPtrToInt(builder, lvalue, TYPE_INT, ""); return LLVMBuildLShr(builder, lvalue, CONST(WORDPOW), ""); }
static LLVMValueRef gen_digestof_value(compile_t* c, LLVMValueRef value) { LLVMTypeRef type = LLVMTypeOf(value); switch(LLVMGetTypeKind(type)) { case LLVMFloatTypeKind: value = LLVMBuildBitCast(c->builder, value, c->i32, ""); return LLVMBuildZExt(c->builder, value, c->i64, ""); case LLVMDoubleTypeKind: return LLVMBuildBitCast(c->builder, value, c->i64, ""); case LLVMIntegerTypeKind: { uint32_t width = LLVMGetIntTypeWidth(type); if(width < 64) { value = LLVMBuildZExt(c->builder, value, c->i64, ""); } else if(width == 128) { LLVMValueRef shift = LLVMConstInt(c->i128, 64, false); LLVMValueRef high = LLVMBuildLShr(c->builder, value, shift, ""); high = LLVMBuildTrunc(c->builder, high, c->i64, ""); value = LLVMBuildTrunc(c->builder, value, c->i64, ""); value = LLVMBuildXor(c->builder, value, high, ""); } return value; } case LLVMStructTypeKind: { uint32_t count = LLVMCountStructElementTypes(type); LLVMValueRef result = LLVMConstInt(c->i64, 0, false); for(uint32_t i = 0; i < count; i++) { LLVMValueRef elem = LLVMBuildExtractValue(c->builder, value, i, ""); elem = gen_digestof_value(c, elem); result = LLVMBuildXor(c->builder, result, elem, ""); } return result; } case LLVMPointerTypeKind: return LLVMBuildPtrToInt(c->builder, value, c->i64, ""); default: {} } assert(0); return NULL; }
/* * gen_shift * * Shifts are a little tricky, since LLVM has explicit left-shift and * right-shift instructions, which take non-negative shift values. BLISS, * on the other hand, has a single shift operator and generates right-shifts * when the RHS is negative. If the RHS is a constant, we can do the translation * here; otherwise, we have to build a conditional to check at runtime. */ static LLVMValueRef gen_shift (gencodectx_t gctx, expr_node_t *lhs, expr_node_t *rhs, LLVMTypeRef neededtype) { LLVMBuilderRef builder = gctx->curfn->builder; LLVMTypeRef inttype = gctx->fullwordtype; LLVMValueRef lval, rval, result, test; lval = (lhs == 0 ? 0 : llvmgen_expression(gctx, lhs, inttype)); if (expr_type(rhs) == EXPTYPE_PRIM_LIT) { long count = expr_litval(rhs); if (count < 0) { rval = LLVMConstInt(inttype, -count, 0); result = LLVMBuildLShr(builder, lval, rval, llvmgen_temp(gctx)); } else { rval = LLVMConstInt(inttype, count, 0); result = LLVMBuildShl(builder, lval, rval, llvmgen_temp(gctx)); } } else { LLVMBasicBlockRef exitblock = llvmgen_exitblock_create(gctx, 0); LLVMBasicBlockRef lshiftblk, rshiftblk; llvm_btrack_t *bt = llvmgen_btrack_create(gctx, exitblock); lshiftblk = LLVMInsertBasicBlockInContext(gctx->llvmctx, exitblock, llvmgen_label(gctx)); rshiftblk = LLVMInsertBasicBlockInContext(gctx->llvmctx, exitblock, llvmgen_label(gctx)); rval = llvmgen_expression(gctx, rhs, inttype); test = LLVMBuildICmp(builder, LLVMIntSLT, rval, LLVMConstNull(inttype), llvmgen_temp(gctx)); LLVMBuildCondBr(builder, test, rshiftblk, lshiftblk); LLVMPositionBuilderAtEnd(builder, lshiftblk); result = LLVMBuildShl(builder, lval, rval, llvmgen_temp(gctx)); llvmgen_btrack_update(gctx, bt, result); LLVMPositionBuilderAtEnd(builder, rshiftblk); rval = LLVMBuildNeg(builder, rval, llvmgen_temp(gctx)); result = LLVMBuildLShr(builder, lval, rval, llvmgen_temp(gctx)); llvmgen_btrack_update(gctx, bt, result); result = llvmgen_btrack_finalize(gctx, bt, inttype); } return llvmgen_adjustval(gctx, result, neededtype, 0); } /* gen_shift */
/** * Extract Y, U, V channels from packed UYVY. * @param packed is a <n x i32> vector with the packed UYVY blocks * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) */ static void uyvy_to_yuv_soa(LLVMBuilderRef builder, unsigned n, LLVMValueRef packed, LLVMValueRef i, LLVMValueRef *y, LLVMValueRef *u, LLVMValueRef *v) { struct lp_type type; LLVMValueRef shift, mask; memset(&type, 0, sizeof type); type.width = 32; type.length = n; assert(lp_check_value(type, packed)); assert(lp_check_value(type, i)); /* * y = (uyvy >> 16*i) & 0xff * u = (uyvy ) & 0xff * v = (uyvy >> 16 ) & 0xff */ shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), ""); shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), ""); *y = LLVMBuildLShr(builder, packed, shift, ""); *u = packed; *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), ""); mask = lp_build_const_int_vec(type, 0xff); *y = LLVMBuildAnd(builder, *y, mask, "y"); *u = LLVMBuildAnd(builder, *u, mask, "u"); *v = LLVMBuildAnd(builder, *v, mask, "v"); }
static LLVMValueRef gen_digestof_int64(compile_t* c, LLVMValueRef value) { pony_assert(LLVMTypeOf(value) == c->i64); if(target_is_ilp32(c->opt->triple)) { LLVMValueRef shift = LLVMConstInt(c->i64, 32, false); LLVMValueRef high = LLVMBuildLShr(c->builder, value, shift, ""); high = LLVMBuildTrunc(c->builder, high, c->i32, ""); value = LLVMBuildTrunc(c->builder, value, c->i32, ""); value = LLVMBuildXor(c->builder, value, high, ""); } return value; }
/** * Inverse of lp_build_clamped_float_to_unsigned_norm above. * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1] * return {float, float, float, float} with values in range [0, 1]. */ LLVMValueRef lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, unsigned src_width, struct lp_type dst_type, LLVMValueRef src) { LLVMTypeRef vec_type = lp_build_vec_type(dst_type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(dst_type); LLVMValueRef bias_; LLVMValueRef res; unsigned mantissa; unsigned n; unsigned long long ubound; unsigned long long mask; double scale; double bias; assert(dst_type.floating); mantissa = lp_mantissa(dst_type); n = MIN2(mantissa, src_width); ubound = ((unsigned long long)1 << n); mask = ubound - 1; scale = (double)ubound/mask; bias = (double)((unsigned long long)1 << (mantissa - n)); res = src; if(src_width > mantissa) { int shift = src_width - mantissa; res = LLVMBuildLShr(builder, res, lp_build_const_int_vec(dst_type, shift), ""); } bias_ = lp_build_const_vec(dst_type, bias); res = LLVMBuildOr(builder, res, LLVMBuildBitCast(builder, bias_, int_vec_type, ""), ""); res = LLVMBuildBitCast(builder, res, vec_type, ""); res = LLVMBuildFSub(builder, res, bias_, ""); res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), ""); return res; }
/** * Compute the partial offset of a pixel block along an arbitrary axis. * * @param coord coordinate in pixels * @param stride number of bytes between rows of successive pixel blocks * @param block_length number of pixels in a pixels block along the coordinate * axis * @param out_offset resulting relative offset of the pixel block in bytes * @param out_subcoord resulting sub-block pixel coordinate */ void lp_build_sample_partial_offset(struct lp_build_context *bld, unsigned block_length, LLVMValueRef coord, LLVMValueRef stride, LLVMValueRef *out_offset, LLVMValueRef *out_subcoord) { LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef offset; LLVMValueRef subcoord; if (block_length == 1) { subcoord = bld->zero; } else { /* * Pixel blocks have power of two dimensions. LLVM should convert the * rem/div to bit arithmetic. * TODO: Verify this. * It does indeed BUT it does transform it to scalar (and back) when doing so * (using roughly extract, shift/and, mov, unpack) (llvm 2.7). * The generated code looks seriously unfunny and is quite expensive. */ #if 0 LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length); subcoord = LLVMBuildURem(builder, coord, block_width, ""); coord = LLVMBuildUDiv(builder, coord, block_width, ""); #else unsigned logbase2 = util_logbase2(block_length); LLVMValueRef block_shift = lp_build_const_int_vec(bld->gallivm, bld->type, logbase2); LLVMValueRef block_mask = lp_build_const_int_vec(bld->gallivm, bld->type, block_length - 1); subcoord = LLVMBuildAnd(builder, coord, block_mask, ""); coord = LLVMBuildLShr(builder, coord, block_shift, ""); #endif } offset = lp_build_mul(bld, coord, stride); assert(out_offset); assert(out_subcoord); *out_offset = offset; *out_subcoord = subcoord; }
/** * Shift right. */ LLVMValueRef lp_build_shr(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; LLVMValueRef res; assert(!type.floating); assert(lp_check_value(type, a)); assert(lp_check_value(type, b)); if (type.sign) { res = LLVMBuildAShr(builder, a, b, ""); } else { res = LLVMBuildLShr(builder, a, b, ""); } return res; }
/** * Codegen equivalent for u_minify(). * Return max(1, base_size >> level); */ LLVMValueRef lp_build_minify(struct lp_build_context *bld, LLVMValueRef base_size, LLVMValueRef level) { LLVMBuilderRef builder = bld->gallivm->builder; assert(lp_check_value(bld->type, base_size)); assert(lp_check_value(bld->type, level)); if (level == bld->zero) { /* if we're using mipmap level zero, no minification is needed */ return base_size; } else { LLVMValueRef size = LLVMBuildLShr(builder, base_size, level, "minify"); assert(bld->type.sign); size = lp_build_max(bld, size, bld->one); return size; } }
static void emit_up2h(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { LLVMBuilderRef builder = bld_base->base.gallivm->builder; LLVMContextRef context = bld_base->base.gallivm->context; struct lp_build_context *uint_bld = &bld_base->uint_bld; LLVMTypeRef fp16, i16; LLVMValueRef const16, input, val; unsigned i; fp16 = LLVMHalfTypeInContext(context); i16 = LLVMInt16TypeInContext(context); const16 = lp_build_const_int32(uint_bld->gallivm, 16); input = emit_data->args[0]; for (i = 0; i < 2; i++) { val = i == 1 ? LLVMBuildLShr(builder, input, const16, "") : input; val = LLVMBuildTrunc(builder, val, i16, ""); val = LLVMBuildBitCast(builder, val, fp16, ""); emit_data->output[i] = LLVMBuildFPExt(builder, val, bld_base->base.elem_type, ""); } }
LLVMValueRef gen_shr(compile_t* c, ast_t* left, ast_t* right) { ast_t* type = ast_type(left); bool sign = is_signed(c->opt, type); LLVMValueRef l_value = gen_expr(c, left); LLVMValueRef r_value = gen_expr(c, right); if((l_value == NULL) || (r_value == NULL)) return NULL; if(LLVMIsConstant(l_value) && LLVMIsConstant(r_value)) { if(sign) return LLVMConstAShr(l_value, r_value); return LLVMConstLShr(l_value, r_value); } if(sign) return LLVMBuildAShr(c->builder, l_value, r_value, ""); return LLVMBuildLShr(c->builder, l_value, r_value, ""); }
/** * Unpack a single pixel into its RGBA components. * * @param desc the pixel format for the packed pixel value * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM * * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector. */ static INLINE LLVMValueRef lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *desc, LLVMValueRef packed) { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef shifted, casted, scaled, masked; LLVMValueRef shifts[4]; LLVMValueRef masks[4]; LLVMValueRef scales[4]; boolean normalized; boolean needs_uitofp; unsigned shift; unsigned i; /* TODO: Support more formats */ assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(desc->block.width == 1); assert(desc->block.height == 1); assert(desc->block.bits <= 32); /* Do the intermediate integer computations with 32bit integers since it * matches floating point size */ assert (LLVMTypeOf(packed) == LLVMInt32TypeInContext(gallivm->context)); /* Broadcast the packed value to all four channels * before: packed = BGRA * after: packed = {BGRA, BGRA, BGRA, BGRA} */ packed = LLVMBuildInsertElement(builder, LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), packed, LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)), ""); packed = LLVMBuildShuffleVector(builder, packed, LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), LLVMConstNull(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), ""); /* Initialize vector constants */ normalized = FALSE; needs_uitofp = FALSE; shift = 0; /* Loop over 4 color components */ for (i = 0; i < 4; ++i) { unsigned bits = desc->channel[i].size; if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); masks[i] = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)); scales[i] = LLVMConstNull(LLVMFloatTypeInContext(gallivm->context)); } else { unsigned long long mask = (1ULL << bits) - 1; assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); if (bits == 32) { needs_uitofp = TRUE; } shifts[i] = lp_build_const_int32(gallivm, shift); masks[i] = lp_build_const_int32(gallivm, mask); if (desc->channel[i].normalized) { scales[i] = lp_build_const_float(gallivm, 1.0 / mask); normalized = TRUE; } else scales[i] = lp_build_const_float(gallivm, 1.0); } shift += bits; } /* Ex: convert packed = {BGRA, BGRA, BGRA, BGRA} * into masked = {B, G, R, A} */ shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), ""); masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), ""); if (!needs_uitofp) { /* UIToFP can't be expressed in SSE2 */ casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), ""); } else { casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), ""); } /* At this point 'casted' may be a vector of floats such as * {255.0, 255.0, 255.0, 255.0}. Next, if the pixel values are normalized * we'll scale this to {1.0, 1.0, 1.0, 1.0}. */ if (normalized) scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), ""); else scaled = casted; return scaled; }
/** * Generic type conversion. * * TODO: Take a precision argument, or even better, add a new precision member * to the lp_type union. */ void lp_build_conv(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, LLVMValueRef *dst, unsigned num_dsts) { LLVMBuilderRef builder = gallivm->builder; struct lp_type tmp_type; LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; unsigned num_tmps; unsigned i; /* We must not loose or gain channels. Only precision */ assert(src_type.length * num_srcs == dst_type.length * num_dsts); assert(src_type.length <= LP_MAX_VECTOR_LENGTH); assert(dst_type.length <= LP_MAX_VECTOR_LENGTH); assert(num_srcs <= LP_MAX_VECTOR_LENGTH); assert(num_dsts <= LP_MAX_VECTOR_LENGTH); tmp_type = src_type; for(i = 0; i < num_srcs; ++i) { assert(lp_check_value(src_type, src[i])); tmp[i] = src[i]; } num_tmps = num_srcs; /* Special case 4x4f --> 1x16ub */ if (src_type.floating == 1 && src_type.fixed == 0 && src_type.sign == 1 && src_type.norm == 0 && src_type.width == 32 && src_type.length == 4 && dst_type.floating == 0 && dst_type.fixed == 0 && dst_type.sign == 0 && dst_type.norm == 1 && dst_type.width == 8 && dst_type.length == 16 && 4 * num_dsts == num_srcs && util_cpu_caps.has_sse2) { struct lp_build_context bld; struct lp_type int16_type = dst_type; struct lp_type int32_type = dst_type; LLVMValueRef const_255f; unsigned i, j; lp_build_context_init(&bld, gallivm, src_type); int16_type.width *= 2; int16_type.length /= 2; int16_type.sign = 1; int32_type.width *= 4; int32_type.length /= 4; int32_type.sign = 1; const_255f = lp_build_const_vec(gallivm, src_type, 255.0f); for (i = 0; i < num_dsts; ++i, src += 4) { LLVMValueRef lo, hi; for (j = 0; j < 4; ++j) { tmp[j] = LLVMBuildFMul(builder, src[j], const_255f, ""); tmp[j] = lp_build_iround(&bld, tmp[j]); } /* relying on clamping behavior of sse2 intrinsics here */ lo = lp_build_pack2(gallivm, int32_type, int16_type, tmp[0], tmp[1]); hi = lp_build_pack2(gallivm, int32_type, int16_type, tmp[2], tmp[3]); dst[i] = lp_build_pack2(gallivm, int16_type, dst_type, lo, hi); } return; } /* Special case 2x8f --> 1x16ub */ else if (src_type.floating == 1 && src_type.fixed == 0 && src_type.sign == 1 && src_type.norm == 0 && src_type.width == 32 && src_type.length == 8 && dst_type.floating == 0 && dst_type.fixed == 0 && dst_type.sign == 0 && dst_type.norm == 1 && dst_type.width == 8 && dst_type.length == 16 && 2 * num_dsts == num_srcs && util_cpu_caps.has_avx) { struct lp_build_context bld; struct lp_type int16_type = dst_type; struct lp_type int32_type = dst_type; LLVMValueRef const_255f; unsigned i; lp_build_context_init(&bld, gallivm, src_type); int16_type.width *= 2; int16_type.length /= 2; int16_type.sign = 1; int32_type.width *= 4; int32_type.length /= 4; int32_type.sign = 1; const_255f = lp_build_const_vec(gallivm, src_type, 255.0f); for (i = 0; i < num_dsts; ++i, src += 2) { LLVMValueRef lo, hi, a, b; a = LLVMBuildFMul(builder, src[0], const_255f, ""); b = LLVMBuildFMul(builder, src[1], const_255f, ""); a = lp_build_iround(&bld, a); b = lp_build_iround(&bld, b); tmp[0] = lp_build_extract_range(gallivm, a, 0, 4); tmp[1] = lp_build_extract_range(gallivm, a, 4, 4); tmp[2] = lp_build_extract_range(gallivm, b, 0, 4); tmp[3] = lp_build_extract_range(gallivm, b, 4, 4); /* relying on clamping behavior of sse2 intrinsics here */ lo = lp_build_pack2(gallivm, int32_type, int16_type, tmp[0], tmp[1]); hi = lp_build_pack2(gallivm, int32_type, int16_type, tmp[2], tmp[3]); dst[i] = lp_build_pack2(gallivm, int16_type, dst_type, lo, hi); } return; } /* Pre convert half-floats to floats */ else if (src_type.floating && src_type.width == 16) { for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_half_to_float(gallivm, src_type, tmp[i]); tmp_type.width = 32; } /* * Clamp if necessary */ if(memcmp(&src_type, &dst_type, sizeof src_type) != 0) { struct lp_build_context bld; double src_min = lp_const_min(src_type); double dst_min = lp_const_min(dst_type); double src_max = lp_const_max(src_type); double dst_max = lp_const_max(dst_type); LLVMValueRef thres; lp_build_context_init(&bld, gallivm, tmp_type); if(src_min < dst_min) { if(dst_min == 0.0) thres = bld.zero; else thres = lp_build_const_vec(gallivm, src_type, dst_min); for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_max(&bld, tmp[i], thres); } if(src_max > dst_max) { if(dst_max == 1.0) thres = bld.one; else thres = lp_build_const_vec(gallivm, src_type, dst_max); for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_min(&bld, tmp[i], thres); } } /* * Scale to the narrowest range */ if(dst_type.floating) { /* Nothing to do */ } else if(tmp_type.floating) { if(!dst_type.fixed && !dst_type.sign && dst_type.norm) { for(i = 0; i < num_tmps; ++i) { tmp[i] = lp_build_clamped_float_to_unsigned_norm(gallivm, tmp_type, dst_type.width, tmp[i]); } tmp_type.floating = FALSE; } else { double dst_scale = lp_const_scale(dst_type); LLVMTypeRef tmp_vec_type; if (dst_scale != 1.0) { LLVMValueRef scale = lp_build_const_vec(gallivm, tmp_type, dst_scale); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, ""); } /* Use an equally sized integer for intermediate computations */ tmp_type.floating = FALSE; tmp_vec_type = lp_build_vec_type(gallivm, tmp_type); for(i = 0; i < num_tmps; ++i) { #if 0 if(dst_type.sign) tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, ""); else tmp[i] = LLVMBuildFPToUI(builder, tmp[i], tmp_vec_type, ""); #else /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */ tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, ""); #endif } } } else { unsigned src_shift = lp_const_shift(src_type); unsigned dst_shift = lp_const_shift(dst_type); unsigned src_offset = lp_const_offset(src_type); unsigned dst_offset = lp_const_offset(dst_type); /* Compensate for different offsets */ if (dst_offset > src_offset && src_type.width > dst_type.width) { for (i = 0; i < num_tmps; ++i) { LLVMValueRef shifted; LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, src_shift - 1); if(src_type.sign) shifted = LLVMBuildAShr(builder, tmp[i], shift, ""); else shifted = LLVMBuildLShr(builder, tmp[i], shift, ""); tmp[i] = LLVMBuildSub(builder, tmp[i], shifted, ""); } } if(src_shift > dst_shift) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, src_shift - dst_shift); for(i = 0; i < num_tmps; ++i) if(src_type.sign) tmp[i] = LLVMBuildAShr(builder, tmp[i], shift, ""); else tmp[i] = LLVMBuildLShr(builder, tmp[i], shift, ""); } } /* * Truncate or expand bit width * * No data conversion should happen here, although the sign bits are * crucial to avoid bad clamping. */ { struct lp_type new_type; new_type = tmp_type; new_type.sign = dst_type.sign; new_type.width = dst_type.width; new_type.length = dst_type.length; lp_build_resize(gallivm, tmp_type, new_type, tmp, num_srcs, tmp, num_dsts); tmp_type = new_type; num_tmps = num_dsts; } /* * Scale to the widest range */ if(src_type.floating) { /* Nothing to do */ } else if(!src_type.floating && dst_type.floating) { if(!src_type.fixed && !src_type.sign && src_type.norm) { for(i = 0; i < num_tmps; ++i) { tmp[i] = lp_build_unsigned_norm_to_float(gallivm, src_type.width, dst_type, tmp[i]); } tmp_type.floating = TRUE; } else { double src_scale = lp_const_scale(src_type); LLVMTypeRef tmp_vec_type; /* Use an equally sized integer for intermediate computations */ tmp_type.floating = TRUE; tmp_type.sign = TRUE; tmp_vec_type = lp_build_vec_type(gallivm, tmp_type); for(i = 0; i < num_tmps; ++i) { #if 0 if(dst_type.sign) tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, ""); else tmp[i] = LLVMBuildUIToFP(builder, tmp[i], tmp_vec_type, ""); #else /* FIXME: there is no SSE counterpart for LLVMBuildUIToFP */ tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, ""); #endif } if (src_scale != 1.0) { LLVMValueRef scale = lp_build_const_vec(gallivm, tmp_type, 1.0/src_scale); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, ""); } } } else { unsigned src_shift = lp_const_shift(src_type); unsigned dst_shift = lp_const_shift(dst_type); unsigned src_offset = lp_const_offset(src_type); unsigned dst_offset = lp_const_offset(dst_type); if (src_shift < dst_shift) { LLVMValueRef pre_shift[LP_MAX_VECTOR_LENGTH]; LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, dst_shift - src_shift); for (i = 0; i < num_tmps; ++i) { pre_shift[i] = tmp[i]; tmp[i] = LLVMBuildShl(builder, tmp[i], shift, ""); } /* Compensate for different offsets */ if (dst_offset > src_offset) { for (i = 0; i < num_tmps; ++i) { tmp[i] = LLVMBuildSub(builder, tmp[i], pre_shift[i], ""); } } } } for(i = 0; i < num_dsts; ++i) { dst[i] = tmp[i]; assert(lp_check_value(dst_type, dst[i])); } }
/** * Unpack several pixels in SoA. * * It takes a vector of packed pixels: * * packed = {P0, P1, P2, P3, ..., Pn} * * And will produce four vectors: * * red = {R0, R1, R2, R3, ..., Rn} * green = {G0, G1, G2, G3, ..., Gn} * blue = {B0, B1, B2, B3, ..., Bn} * alpha = {A0, A1, A2, A3, ..., An} * * It requires that a packed pixel fits into an element of the output * channels. The common case is when converting pixel with a depth of 32 bit or * less into floats. * * \param format_desc the format of the 'packed' incoming pixel vector * \param type the desired type for rgba_out (type.length = n, above) * \param packed the incoming vector of packed pixels * \param rgba_out returns the SoA R,G,B,A vectors */ void lp_build_unpack_rgba_soa(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, LLVMValueRef packed, LLVMValueRef rgba_out[4]) { LLVMBuilderRef builder = gallivm->builder; struct lp_build_context bld; LLVMValueRef inputs[4]; unsigned chan; assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(format_desc->block.width == 1); assert(format_desc->block.height == 1); assert(format_desc->block.bits <= type.width); /* FIXME: Support more output types */ assert(type.width == 32); lp_build_context_init(&bld, gallivm, type); /* Decode the input vector components */ for (chan = 0; chan < format_desc->nr_channels; ++chan) { const unsigned width = format_desc->channel[chan].size; const unsigned start = format_desc->channel[chan].shift; const unsigned stop = start + width; LLVMValueRef input; input = packed; switch(format_desc->channel[chan].type) { case UTIL_FORMAT_TYPE_VOID: input = lp_build_undef(gallivm, type); break; case UTIL_FORMAT_TYPE_UNSIGNED: /* * Align the LSB */ if (start) { input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), ""); } /* * Zero the MSBs */ if (stop < format_desc->block.bits) { unsigned mask = ((unsigned long long)1 << width) - 1; input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), ""); } /* * Type conversion */ if (type.floating) { if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { assert(width == 8); if (format_desc->swizzle[3] == chan) { input = lp_build_unsigned_norm_to_float(gallivm, width, type, input); } else { struct lp_type conv_type = lp_uint_type(type); input = lp_build_srgb_to_linear(gallivm, conv_type, input); } } else { if(format_desc->channel[chan].normalized) input = lp_build_unsigned_norm_to_float(gallivm, width, type, input); else input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), ""); } } else if (format_desc->channel[chan].pure_integer) { /* Nothing to do */ } else { /* FIXME */ assert(0); } break; case UTIL_FORMAT_TYPE_SIGNED: /* * Align the sign bit first. */ if (stop < type.width) { unsigned bits = type.width - stop; LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits); input = LLVMBuildShl(builder, input, bits_val, ""); } /* * Align the LSB (with an arithmetic shift to preserve the sign) */ if (format_desc->channel[chan].size < type.width) { unsigned bits = type.width - format_desc->channel[chan].size; LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits); input = LLVMBuildAShr(builder, input, bits_val, ""); } /* * Type conversion */ if (type.floating) { input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), ""); if (format_desc->channel[chan].normalized) { double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1); LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale); input = LLVMBuildFMul(builder, input, scale_val, ""); /* the formula above will produce value below -1.0 for most negative * value but everything seems happy with that hence disable for now */ if (0) input = lp_build_max(&bld, input, lp_build_const_vec(gallivm, type, -1.0f)); } } else if (format_desc->channel[chan].pure_integer) { /* Nothing to do */ } else { /* FIXME */ assert(0); } break; case UTIL_FORMAT_TYPE_FLOAT: if (type.floating) { assert(start == 0); assert(stop == 32); assert(type.width == 32); input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), ""); } else { /* FIXME */ assert(0); input = lp_build_undef(gallivm, type); } break; case UTIL_FORMAT_TYPE_FIXED: if (type.floating) { double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1); LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale); input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), ""); input = LLVMBuildFMul(builder, input, scale_val, ""); } else { /* FIXME */ assert(0); input = lp_build_undef(gallivm, type); } break; default: assert(0); input = lp_build_undef(gallivm, type); break; } inputs[chan] = input; } lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out); }
/** * Special case for converting clamped IEEE-754 floats to unsigned norms. * * The mathematical voodoo below may seem excessive but it is actually * paramount we do it this way for several reasons. First, there is no single * precision FP to unsigned integer conversion Intel SSE instruction. Second, * secondly, even if there was, since the FP's mantissa takes only a fraction * of register bits the typically scale and cast approach would require double * precision for accurate results, and therefore half the throughput * * Although the result values can be scaled to an arbitrary bit width specified * by dst_width, the actual result type will have the same width. * * Ex: src = { float, float, float, float } * return { i32, i32, i32, i32 } where each value is in [0, 2^dst_width-1]. */ LLVMValueRef lp_build_clamped_float_to_unsigned_norm(struct gallivm_state *gallivm, struct lp_type src_type, unsigned dst_width, LLVMValueRef src) { LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, src_type); LLVMValueRef res; unsigned mantissa; assert(src_type.floating); assert(dst_width <= src_type.width); src_type.sign = FALSE; mantissa = lp_mantissa(src_type); if (dst_width <= mantissa) { /* * Apply magic coefficients that will make the desired result to appear * in the lowest significant bits of the mantissa, with correct rounding. * * This only works if the destination width fits in the mantissa. */ unsigned long long ubound; unsigned long long mask; double scale; double bias; ubound = (1ULL << dst_width); mask = ubound - 1; scale = (double)mask/ubound; bias = (double)(1ULL << (mantissa - dst_width)); res = LLVMBuildFMul(builder, src, lp_build_const_vec(gallivm, src_type, scale), ""); res = LLVMBuildFAdd(builder, res, lp_build_const_vec(gallivm, src_type, bias), ""); res = LLVMBuildBitCast(builder, res, int_vec_type, ""); res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(gallivm, src_type, mask), ""); } else if (dst_width == (mantissa + 1)) { /* * The destination width matches exactly what can be represented in * floating point (i.e., mantissa + 1 bits). So do a straight * multiplication followed by casting. No further rounding is necessary. */ double scale; scale = (double)((1ULL << dst_width) - 1); res = LLVMBuildFMul(builder, src, lp_build_const_vec(gallivm, src_type, scale), ""); res = LLVMBuildFPToSI(builder, res, int_vec_type, ""); } else { /* * The destination exceeds what can be represented in the floating point. * So multiply by the largest power two we get away with, and when * subtract the most significant bit to rescale to normalized values. * * The largest power of two factor we can get away is * (1 << (src_type.width - 1)), because we need to use signed . In theory it * should be (1 << (src_type.width - 2)), but IEEE 754 rules states * INT_MIN should be returned in FPToSI, which is the correct result for * values near 1.0! * * This means we get (src_type.width - 1) correct bits for values near 0.0, * and (mantissa + 1) correct bits for values near 1.0. Equally or more * important, we also get exact results for 0.0 and 1.0. */ unsigned n = MIN2(src_type.width - 1, dst_width); double scale = (double)(1ULL << n); unsigned lshift = dst_width - n; unsigned rshift = n; LLVMValueRef lshifted; LLVMValueRef rshifted; res = LLVMBuildFMul(builder, src, lp_build_const_vec(gallivm, src_type, scale), ""); res = LLVMBuildFPToSI(builder, res, int_vec_type, ""); /* * Align the most significant bit to its final place. * * This will cause 1.0 to overflow to 0, but the later adjustment will * get it right. */ if (lshift) { lshifted = LLVMBuildShl(builder, res, lp_build_const_int_vec(gallivm, src_type, lshift), ""); } else { lshifted = res; } /* * Align the most significant bit to the right. */ rshifted = LLVMBuildLShr(builder, res, lp_build_const_int_vec(gallivm, src_type, rshift), ""); /* * Subtract the MSB to the LSB, therefore re-scaling from * (1 << dst_width) to ((1 << dst_width) - 1). */ res = LLVMBuildSub(builder, lshifted, rshifted, ""); } return res; }
/** * Inverse of lp_build_clamped_float_to_unsigned_norm above. * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1] * return {float, float, float, float} with values in range [0, 1]. */ LLVMValueRef lp_build_unsigned_norm_to_float(struct gallivm_state *gallivm, unsigned src_width, struct lp_type dst_type, LLVMValueRef src) { LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef vec_type = lp_build_vec_type(gallivm, dst_type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, dst_type); LLVMValueRef bias_; LLVMValueRef res; unsigned mantissa; unsigned n; unsigned long long ubound; unsigned long long mask; double scale; double bias; assert(dst_type.floating); mantissa = lp_mantissa(dst_type); if (src_width <= (mantissa + 1)) { /* * The source width matches fits what can be represented in floating * point (i.e., mantissa + 1 bits). So do a straight multiplication * followed by casting. No further rounding is necessary. */ scale = 1.0/(double)((1ULL << src_width) - 1); res = LLVMBuildSIToFP(builder, src, vec_type, ""); res = LLVMBuildFMul(builder, res, lp_build_const_vec(gallivm, dst_type, scale), ""); return res; } else { /* * The source width exceeds what can be represented in floating * point. So truncate the incoming values. */ n = MIN2(mantissa, src_width); ubound = ((unsigned long long)1 << n); mask = ubound - 1; scale = (double)ubound/mask; bias = (double)((unsigned long long)1 << (mantissa - n)); res = src; if (src_width > mantissa) { int shift = src_width - mantissa; res = LLVMBuildLShr(builder, res, lp_build_const_int_vec(gallivm, dst_type, shift), ""); } bias_ = lp_build_const_vec(gallivm, dst_type, bias); res = LLVMBuildOr(builder, res, LLVMBuildBitCast(builder, bias_, int_vec_type, ""), ""); res = LLVMBuildBitCast(builder, res, vec_type, ""); res = LLVMBuildFSub(builder, res, bias_, ""); res = LLVMBuildFMul(builder, res, lp_build_const_vec(gallivm, dst_type, scale), ""); } return res; }
static LLVMValueRef gen_digestof_value(compile_t* c, ast_t* type, LLVMValueRef value) { LLVMTypeRef impl_type = LLVMTypeOf(value); switch(LLVMGetTypeKind(impl_type)) { case LLVMFloatTypeKind: value = LLVMBuildBitCast(c->builder, value, c->i32, ""); return LLVMBuildZExt(c->builder, value, c->intptr, ""); case LLVMDoubleTypeKind: value = LLVMBuildBitCast(c->builder, value, c->i64, ""); return gen_digestof_int64(c, value); case LLVMIntegerTypeKind: { uint32_t width = LLVMGetIntTypeWidth(impl_type); if(width < 64) { return LLVMBuildZExt(c->builder, value, c->intptr, ""); } else if(width == 64) { return gen_digestof_int64(c, value); } else if(width == 128) { LLVMValueRef shift = LLVMConstInt(c->i128, 64, false); LLVMValueRef high = LLVMBuildLShr(c->builder, value, shift, ""); high = LLVMBuildTrunc(c->builder, high, c->i64, ""); value = LLVMBuildTrunc(c->builder, value, c->i64, ""); high = gen_digestof_int64(c, high); value = gen_digestof_int64(c, value); return LLVMBuildXor(c->builder, value, high, ""); } break; } case LLVMStructTypeKind: { uint32_t count = LLVMCountStructElementTypes(impl_type); LLVMValueRef result = LLVMConstInt(c->intptr, 0, false); ast_t* child = ast_child(type); for(uint32_t i = 0; i < count; i++) { LLVMValueRef elem = LLVMBuildExtractValue(c->builder, value, i, ""); elem = gen_digestof_value(c, child, elem); result = LLVMBuildXor(c->builder, result, elem, ""); child = ast_sibling(child); } pony_assert(child == NULL); return result; } case LLVMPointerTypeKind: if(!is_known(type)) { reach_type_t* t = reach_type(c->reach, type); int sub_kind = subtype_kind(t); if((sub_kind & SUBTYPE_KIND_BOXED) != 0) return gen_digestof_box(c, t, value, sub_kind); } return LLVMBuildPtrToInt(c->builder, value, c->intptr, ""); default: {} } pony_assert(0); return NULL; }
/* * gen_fetch * * Generates a load operation for a fetch expression. */ static LLVMValueRef gen_fetch (gencodectx_t gctx, expr_node_t *rhs, LLVMTypeRef neededtype) { LLVMBuilderRef builder = gctx->curfn->builder; llvm_accinfo_t accinfo; LLVMValueRef addr, val; LLVMTypeRef type; int shifts_required = 0; int signext; // For field references with non-zero bit position, or with // non-CTCE size, we'll have to do bit shifting to extract // the field. addr = llvmgen_addr_expression(gctx, rhs, &accinfo); if (accinfo.posval != 0 || accinfo.sizeval != 0) { type = gctx->fullwordtype; if ((accinfo.flags & LLVMGEN_M_ACC_CONSTSIZ)) { accinfo.sizeval = LLVMConstInt(gctx->fullwordtype, accinfo.size, 0); } shifts_required = 1; } else if ((accinfo.flags & LLVMGEN_M_ACC_CONSTSIZ)) { if (accinfo.size == 0) { // XXX signal invalid size type = gctx->int1type; } else { type = LLVMIntTypeInContext(gctx->llvmctx, accinfo.size); } } else { type = gctx->fullwordtype; } signext = ((accinfo.flags & LLVMGEN_M_SEG_SIGNEXT) != 0); // If we're fetching from a register, there's no load intruction // required - EXCEPT if this was a scalar BIND, where the BIND if ((accinfo.segclass == LLVM_REG && (accinfo.flags & LLVMGEN_M_SEG_DEREFED) == 0) && (accinfo.flags & LLVMGEN_M_SEG_BINDPTR) == 0) { val = llvmgen_adjustval(gctx, addr, type, signext); } else { addr = llvmgen_adjustval(gctx, addr, LLVMPointerType(type, 0), 0); val = LLVMBuildLoad(builder, addr, llvmgen_temp(gctx)); if ((accinfo.flags & LLVMGEN_M_SEG_VOLATILE) != 0) LLVMSetVolatile(val, 1); } if (shifts_required) { val = llvmgen_adjustval(gctx, val, gctx->fullwordtype, signext); if (signext) { val = LLVMBuildAShr(builder, val, accinfo.posval, llvmgen_temp(gctx)); } else { val = LLVMBuildLShr(builder, val, accinfo.posval, llvmgen_temp(gctx)); } if ((accinfo.flags & LLVMGEN_M_ACC_CONSTSIZ) != 0) { LLVMTypeRef trunctype = LLVMIntTypeInContext(gctx->llvmctx, accinfo.size); val = llvmgen_adjustval(gctx, val, trunctype, signext); } else { LLVMValueRef neg1 = LLVMConstAllOnes(gctx->fullwordtype); LLVMValueRef mask; mask = LLVMBuildShl(builder, neg1, accinfo.sizeval, llvmgen_temp(gctx)); mask = LLVMBuildNeg(builder, mask, llvmgen_temp(gctx)); val = LLVMBuildAnd(builder, val, mask, llvmgen_temp(gctx)); if (signext) { val = LLVMBuildSExt(builder, val, gctx->fullwordtype, llvmgen_temp(gctx)); } } } return llvmgen_adjustval(gctx, val, neededtype, signext); } /* gen_fetch */
static void llvm_emit_tex( const struct lp_build_tgsi_action * action, struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) { struct gallivm_state * gallivm = bld_base->base.gallivm; LLVMValueRef args[7]; unsigned c, sampler_src; struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); if (emit_data->inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { switch (emit_data->inst->Instruction.Opcode) { case TGSI_OPCODE_TXQ: { struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); ctx->uses_tex_buffers = true; bool isEgPlus = (ctx->chip_class >= EVERGREEN); LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, isEgPlus ? 0 : 1); LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, offset, LLVM_R600_BUFFER_INFO_CONST_BUFFER); if (!isEgPlus) { LLVMValueRef maskval[4] = { lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 2), lp_build_const_int32(gallivm, 3), lp_build_const_int32(gallivm, 0), }; LLVMValueRef mask = LLVMConstVector(maskval, 4); cvecval = LLVMBuildShuffleVector(gallivm->builder, cvecval, cvecval, mask, ""); } emit_data->output[0] = cvecval; return; } case TGSI_OPCODE_TXF: { args[0] = LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), ""); args[1] = lp_build_const_int32(gallivm, R600_MAX_CONST_BUFFERS); emit_data->output[0] = build_intrinsic(gallivm->builder, "llvm.R600.load.texbuf", emit_data->dst_type, args, 2, LLVMReadNoneAttribute); if (ctx->chip_class >= EVERGREEN) return; ctx->uses_tex_buffers = true; LLVMDumpValue(emit_data->output[0]); emit_data->output[0] = LLVMBuildBitCast(gallivm->builder, emit_data->output[0], LLVMVectorType(bld_base->base.int_elem_type, 4), ""); LLVMValueRef Mask = llvm_load_const_buffer(bld_base, lp_build_const_int32(gallivm, 0), LLVM_R600_BUFFER_INFO_CONST_BUFFER); Mask = LLVMBuildBitCast(gallivm->builder, Mask, LLVMVectorType(bld_base->base.int_elem_type, 4), ""); emit_data->output[0] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_AND, emit_data->output[0], Mask); LLVMValueRef WComponent = LLVMBuildExtractElement(gallivm->builder, emit_data->output[0], lp_build_const_int32(gallivm, 3), ""); Mask = llvm_load_const_buffer(bld_base, lp_build_const_int32(gallivm, 1), LLVM_R600_BUFFER_INFO_CONST_BUFFER); Mask = LLVMBuildExtractElement(gallivm->builder, Mask, lp_build_const_int32(gallivm, 0), ""); Mask = LLVMBuildBitCast(gallivm->builder, Mask, bld_base->base.int_elem_type, ""); WComponent = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_OR, WComponent, Mask); emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder, emit_data->output[0], WComponent, lp_build_const_int32(gallivm, 3), ""); emit_data->output[0] = LLVMBuildBitCast(gallivm->builder, emit_data->output[0], LLVMVectorType(bld_base->base.elem_type, 4), ""); } return; default: break; } } if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TEX || emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXP) { LLVMValueRef Vector[4] = { LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), ""), LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 1), ""), LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 2), ""), LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 3), ""), }; switch (emit_data->inst->Texture.Texture) { case TGSI_TEXTURE_2D: case TGSI_TEXTURE_RECT: Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type); break; case TGSI_TEXTURE_1D: Vector[1] = Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type); break; default: break; } args[0] = lp_build_gather_values(gallivm, Vector, 4); } else { args[0] = emit_data->args[0]; } assert(emit_data->arg_count + 2 <= Elements(args)); for (c = 1; c < emit_data->arg_count; ++c) args[c] = emit_data->args[c]; if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF) { args[1] = LLVMBuildShl(gallivm->builder, args[1], lp_build_const_int32(gallivm, 1), ""); args[2] = LLVMBuildShl(gallivm->builder, args[2], lp_build_const_int32(gallivm, 1), ""); args[3] = LLVMBuildShl(gallivm->builder, args[3], lp_build_const_int32(gallivm, 1), ""); } sampler_src = emit_data->inst->Instruction.NumSrcRegs-1; args[c++] = lp_build_const_int32(gallivm, emit_data->inst->Src[sampler_src].Register.Index + R600_MAX_CONST_BUFFERS); args[c++] = lp_build_const_int32(gallivm, emit_data->inst->Src[sampler_src].Register.Index); args[c++] = lp_build_const_int32(gallivm, emit_data->inst->Texture.Texture); if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF && (emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) { switch (emit_data->inst->Texture.Texture) { case TGSI_TEXTURE_2D_MSAA: args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D); break; case TGSI_TEXTURE_2D_ARRAY_MSAA: args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D_ARRAY); break; default: break; } if (ctx->has_compressed_msaa_texturing) { LLVMValueRef ldptr_args[10] = { args[0], // Coord args[1], // Offset X args[2], // Offset Y args[3], // Offset Z args[4], args[5], lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 1) }; LLVMValueRef ptr = build_intrinsic(gallivm->builder, "llvm.R600.ldptr", emit_data->dst_type, ldptr_args, 10, LLVMReadNoneAttribute); LLVMValueRef Tmp = LLVMBuildExtractElement(gallivm->builder, args[0], lp_build_const_int32(gallivm, 3), ""); Tmp = LLVMBuildMul(gallivm->builder, Tmp, lp_build_const_int32(gallivm, 4), ""); LLVMValueRef ResX = LLVMBuildExtractElement(gallivm->builder, ptr, lp_build_const_int32(gallivm, 0), ""); ResX = LLVMBuildBitCast(gallivm->builder, ResX, bld_base->base.int_elem_type, ""); Tmp = LLVMBuildLShr(gallivm->builder, ResX, Tmp, ""); Tmp = LLVMBuildAnd(gallivm->builder, Tmp, lp_build_const_int32(gallivm, 0xF), ""); args[0] = LLVMBuildInsertElement(gallivm->builder, args[0], Tmp, lp_build_const_int32(gallivm, 3), ""); args[c++] = lp_build_const_int32(gallivm, emit_data->inst->Texture.Texture); } } emit_data->output[0] = build_intrinsic(gallivm->builder, action->intr_name, emit_data->dst_type, args, c, LLVMReadNoneAttribute); if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXQ && ((emit_data->inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || emit_data->inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY))) if (emit_data->inst->Dst[0].Register.WriteMask & 4) { LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, 0); LLVMValueRef ZLayer = LLVMBuildExtractElement(gallivm->builder, llvm_load_const_buffer(bld_base, offset, CONSTANT_TXQ_BUFFER), lp_build_const_int32(gallivm, 0), ""); emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder, emit_data->output[0], ZLayer, lp_build_const_int32(gallivm, 2), ""); struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); ctx->has_txq_cube_array_z_comp = true; } }
LLVMValueRef build_label_from_tag(struct llvm_ctx *ctx, LLVMValueRef tag) { return LLVMBuildLShr(ctx->builder, tag, CONST_WORD(16), "tag.label"); }
static LLVMValueRef get_stritem_len_fn(struct llvm_ctx *ctx) { if(ctx->stritem_len_fn != NULL) return ctx->stritem_len_fn; /* returns (i32 len, i32 new_tpos) * params (word *utcbptr, i32 tpos) * * when return value "new_tpos" > tmax + 1, the result is invalid. the function * should also not be called when tpos > tmax + 1. */ LLVMTypeRef ret_types[2] = { ctx->i32t, ctx->i32t }, parm_types[2] = { LLVMPointerType(ctx->wordt, 0), ctx->i32t }, ret_type = LLVMStructTypeInContext(ctx->ctx, ret_types, 2, 0), fn_type = LLVMFunctionType(ret_type, parm_types, 2, 0); LLVMValueRef fn = LLVMAddFunction(ctx->module, "__muidl_get_stritem_len", fn_type); LLVMSetVisibility(fn, LLVMHiddenVisibility); LLVMSetLinkage(fn, LLVMInternalLinkage); V fn_args[2]; LLVMGetParams(fn, fn_args); LLVMAddAttribute(fn_args[0], LLVMNoCaptureAttribute); for(int i=0; i<2; i++) { LLVMAddAttribute(fn_args[i], LLVMInRegAttribute); } ctx->stritem_len_fn = fn; LLVMBuilderRef old_builder = ctx->builder; ctx->builder = LLVMCreateBuilderInContext(ctx->ctx); LLVMBasicBlockRef entry_bb = LLVMAppendBasicBlockInContext(ctx->ctx, fn, "EntryBlock"), loop_bb = LLVMAppendBasicBlockInContext(ctx->ctx, fn, "loop"), valid_bb = LLVMAppendBasicBlockInContext(ctx->ctx, fn, "valid"), exit_bb = LLVMAppendBasicBlockInContext(ctx->ctx, fn, "exit"); LLVMPositionBuilderAtEnd(ctx->builder, entry_bb); LLVMValueRef old_utcb = ctx->utcb, old_tpos = ctx->tpos; ctx->utcb = fn_args[0]; ctx->tpos = fn_args[1]; LLVMBuildBr(ctx->builder, loop_bb); LLVMPositionBuilderAtEnd(ctx->builder, exit_bb); LLVMValueRef exit_len_phi = LLVMBuildPhi(ctx->builder, ctx->i32t, "exit.len.phi"), exit_tpos_phi = LLVMBuildPhi(ctx->builder, ctx->i32t, "exit.tpos.phi"); LLVMValueRef rvals[2] = { exit_len_phi, exit_tpos_phi }; LLVMBuildAggregateRet(ctx->builder, rvals, 2); LLVMPositionBuilderAtEnd(ctx->builder, loop_bb); LLVMValueRef len_phi = LLVMBuildPhi(ctx->builder, ctx->i32t, "len.phi"), tpos_phi = LLVMBuildPhi(ctx->builder, ctx->i32t, "tpos.phi"); LLVMAddIncoming(len_phi, &ctx->zero, &entry_bb, 1); LLVMAddIncoming(tpos_phi, &ctx->tpos, &entry_bb, 1); ctx->tpos = tpos_phi; /* test: if *tpos doesn't look like a string item, conk out. */ LLVMValueRef infoword = build_utcb_load(ctx, ctx->tpos, "si.info"); LLVMValueRef is_cond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, ctx->zero, LLVMBuildAnd(ctx->builder, infoword, CONST_WORD(1 << 4), "infoword.si.mask"), "infoword.si.cond"); /* anything + 100 is sure to be > tmax + 1. */ LLVMValueRef fucked_tpos = LLVMBuildAdd(ctx->builder, tpos_phi, CONST_INT(100), "f****d.tpos"); branch_set_phi(ctx, exit_len_phi, len_phi); branch_set_phi(ctx, exit_tpos_phi, fucked_tpos); LLVMBuildCondBr(ctx->builder, is_cond, valid_bb, exit_bb); LLVMPositionBuilderAtEnd(ctx->builder, valid_bb); LLVMValueRef string_length = LLVMBuildTruncOrBitCast(ctx->builder, LLVMBuildLShr(ctx->builder, infoword, CONST_INT(10), "si.info.len"), ctx->i32t, "si.info.len.int"), string_j = LLVMBuildTruncOrBitCast(ctx->builder, LLVMBuildAnd(ctx->builder, CONST_WORD(0x1f), LLVMBuildLShr(ctx->builder, infoword, CONST_WORD(4), "si.info.j.shift"), "si.info.j.masked"), ctx->i32t, "si.info.j"), string_c = LLVMBuildTruncOrBitCast(ctx->builder, LLVMBuildAnd(ctx->builder, CONST_WORD(1 << 9), infoword, "si.info.c.masked"), ctx->i32t, "si.info.c.masked.int"), c_cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, string_c, CONST_WORD(0), "si.info.c.cond"), new_len = LLVMBuildAdd(ctx->builder, len_phi, LLVMBuildMul(ctx->builder, string_length, LLVMBuildAdd(ctx->builder, string_j, CONST_INT(1), "j.plus.one"), "len.incr"), "len.new"), new_tpos = LLVMBuildAdd(ctx->builder, ctx->tpos, LLVMBuildSelect(ctx->builder, c_cond, LLVMBuildAdd(ctx->builder, CONST_INT(2), string_j, "cont.tpos.bump"), CONST_INT(2), "tpos.bump"), "tpos.new"); LLVMAddIncoming(len_phi, &new_len, &valid_bb, 1); LLVMAddIncoming(tpos_phi, &new_tpos, &valid_bb, 1); LLVMAddIncoming(exit_len_phi, &new_len, &valid_bb, 1); LLVMAddIncoming(exit_tpos_phi, &new_tpos, &valid_bb, 1); LLVMBuildCondBr(ctx->builder, c_cond, loop_bb, exit_bb); LLVMDisposeBuilder(ctx->builder); ctx->builder = old_builder; ctx->utcb = old_utcb; ctx->tpos = old_tpos; return ctx->stritem_len_fn; }