static LLVMValueRef CreateFibFunction(LLVMModuleRef M, LLVMContextRef Context) { LLVMBuilderRef B = LLVMCreateBuilderInContext(Context); // Create the fib function and insert it into module M. This function is said // to return an int and take an int parameter. LLVMTypeRef ParamTypes[] = {LLVMInt32TypeInContext(Context)}; LLVMTypeRef ReturnType = LLVMInt32TypeInContext(Context); LLVMTypeRef FunctionTy = LLVMFunctionType(ReturnType, ParamTypes, 1, 0); LLVMValueRef FibF = LLVMAddFunction(M, "fib", FunctionTy); // Add a basic block to the function. LLVMBasicBlockRef BB = LLVMAppendBasicBlockInContext(Context, FibF, "EntryBlock"); // Get pointers to the constants. LLVMValueRef One = LLVMConstInt(LLVMInt32TypeInContext(Context), 1, 0); LLVMValueRef Two = LLVMConstInt(LLVMInt32TypeInContext(Context), 2, 0); // Get pointer to the integer argument of the add1 function... LLVMValueRef ArgX = LLVMGetFirstParam(FibF); // Get the arg. LLVMSetValueName(ArgX, "AnArg"); // Give it a nice symbolic name for fun. // Create the true_block. LLVMBasicBlockRef RetBB = LLVMAppendBasicBlockInContext(Context, FibF, "return"); // Create an exit block. LLVMBasicBlockRef RecurseBB = LLVMAppendBasicBlockInContext(Context, FibF, "recurse"); // Create the "if (arg <= 2) goto exitbb" LLVMPositionBuilderAtEnd(B, BB); LLVMValueRef CondInst = LLVMBuildICmp(B, LLVMIntSLE, ArgX, Two, "cond"); LLVMBuildCondBr(B, CondInst, RetBB, RecurseBB); // Create: ret int 1 LLVMPositionBuilderAtEnd(B, RetBB); LLVMBuildRet(B, One); // create fib(x-1) LLVMPositionBuilderAtEnd(B, RecurseBB); LLVMValueRef Sub = LLVMBuildSub(B, ArgX, One, "arg"); LLVMValueRef CallFibX1 = LLVMBuildCall(B, FibF, &Sub, 1, "fibx1"); LLVMSetTailCall(CallFibX1, 1); // create fib(x-2) LLVMPositionBuilderAtEnd(B, RecurseBB); Sub = LLVMBuildSub(B, ArgX, Two, "arg"); LLVMValueRef CallFibX2 = LLVMBuildCall(B, FibF, &Sub, 1, "fibx2"); LLVMSetTailCall(CallFibX2, 1); // fib(x-1)+fib(x-2) LLVMPositionBuilderAtEnd(B, RecurseBB); LLVMValueRef Sum = LLVMBuildAdd(B, CallFibX1, CallFibX2, "addresult"); // Create the return instruction and add it to the basic block LLVMPositionBuilderAtEnd(B, RecurseBB); LLVMBuildRet(B, Sum); return FibF; }
void lp_build_exp2_approx(struct lp_build_context *bld, LLVMValueRef x, LLVMValueRef *p_exp2_int_part, LLVMValueRef *p_frac_part, LLVMValueRef *p_exp2) { const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef ipart = NULL; LLVMValueRef fpart = NULL; LLVMValueRef expipart = NULL; LLVMValueRef expfpart = NULL; LLVMValueRef res = NULL; if(p_exp2_int_part || p_frac_part || p_exp2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) debug_printf("%s: inefficient/imprecise constant arithmetic\n", __FUNCTION__); assert(type.floating && type.width == 32); x = lp_build_min(bld, x, lp_build_const_scalar(type, 129.0)); x = lp_build_max(bld, x, lp_build_const_scalar(type, -126.99999)); /* ipart = int(x - 0.5) */ ipart = LLVMBuildSub(bld->builder, x, lp_build_const_scalar(type, 0.5f), ""); ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, ""); /* fpart = x - ipart */ fpart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, ""); fpart = LLVMBuildSub(bld->builder, x, fpart, ""); } if(p_exp2_int_part || p_exp2) { /* expipart = (float) (1 << ipart) */ expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_scalar(type, 127), ""); expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_scalar(type, 23), ""); expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, ""); } if(p_exp2) { expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial, Elements(lp_build_exp2_polynomial)); res = LLVMBuildMul(bld->builder, expipart, expfpart, ""); } if(p_exp2_int_part) *p_exp2_int_part = expipart; if(p_frac_part) *p_frac_part = fpart; if(p_exp2) *p_exp2 = res; }
/* * Helper for building packed ddx/ddy vector for one coord (scalar per quad * values). The vector will look like this (8-wide): * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy * This only needs 2 (v)shufps. */ LLVMValueRef lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { struct gallivm_state *gallivm = bld->gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH/4]; LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH/4]; LLVMValueRef vec1, vec2; unsigned length, num_quads, i; /* XXX: do hsub version */ length = bld->type.length; num_quads = length / 4; for (i = 0; i < num_quads; i++) { unsigned s1 = 4 * i; unsigned s2 = 4 * i + length; shuffles1[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1); shuffles1[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1); shuffles1[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2); shuffles1[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2); shuffles2[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s1); shuffles2[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s1); shuffles2[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s2); shuffles2[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s2); } vec1 = LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles1, length), ""); vec2 = LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles2, length), ""); if (bld->type.floating) return LLVMBuildFSub(builder, vec2, vec1, "ddxddyddxddy"); else return LLVMBuildSub(builder, vec2, vec1, "ddxddyddxddy"); }
/** * Special case for converting clamped IEEE-754 floats to unsigned norms. * * The mathematical voodoo below may seem excessive but it is actually * paramount we do it this way for several reasons. First, there is no single * precision FP to unsigned integer conversion Intel SSE instruction. Second, * secondly, even if there was, since the FP's mantissa takes only a fraction * of register bits the typically scale and cast approach would require double * precision for accurate results, and therefore half the throughput * * Although the result values can be scaled to an arbitrary bit width specified * by dst_width, the actual result type will have the same width. */ LLVMValueRef lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, struct lp_type src_type, unsigned dst_width, LLVMValueRef src) { LLVMTypeRef int_vec_type = lp_build_int_vec_type(src_type); LLVMValueRef res; unsigned mantissa; unsigned n; unsigned long long ubound; unsigned long long mask; double scale; double bias; assert(src_type.floating); mantissa = lp_mantissa(src_type); /* We cannot carry more bits than the mantissa */ n = MIN2(mantissa, dst_width); /* This magic coefficients will make the desired result to appear in the * lowest significant bits of the mantissa. */ ubound = ((unsigned long long)1 << n); mask = ubound - 1; scale = (double)mask/ubound; bias = (double)((unsigned long long)1 << (mantissa - n)); res = LLVMBuildMul(builder, src, lp_build_const_scalar(src_type, scale), ""); res = LLVMBuildAdd(builder, res, lp_build_const_scalar(src_type, bias), ""); res = LLVMBuildBitCast(builder, res, int_vec_type, ""); if(dst_width > n) { int shift = dst_width - n; res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), ""); /* TODO: Fill in the empty lower bits for additional precision? */ #if 0 { LLVMValueRef msb; msb = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, dst_width - 1), ""); msb = LLVMBuildShl(builder, msb, lp_build_int_const_scalar(src_type, shift), ""); msb = LLVMBuildSub(builder, msb, lp_build_int_const_scalar(src_type, 1), ""); res = LLVMBuildOr(builder, res, msb, ""); } #elif 0 while(shift > 0) { res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, n), ""), ""); shift -= n; n *= 2; } #endif } else res = LLVMBuildAnd(builder, res, lp_build_int_const_scalar(src_type, mask), ""); return res; }
/* Find the last bit set. */ static void emit_umsb(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef args[2] = { emit_data->args[0], /* Don't generate code for handling zero: */ LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), 1, 0) }; LLVMValueRef msb = lp_build_intrinsic(builder, "llvm.ctlz.i32", emit_data->dst_type, args, ARRAY_SIZE(args), LLVMReadNoneAttribute); /* The HW returns the last bit index from MSB, but TGSI wants * the index from LSB. Invert it by doing "31 - msb". */ msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31), msb, ""); /* Check for zero: */ emit_data->output[emit_data->chan] = LLVMBuildSelect(builder, LLVMBuildICmp(builder, LLVMIntEQ, args[0], bld_base->uint_bld.zero, ""), lp_build_const_int32(gallivm, -1), msb, ""); }
/* Find the last bit opposite of the sign bit. */ static void emit_imsb(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef arg = emit_data->args[0]; LLVMValueRef msb = lp_build_intrinsic(builder, "llvm.AMDGPU.flbit.i32", emit_data->dst_type, &arg, 1, LLVMReadNoneAttribute); /* The HW returns the last bit index from MSB, but TGSI wants * the index from LSB. Invert it by doing "31 - msb". */ msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31), msb, ""); /* If arg == 0 || arg == -1 (0xffffffff), return -1. */ LLVMValueRef all_ones = lp_build_const_int32(gallivm, -1); LLVMValueRef cond = LLVMBuildOr(builder, LLVMBuildICmp(builder, LLVMIntEQ, arg, bld_base->uint_bld.zero, ""), LLVMBuildICmp(builder, LLVMIntEQ, arg, all_ones, ""), ""); emit_data->output[emit_data->chan] = LLVMBuildSelect(builder, cond, all_ones, msb, ""); }
/* * Helper for building packed ddx/ddy vector for one coord (scalar per quad * values). The vector will look like this (8-wide): * dr1dx _____ -dr1dy _____ dr2dx _____ -dr2dy _____ * This only requires one shuffle instead of two for more straightforward packing. */ LLVMValueRef lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld, LLVMValueRef a) { struct gallivm_state *gallivm = bld->gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef vec1, vec2; /* use aos swizzle helper */ static const unsigned char swizzle1[] = { /* no-op swizzle */ LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE, LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_SWIZZLE_DONTCARE }; static const unsigned char swizzle2[] = { LP_BLD_QUAD_TOP_RIGHT, LP_BLD_SWIZZLE_DONTCARE, LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE }; vec1 = lp_build_swizzle_aos(bld, a, swizzle1); vec2 = lp_build_swizzle_aos(bld, a, swizzle2); if (bld->type.floating) return LLVMBuildFSub(builder, vec2, vec1, "ddxddy"); else return LLVMBuildSub(builder, vec2, vec1, "ddxddy"); }
LLVMValueRef gen_sub(struct node *ast) { return LLVMBuildSub(builder, codegen(ast->one), codegen(ast->two), ""); }
LLVMValueRef ac_build_imsb(struct ac_llvm_context *ctx, LLVMValueRef arg, LLVMTypeRef dst_type) { const char *intr_name = (HAVE_LLVM < 0x0400) ? "llvm.AMDGPU.flbit.i32" : "llvm.amdgcn.sffbh.i32"; LLVMValueRef msb = ac_build_intrinsic(ctx, intr_name, dst_type, &arg, 1, AC_FUNC_ATTR_READNONE); /* The HW returns the last bit index from MSB, but NIR/TGSI wants * the index from LSB. Invert it by doing "31 - msb". */ msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false), msb, ""); LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true); LLVMValueRef cond = LLVMBuildOr(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, LLVMConstInt(ctx->i32, 0, 0), ""), LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, all_ones, ""), ""); return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, ""); }
struct LLVMOpaqueValue *bllvm_compile_rirbop(const struct rir_expression *expr, struct llvm_traversal_ctx *ctx) { LLVMValueRef ret; LLVMValueRef left = bllvm_value_from_rir_value_or_die(expr->binaryop.a, ctx); LLVMValueRef right = bllvm_value_from_rir_value_or_die(expr->binaryop.b, ctx); switch(expr->type) { case RIR_EXPRESSION_ADD: ret = LLVMBuildAdd(ctx->builder, left, right, ""); break; case RIR_EXPRESSION_SUB: ret = LLVMBuildSub(ctx->builder, left, right, ""); break; case RIR_EXPRESSION_MUL: ret = LLVMBuildMul(ctx->builder, left, right, ""); break; case RIR_EXPRESSION_DIV: ret = LLVMBuildUDiv(ctx->builder, left, right, ""); break; default: RF_CRITICAL_FAIL("Should never get anything other than binaryop here"); break; } return ret; }
/* * To be able to handle multiple quads at once in texture sampling and * do lod calculations per quad, it is necessary to get the per-quad * derivatives into the lp_build_rho function. * For 8-wide vectors the packed derivative values for 3 coords would * look like this, this scales to a arbitrary (multiple of 4) vector size: * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy * dr1dx dr1dy _____ _____ dr2dx dr2dy _____ _____ * The second vector will be unused for 1d and 2d textures. */ LLVMValueRef lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld, LLVMValueRef a) { struct gallivm_state *gallivm = bld->gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef vec1, vec2; /* same packing as _twocoord, but can use aos swizzle helper */ /* * XXX could make swizzle1 a noop swizzle by using right top/bottom * pair for ddy */ static const unsigned char swizzle1[] = { LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE }; static const unsigned char swizzle2[] = { LP_BLD_QUAD_TOP_RIGHT, LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE }; vec1 = lp_build_swizzle_aos(bld, a, swizzle1); vec2 = lp_build_swizzle_aos(bld, a, swizzle2); if (bld->type.floating) return LLVMBuildFSub(builder, vec2, vec1, "ddxddy"); else return LLVMBuildSub(builder, vec2, vec1, "ddxddy"); }
LLVMValueRef gen_predec(struct node *ast) { LLVMValueRef result; result = LLVMBuildSub(builder, codegen(ast->one), CONST(1), ""); check_store(result, lvalue(ast->one)); return result; }
LLVMValueRef gen_postdec(struct node *ast) { LLVMValueRef orig, result; orig = codegen(ast->one); result = LLVMBuildSub(builder, orig, CONST(1), ""); check_store(result, lvalue(ast->one)); return orig; }
/** * Inverse of lp_build_clamped_float_to_unsigned_norm above. */ LLVMValueRef lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, unsigned src_width, struct lp_type dst_type, LLVMValueRef src) { LLVMTypeRef vec_type = lp_build_vec_type(dst_type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(dst_type); LLVMValueRef bias_; LLVMValueRef res; unsigned mantissa; unsigned n; unsigned long long ubound; unsigned long long mask; double scale; double bias; mantissa = lp_mantissa(dst_type); n = MIN2(mantissa, src_width); ubound = ((unsigned long long)1 << n); mask = ubound - 1; scale = (double)ubound/mask; bias = (double)((unsigned long long)1 << (mantissa - n)); res = src; if(src_width > mantissa) { int shift = src_width - mantissa; res = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(dst_type, shift), ""); } bias_ = lp_build_const_scalar(dst_type, bias); res = LLVMBuildOr(builder, res, LLVMBuildBitCast(builder, bias_, int_vec_type, ""), ""); res = LLVMBuildBitCast(builder, res, vec_type, ""); res = LLVMBuildSub(builder, res, bias_, ""); res = LLVMBuildMul(builder, res, lp_build_const_scalar(dst_type, scale), ""); return res; }
static LLVMValueRef translateIntBinOp(NodeKind Op, LLVMValueRef ValueE1, LLVMValueRef ValueE2) { switch (Op) { case OrOp: return LLVMBuildOr (Builder, ValueE1, ValueE2, ""); case AndOp: return LLVMBuildAnd(Builder, ValueE1, ValueE2, ""); case SumOp: return LLVMBuildAdd(Builder, ValueE1, ValueE2, ""); case SubOp: return LLVMBuildSub(Builder, ValueE1, ValueE2, ""); case MultOp: return LLVMBuildMul(Builder, ValueE1, ValueE2, ""); case DivOp: return LLVMBuildSDiv(Builder, ValueE1, ValueE2, ""); case LtOp: return LLVMBuildICmp(Builder, LLVMIntSLT, ValueE1, ValueE2, ""); case LeOp: return LLVMBuildICmp(Builder, LLVMIntSLE, ValueE1, ValueE2, ""); case GtOp: return LLVMBuildICmp(Builder, LLVMIntSGT, ValueE1, ValueE2, ""); case GeOp: return LLVMBuildICmp(Builder, LLVMIntSGE, ValueE1, ValueE2, ""); case EqOp: return LLVMBuildICmp(Builder, LLVMIntEQ, ValueE1, ValueE2, ""); case DiffOp: return LLVMBuildICmp(Builder, LLVMIntNE, ValueE1, ValueE2, ""); default: return NULL; } }
static inline LLVMValueRef LLVM_visit(ASTNode *node, LLVMBuilderRef builder) { switch(node->type) { case AST_BINARY_OP: { ASTBinaryOp *binary_op = (ASTBinaryOp*)node; LLVMValueRef a = LLVM_visit(binary_op->lhs, builder); LLVMValueRef b = LLVM_visit(binary_op->rhs, builder); switch(binary_op->op) { case '+': return LLVMBuildAdd(builder, a, b, "a + b"); case '-': return LLVMBuildSub(builder, a, b, "a - b"); case '*': return LLVMBuildMul(builder, a, b, "a * b"); case '/': return LLVMBuildSDiv(builder, a, b, "a / b"); } } case AST_INT: { return LLVMConstInt(LLVMInt32Type(), ((ASTInt*)node)->value, 0); } } }
/** * Generate a - b */ LLVMValueRef lp_build_sub(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { const struct lp_type type = bld->type; LLVMValueRef res; if(b == bld->zero) return a; if(a == bld->undef || b == bld->undef) return bld->undef; if(a == b) return bld->zero; if(bld->type.norm) { const char *intrinsic = NULL; if(b == bld->one) return bld->zero; if(util_cpu_caps.has_sse2 && type.width * type.length == 128 && !type.floating && !type.fixed) { if(type.width == 8) intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b"; if(type.width == 16) intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w"; } if(intrinsic) return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); } if(LLVMIsConstant(a) && LLVMIsConstant(b)) res = LLVMConstSub(a, b); else res = LLVMBuildSub(bld->builder, a, b, ""); if(bld->type.norm && (bld->type.floating || bld->type.fixed)) res = lp_build_max_simple(bld, res, bld->zero); return res; }
static void emit_bfi(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef bfi_args[3]; LLVMValueRef bfi_sm5; LLVMValueRef cond; // Calculate the bitmask: (((1 << src3) - 1) << src2 bfi_args[0] = LLVMBuildShl(builder, LLVMBuildSub(builder, LLVMBuildShl(builder, bld_base->int_bld.one, emit_data->args[3], ""), bld_base->int_bld.one, ""), emit_data->args[2], ""); bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1], emit_data->args[2], ""); bfi_args[2] = emit_data->args[0]; /* Calculate: * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2) * Use the right-hand side, which the LLVM backend can convert to V_BFI. */ bfi_sm5 = LLVMBuildXor(builder, bfi_args[2], LLVMBuildAnd(builder, bfi_args[0], LLVMBuildXor(builder, bfi_args[1], bfi_args[2], ""), ""), ""); /* Since shifts of >= 32 bits are undefined in LLVM IR, the backend * uses the convenient V_BFI lowering for the above, which follows SM5 * and disagrees with GLSL semantics when bits (src3) is 32. */ cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[3], lp_build_const_int32(gallivm, 32), ""); emit_data->output[emit_data->chan] = LLVMBuildSelect(builder, cond, emit_data->args[1], bfi_sm5, ""); }
/** * Generate 1 - a, or ~a depending on bld->type. */ LLVMValueRef lp_build_comp(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; if(a == bld->one) return bld->zero; if(a == bld->zero) return bld->one; if(type.norm && !type.floating && !type.fixed && !type.sign) { if(LLVMIsConstant(a)) return LLVMConstNot(a); else return LLVMBuildNot(bld->builder, a, ""); } if(LLVMIsConstant(a)) return LLVMConstSub(bld->one, a); else return LLVMBuildSub(bld->builder, bld->one, a, ""); }
LLVMValueRef ac_build_umsb(struct ac_llvm_context *ctx, LLVMValueRef arg, LLVMTypeRef dst_type) { LLVMValueRef args[2] = { arg, LLVMConstInt(ctx->i1, 1, 0), }; LLVMValueRef msb = ac_build_intrinsic(ctx, "llvm.ctlz.i32", dst_type, args, ARRAY_SIZE(args), AC_FUNC_ATTR_READNONE); /* The HW returns the last bit index from MSB, but TGSI/NIR wants * the index from LSB. Invert it by doing "31 - msb". */ msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false), msb, ""); /* check for zero */ return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, LLVMConstInt(ctx->i32, 0, 0), ""), LLVMConstInt(ctx->i32, -1, true), msb, ""); }
static INLINE void yuv_to_rgb_soa(struct gallivm_state *gallivm, unsigned n, LLVMValueRef y, LLVMValueRef u, LLVMValueRef v, LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b) { LLVMBuilderRef builder = gallivm->builder; struct lp_type type; struct lp_build_context bld; LLVMValueRef c0; LLVMValueRef c8; LLVMValueRef c16; LLVMValueRef c128; LLVMValueRef c255; LLVMValueRef cy; LLVMValueRef cug; LLVMValueRef cub; LLVMValueRef cvr; LLVMValueRef cvg; memset(&type, 0, sizeof type); type.sign = TRUE; type.width = 32; type.length = n; lp_build_context_init(&bld, gallivm, type); assert(lp_check_value(type, y)); assert(lp_check_value(type, u)); assert(lp_check_value(type, v)); /* * Constants */ c0 = lp_build_const_int_vec(gallivm, type, 0); c8 = lp_build_const_int_vec(gallivm, type, 8); c16 = lp_build_const_int_vec(gallivm, type, 16); c128 = lp_build_const_int_vec(gallivm, type, 128); c255 = lp_build_const_int_vec(gallivm, type, 255); cy = lp_build_const_int_vec(gallivm, type, 298); cug = lp_build_const_int_vec(gallivm, type, -100); cub = lp_build_const_int_vec(gallivm, type, 516); cvr = lp_build_const_int_vec(gallivm, type, 409); cvg = lp_build_const_int_vec(gallivm, type, -208); /* * y -= 16; * u -= 128; * v -= 128; */ y = LLVMBuildSub(builder, y, c16, ""); u = LLVMBuildSub(builder, u, c128, ""); v = LLVMBuildSub(builder, v, c128, ""); /* * r = 298 * _y + 409 * _v + 128; * g = 298 * _y - 100 * _u - 208 * _v + 128; * b = 298 * _y + 516 * _u + 128; */ y = LLVMBuildMul(builder, y, cy, ""); y = LLVMBuildAdd(builder, y, c128, ""); *r = LLVMBuildMul(builder, v, cvr, ""); *g = LLVMBuildAdd(builder, LLVMBuildMul(builder, u, cug, ""), LLVMBuildMul(builder, v, cvg, ""), ""); *b = LLVMBuildMul(builder, u, cub, ""); *r = LLVMBuildAdd(builder, *r, y, ""); *g = LLVMBuildAdd(builder, *g, y, ""); *b = LLVMBuildAdd(builder, *b, y, ""); /* * r >>= 8; * g >>= 8; * b >>= 8; */ *r = LLVMBuildAShr(builder, *r, c8, "r"); *g = LLVMBuildAShr(builder, *g, c8, "g"); *b = LLVMBuildAShr(builder, *b, c8, "b"); /* * Clamp */ *r = lp_build_clamp(&bld, *r, c0, c255); *g = lp_build_clamp(&bld, *g, c0, c255); *b = lp_build_clamp(&bld, *b, c0, c255); }
/** * Generic type conversion. * * TODO: Take a precision argument, or even better, add a new precision member * to the lp_type union. */ void lp_build_conv(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, LLVMValueRef *dst, unsigned num_dsts) { LLVMBuilderRef builder = gallivm->builder; struct lp_type tmp_type; LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; unsigned num_tmps; unsigned i; /* We must not loose or gain channels. Only precision */ assert(src_type.length * num_srcs == dst_type.length * num_dsts); assert(src_type.length <= LP_MAX_VECTOR_LENGTH); assert(dst_type.length <= LP_MAX_VECTOR_LENGTH); assert(num_srcs <= LP_MAX_VECTOR_LENGTH); assert(num_dsts <= LP_MAX_VECTOR_LENGTH); tmp_type = src_type; for(i = 0; i < num_srcs; ++i) { assert(lp_check_value(src_type, src[i])); tmp[i] = src[i]; } num_tmps = num_srcs; /* Special case 4x4f --> 1x16ub */ if (src_type.floating == 1 && src_type.fixed == 0 && src_type.sign == 1 && src_type.norm == 0 && src_type.width == 32 && src_type.length == 4 && dst_type.floating == 0 && dst_type.fixed == 0 && dst_type.sign == 0 && dst_type.norm == 1 && dst_type.width == 8 && dst_type.length == 16 && 4 * num_dsts == num_srcs && util_cpu_caps.has_sse2) { struct lp_build_context bld; struct lp_type int16_type = dst_type; struct lp_type int32_type = dst_type; LLVMValueRef const_255f; unsigned i, j; lp_build_context_init(&bld, gallivm, src_type); int16_type.width *= 2; int16_type.length /= 2; int16_type.sign = 1; int32_type.width *= 4; int32_type.length /= 4; int32_type.sign = 1; const_255f = lp_build_const_vec(gallivm, src_type, 255.0f); for (i = 0; i < num_dsts; ++i, src += 4) { LLVMValueRef lo, hi; for (j = 0; j < 4; ++j) { tmp[j] = LLVMBuildFMul(builder, src[j], const_255f, ""); tmp[j] = lp_build_iround(&bld, tmp[j]); } /* relying on clamping behavior of sse2 intrinsics here */ lo = lp_build_pack2(gallivm, int32_type, int16_type, tmp[0], tmp[1]); hi = lp_build_pack2(gallivm, int32_type, int16_type, tmp[2], tmp[3]); dst[i] = lp_build_pack2(gallivm, int16_type, dst_type, lo, hi); } return; } /* Special case 2x8f --> 1x16ub */ else if (src_type.floating == 1 && src_type.fixed == 0 && src_type.sign == 1 && src_type.norm == 0 && src_type.width == 32 && src_type.length == 8 && dst_type.floating == 0 && dst_type.fixed == 0 && dst_type.sign == 0 && dst_type.norm == 1 && dst_type.width == 8 && dst_type.length == 16 && 2 * num_dsts == num_srcs && util_cpu_caps.has_avx) { struct lp_build_context bld; struct lp_type int16_type = dst_type; struct lp_type int32_type = dst_type; LLVMValueRef const_255f; unsigned i; lp_build_context_init(&bld, gallivm, src_type); int16_type.width *= 2; int16_type.length /= 2; int16_type.sign = 1; int32_type.width *= 4; int32_type.length /= 4; int32_type.sign = 1; const_255f = lp_build_const_vec(gallivm, src_type, 255.0f); for (i = 0; i < num_dsts; ++i, src += 2) { LLVMValueRef lo, hi, a, b; a = LLVMBuildFMul(builder, src[0], const_255f, ""); b = LLVMBuildFMul(builder, src[1], const_255f, ""); a = lp_build_iround(&bld, a); b = lp_build_iround(&bld, b); tmp[0] = lp_build_extract_range(gallivm, a, 0, 4); tmp[1] = lp_build_extract_range(gallivm, a, 4, 4); tmp[2] = lp_build_extract_range(gallivm, b, 0, 4); tmp[3] = lp_build_extract_range(gallivm, b, 4, 4); /* relying on clamping behavior of sse2 intrinsics here */ lo = lp_build_pack2(gallivm, int32_type, int16_type, tmp[0], tmp[1]); hi = lp_build_pack2(gallivm, int32_type, int16_type, tmp[2], tmp[3]); dst[i] = lp_build_pack2(gallivm, int16_type, dst_type, lo, hi); } return; } /* Pre convert half-floats to floats */ else if (src_type.floating && src_type.width == 16) { for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_half_to_float(gallivm, src_type, tmp[i]); tmp_type.width = 32; } /* * Clamp if necessary */ if(memcmp(&src_type, &dst_type, sizeof src_type) != 0) { struct lp_build_context bld; double src_min = lp_const_min(src_type); double dst_min = lp_const_min(dst_type); double src_max = lp_const_max(src_type); double dst_max = lp_const_max(dst_type); LLVMValueRef thres; lp_build_context_init(&bld, gallivm, tmp_type); if(src_min < dst_min) { if(dst_min == 0.0) thres = bld.zero; else thres = lp_build_const_vec(gallivm, src_type, dst_min); for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_max(&bld, tmp[i], thres); } if(src_max > dst_max) { if(dst_max == 1.0) thres = bld.one; else thres = lp_build_const_vec(gallivm, src_type, dst_max); for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_min(&bld, tmp[i], thres); } } /* * Scale to the narrowest range */ if(dst_type.floating) { /* Nothing to do */ } else if(tmp_type.floating) { if(!dst_type.fixed && !dst_type.sign && dst_type.norm) { for(i = 0; i < num_tmps; ++i) { tmp[i] = lp_build_clamped_float_to_unsigned_norm(gallivm, tmp_type, dst_type.width, tmp[i]); } tmp_type.floating = FALSE; } else { double dst_scale = lp_const_scale(dst_type); LLVMTypeRef tmp_vec_type; if (dst_scale != 1.0) { LLVMValueRef scale = lp_build_const_vec(gallivm, tmp_type, dst_scale); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, ""); } /* Use an equally sized integer for intermediate computations */ tmp_type.floating = FALSE; tmp_vec_type = lp_build_vec_type(gallivm, tmp_type); for(i = 0; i < num_tmps; ++i) { #if 0 if(dst_type.sign) tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, ""); else tmp[i] = LLVMBuildFPToUI(builder, tmp[i], tmp_vec_type, ""); #else /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */ tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, ""); #endif } } } else { unsigned src_shift = lp_const_shift(src_type); unsigned dst_shift = lp_const_shift(dst_type); unsigned src_offset = lp_const_offset(src_type); unsigned dst_offset = lp_const_offset(dst_type); /* Compensate for different offsets */ if (dst_offset > src_offset && src_type.width > dst_type.width) { for (i = 0; i < num_tmps; ++i) { LLVMValueRef shifted; LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, src_shift - 1); if(src_type.sign) shifted = LLVMBuildAShr(builder, tmp[i], shift, ""); else shifted = LLVMBuildLShr(builder, tmp[i], shift, ""); tmp[i] = LLVMBuildSub(builder, tmp[i], shifted, ""); } } if(src_shift > dst_shift) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, src_shift - dst_shift); for(i = 0; i < num_tmps; ++i) if(src_type.sign) tmp[i] = LLVMBuildAShr(builder, tmp[i], shift, ""); else tmp[i] = LLVMBuildLShr(builder, tmp[i], shift, ""); } } /* * Truncate or expand bit width * * No data conversion should happen here, although the sign bits are * crucial to avoid bad clamping. */ { struct lp_type new_type; new_type = tmp_type; new_type.sign = dst_type.sign; new_type.width = dst_type.width; new_type.length = dst_type.length; lp_build_resize(gallivm, tmp_type, new_type, tmp, num_srcs, tmp, num_dsts); tmp_type = new_type; num_tmps = num_dsts; } /* * Scale to the widest range */ if(src_type.floating) { /* Nothing to do */ } else if(!src_type.floating && dst_type.floating) { if(!src_type.fixed && !src_type.sign && src_type.norm) { for(i = 0; i < num_tmps; ++i) { tmp[i] = lp_build_unsigned_norm_to_float(gallivm, src_type.width, dst_type, tmp[i]); } tmp_type.floating = TRUE; } else { double src_scale = lp_const_scale(src_type); LLVMTypeRef tmp_vec_type; /* Use an equally sized integer for intermediate computations */ tmp_type.floating = TRUE; tmp_type.sign = TRUE; tmp_vec_type = lp_build_vec_type(gallivm, tmp_type); for(i = 0; i < num_tmps; ++i) { #if 0 if(dst_type.sign) tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, ""); else tmp[i] = LLVMBuildUIToFP(builder, tmp[i], tmp_vec_type, ""); #else /* FIXME: there is no SSE counterpart for LLVMBuildUIToFP */ tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, ""); #endif } if (src_scale != 1.0) { LLVMValueRef scale = lp_build_const_vec(gallivm, tmp_type, 1.0/src_scale); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, ""); } } } else { unsigned src_shift = lp_const_shift(src_type); unsigned dst_shift = lp_const_shift(dst_type); unsigned src_offset = lp_const_offset(src_type); unsigned dst_offset = lp_const_offset(dst_type); if (src_shift < dst_shift) { LLVMValueRef pre_shift[LP_MAX_VECTOR_LENGTH]; LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, dst_shift - src_shift); for (i = 0; i < num_tmps; ++i) { pre_shift[i] = tmp[i]; tmp[i] = LLVMBuildShl(builder, tmp[i], shift, ""); } /* Compensate for different offsets */ if (dst_offset > src_offset) { for (i = 0; i < num_tmps; ++i) { tmp[i] = LLVMBuildSub(builder, tmp[i], pre_shift[i], ""); } } } } for(i = 0; i < num_dsts; ++i) { dst[i] = tmp[i]; assert(lp_check_value(dst_type, dst[i])); } }
/** * Special case for converting clamped IEEE-754 floats to unsigned norms. * * The mathematical voodoo below may seem excessive but it is actually * paramount we do it this way for several reasons. First, there is no single * precision FP to unsigned integer conversion Intel SSE instruction. Second, * secondly, even if there was, since the FP's mantissa takes only a fraction * of register bits the typically scale and cast approach would require double * precision for accurate results, and therefore half the throughput * * Although the result values can be scaled to an arbitrary bit width specified * by dst_width, the actual result type will have the same width. * * Ex: src = { float, float, float, float } * return { i32, i32, i32, i32 } where each value is in [0, 2^dst_width-1]. */ LLVMValueRef lp_build_clamped_float_to_unsigned_norm(struct gallivm_state *gallivm, struct lp_type src_type, unsigned dst_width, LLVMValueRef src) { LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, src_type); LLVMValueRef res; unsigned mantissa; assert(src_type.floating); assert(dst_width <= src_type.width); src_type.sign = FALSE; mantissa = lp_mantissa(src_type); if (dst_width <= mantissa) { /* * Apply magic coefficients that will make the desired result to appear * in the lowest significant bits of the mantissa, with correct rounding. * * This only works if the destination width fits in the mantissa. */ unsigned long long ubound; unsigned long long mask; double scale; double bias; ubound = (1ULL << dst_width); mask = ubound - 1; scale = (double)mask/ubound; bias = (double)(1ULL << (mantissa - dst_width)); res = LLVMBuildFMul(builder, src, lp_build_const_vec(gallivm, src_type, scale), ""); res = LLVMBuildFAdd(builder, res, lp_build_const_vec(gallivm, src_type, bias), ""); res = LLVMBuildBitCast(builder, res, int_vec_type, ""); res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(gallivm, src_type, mask), ""); } else if (dst_width == (mantissa + 1)) { /* * The destination width matches exactly what can be represented in * floating point (i.e., mantissa + 1 bits). So do a straight * multiplication followed by casting. No further rounding is necessary. */ double scale; scale = (double)((1ULL << dst_width) - 1); res = LLVMBuildFMul(builder, src, lp_build_const_vec(gallivm, src_type, scale), ""); res = LLVMBuildFPToSI(builder, res, int_vec_type, ""); } else { /* * The destination exceeds what can be represented in the floating point. * So multiply by the largest power two we get away with, and when * subtract the most significant bit to rescale to normalized values. * * The largest power of two factor we can get away is * (1 << (src_type.width - 1)), because we need to use signed . In theory it * should be (1 << (src_type.width - 2)), but IEEE 754 rules states * INT_MIN should be returned in FPToSI, which is the correct result for * values near 1.0! * * This means we get (src_type.width - 1) correct bits for values near 0.0, * and (mantissa + 1) correct bits for values near 1.0. Equally or more * important, we also get exact results for 0.0 and 1.0. */ unsigned n = MIN2(src_type.width - 1, dst_width); double scale = (double)(1ULL << n); unsigned lshift = dst_width - n; unsigned rshift = n; LLVMValueRef lshifted; LLVMValueRef rshifted; res = LLVMBuildFMul(builder, src, lp_build_const_vec(gallivm, src_type, scale), ""); res = LLVMBuildFPToSI(builder, res, int_vec_type, ""); /* * Align the most significant bit to its final place. * * This will cause 1.0 to overflow to 0, but the later adjustment will * get it right. */ if (lshift) { lshifted = LLVMBuildShl(builder, res, lp_build_const_int_vec(gallivm, src_type, lshift), ""); } else { lshifted = res; } /* * Align the most significant bit to the right. */ rshifted = LLVMBuildLShr(builder, res, lp_build_const_int_vec(gallivm, src_type, rshift), ""); /* * Subtract the MSB to the LSB, therefore re-scaling from * (1 << dst_width) to ((1 << dst_width) - 1). */ res = LLVMBuildSub(builder, lshifted, rshifted, ""); } return res; }
/* * gen_operator_expression * * Code generation for operator expressions. Most of them have straightforward * translations into LLVM instructions and are handled directly here. */ static LLVMValueRef gen_operator_expression (gencodectx_t gctx, expr_node_t *exp, LLVMTypeRef neededtype) { expr_node_t *lhs = expr_op_lhs(exp); expr_node_t *rhs = expr_op_rhs(exp); optype_t op = expr_op_type(exp); LLVMBuilderRef builder = gctx->curfn->builder; LLVMTypeRef inttype; LLVMValueRef lval, rval, result; if (op == OPER_FETCH) { return gen_fetch(gctx, rhs, neededtype); } if (op == OPER_ASSIGN) { LLVMValueRef val = llvmgen_assignment(gctx, lhs, rhs); return llvmgen_adjustval(gctx, val, neededtype, 0); } if (op == OPER_SHIFT) { return gen_shift(gctx, lhs, rhs, neededtype); } inttype = LLVMIntTypeInContext(gctx->llvmctx, machine_scalar_bits(gctx->mach)); lval = (lhs == 0 ? 0 : llvmgen_expression(gctx, lhs, inttype)); rval = llvmgen_expression(gctx, rhs, inttype); switch (op) { case OPER_UNARY_PLUS: result = rval; break; case OPER_UNARY_MINUS: result = LLVMBuildNeg(builder, rval, llvmgen_temp(gctx)); break; case OPER_ADD: result = LLVMBuildAdd(builder, lval, rval, llvmgen_temp(gctx)); break; case OPER_SUBTRACT: result = LLVMBuildSub(builder, lval, rval, llvmgen_temp(gctx)); break; case OPER_MULT: result = LLVMBuildMul(builder, lval, rval, llvmgen_temp(gctx)); break; case OPER_DIV: result = LLVMBuildUDiv(builder, lval, rval, llvmgen_temp(gctx)); break; case OPER_MODULO: result = LLVMBuildURem(builder, lval, rval, llvmgen_temp(gctx)); break; case OPER_AND: result = LLVMBuildAnd(builder, lval, rval, llvmgen_temp(gctx)); break; case OPER_OR: result = LLVMBuildOr(builder, lval, rval, llvmgen_temp(gctx)); break; case OPER_NOT: result = LLVMBuildNot(builder, rval, llvmgen_temp(gctx)); break; case OPER_XOR: result = LLVMBuildXor(builder, lval, rval, llvmgen_temp(gctx)); break; case OPER_EQV: result = LLVMBuildXor(builder, lval, rval, llvmgen_temp(gctx)); result = LLVMBuildNot(builder, result, llvmgen_temp(gctx)); break; default: if (op >= OPER_CMP_EQL && op <= OPER_CMP_GEQA) { result = LLVMBuildICmp(builder, llvmgen_predfromop(op, machine_addr_signed(gctx->mach)), lval, rval, llvmgen_temp(gctx)); } else { // Everything should be covered expr_signal(gctx->ectx, STC__INTCMPERR, "gen_operator_expression"); result = LLVMConstNull(inttype); } break; } return llvmgen_adjustval(gctx, result, neededtype, 0); } /* gen_operator_expression */
/** * See http://www.devmaster.net/forums/showthread.php?p=43580 */ void lp_build_log2_approx(struct lp_build_context *bld, LLVMValueRef x, LLVMValueRef *p_exp, LLVMValueRef *p_floor_log2, LLVMValueRef *p_log2) { const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef expmask = lp_build_int_const_scalar(type, 0x7f800000); LLVMValueRef mantmask = lp_build_int_const_scalar(type, 0x007fffff); LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type); LLVMValueRef i = NULL; LLVMValueRef exp = NULL; LLVMValueRef mant = NULL; LLVMValueRef logexp = NULL; LLVMValueRef logmant = NULL; LLVMValueRef res = NULL; if(p_exp || p_floor_log2 || p_log2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) debug_printf("%s: inefficient/imprecise constant arithmetic\n", __FUNCTION__); assert(type.floating && type.width == 32); i = LLVMBuildBitCast(bld->builder, x, int_vec_type, ""); /* exp = (float) exponent(x) */ exp = LLVMBuildAnd(bld->builder, i, expmask, ""); } if(p_floor_log2 || p_log2) { logexp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_scalar(type, 23), ""); logexp = LLVMBuildSub(bld->builder, logexp, lp_build_int_const_scalar(type, 127), ""); logexp = LLVMBuildSIToFP(bld->builder, logexp, vec_type, ""); } if(p_log2) { /* mant = (float) mantissa(x) */ mant = LLVMBuildAnd(bld->builder, i, mantmask, ""); mant = LLVMBuildOr(bld->builder, mant, one, ""); mant = LLVMBuildBitCast(bld->builder, mant, vec_type, ""); logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial, Elements(lp_build_log2_polynomial)); /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), ""); res = LLVMBuildAdd(bld->builder, logmant, logexp, ""); } if(p_exp) *p_exp = exp; if(p_floor_log2) *p_floor_log2 = logexp; if(p_log2) *p_log2 = res; }