static LLVMValueRef make_binop(compile_t* c, ast_t* left, ast_t* right, const_binop const_f, const_binop const_i, build_binop build_f, build_binop build_i) { LLVMValueRef l_value = gen_expr(c, left); LLVMValueRef r_value = gen_expr(c, right); if((l_value == NULL) || (r_value == NULL)) return NULL; if(LLVMIsConstant(l_value) && LLVMIsConstant(r_value)) { if(is_fp(l_value)) return const_f(l_value, r_value); return const_i(l_value, r_value); } if(is_fp(l_value)) { LLVMValueRef result = build_f(c->builder, l_value, r_value, ""); if(!c->opt->ieee_math) LLVMSetUnsafeAlgebra(result); return result; } return build_i(c->builder, l_value, r_value, ""); }
LLVMValueRef gen_xor(compile_t* c, ast_t* left, ast_t* right) { LLVMValueRef l_value = gen_expr(c, left); LLVMValueRef r_value = gen_expr(c, right); if((l_value == NULL) || (r_value == NULL)) return NULL; if(LLVMIsConstant(l_value) && LLVMIsConstant(r_value)) return LLVMConstXor(l_value, r_value); if(is_always_true(c, l_value)) return LLVMBuildNot(c->builder, r_value, ""); if(is_always_false(c, l_value)) return r_value; if(is_always_true(c, r_value)) return LLVMBuildNot(c->builder, l_value, ""); if(is_always_false(c, r_value)) return l_value; return LLVMBuildXor(c->builder, l_value, r_value, ""); }
/** * Generate a / b */ LLVMValueRef lp_build_div(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { const struct lp_type type = bld->type; if(a == bld->zero) return bld->zero; if(a == bld->one) return lp_build_rcp(bld, b); if(b == bld->zero) return bld->undef; if(b == bld->one) return a; if(a == bld->undef || b == bld->undef) return bld->undef; if(LLVMIsConstant(a) && LLVMIsConstant(b)) return LLVMConstFDiv(a, b); if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) return lp_build_mul(bld, a, lp_build_rcp(bld, b)); return LLVMBuildFDiv(bld->builder, a, b, ""); }
static LLVMValueRef make_cmp_value(compile_t* c, bool sign, LLVMValueRef l_value, LLVMValueRef r_value, LLVMRealPredicate cmp_f, LLVMIntPredicate cmp_si, LLVMIntPredicate cmp_ui) { if((l_value == NULL) || (r_value == NULL)) return NULL; if(LLVMIsConstant(l_value) && LLVMIsConstant(r_value)) { if(is_fp(l_value)) return LLVMConstFCmp(cmp_f, l_value, r_value); if(sign) return LLVMConstICmp(cmp_si, l_value, r_value); return LLVMConstICmp(cmp_ui, l_value, r_value); } if(is_fp(l_value)) return LLVMBuildFCmp(c->builder, cmp_f, l_value, r_value, ""); if(sign) return LLVMBuildICmp(c->builder, cmp_si, l_value, r_value, ""); return LLVMBuildICmp(c->builder, cmp_ui, l_value, r_value, ""); }
/** * Generate pow(x, y) */ LLVMValueRef lp_build_pow(struct lp_build_context *bld, LLVMValueRef x, LLVMValueRef y) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x) && LLVMIsConstant(y)) debug_printf("%s: inefficient/imprecise constant arithmetic\n", __FUNCTION__); return lp_build_exp2(bld, lp_build_mul(bld, lp_build_log2(bld, x), y)); }
LLVMValueRef gen_shl(compile_t* c, ast_t* left, ast_t* right) { LLVMValueRef l_value = gen_expr(c, left); LLVMValueRef r_value = gen_expr(c, right); if((l_value == NULL) || (r_value == NULL)) return NULL; if(LLVMIsConstant(l_value) && LLVMIsConstant(r_value)) return LLVMConstShl(l_value, r_value); return LLVMBuildShl(c->builder, l_value, r_value, ""); }
LLVMValueRef gen_or(compile_t* c, ast_t* left, ast_t* right) { LLVMValueRef l_value = gen_expr(c, left); LLVMValueRef r_value = gen_expr(c, right); if((l_value == NULL) || (r_value == NULL)) return NULL; if(LLVMIsConstant(l_value) && LLVMIsConstant(r_value)) return LLVMConstOr(l_value, r_value); if(is_always_true(c, l_value) || is_always_true(c, r_value)) return LLVMConstInt(c->i1, 1, false); return LLVMBuildOr(c->builder, l_value, r_value, ""); }
/** * Generate polynomial. * Ex: coeffs[0] + x * coeffs[1] + x^2 * coeffs[2]. */ static LLVMValueRef lp_build_polynomial(struct lp_build_context *bld, LLVMValueRef x, const double *coeffs, unsigned num_coeffs) { const struct lp_type type = bld->type; LLVMValueRef res = NULL; unsigned i; /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) debug_printf("%s: inefficient/imprecise constant arithmetic\n", __FUNCTION__); for (i = num_coeffs; i--; ) { LLVMValueRef coeff = lp_build_const_scalar(type, coeffs[i]); if(res) res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res)); else res = coeff; } if(res) return res; else return bld->undef; }
/** * Generate a + b */ LLVMValueRef lp_build_add(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { const struct lp_type type = bld->type; LLVMValueRef res; if(a == bld->zero) return b; if(b == bld->zero) return a; if(a == bld->undef || b == bld->undef) return bld->undef; if(bld->type.norm) { const char *intrinsic = NULL; if(a == bld->one || b == bld->one) return bld->one; if(util_cpu_caps.has_sse2 && type.width * type.length == 128 && !type.floating && !type.fixed) { if(type.width == 8) intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b"; if(type.width == 16) intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w"; } if(intrinsic) return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); } if(LLVMIsConstant(a) && LLVMIsConstant(b)) res = LLVMConstAdd(a, b); else res = LLVMBuildAdd(bld->builder, a, b, ""); /* clamp to ceiling of 1.0 */ if(bld->type.norm && (bld->type.floating || bld->type.fixed)) res = lp_build_min_simple(bld, res, bld->one); /* XXX clamp to floor of -1 or 0??? */ return res; }
void lp_build_exp2_approx(struct lp_build_context *bld, LLVMValueRef x, LLVMValueRef *p_exp2_int_part, LLVMValueRef *p_frac_part, LLVMValueRef *p_exp2) { const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef ipart = NULL; LLVMValueRef fpart = NULL; LLVMValueRef expipart = NULL; LLVMValueRef expfpart = NULL; LLVMValueRef res = NULL; if(p_exp2_int_part || p_frac_part || p_exp2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) debug_printf("%s: inefficient/imprecise constant arithmetic\n", __FUNCTION__); assert(type.floating && type.width == 32); x = lp_build_min(bld, x, lp_build_const_scalar(type, 129.0)); x = lp_build_max(bld, x, lp_build_const_scalar(type, -126.99999)); /* ipart = int(x - 0.5) */ ipart = LLVMBuildSub(bld->builder, x, lp_build_const_scalar(type, 0.5f), ""); ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, ""); /* fpart = x - ipart */ fpart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, ""); fpart = LLVMBuildSub(bld->builder, x, fpart, ""); } if(p_exp2_int_part || p_exp2) { /* expipart = (float) (1 << ipart) */ expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_scalar(type, 127), ""); expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_scalar(type, 23), ""); expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, ""); } if(p_exp2) { expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial, Elements(lp_build_exp2_polynomial)); res = LLVMBuildMul(bld->builder, expipart, expfpart, ""); } if(p_exp2_int_part) *p_exp2_int_part = expipart; if(p_frac_part) *p_frac_part = fpart; if(p_exp2) *p_exp2 = res; }
int test_kal_codegen_number() { kal_ast_node *node = kal_ast_number_create(10); LLVMValueRef value = kal_codegen(node, NULL, NULL); LLVMTypeRef type = LLVMTypeOf(value); mu_assert(LLVMGetTypeKind(type) == LLVMDoubleTypeKind, ""); mu_assert(LLVMIsConstant(value), ""); kal_ast_node_free(node); return 0; }
/** * Generate a - b */ LLVMValueRef lp_build_sub(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { const struct lp_type type = bld->type; LLVMValueRef res; if(b == bld->zero) return a; if(a == bld->undef || b == bld->undef) return bld->undef; if(a == b) return bld->zero; if(bld->type.norm) { const char *intrinsic = NULL; if(b == bld->one) return bld->zero; if(util_cpu_caps.has_sse2 && type.width * type.length == 128 && !type.floating && !type.fixed) { if(type.width == 8) intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b"; if(type.width == 16) intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w"; } if(intrinsic) return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); } if(LLVMIsConstant(a) && LLVMIsConstant(b)) res = LLVMConstSub(a, b); else res = LLVMBuildSub(bld->builder, a, b, ""); if(bld->type.norm && (bld->type.floating || bld->type.fixed)) res = lp_build_max_simple(bld, res, bld->zero); return res; }
LLVMValueRef ac_build_fdiv(struct ac_llvm_context *ctx, LLVMValueRef num, LLVMValueRef den) { LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, ""); if (!LLVMIsConstant(ret)) LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp); return ret; }
/** * Generate 1 - a, or ~a depending on bld->type. */ LLVMValueRef lp_build_comp(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; if(a == bld->one) return bld->zero; if(a == bld->zero) return bld->one; if(type.norm && !type.floating && !type.fixed && !type.sign) { if(LLVMIsConstant(a)) return LLVMConstNot(a); else return LLVMBuildNot(bld->builder, a, ""); } if(LLVMIsConstant(a)) return LLVMConstSub(bld->one, a); else return LLVMBuildSub(bld->builder, bld->one, a, ""); }
int test_kal_codegen_binary_expr() { LLVMModuleRef module = LLVMModuleCreateWithName("kal"); LLVMBuilderRef builder = LLVMCreateBuilder(); kal_ast_node *lhs = kal_ast_number_create(20); kal_ast_node *rhs = kal_ast_number_create(30); kal_ast_node *node = kal_ast_binary_expr_create(KAL_BINOP_PLUS, lhs, rhs); LLVMValueRef value = kal_codegen(node, module, builder); mu_assert(LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMDoubleTypeKind, ""); mu_assert(LLVMIsConstant(value), ""); LLVMDisposeBuilder(builder); LLVMDisposeModule(module); kal_ast_node_free(node); return 0; }
LLVMValueRef gen_shr(compile_t* c, ast_t* left, ast_t* right) { ast_t* type = ast_type(left); bool sign = is_signed(c->opt, type); LLVMValueRef l_value = gen_expr(c, left); LLVMValueRef r_value = gen_expr(c, right); if((l_value == NULL) || (r_value == NULL)) return NULL; if(LLVMIsConstant(l_value) && LLVMIsConstant(r_value)) { if(sign) return LLVMConstAShr(l_value, r_value); return LLVMConstLShr(l_value, r_value); } if(sign) return LLVMBuildAShr(c->builder, l_value, r_value, ""); return LLVMBuildLShr(c->builder, l_value, r_value, ""); }
static void emit_fdiv(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); emit_data->output[emit_data->chan] = LLVMBuildFDiv(bld_base->base.gallivm->builder, emit_data->args[0], emit_data->args[1], ""); /* Use v_rcp_f32 instead of precise division. */ if (HAVE_LLVM >= 0x0309 && !LLVMIsConstant(emit_data->output[emit_data->chan])) LLVMSetMetadata(emit_data->output[emit_data->chan], ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp); }
LLVMValueRef lp_build_rcp(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; if(a == bld->zero) return bld->undef; if(a == bld->one) return bld->one; if(a == bld->undef) return bld->undef; assert(type.floating); if(LLVMIsConstant(a)) return LLVMConstFDiv(bld->one, a); if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) /* FIXME: improve precision */ return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); return LLVMBuildFDiv(bld->builder, bld->one, a, ""); }
struct cl2llvm_val_t *llvm_type_cast(struct cl2llvm_val_t * original_val, struct cl2llvmTypeWrap *totype_w_sign) { struct cl2llvm_val_t *llvm_val = cl2llvm_val_create(); int i; struct cl2llvmTypeWrap *elem_type; struct cl2llvm_val_t *cast_original_val; LLVMValueRef index; LLVMValueRef vector_addr; LLVMValueRef vector; LLVMValueRef const_elems[16]; LLVMTypeRef fromtype = cl2llvmTypeWrapGetLlvmType(original_val->type); LLVMTypeRef totype = cl2llvmTypeWrapGetLlvmType(totype_w_sign); int fromsign = cl2llvmTypeWrapGetSign(original_val->type); int tosign = cl2llvmTypeWrapGetSign(totype_w_sign); /*By default the return value is the same as the original_val*/ llvm_val->val = original_val->val; cl2llvmTypeWrapSetLlvmType(llvm_val->type, cl2llvmTypeWrapGetLlvmType(original_val->type)); cl2llvmTypeWrapSetSign(llvm_val->type, cl2llvmTypeWrapGetSign(original_val->type)); snprintf(temp_var_name, sizeof temp_var_name, "tmp_%d", temp_var_count++); /* Check that fromtype is not a vector, unless both types are identical. */ if (LLVMGetTypeKind(fromtype) == LLVMVectorTypeKind) { if ((LLVMGetVectorSize(fromtype) != LLVMGetVectorSize(totype) || LLVMGetElementType(fromtype) != LLVMGetElementType(totype)) || fromsign != tosign) { if (LLVMGetTypeKind(totype) == LLVMVectorTypeKind) cl2llvm_yyerror("Casts between vector types are forbidden"); cl2llvm_yyerror("A vector may not be cast to any other type."); } } /* If totype is a vector, create a vector whose components are equal to original_val */ if (LLVMGetTypeKind(totype) == LLVMVectorTypeKind && LLVMGetTypeKind(fromtype) != LLVMVectorTypeKind) { /*Go to entry block and declare vector*/ LLVMPositionBuilder(cl2llvm_builder, cl2llvm_current_function->entry_block, cl2llvm_current_function->branch_instr); snprintf(temp_var_name, sizeof temp_var_name, "tmp_%d", temp_var_count++); vector_addr = LLVMBuildAlloca(cl2llvm_builder, totype, temp_var_name); LLVMPositionBuilderAtEnd(cl2llvm_builder, current_basic_block); /* Load vector */ snprintf(temp_var_name, sizeof temp_var_name, "tmp_%d", temp_var_count++); vector = LLVMBuildLoad(cl2llvm_builder, vector_addr, temp_var_name); /* Create object to represent element type of totype */ elem_type = cl2llvmTypeWrapCreate(LLVMGetElementType(totype), tosign); /* If original_val is constant create a constant vector */ if (LLVMIsConstant(original_val->val)) { cast_original_val = llvm_type_cast(original_val, elem_type); for (i = 0; i < LLVMGetVectorSize(totype); i++) const_elems[i] = cast_original_val->val; vector = LLVMConstVector(const_elems, LLVMGetVectorSize(totype)); llvm_val->val = vector; cl2llvm_val_free(cast_original_val); } /* If original value is not constant insert elements */ else { for (i = 0; i < LLVMGetVectorSize(totype); i++) { index = LLVMConstInt(LLVMInt32Type(), i, 0); cast_original_val = llvm_type_cast(original_val, elem_type); snprintf(temp_var_name, sizeof temp_var_name, "tmp_%d", temp_var_count++); vector = LLVMBuildInsertElement(cl2llvm_builder, vector, cast_original_val->val, index, temp_var_name); cl2llvm_val_free(cast_original_val); } } cl2llvmTypeWrapFree(elem_type); llvm_val->val = vector; } if (fromtype == LLVMInt64Type()) { if (totype == LLVMDoubleType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMFloatType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMHalfType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMInt64Type()) { if (tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); temp_var_count--; } else if (totype == LLVMInt32Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt16Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt16Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt8Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt8Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt1Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt1Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } } else if (fromtype == LLVMInt32Type()) { if (totype == LLVMDoubleType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMFloatType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMHalfType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMInt64Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } if (tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt32Type()) { if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); temp_var_count--; } else if (totype == LLVMInt16Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt16Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt8Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt8Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt1Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt1Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } } else if (fromtype == LLVMInt16Type()) { if (totype == LLVMDoubleType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMFloatType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMHalfType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMInt64Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } if (tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt32Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); } if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt16Type()) { if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); temp_var_count--; } else if (totype == LLVMInt8Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt8Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt1Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt1Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } } else if (fromtype == LLVMInt8Type()) { if (totype == LLVMDoubleType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMFloatType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMHalfType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMInt64Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } if (tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt32Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); } if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt16Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt16Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt16Type(), temp_var_name); } if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt8Type()) { if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); temp_var_count--; } else if (totype == LLVMInt1Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt1Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } } else if (fromtype == LLVMInt1Type()) { if (totype == LLVMDoubleType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMFloatType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMHalfType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMInt64Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } if (tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt32Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); } if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt16Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt16Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt16Type(), temp_var_name); } if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt8Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt8Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt8Type(), temp_var_name); } if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt1Type()) { if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); temp_var_count--; } } /*We now know that from type must be a floating point.*/ /*Floating point to signed integer conversions*/ else if (tosign && LLVMGetTypeKind(totype) == 8) { if (totype == LLVMInt64Type()) { llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } else if (totype == LLVMInt32Type()) { llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); } else if (totype == LLVMInt16Type()) { llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, original_val->val, LLVMInt16Type(), temp_var_name); } else if (totype == LLVMInt8Type()) { llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, original_val->val, LLVMInt8Type(), temp_var_name); } else if (totype == LLVMInt1Type()) { llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, original_val->val, LLVMInt1Type(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } /*Floating point to unsigned integer conversions*/ else if (!tosign) { if (totype == LLVMInt64Type()) { llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } else if (totype == LLVMInt32Type()) { llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); } else if (totype == LLVMInt16Type()) { llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, original_val->val, LLVMInt16Type(), temp_var_name); } else if (totype == LLVMInt8Type()) { llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, original_val->val, LLVMInt8Type(), temp_var_name); } else if (totype == LLVMInt1Type()) { llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, original_val->val, LLVMInt1Type(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMDoubleType()) { llvm_val->val = LLVMBuildFPExt(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMFloatType()) { if (fromtype == LLVMDoubleType()) { llvm_val->val = LLVMBuildFPTrunc(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } else if (fromtype == LLVMHalfType()) { llvm_val->val = LLVMBuildFPExt(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMHalfType()) { llvm_val->val = LLVMBuildFPTrunc(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); cl2llvmTypeWrapSetSign(llvm_val->type, 1); } cl2llvmTypeWrapSetLlvmType(llvm_val->type, totype); cl2llvmTypeWrapSetSign(llvm_val->type, tosign); return llvm_val; }
/** * See http://www.devmaster.net/forums/showthread.php?p=43580 */ void lp_build_log2_approx(struct lp_build_context *bld, LLVMValueRef x, LLVMValueRef *p_exp, LLVMValueRef *p_floor_log2, LLVMValueRef *p_log2) { const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef expmask = lp_build_int_const_scalar(type, 0x7f800000); LLVMValueRef mantmask = lp_build_int_const_scalar(type, 0x007fffff); LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type); LLVMValueRef i = NULL; LLVMValueRef exp = NULL; LLVMValueRef mant = NULL; LLVMValueRef logexp = NULL; LLVMValueRef logmant = NULL; LLVMValueRef res = NULL; if(p_exp || p_floor_log2 || p_log2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) debug_printf("%s: inefficient/imprecise constant arithmetic\n", __FUNCTION__); assert(type.floating && type.width == 32); i = LLVMBuildBitCast(bld->builder, x, int_vec_type, ""); /* exp = (float) exponent(x) */ exp = LLVMBuildAnd(bld->builder, i, expmask, ""); } if(p_floor_log2 || p_log2) { logexp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_scalar(type, 23), ""); logexp = LLVMBuildSub(bld->builder, logexp, lp_build_int_const_scalar(type, 127), ""); logexp = LLVMBuildSIToFP(bld->builder, logexp, vec_type, ""); } if(p_log2) { /* mant = (float) mantissa(x) */ mant = LLVMBuildAnd(bld->builder, i, mantmask, ""); mant = LLVMBuildOr(bld->builder, mant, one, ""); mant = LLVMBuildBitCast(bld->builder, mant, vec_type, ""); logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial, Elements(lp_build_log2_polynomial)); /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), ""); res = LLVMBuildAdd(bld->builder, logmant, logexp, ""); } if(p_exp) *p_exp = exp; if(p_floor_log2) *p_floor_log2 = logexp; if(p_log2) *p_log2 = res; }
LLVMValueRef make_divmod(compile_t* c, ast_t* left, ast_t* right, const_binop const_f, const_binop const_ui, const_binop const_si, build_binop build_f, build_binop build_ui, build_binop build_si) { ast_t* type = ast_type(left); bool sign = is_signed(c->opt, type); LLVMValueRef l_value = gen_expr(c, left); LLVMValueRef r_value = gen_expr(c, right); if((l_value == NULL) || (r_value == NULL)) return NULL; if(!is_fp(r_value) && LLVMIsConstant(r_value) && (LLVMConstIntGetSExtValue(r_value) == 0) ) { ast_error(right, "constant divide or mod by zero"); return NULL; } if(LLVMIsConstant(l_value) && LLVMIsConstant(r_value)) { if(is_fp(l_value)) return const_f(l_value, r_value); if(sign) return const_si(l_value, r_value); return const_ui(l_value, r_value); } if(is_fp(l_value)) return build_f(c->builder, l_value, r_value, ""); // Setup additional blocks. LLVMBasicBlockRef insert = LLVMGetInsertBlock(c->builder); LLVMBasicBlockRef then_block = codegen_block(c, "div_then"); LLVMBasicBlockRef post_block = codegen_block(c, "div_post"); // Check for div by zero. LLVMTypeRef r_type = LLVMTypeOf(r_value); LLVMValueRef zero = LLVMConstInt(r_type, 0, false); LLVMValueRef cmp = LLVMBuildICmp(c->builder, LLVMIntNE, r_value, zero, ""); LLVMBuildCondBr(c->builder, cmp, then_block, post_block); // Divisor is not zero. LLVMPositionBuilderAtEnd(c->builder, then_block); LLVMValueRef result; if(sign) result = build_si(c->builder, l_value, r_value, ""); else result = build_ui(c->builder, l_value, r_value, ""); LLVMBuildBr(c->builder, post_block); // Phi node. LLVMPositionBuilderAtEnd(c->builder, post_block); LLVMValueRef phi = LLVMBuildPhi(c->builder, r_type, ""); LLVMAddIncoming(phi, &zero, &insert, 1); LLVMAddIncoming(phi, &result, &then_block, 1); return phi; }
/** * Generate a * b */ LLVMValueRef lp_build_mul(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { const struct lp_type type = bld->type; LLVMValueRef shift; LLVMValueRef res; if(a == bld->zero) return bld->zero; if(a == bld->one) return b; if(b == bld->zero) return bld->zero; if(b == bld->one) return a; if(a == bld->undef || b == bld->undef) return bld->undef; if(!type.floating && !type.fixed && type.norm) { if(type.width == 8) { struct lp_type i16_type = lp_wider_type(type); LLVMValueRef al, ah, bl, bh, abl, abh, ab; lp_build_unpack2(bld->builder, type, i16_type, a, &al, &ah); lp_build_unpack2(bld->builder, type, i16_type, b, &bl, &bh); /* PMULLW, PSRLW, PADDW */ abl = lp_build_mul_u8n(bld->builder, i16_type, al, bl); abh = lp_build_mul_u8n(bld->builder, i16_type, ah, bh); ab = lp_build_pack2(bld->builder, i16_type, type, abl, abh); return ab; } /* FIXME */ assert(0); } if(type.fixed) shift = lp_build_int_const_scalar(type, type.width/2); else shift = NULL; if(LLVMIsConstant(a) && LLVMIsConstant(b)) { res = LLVMConstMul(a, b); if(shift) { if(type.sign) res = LLVMConstAShr(res, shift); else res = LLVMConstLShr(res, shift); } } else { res = LLVMBuildMul(bld->builder, a, b, ""); if(shift) { if(type.sign) res = LLVMBuildAShr(bld->builder, res, shift, ""); else res = LLVMBuildLShr(bld->builder, res, shift, ""); } } return res; }
/** * Swizzle one channel into other channels. */ LLVMValueRef lp_build_swizzle_scalar_aos(struct lp_build_context *bld, LLVMValueRef a, unsigned channel, unsigned num_channels) { LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; const unsigned n = type.length; unsigned i, j; if(a == bld->undef || a == bld->zero || a == bld->one || num_channels == 1) return a; assert(num_channels == 2 || num_channels == 4); /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing * using shuffles here actually causes worst results. More investigation is * needed. */ if (LLVMIsConstant(a) || type.width >= 16) { /* * Shuffle. */ LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; for(j = 0; j < n; j += num_channels) for(i = 0; i < num_channels; ++i) shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0); return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), ""); } else if (num_channels == 2) { /* * Bit mask and shifts * * XY XY .... XY <= input * 0Y 0Y .... 0Y * YY YY .... YY * YY YY .... YY <= output */ struct lp_type type2; LLVMValueRef tmp = NULL; int shift; a = LLVMBuildAnd(builder, a, lp_build_const_mask_aos(bld->gallivm, type, 1 << channel, num_channels), ""); type2 = type; type2.floating = FALSE; type2.width *= 2; type2.length /= 2; a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), ""); /* * Vector element 0 is always channel X. * * 76 54 32 10 (array numbering) * Little endian reg in: YX YX YX YX * Little endian reg out: YY YY YY YY if shift right (shift == -1) * XX XX XX XX if shift left (shift == 1) * * 01 23 45 67 (array numbering) * Big endian reg in: XY XY XY XY * Big endian reg out: YY YY YY YY if shift left (shift == 1) * XX XX XX XX if shift right (shift == -1) * */ #ifdef PIPE_ARCH_LITTLE_ENDIAN shift = channel == 0 ? 1 : -1; #else shift = channel == 0 ? -1 : 1; #endif if (shift > 0) { tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type2, shift * type.width), ""); } else if (shift < 0) { tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type2, -shift * type.width), ""); } assert(tmp); if (tmp) { a = LLVMBuildOr(builder, a, tmp, ""); } return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), ""); } else { /* * Bit mask and recursive shifts * * Little-endian registers: * * 7654 3210 * WZYX WZYX .... WZYX <= input * 00Y0 00Y0 .... 00Y0 <= mask * 00YY 00YY .... 00YY <= shift right 1 (shift amount -1) * YYYY YYYY .... YYYY <= shift left 2 (shift amount 2) * * Big-endian registers: * * 0123 4567 * XYZW XYZW .... XYZW <= input * 0Y00 0Y00 .... 0Y00 <= mask * YY00 YY00 .... YY00 <= shift left 1 (shift amount 1) * YYYY YYYY .... YYYY <= shift right 2 (shift amount -2) * * shifts[] gives little-endian shift amounts; we need to negate for big-endian. */ struct lp_type type4; const int shifts[4][2] = { { 1, 2}, {-1, 2}, { 1, -2}, {-1, -2} }; unsigned i; a = LLVMBuildAnd(builder, a, lp_build_const_mask_aos(bld->gallivm, type, 1 << channel, 4), ""); /* * Build a type where each element is an integer that cover the four * channels. */ type4 = type; type4.floating = FALSE; type4.width *= 4; type4.length /= 4; a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), ""); for(i = 0; i < 2; ++i) { LLVMValueRef tmp = NULL; int shift = shifts[channel][i]; /* See endianness diagram above */ #ifdef PIPE_ARCH_BIG_ENDIAN shift = -shift; #endif if(shift > 0) tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), ""); if(shift < 0) tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), ""); assert(tmp); if(tmp) a = LLVMBuildOr(builder, a, tmp, ""); } return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), ""); } }
LLVMValueRef lp_build_swizzle_aos(struct lp_build_context *bld, LLVMValueRef a, const unsigned char swizzles[4]) { LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; const unsigned n = type.length; unsigned i, j; if (swizzles[0] == PIPE_SWIZZLE_X && swizzles[1] == PIPE_SWIZZLE_Y && swizzles[2] == PIPE_SWIZZLE_Z && swizzles[3] == PIPE_SWIZZLE_W) { return a; } if (swizzles[0] == swizzles[1] && swizzles[1] == swizzles[2] && swizzles[2] == swizzles[3]) { switch (swizzles[0]) { case PIPE_SWIZZLE_X: case PIPE_SWIZZLE_Y: case PIPE_SWIZZLE_Z: case PIPE_SWIZZLE_W: return lp_build_swizzle_scalar_aos(bld, a, swizzles[0], 4); case PIPE_SWIZZLE_0: return bld->zero; case PIPE_SWIZZLE_1: return bld->one; case LP_BLD_SWIZZLE_DONTCARE: return bld->undef; default: assert(0); return bld->undef; } } if (LLVMIsConstant(a) || type.width >= 16) { /* * Shuffle. */ LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type)); LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; LLVMValueRef aux[LP_MAX_VECTOR_LENGTH]; memset(aux, 0, sizeof aux); for(j = 0; j < n; j += 4) { for(i = 0; i < 4; ++i) { unsigned shuffle; switch (swizzles[i]) { default: assert(0); /* fall through */ case PIPE_SWIZZLE_X: case PIPE_SWIZZLE_Y: case PIPE_SWIZZLE_Z: case PIPE_SWIZZLE_W: shuffle = j + swizzles[i]; shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); break; case PIPE_SWIZZLE_0: shuffle = type.length + 0; shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); if (!aux[0]) { aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0); } break; case PIPE_SWIZZLE_1: shuffle = type.length + 1; shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); if (!aux[1]) { aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0); } break; case LP_BLD_SWIZZLE_DONTCARE: shuffles[j + i] = LLVMGetUndef(i32t); break; } } } for (i = 0; i < n; ++i) { if (!aux[i]) { aux[i] = undef; } } return LLVMBuildShuffleVector(builder, a, LLVMConstVector(aux, n), LLVMConstVector(shuffles, n), ""); } else { /* * Bit mask and shifts. * * For example, this will convert BGRA to RGBA by doing * * Little endian: * rgba = (bgra & 0x00ff0000) >> 16 * | (bgra & 0xff00ff00) * | (bgra & 0x000000ff) << 16 * * Big endian:A * rgba = (bgra & 0x0000ff00) << 16 * | (bgra & 0x00ff00ff) * | (bgra & 0xff000000) >> 16 * * This is necessary not only for faster cause, but because X86 backend * will refuse shuffles of <4 x i8> vectors */ LLVMValueRef res; struct lp_type type4; unsigned cond = 0; unsigned chan; int shift; /* * Start with a mixture of 1 and 0. */ for (chan = 0; chan < 4; ++chan) { if (swizzles[chan] == PIPE_SWIZZLE_1) { cond |= 1 << chan; } } res = lp_build_select_aos(bld, cond, bld->one, bld->zero, 4); /* * Build a type where each element is an integer that cover the four * channels. */ type4 = type; type4.floating = FALSE; type4.width *= 4; type4.length /= 4; a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), ""); res = LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type4), ""); /* * Mask and shift the channels, trying to group as many channels in the * same shift as possible. The shift amount is positive for shifts left * and negative for shifts right. */ for (shift = -3; shift <= 3; ++shift) { uint64_t mask = 0; assert(type4.width <= sizeof(mask)*8); /* * Vector element numbers follow the XYZW order, so 0 is always X, etc. * After widening 4 times we have: * * 3210 * Little-endian register layout: WZYX * * 0123 * Big-endian register layout: XYZW * * For little-endian, higher-numbered channels are obtained by a shift right * (negative shift amount) and lower-numbered channels by a shift left * (positive shift amount). The opposite is true for big-endian. */ for (chan = 0; chan < 4; ++chan) { if (swizzles[chan] < 4) { /* We need to move channel swizzles[chan] into channel chan */ #ifdef PIPE_ARCH_LITTLE_ENDIAN if (swizzles[chan] - chan == -shift) { mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width); } #else if (swizzles[chan] - chan == shift) { mask |= ((1ULL << type.width) - 1) << (type4.width - type.width) >> (swizzles[chan] * type.width); } #endif } } if (mask) { LLVMValueRef masked; LLVMValueRef shifted; if (0) debug_printf("shift = %i, mask = %" PRIx64 "\n", shift, mask); masked = LLVMBuildAnd(builder, a, lp_build_const_int_vec(bld->gallivm, type4, mask), ""); if (shift > 0) { shifted = LLVMBuildShl(builder, masked, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), ""); } else if (shift < 0) { shifted = LLVMBuildLShr(builder, masked, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), ""); } else { shifted = masked; } res = LLVMBuildOr(builder, res, shifted, ""); } } return LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type), ""); }
/** * Return mask ? a : b; * * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value * will yield unpredictable results. */ LLVMValueRef lp_build_select(struct lp_build_context *bld, LLVMValueRef mask, LLVMValueRef a, LLVMValueRef b) { LLVMBuilderRef builder = bld->gallivm->builder; LLVMContextRef lc = bld->gallivm->context; struct lp_type type = bld->type; LLVMValueRef res; assert(lp_check_value(type, a)); assert(lp_check_value(type, b)); if(a == b) return a; if (type.length == 1) { mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), ""); res = LLVMBuildSelect(builder, mask, a, b, ""); } else if (0) { /* Generate a vector select. * * XXX: Using vector selects would avoid emitting intrinsics, but they aren't * properly supported yet. * * LLVM 3.0 includes experimental support provided the -promote-elements * options is passed to LLVM's command line (e.g., via * llvm::cl::ParseCommandLineOptions), but resulting code quality is much * worse, probably because some optimization passes don't know how to * handle vector selects. * * See also: * - http://lists.cs.uiuc.edu/pipermail/llvmdev/2011-October/043659.html */ /* Convert the mask to a vector of booleans. * XXX: There are two ways to do this. Decide what's best. */ if (1) { LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length); mask = LLVMBuildTrunc(builder, mask, bool_vec_type, ""); } else { mask = LLVMBuildICmp(builder, LLVMIntNE, mask, LLVMConstNull(bld->int_vec_type), ""); } res = LLVMBuildSelect(builder, mask, a, b, ""); } else if (((util_cpu_caps.has_sse4_1 && type.width * type.length == 128) || (util_cpu_caps.has_avx && type.width * type.length == 256 && type.width >= 32)) && !LLVMIsConstant(a) && !LLVMIsConstant(b) && !LLVMIsConstant(mask)) { const char *intrinsic; LLVMTypeRef arg_type; LLVMValueRef args[3]; /* * There's only float blend in AVX but can just cast i32/i64 * to float. */ if (type.width * type.length == 256) { if (type.width == 64) { intrinsic = "llvm.x86.avx.blendv.pd.256"; arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4); } else { intrinsic = "llvm.x86.avx.blendv.ps.256"; arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8); } } else if (type.floating && type.width == 64) { intrinsic = "llvm.x86.sse41.blendvpd"; arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2); } else if (type.floating && type.width == 32) { intrinsic = "llvm.x86.sse41.blendvps"; arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4); } else { intrinsic = "llvm.x86.sse41.pblendvb"; arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16); } if (arg_type != bld->int_vec_type) { mask = LLVMBuildBitCast(builder, mask, arg_type, ""); } if (arg_type != bld->vec_type) { a = LLVMBuildBitCast(builder, a, arg_type, ""); b = LLVMBuildBitCast(builder, b, arg_type, ""); } args[0] = b; args[1] = a; args[2] = mask; res = lp_build_intrinsic(builder, intrinsic, arg_type, args, Elements(args)); if (arg_type != bld->vec_type) { res = LLVMBuildBitCast(builder, res, bld->vec_type, ""); } } else { res = lp_build_select_bitwise(bld, mask, a, b); } return res; }