static LLVMValueRef gen_digestof_value(compile_t* c, LLVMValueRef value) { LLVMTypeRef type = LLVMTypeOf(value); switch(LLVMGetTypeKind(type)) { case LLVMFloatTypeKind: value = LLVMBuildBitCast(c->builder, value, c->i32, ""); return LLVMBuildZExt(c->builder, value, c->i64, ""); case LLVMDoubleTypeKind: return LLVMBuildBitCast(c->builder, value, c->i64, ""); case LLVMIntegerTypeKind: { uint32_t width = LLVMGetIntTypeWidth(type); if(width < 64) { value = LLVMBuildZExt(c->builder, value, c->i64, ""); } else if(width == 128) { LLVMValueRef shift = LLVMConstInt(c->i128, 64, false); LLVMValueRef high = LLVMBuildLShr(c->builder, value, shift, ""); high = LLVMBuildTrunc(c->builder, high, c->i64, ""); value = LLVMBuildTrunc(c->builder, value, c->i64, ""); value = LLVMBuildXor(c->builder, value, high, ""); } return value; } case LLVMStructTypeKind: { uint32_t count = LLVMCountStructElementTypes(type); LLVMValueRef result = LLVMConstInt(c->i64, 0, false); for(uint32_t i = 0; i < count; i++) { LLVMValueRef elem = LLVMBuildExtractValue(c->builder, value, i, ""); elem = gen_digestof_value(c, elem); result = LLVMBuildXor(c->builder, result, elem, ""); } return result; } case LLVMPointerTypeKind: return LLVMBuildPtrToInt(c->builder, value, c->i64, ""); default: {} } assert(0); return NULL; }
static void emit_pk2h(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { LLVMBuilderRef builder = bld_base->base.gallivm->builder; LLVMContextRef context = bld_base->base.gallivm->context; struct lp_build_context *uint_bld = &bld_base->uint_bld; LLVMTypeRef fp16, i16; LLVMValueRef const16, comp[2]; unsigned i; fp16 = LLVMHalfTypeInContext(context); i16 = LLVMInt16TypeInContext(context); const16 = lp_build_const_int32(uint_bld->gallivm, 16); for (i = 0; i < 2; i++) { comp[i] = LLVMBuildFPTrunc(builder, emit_data->args[i], fp16, ""); comp[i] = LLVMBuildBitCast(builder, comp[i], i16, ""); comp[i] = LLVMBuildZExt(builder, comp[i], uint_bld->elem_type, ""); } comp[1] = LLVMBuildShl(builder, comp[1], const16, ""); comp[0] = LLVMBuildOr(builder, comp[0], comp[1], ""); emit_data->output[emit_data->chan] = comp[0]; }
/** * Converts int16 half-float to float32 * Note this can be performed in 1 instruction if vcvtph2ps exists (f16c/cvt16) * [llvm.x86.vcvtph2ps / _mm_cvtph_ps] * * @param src value to convert * */ LLVMValueRef lp_build_half_to_float(struct gallivm_state *gallivm, LLVMValueRef src) { LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef src_type = LLVMTypeOf(src); unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ? LLVMGetVectorSize(src_type) : 1; struct lp_type f32_type = lp_type_float_vec(32, 32 * src_length); struct lp_type i32_type = lp_type_int_vec(32, 32 * src_length); LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type); LLVMValueRef h; if (util_cpu_caps.has_f16c && HAVE_LLVM >= 0x0301 && (src_length == 4 || src_length == 8)) { const char *intrinsic = NULL; if (src_length == 4) { src = lp_build_pad_vector(gallivm, src, 8); intrinsic = "llvm.x86.vcvtph2ps.128"; } else { intrinsic = "llvm.x86.vcvtph2ps.256"; } return lp_build_intrinsic_unary(builder, intrinsic, lp_build_vec_type(gallivm, f32_type), src); } /* Convert int16 vector to int32 vector by zero ext (might generate bad code) */ h = LLVMBuildZExt(builder, src, int_vec_type, ""); return lp_build_smallfloat_to_float(gallivm, f32_type, h, 10, 5, 0, true); }
/** * Gather one element from scatter positions in memory. * * @sa lp_build_gather() */ LLVMValueRef lp_build_gather_elem(struct gallivm_state *gallivm, unsigned length, unsigned src_width, unsigned dst_width, LLVMValueRef base_ptr, LLVMValueRef offsets, unsigned i) { LLVMTypeRef src_type = LLVMIntTypeInContext(gallivm->context, src_width); LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); LLVMTypeRef dst_elem_type = LLVMIntTypeInContext(gallivm->context, dst_width); LLVMValueRef ptr; LLVMValueRef res; assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)); ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i); ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, ""); res = LLVMBuildLoad(gallivm->builder, ptr, ""); assert(src_width <= dst_width); if (src_width > dst_width) res = LLVMBuildTrunc(gallivm->builder, res, dst_elem_type, ""); if (src_width < dst_width) res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, ""); return res; }
static LLVMValueRef translateBinOp(SymbolTable *TyTable, SymbolTable *ValTable, ASTNode *Node) { ASTNode *NodeE1 = (ASTNode*) ptrVectorGet(&(Node->Child), 0), *NodeE2 = (ASTNode*) ptrVectorGet(&(Node->Child), 1); LLVMValueRef ValueE1Ptr = translateExpr(TyTable, ValTable, NodeE1), ValueE2Ptr = translateExpr(TyTable, ValTable, NodeE2); LLVMValueRef ValueE1 = LLVMBuildLoad(Builder, ValueE1Ptr, "binop.ld.e1."), ValueE2 = LLVMBuildLoad(Builder, ValueE2Ptr, "binop.ld.e2."); LLVMValueRef ResultVal = NULL; switch (LLVMGetTypeKind(LLVMTypeOf(ValueE1))) { case LLVMIntegerTypeKind: ResultVal = translateIntBinOp (Node->Kind, ValueE1, ValueE2); break; case LLVMFloatTypeKind: ResultVal = translateFloatBinOp (Node->Kind, ValueE1, ValueE2); break; case LLVMStructTypeKind: ResultVal = translateStructBinOp(Node->Kind, ValueE1Ptr, ValueE2Ptr); break; case LLVMPointerTypeKind: ResultVal = translateStringBinOp(Node->Kind, ValueE1, ValueE2); break; default: return NULL; } switch (LLVMGetTypeKind(LLVMTypeOf(ResultVal))) { case LLVMIntegerTypeKind: ResultVal = LLVMBuildZExt(Builder, ResultVal, LLVMInt32Type(), ""); break; default: break; } return wrapValue(ResultVal); }
LLVMValueRef gen_not(compile_t* c, ast_t* ast) { LLVMValueRef value = gen_expr(c, ast); if(value == NULL) return NULL; ast_t* type = ast_type(ast); if(is_bool(type)) { if(LLVMIsAConstantInt(value)) { if(is_always_true(value)) return LLVMConstInt(c->ibool, 0, false); return LLVMConstInt(c->ibool, 1, false); } LLVMValueRef test = LLVMBuildICmp(c->builder, LLVMIntEQ, value, LLVMConstInt(c->ibool, 0, false), ""); return LLVMBuildZExt(c->builder, test, c->ibool, ""); } if(LLVMIsAConstantInt(value)) return LLVMConstNot(value); return LLVMBuildNot(c->builder, value, ""); }
LLVMValueRef gendesc_fieldptr(compile_t* c, LLVMValueRef ptr, LLVMValueRef field_info) { LLVMValueRef offset = LLVMBuildExtractValue(c->builder, field_info, 0, ""); offset = LLVMBuildZExt(c->builder, offset, c->intptr, ""); return LLVMBuildInBoundsGEP(c->builder, ptr, &offset, 1, ""); }
/** * Gather one element from scatter positions in memory. * * @sa lp_build_gather() */ LLVMValueRef lp_build_gather_elem(struct gallivm_state *gallivm, unsigned length, unsigned src_width, unsigned dst_width, boolean aligned, LLVMValueRef base_ptr, LLVMValueRef offsets, unsigned i, boolean vector_justify) { LLVMTypeRef src_type = LLVMIntTypeInContext(gallivm->context, src_width); LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); LLVMTypeRef dst_elem_type = LLVMIntTypeInContext(gallivm->context, dst_width); LLVMValueRef ptr; LLVMValueRef res; assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)); ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i); ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, ""); res = LLVMBuildLoad(gallivm->builder, ptr, ""); /* XXX * On some archs we probably really want to avoid having to deal * with alignments lower than 4 bytes (if fetch size is a power of * two >= 32). On x86 it doesn't matter, however. * We should be able to guarantee full alignment for any kind of texture * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends * but I don't think that's quite what we wanted). * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT * looks like a good fit, but it seems this cap bit (and OpenGL) aren't * enforcing what we want (which is what d3d10 does, the offset needs to * be aligned to element size, but GL has bytes regardless of element * size which would only leave us with minimum alignment restriction of 16 * which doesn't make much sense if the type isn't 4x32bit). Due to * translation of offsets to first_elem in sampler_views it actually seems * gallium could not do anything else except 16 no matter what... */ if (!aligned) { LLVMSetAlignment(res, 1); } assert(src_width <= dst_width); if (src_width > dst_width) { res = LLVMBuildTrunc(gallivm->builder, res, dst_elem_type, ""); } else if (src_width < dst_width) { res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, ""); if (vector_justify) { #ifdef PIPE_ARCH_BIG_ENDIAN res = LLVMBuildShl(gallivm->builder, res, LLVMConstInt(dst_elem_type, dst_width - src_width, 0), ""); #endif } } return res; }
static void number_conversion(compile_t* c, num_conv_t* from, num_conv_t* to, bool native128) { if(!native128 && ((from->is_float && (to->size > 64)) || (to->is_float && (from->size > 64))) ) { return; } reach_type_t* t = reach_type_name(c->reach, from->type_name); if(t == NULL) return; FIND_METHOD(to->fun_name); start_function(c, m, to->type, &from->type, 1); LLVMValueRef arg = LLVMGetParam(m->func, 0); LLVMValueRef result; if(from->is_float) { if(to->is_float) { if(from->size < to->size) result = LLVMBuildFPExt(c->builder, arg, to->type, ""); else if(from->size > to->size) result = LLVMBuildFPTrunc(c->builder, arg, to->type, ""); else result = arg; } else if(to->is_signed) { result = LLVMBuildFPToSI(c->builder, arg, to->type, ""); } else { result = LLVMBuildFPToUI(c->builder, arg, to->type, ""); } } else if(to->is_float) { if(from->is_signed) result = LLVMBuildSIToFP(c->builder, arg, to->type, ""); else result = LLVMBuildUIToFP(c->builder, arg, to->type, ""); } else if(from->size > to->size) { result = LLVMBuildTrunc(c->builder, arg, to->type, ""); } else if(from->size < to->size) { if(from->is_signed) result = LLVMBuildSExt(c->builder, arg, to->type, ""); else result = LLVMBuildZExt(c->builder, arg, to->type, ""); } else { result = arg; } LLVMBuildRet(c->builder, result); codegen_finishfun(c); BOX_FUNCTION(); }
LLVMValueRef gendesc_ptr_to_fields(compile_t* c, LLVMValueRef object, LLVMValueRef desc) { // Skip the descriptor. LLVMValueRef offset = desc_field(c, desc, DESC_FIELD_OFFSET); offset = LLVMBuildZExt(c->builder, offset, c->intptr, ""); LLVMValueRef base = LLVMBuildBitCast(c->builder, object, c->void_ptr, ""); return LLVMBuildInBoundsGEP(c->builder, base, &offset, 1, ""); }
LLVMValueRef gen_eq_rvalue(compile_t* c, ast_t* left, LLVMValueRef r_value) { ast_t* type = ast_type(left); bool sign = is_signed(type); LLVMValueRef l_value = gen_expr(c, left); LLVMValueRef test = make_cmp_value(c, sign, l_value, r_value, LLVMRealOEQ, LLVMIntEQ, LLVMIntEQ); return LLVMBuildZExt(c->builder, test, c->ibool, ""); }
LLVMValueRef gen_not(struct node *ast) { LLVMValueRef truth; truth = LLVMBuildICmp(builder, LLVMIntEQ, codegen(ast->one), CONST(0), ""); return LLVMBuildZExt(builder, truth, TYPE_INT, ""); }
LLVMValueRef gen_ge(struct node *ast) { LLVMValueRef truth; truth = LLVMBuildICmp(builder, LLVMIntSGE, codegen(ast->one), codegen(ast->two), ""); return LLVMBuildZExt(builder, truth, TYPE_INT, ""); }
LLVMValueRef gendesc_ptr_to_fields(compile_t* c, LLVMValueRef object, LLVMValueRef desc) { // Skip the descriptor. LLVMValueRef offset = desc_field(c, desc, DESC_FIELD_OFFSET); offset = LLVMBuildZExt(c->builder, offset, c->intptr, ""); LLVMValueRef base = LLVMBuildPtrToInt(c->builder, object, c->intptr, ""); LLVMValueRef result = LLVMBuildAdd(c->builder, base, offset, ""); // Return as a c->intptr. return result; }
static LLVMValueRef make_cmp(compile_t* c, ast_t* left, ast_t* right, LLVMRealPredicate cmp_f, LLVMIntPredicate cmp_si, LLVMIntPredicate cmp_ui) { ast_t* type = ast_type(left); bool sign = is_signed(type); LLVMValueRef l_value = gen_expr(c, left); LLVMValueRef r_value = gen_expr(c, right); LLVMValueRef test = make_cmp_value(c, sign, l_value, r_value, cmp_f, cmp_si, cmp_ui); return LLVMBuildZExt(c->builder, test, c->ibool, ""); }
static void maybe_is_none(compile_t* c, reach_type_t* t) { // Returns true if the receiver is null. FIND_METHOD("is_none"); start_function(c, m, c->ibool, &t->use_type, 1); LLVMValueRef receiver = LLVMGetParam(m->func, 0); LLVMValueRef test = LLVMBuildIsNull(c->builder, receiver, ""); LLVMValueRef value = LLVMBuildZExt(c->builder, test, c->ibool, ""); LLVMBuildRet(c->builder, value); codegen_finishfun(c); BOX_FUNCTION(); }
/** * Converts int16 half-float to float32 * Note this can be performed in 1 instruction if vcvtph2ps exists (sse5 i think?) * [llvm.x86.vcvtph2ps / _mm_cvtph_ps] * * @param src_type <vector> type of int16 * @param src value to convert * * ref http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ */ LLVMValueRef lp_build_half_to_float(struct gallivm_state *gallivm, struct lp_type src_type, LLVMValueRef src) { struct lp_type f32_type = lp_type_float_vec(32, 32 * src_type.length); struct lp_type i32_type = lp_type_int_vec(32, 32 * src_type.length); LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type); LLVMTypeRef float_vec_type = lp_build_vec_type(gallivm, f32_type); /* Constants */ LLVMValueRef i32_13 = lp_build_const_int_vec(gallivm, i32_type, 13); LLVMValueRef i32_16 = lp_build_const_int_vec(gallivm, i32_type, 16); LLVMValueRef i32_mask_nosign = lp_build_const_int_vec(gallivm, i32_type, 0x7fff); LLVMValueRef i32_was_infnan = lp_build_const_int_vec(gallivm, i32_type, 0x7bff); LLVMValueRef i32_exp_infnan = lp_build_const_int_vec(gallivm, i32_type, 0xff << 23); LLVMValueRef f32_magic = LLVMBuildBitCast(builder, lp_build_const_int_vec(gallivm, i32_type, (254 - 15) << 23), float_vec_type, ""); /* Convert int16 vector to int32 vector by zero ext */ LLVMValueRef h = LLVMBuildZExt(builder, src, int_vec_type, ""); /* Exponent / mantissa bits */ LLVMValueRef expmant = LLVMBuildAnd(builder, i32_mask_nosign, h, ""); LLVMValueRef shifted = LLVMBuildBitCast(builder, LLVMBuildShl(builder, expmant, i32_13, ""), float_vec_type, ""); /* Exponent adjust */ LLVMValueRef scaled = LLVMBuildBitCast(builder, LLVMBuildFMul(builder, shifted, f32_magic, ""), int_vec_type, ""); /* Make sure Inf/NaN survive */ LLVMValueRef b_wasinfnan = lp_build_compare(gallivm, i32_type, PIPE_FUNC_GREATER, expmant, i32_was_infnan); LLVMValueRef infnanexp = LLVMBuildAnd(builder, b_wasinfnan, i32_exp_infnan, ""); /* Sign bit */ LLVMValueRef justsign = LLVMBuildXor(builder, h, expmant, ""); LLVMValueRef sign = LLVMBuildShl(builder, justsign, i32_16, ""); /* Combine result */ LLVMValueRef sign_inf = LLVMBuildOr(builder, sign, infnanexp, ""); LLVMValueRef final = LLVMBuildOr(builder, scaled, sign_inf, ""); /* Cast from int32 vector to float32 vector */ return LLVMBuildBitCast(builder, final, float_vec_type, ""); }
/** * lp_build_assert. * * Build an assertion in LLVM IR by building a function call to the * lp_assert() function above. * * \param condition should be an 'i1' or 'i32' value * \param msg a string to print if the assertion fails. */ LLVMValueRef lp_build_assert(LLVMBuilderRef builder, LLVMValueRef condition, const char *msg) { LLVMModuleRef module; LLVMTypeRef arg_types[2]; LLVMValueRef msg_string, assert_func, params[2], r; module = LLVMGetGlobalParent(LLVMGetBasicBlockParent( LLVMGetInsertBlock(builder))); msg_string = lp_build_const_string_variable(module, msg, strlen(msg) + 1); arg_types[0] = LLVMInt32Type(); arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0); /* lookup the lp_assert function */ assert_func = LLVMGetNamedFunction(module, "lp_assert"); /* Create the assertion function if not found */ if (!assert_func) { LLVMTypeRef func_type = LLVMFunctionType(LLVMVoidType(), arg_types, 2, 0); assert_func = LLVMAddFunction(module, "lp_assert", func_type); LLVMSetFunctionCallConv(assert_func, LLVMCCallConv); LLVMSetLinkage(assert_func, LLVMExternalLinkage); LLVMAddGlobalMapping(lp_build_engine, assert_func, func_to_pointer((func_pointer)lp_assert)); } assert(assert_func); /* build function call param list */ params[0] = LLVMBuildZExt(builder, condition, arg_types[0], ""); params[1] = LLVMBuildBitCast(builder, msg_string, arg_types[1], ""); /* check arg types */ assert(LLVMTypeOf(params[0]) == arg_types[0]); assert(LLVMTypeOf(params[1]) == arg_types[1]); r = LLVMBuildCall(builder, assert_func, params, 2, ""); return r; }
LLVMValueRef gen_is(compile_t* c, ast_t* ast) { AST_GET_CHILDREN(ast, left, right); ast_t* left_type = ast_type(left); ast_t* right_type = ast_type(right); LLVMValueRef l_value = gen_expr(c, left); LLVMValueRef r_value = gen_expr(c, right); if((l_value == NULL) || (r_value == NULL)) return NULL; codegen_debugloc(c, ast); LLVMValueRef result = gen_is_value(c, left_type, right_type, l_value, r_value); LLVMValueRef value = LLVMBuildZExt(c->builder, result, c->ibool, ""); codegen_debugloc(c, NULL); return value; }
/** * Gather elements from scatter positions in memory into a single vector. * * @param src_width src element width * @param dst_width result element width (source will be expanded to fit) * @param length length of the offsets, * @param base_ptr base pointer, should be a i8 pointer type. * @param offsets vector with offsets */ LLVMValueRef lp_build_gather(LLVMBuilderRef builder, unsigned length, unsigned src_width, unsigned dst_width, LLVMValueRef base_ptr, LLVMValueRef offsets) { LLVMTypeRef src_type = LLVMIntType(src_width); LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); LLVMValueRef res; unsigned i; res = LLVMGetUndef(dst_vec_type); for(i = 0; i < length; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); LLVMValueRef elem_offset; LLVMValueRef elem_ptr; LLVMValueRef elem; elem_offset = LLVMBuildExtractElement(builder, offsets, index, ""); elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, ""); elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, ""); elem = LLVMBuildLoad(builder, elem_ptr, ""); assert(src_width <= dst_width); if(src_width > dst_width) elem = LLVMBuildTrunc(builder, elem, dst_elem_type, ""); if(src_width < dst_width) elem = LLVMBuildZExt(builder, elem, dst_elem_type, ""); res = LLVMBuildInsertElement(builder, res, elem, index, ""); } return res; }
/* * Do a cached lookup. * * Returns (vectors of) 4x8 rgba aos value */ LLVMValueRef lp_build_fetch_cached_texels(struct gallivm_state *gallivm, const struct util_format_description *format_desc, unsigned n, LLVMValueRef base_ptr, LLVMValueRef offset, LLVMValueRef i, LLVMValueRef j, LLVMValueRef cache) { LLVMBuilderRef builder = gallivm->builder; unsigned count, low_bit, log2size; LLVMValueRef color, offset_stored, addr, ptr_addrtrunc, tmp; LLVMValueRef ij_index, hash_index, hash_mask, block_index; LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context); struct lp_type type; struct lp_build_context bld32; memset(&type, 0, sizeof type); type.width = 32; type.length = n; assert(format_desc->block.width == 4); assert(format_desc->block.height == 4); lp_build_context_init(&bld32, gallivm, type); /* * compute hash - we use direct mapped cache, the hash function could * be better but it needs to be simple * per-element: * compare offset with offset stored at tag (hash) * if not equal decode/store block, update tag * extract color from cache * assemble result vector */ /* TODO: not ideal with 32bit pointers... */ low_bit = util_logbase2(format_desc->block.bits / 8); log2size = util_logbase2(LP_BUILD_FORMAT_CACHE_SIZE); addr = LLVMBuildPtrToInt(builder, base_ptr, i64t, ""); ptr_addrtrunc = LLVMBuildPtrToInt(builder, base_ptr, i32t, ""); ptr_addrtrunc = lp_build_broadcast_scalar(&bld32, ptr_addrtrunc); /* For the hash function, first mask off the unused lowest bits. Then just do some xor with address bits - only use lower 32bits */ ptr_addrtrunc = LLVMBuildAdd(builder, offset, ptr_addrtrunc, ""); ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc, lp_build_const_int_vec(gallivm, type, low_bit), ""); /* This only really makes sense for size 64,128,256 */ hash_index = ptr_addrtrunc; ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc, lp_build_const_int_vec(gallivm, type, 2*log2size), ""); hash_index = LLVMBuildXor(builder, ptr_addrtrunc, hash_index, ""); tmp = LLVMBuildLShr(builder, hash_index, lp_build_const_int_vec(gallivm, type, log2size), ""); hash_index = LLVMBuildXor(builder, hash_index, tmp, ""); hash_mask = lp_build_const_int_vec(gallivm, type, LP_BUILD_FORMAT_CACHE_SIZE - 1); hash_index = LLVMBuildAnd(builder, hash_index, hash_mask, ""); ij_index = LLVMBuildShl(builder, i, lp_build_const_int_vec(gallivm, type, 2), ""); ij_index = LLVMBuildAdd(builder, ij_index, j, ""); block_index = LLVMBuildShl(builder, hash_index, lp_build_const_int_vec(gallivm, type, 4), ""); block_index = LLVMBuildAdd(builder, ij_index, block_index, ""); if (n > 1) { color = LLVMGetUndef(LLVMVectorType(i32t, n)); for (count = 0; count < n; count++) { LLVMValueRef index, cond, colorx; LLVMValueRef block_indexx, hash_indexx, addrx, offsetx, ptr_addrx; struct lp_build_if_state if_ctx; index = lp_build_const_int32(gallivm, count); offsetx = LLVMBuildExtractElement(builder, offset, index, ""); addrx = LLVMBuildZExt(builder, offsetx, i64t, ""); addrx = LLVMBuildAdd(builder, addrx, addr, ""); block_indexx = LLVMBuildExtractElement(builder, block_index, index, ""); hash_indexx = LLVMBuildLShr(builder, block_indexx, lp_build_const_int32(gallivm, 4), ""); offset_stored = lookup_tag_data(gallivm, cache, hash_indexx); cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addrx, ""); lp_build_if(&if_ctx, gallivm, cond); { ptr_addrx = LLVMBuildIntToPtr(builder, addrx, LLVMPointerType(i8t, 0), ""); update_cached_block(gallivm, format_desc, ptr_addrx, hash_indexx, cache); #if LP_BUILD_FORMAT_CACHE_DEBUG update_cache_access(gallivm, cache, 1, LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS); #endif } lp_build_endif(&if_ctx); colorx = lookup_cached_pixel(gallivm, cache, block_indexx); color = LLVMBuildInsertElement(builder, color, colorx, lp_build_const_int32(gallivm, count), ""); } } else { LLVMValueRef cond; struct lp_build_if_state if_ctx; tmp = LLVMBuildZExt(builder, offset, i64t, ""); addr = LLVMBuildAdd(builder, tmp, addr, ""); offset_stored = lookup_tag_data(gallivm, cache, hash_index); cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addr, ""); lp_build_if(&if_ctx, gallivm, cond); { tmp = LLVMBuildIntToPtr(builder, addr, LLVMPointerType(i8t, 0), ""); update_cached_block(gallivm, format_desc, tmp, hash_index, cache); #if LP_BUILD_FORMAT_CACHE_DEBUG update_cache_access(gallivm, cache, 1, LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS); #endif } lp_build_endif(&if_ctx); color = lookup_cached_pixel(gallivm, cache, block_index); } #if LP_BUILD_FORMAT_CACHE_DEBUG update_cache_access(gallivm, cache, n, LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL); #endif return LLVMBuildBitCast(builder, color, LLVMVectorType(i8t, n * 4), ""); }
static void gen_main(compile_t* c, gentype_t* main_g, gentype_t* env_g) { LLVMTypeRef params[3]; params[0] = c->i32; params[1] = LLVMPointerType(LLVMPointerType(c->i8, 0), 0); params[2] = LLVMPointerType(LLVMPointerType(c->i8, 0), 0); LLVMTypeRef ftype = LLVMFunctionType(c->i32, params, 3, false); LLVMValueRef func = LLVMAddFunction(c->module, "main", ftype); codegen_startfun(c, func, false); LLVMValueRef args[3]; args[0] = LLVMGetParam(func, 0); LLVMSetValueName(args[0], "argc"); args[1] = LLVMGetParam(func, 1); LLVMSetValueName(args[1], "argv"); args[2] = LLVMGetParam(func, 2); LLVMSetValueName(args[1], "envp"); // Initialise the pony runtime with argc and argv, getting a new argc. args[0] = gencall_runtime(c, "pony_init", args, 2, "argc"); // Create the main actor and become it. LLVMValueRef main_actor = create_main(c, main_g); // Create an Env on the main actor's heap. const char* env_name = "Env"; const char* env_create = genname_fun(env_name, "_create", NULL); LLVMValueRef env_args[4]; env_args[0] = gencall_alloc(c, env_g); env_args[1] = LLVMBuildZExt(c->builder, args[0], c->i64, ""); env_args[2] = args[1]; env_args[3] = args[2]; LLVMValueRef env = gencall_runtime(c, env_create, env_args, 4, "env"); LLVMSetInstructionCallConv(env, GEN_CALLCONV); // Run primitive initialisers using the main actor's heap. primitive_call(c, stringtab("_init"), env); // Create a type for the message. LLVMTypeRef f_params[4]; f_params[0] = c->i32; f_params[1] = c->i32; f_params[2] = c->void_ptr; f_params[3] = LLVMTypeOf(env); LLVMTypeRef msg_type = LLVMStructTypeInContext(c->context, f_params, 4, false); LLVMTypeRef msg_type_ptr = LLVMPointerType(msg_type, 0); // Allocate the message, setting its size and ID. uint32_t index = genfun_vtable_index(c, main_g, stringtab("create"), NULL); size_t msg_size = LLVMABISizeOfType(c->target_data, msg_type); args[0] = LLVMConstInt(c->i32, pool_index(msg_size), false); args[1] = LLVMConstInt(c->i32, index, false); LLVMValueRef msg = gencall_runtime(c, "pony_alloc_msg", args, 2, ""); LLVMValueRef msg_ptr = LLVMBuildBitCast(c->builder, msg, msg_type_ptr, ""); // Set the message contents. LLVMValueRef env_ptr = LLVMBuildStructGEP(c->builder, msg_ptr, 3, ""); LLVMBuildStore(c->builder, env, env_ptr); // Trace the message. gencall_runtime(c, "pony_gc_send", NULL, 0, ""); const char* env_trace = genname_trace(env_name); args[0] = LLVMBuildBitCast(c->builder, env, c->object_ptr, ""); args[1] = LLVMGetNamedFunction(c->module, env_trace); gencall_runtime(c, "pony_traceobject", args, 2, ""); gencall_runtime(c, "pony_send_done", NULL, 0, ""); // Send the message. args[0] = main_actor; args[1] = msg; gencall_runtime(c, "pony_sendv", args, 2, ""); // Start the runtime. LLVMValueRef zero = LLVMConstInt(c->i32, 0, false); LLVMValueRef rc = gencall_runtime(c, "pony_start", &zero, 1, ""); // Run primitive finalisers. We create a new main actor as a context to run // the finalisers in, but we do not initialise or schedule it. LLVMValueRef final_actor = create_main(c, main_g); primitive_call(c, stringtab("_final"), NULL); args[0] = final_actor; gencall_runtime(c, "pony_destroy", args, 1, ""); // Return the runtime exit code. LLVMBuildRet(c->builder, rc); codegen_finishfun(c); // External linkage for main(). LLVMSetLinkage(func, LLVMExternalLinkage); }
/** * Load depth/stencil values. * The stored values are linear, swizzle them. * * \param type the data type of the fragment depth/stencil values * \param format_desc description of the depth/stencil surface * \param loop_counter the current loop iteration * \param depth_ptr pointer to the depth/stencil values of this 4x4 block * \param depth_stride stride of the depth/stencil buffer * \param z_fb contains z values loaded from fb (may include padding) * \param s_fb contains s values loaded from fb (may include padding) */ void lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, struct lp_type z_src_type, const struct util_format_description *format_desc, LLVMValueRef depth_ptr, LLVMValueRef depth_stride, LLVMValueRef *z_fb, LLVMValueRef *s_fb, LLVMValueRef loop_counter) { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4]; LLVMValueRef zs_dst1, zs_dst2; LLVMValueRef zs_dst_ptr; LLVMValueRef depth_offset1, depth_offset2; LLVMTypeRef load_ptr_type; unsigned depth_bytes = format_desc->block.bits / 8; struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length); struct lp_type zs_load_type = zs_type; zs_load_type.length = zs_load_type.length / 2; load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0); if (z_src_type.length == 4) { unsigned i; LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter, lp_build_const_int32(gallivm, 1), ""); LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter, lp_build_const_int32(gallivm, 2), ""); LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb, depth_stride, ""); depth_offset1 = LLVMBuildMul(builder, looplsb, lp_build_const_int32(gallivm, depth_bytes * 2), ""); depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, ""); /* just concatenate the loaded 2x2 values into 4-wide vector */ for (i = 0; i < 4; i++) { shuffles[i] = lp_build_const_int32(gallivm, i); } } else { unsigned i; LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter, lp_build_const_int32(gallivm, 1), ""); assert(z_src_type.length == 8); depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, ""); /* * We load 2x4 values, and need to swizzle them (order * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately. */ for (i = 0; i < 8; i++) { shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); } } depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, ""); /* Load current z/stencil values from z/stencil buffer */ zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, ""); zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, ""); zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, ""); zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, ""); zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, ""); zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, ""); *z_fb = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2, LLVMConstVector(shuffles, zs_type.length), ""); *s_fb = *z_fb; if (format_desc->block.bits < z_src_type.width) { /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */ *z_fb = LLVMBuildZExt(builder, *z_fb, lp_build_int_vec_type(gallivm, z_src_type), ""); } else if (format_desc->block.bits > 32) { /* rely on llvm to handle too wide vector we have here nicely */ unsigned i; struct lp_type typex2 = zs_type; struct lp_type s_type = zs_type; LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH / 4]; LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH / 4]; LLVMValueRef tmp; typex2.width = typex2.width / 2; typex2.length = typex2.length * 2; s_type.width = s_type.width / 2; s_type.floating = 0; tmp = LLVMBuildBitCast(builder, *z_fb, lp_build_vec_type(gallivm, typex2), ""); for (i = 0; i < zs_type.length; i++) { shuffles1[i] = lp_build_const_int32(gallivm, i * 2); shuffles2[i] = lp_build_const_int32(gallivm, i * 2 + 1); } *z_fb = LLVMBuildShuffleVector(builder, tmp, tmp, LLVMConstVector(shuffles1, zs_type.length), ""); *s_fb = LLVMBuildShuffleVector(builder, tmp, tmp, LLVMConstVector(shuffles2, zs_type.length), ""); *s_fb = LLVMBuildBitCast(builder, *s_fb, lp_build_vec_type(gallivm, s_type), ""); lp_build_name(*s_fb, "s_dst"); } lp_build_name(*z_fb, "z_dst"); lp_build_name(*s_fb, "s_dst"); lp_build_name(*z_fb, "z_dst"); }
/* * Create a function that deforms a tuple of type desc up to natts columns. */ LLVMValueRef slot_compile_deform(LLVMJitContext *context, TupleDesc desc, int natts) { char *funcname; LLVMModuleRef mod; LLVMBuilderRef b; LLVMTypeRef deform_sig; LLVMValueRef v_deform_fn; LLVMBasicBlockRef b_entry; LLVMBasicBlockRef b_adjust_unavail_cols; LLVMBasicBlockRef b_find_start; LLVMBasicBlockRef b_out; LLVMBasicBlockRef b_dead; LLVMBasicBlockRef *attcheckattnoblocks; LLVMBasicBlockRef *attstartblocks; LLVMBasicBlockRef *attisnullblocks; LLVMBasicBlockRef *attcheckalignblocks; LLVMBasicBlockRef *attalignblocks; LLVMBasicBlockRef *attstoreblocks; LLVMValueRef v_offp; LLVMValueRef v_tupdata_base; LLVMValueRef v_tts_values; LLVMValueRef v_tts_nulls; LLVMValueRef v_slotoffp; LLVMValueRef v_slowp; LLVMValueRef v_nvalidp; LLVMValueRef v_nvalid; LLVMValueRef v_maxatt; LLVMValueRef v_slot; LLVMValueRef v_tupleheaderp; LLVMValueRef v_tuplep; LLVMValueRef v_infomask1; LLVMValueRef v_infomask2; LLVMValueRef v_bits; LLVMValueRef v_hoff; LLVMValueRef v_hasnulls; /* last column (0 indexed) guaranteed to exist */ int guaranteed_column_number = -1; /* current known alignment */ int known_alignment = 0; /* if true, known_alignment describes definite offset of column */ bool attguaranteedalign = true; int attnum; mod = llvm_mutable_module(context); funcname = llvm_expand_funcname(context, "deform"); /* * Check which columns do have to exist, so we don't have to check the * rows natts unnecessarily. */ for (attnum = 0; attnum < desc->natts; attnum++) { Form_pg_attribute att = TupleDescAttr(desc, attnum); /* * If the column is possibly missing, we can't rely on its (or * subsequent) NOT NULL constraints to indicate minimum attributes in * the tuple, so stop here. */ if (att->atthasmissing) break; /* * Column is NOT NULL and there've been no preceding missing columns, * it's guaranteed that all columns up to here exist at least in the * NULL bitmap. */ if (att->attnotnull) guaranteed_column_number = attnum; } /* Create the signature and function */ { LLVMTypeRef param_types[1]; param_types[0] = l_ptr(StructTupleTableSlot); deform_sig = LLVMFunctionType(LLVMVoidType(), param_types, lengthof(param_types), 0); } v_deform_fn = LLVMAddFunction(mod, funcname, deform_sig); LLVMSetLinkage(v_deform_fn, LLVMInternalLinkage); LLVMSetParamAlignment(LLVMGetParam(v_deform_fn, 0), MAXIMUM_ALIGNOF); llvm_copy_attributes(AttributeTemplate, v_deform_fn); b_entry = LLVMAppendBasicBlock(v_deform_fn, "entry"); b_adjust_unavail_cols = LLVMAppendBasicBlock(v_deform_fn, "adjust_unavail_cols"); b_find_start = LLVMAppendBasicBlock(v_deform_fn, "find_startblock"); b_out = LLVMAppendBasicBlock(v_deform_fn, "outblock"); b_dead = LLVMAppendBasicBlock(v_deform_fn, "deadblock"); b = LLVMCreateBuilder(); attcheckattnoblocks = palloc(sizeof(LLVMBasicBlockRef) * natts); attstartblocks = palloc(sizeof(LLVMBasicBlockRef) * natts); attisnullblocks = palloc(sizeof(LLVMBasicBlockRef) * natts); attcheckalignblocks = palloc(sizeof(LLVMBasicBlockRef) * natts); attalignblocks = palloc(sizeof(LLVMBasicBlockRef) * natts); attstoreblocks = palloc(sizeof(LLVMBasicBlockRef) * natts); known_alignment = 0; LLVMPositionBuilderAtEnd(b, b_entry); /* perform allocas first, llvm only converts those to registers */ v_offp = LLVMBuildAlloca(b, TypeSizeT, "v_offp"); v_slot = LLVMGetParam(v_deform_fn, 0); v_tts_values = l_load_struct_gep(b, v_slot, FIELDNO_TUPLETABLESLOT_VALUES, "tts_values"); v_tts_nulls = l_load_struct_gep(b, v_slot, FIELDNO_TUPLETABLESLOT_ISNULL, "tts_ISNULL"); v_slotoffp = LLVMBuildStructGEP(b, v_slot, FIELDNO_TUPLETABLESLOT_OFF, ""); v_slowp = LLVMBuildStructGEP(b, v_slot, FIELDNO_TUPLETABLESLOT_SLOW, ""); v_nvalidp = LLVMBuildStructGEP(b, v_slot, FIELDNO_TUPLETABLESLOT_NVALID, ""); v_tupleheaderp = l_load_struct_gep(b, v_slot, FIELDNO_TUPLETABLESLOT_TUPLE, "tupleheader"); v_tuplep = l_load_struct_gep(b, v_tupleheaderp, FIELDNO_HEAPTUPLEDATA_DATA, "tuple"); v_bits = LLVMBuildBitCast(b, LLVMBuildStructGEP(b, v_tuplep, FIELDNO_HEAPTUPLEHEADERDATA_BITS, ""), l_ptr(LLVMInt8Type()), "t_bits"); v_infomask1 = l_load_struct_gep(b, v_tuplep, FIELDNO_HEAPTUPLEHEADERDATA_INFOMASK, "infomask1"); v_infomask2 = l_load_struct_gep(b, v_tuplep, FIELDNO_HEAPTUPLEHEADERDATA_INFOMASK2, "infomask2"); /* t_infomask & HEAP_HASNULL */ v_hasnulls = LLVMBuildICmp(b, LLVMIntNE, LLVMBuildAnd(b, l_int16_const(HEAP_HASNULL), v_infomask1, ""), l_int16_const(0), "hasnulls"); /* t_infomask2 & HEAP_NATTS_MASK */ v_maxatt = LLVMBuildAnd(b, l_int16_const(HEAP_NATTS_MASK), v_infomask2, "maxatt"); v_hoff = l_load_struct_gep(b, v_tuplep, FIELDNO_HEAPTUPLEHEADERDATA_HOFF, "t_hoff"); v_tupdata_base = LLVMBuildGEP(b, LLVMBuildBitCast(b, v_tuplep, l_ptr(LLVMInt8Type()), ""), &v_hoff, 1, "v_tupdata_base"); /* * Load tuple start offset from slot. Will be reset below in case there's * no existing deformed columns in slot. */ { LLVMValueRef v_off_start; v_off_start = LLVMBuildLoad(b, v_slotoffp, "v_slot_off"); v_off_start = LLVMBuildZExt(b, v_off_start, TypeSizeT, ""); LLVMBuildStore(b, v_off_start, v_offp); } /* build the basic block for each attribute, need them as jump target */ for (attnum = 0; attnum < natts; attnum++) { attcheckattnoblocks[attnum] = l_bb_append_v(v_deform_fn, "block.attr.%d.attcheckattno", attnum); attstartblocks[attnum] = l_bb_append_v(v_deform_fn, "block.attr.%d.start", attnum); attisnullblocks[attnum] = l_bb_append_v(v_deform_fn, "block.attr.%d.attisnull", attnum); attcheckalignblocks[attnum] = l_bb_append_v(v_deform_fn, "block.attr.%d.attcheckalign", attnum); attalignblocks[attnum] = l_bb_append_v(v_deform_fn, "block.attr.%d.align", attnum); attstoreblocks[attnum] = l_bb_append_v(v_deform_fn, "block.attr.%d.store", attnum); } /* * Check if's guaranteed the all the desired attributes are available in * tuple. If so, we can start deforming. If not, need to make sure to * fetch the missing columns. */ if ((natts - 1) <= guaranteed_column_number) { /* just skip through unnecessary blocks */ LLVMBuildBr(b, b_adjust_unavail_cols); LLVMPositionBuilderAtEnd(b, b_adjust_unavail_cols); LLVMBuildBr(b, b_find_start); } else { LLVMValueRef v_params[3]; /* branch if not all columns available */ LLVMBuildCondBr(b, LLVMBuildICmp(b, LLVMIntULT, v_maxatt, l_int16_const(natts), ""), b_adjust_unavail_cols, b_find_start); /* if not, memset tts_isnull of relevant cols to true */ LLVMPositionBuilderAtEnd(b, b_adjust_unavail_cols); v_params[0] = v_slot; v_params[1] = LLVMBuildZExt(b, v_maxatt, LLVMInt32Type(), ""); v_params[2] = l_int32_const(natts); LLVMBuildCall(b, llvm_get_decl(mod, FuncSlotGetmissingattrs), v_params, lengthof(v_params), ""); LLVMBuildBr(b, b_find_start); } LLVMPositionBuilderAtEnd(b, b_find_start); v_nvalid = LLVMBuildLoad(b, v_nvalidp, ""); /* * Build switch to go from nvalid to the right startblock. Callers * currently don't have the knowledge, but it'd be good for performance to * avoid this check when it's known that the slot is empty (e.g. in scan * nodes). */ if (true) { LLVMValueRef v_switch = LLVMBuildSwitch(b, v_nvalid, b_dead, natts); for (attnum = 0; attnum < natts; attnum++) { LLVMValueRef v_attno = l_int32_const(attnum); LLVMAddCase(v_switch, v_attno, attcheckattnoblocks[attnum]); } } else { /* jump from entry block to first block */ LLVMBuildBr(b, attcheckattnoblocks[0]); } LLVMPositionBuilderAtEnd(b, b_dead); LLVMBuildUnreachable(b); /* * Iterate over each attribute that needs to be deformed, build code to * deform it. */ for (attnum = 0; attnum < natts; attnum++) { Form_pg_attribute att = TupleDescAttr(desc, attnum); LLVMValueRef v_incby; int alignto; LLVMValueRef l_attno = l_int16_const(attnum); LLVMValueRef v_attdatap; LLVMValueRef v_resultp; /* build block checking whether we did all the necessary attributes */ LLVMPositionBuilderAtEnd(b, attcheckattnoblocks[attnum]); /* * If this is the first attribute, slot->tts_nvalid was 0. Therefore * reset offset to 0 to, it be from a previous execution. */ if (attnum == 0) { LLVMBuildStore(b, l_sizet_const(0), v_offp); } /* * Build check whether column is available (i.e. whether the tuple has * that many columns stored). We can avoid the branch if we know * there's a subsequent NOT NULL column. */ if (attnum <= guaranteed_column_number) { LLVMBuildBr(b, attstartblocks[attnum]); } else { LLVMValueRef v_islast; v_islast = LLVMBuildICmp(b, LLVMIntUGE, l_attno, v_maxatt, "heap_natts"); LLVMBuildCondBr(b, v_islast, b_out, attstartblocks[attnum]); } LLVMPositionBuilderAtEnd(b, attstartblocks[attnum]); /* * Check for nulls if necessary. No need to take missing attributes * into account, because in case they're present the heaptuple's natts * would have indicated that a slot_getmissingattrs() is needed. */ if (!att->attnotnull) { LLVMBasicBlockRef b_ifnotnull; LLVMBasicBlockRef b_ifnull; LLVMBasicBlockRef b_next; LLVMValueRef v_attisnull; LLVMValueRef v_nullbyteno; LLVMValueRef v_nullbytemask; LLVMValueRef v_nullbyte; LLVMValueRef v_nullbit; b_ifnotnull = attcheckalignblocks[attnum]; b_ifnull = attisnullblocks[attnum]; if (attnum + 1 == natts) b_next = b_out; else b_next = attcheckattnoblocks[attnum + 1]; v_nullbyteno = l_int32_const(attnum >> 3); v_nullbytemask = l_int8_const(1 << ((attnum) & 0x07)); v_nullbyte = l_load_gep1(b, v_bits, v_nullbyteno, "attnullbyte"); v_nullbit = LLVMBuildICmp(b, LLVMIntEQ, LLVMBuildAnd(b, v_nullbyte, v_nullbytemask, ""), l_int8_const(0), "attisnull"); v_attisnull = LLVMBuildAnd(b, v_hasnulls, v_nullbit, ""); LLVMBuildCondBr(b, v_attisnull, b_ifnull, b_ifnotnull); LLVMPositionBuilderAtEnd(b, b_ifnull); /* store null-byte */ LLVMBuildStore(b, l_int8_const(1), LLVMBuildGEP(b, v_tts_nulls, &l_attno, 1, "")); /* store zero datum */ LLVMBuildStore(b, l_sizet_const(0), LLVMBuildGEP(b, v_tts_values, &l_attno, 1, "")); LLVMBuildBr(b, b_next); attguaranteedalign = false; } else {
/** * Gather elements from scatter positions in memory into a single vector. * Use for fetching texels from a texture. * For SSE, typical values are length=4, src_width=32, dst_width=32. * * When src_width < dst_width, the return value can be justified in * one of two ways: * "integer justification" is used when the caller treats the destination * as a packed integer bitmask, as described by the channels' "shift" and * "width" fields; * "vector justification" is used when the caller casts the destination * to a vector and needs channel X to be in vector element 0. * * @param length length of the offsets * @param src_width src element width in bits * @param dst_type result element type (src will be expanded to fit, * but truncation is not allowed) * (this may be a vector, must be pot sized) * @param aligned whether the data is guaranteed to be aligned (to src_width) * @param base_ptr base pointer, needs to be a i8 pointer type. * @param offsets vector with offsets * @param vector_justify select vector rather than integer justification */ LLVMValueRef lp_build_gather(struct gallivm_state *gallivm, unsigned length, unsigned src_width, struct lp_type dst_type, boolean aligned, LLVMValueRef base_ptr, LLVMValueRef offsets, boolean vector_justify) { LLVMValueRef res; boolean need_expansion = src_width < dst_type.width * dst_type.length; boolean vec_fetch; struct lp_type fetch_type, fetch_dst_type; LLVMTypeRef src_type; assert(src_width <= dst_type.width * dst_type.length); /* * This is quite a mess... * Figure out if the fetch should be done as: * a) scalar or vector * b) float or int * * As an example, for a 96bit fetch expanded into 4x32bit, it is better * to use (3x32bit) vector type (then pad the vector). Otherwise, the * zext will cause extra instructions. * However, the same isn't true for 3x16bit (the codegen for that is * completely worthless on x86 simd, and for 3x8bit is is way worse * still, don't try that... (To get really good code out of llvm for * these cases, the only way is to decompose the fetches manually * into 1x32bit/1x16bit, or 1x16/1x8bit respectively, although the latter * case requires sse41, otherwise simple scalar zext is way better. * But probably not important enough, so don't bother.) * Also, we try to honor the floating bit of destination (but isn't * possible if caller asks for instance for 2x32bit dst_type with * 48bit fetch - the idea would be to use 3x16bit fetch, pad and * cast to 2x32f type, so the fetch is always int and on top of that * we avoid the vec pad and use scalar zext due the above mentioned * issue). * Note this is optimized for x86 sse2 and up backend. Could be tweaked * for other archs if necessary... */ if (((src_width % 32) == 0) && ((src_width % dst_type.width) == 0) && (dst_type.length > 1)) { /* use vector fetch (if dst_type is vector) */ vec_fetch = TRUE; if (dst_type.floating) { fetch_type = lp_type_float_vec(dst_type.width, src_width); } else { fetch_type = lp_type_int_vec(dst_type.width, src_width); } /* intentionally not using lp_build_vec_type here */ src_type = LLVMVectorType(lp_build_elem_type(gallivm, fetch_type), fetch_type.length); fetch_dst_type = fetch_type; fetch_dst_type.length = dst_type.length; } else { /* use scalar fetch */ vec_fetch = FALSE; if (dst_type.floating && ((src_width == 32) || (src_width == 64))) { fetch_type = lp_type_float(src_width); } else { fetch_type = lp_type_int(src_width); } src_type = lp_build_vec_type(gallivm, fetch_type); fetch_dst_type = fetch_type; fetch_dst_type.width = dst_type.width * dst_type.length; } if (length == 1) { /* Scalar */ res = lp_build_gather_elem_vec(gallivm, length, src_width, src_type, fetch_dst_type, aligned, base_ptr, offsets, 0, vector_justify); return LLVMBuildBitCast(gallivm->builder, res, lp_build_vec_type(gallivm, dst_type), ""); /* * Excluding expansion from these paths because if you need it for * 32bit/64bit fetches you're doing it wrong (this is gather, not * conversion) and it would be awkward for floats. */ } else if (util_cpu_caps.has_avx2 && !need_expansion && src_width == 32 && (length == 4 || length == 8)) { return lp_build_gather_avx2(gallivm, length, src_width, dst_type, base_ptr, offsets); /* * This looks bad on paper wrt throughtput/latency on Haswell. * Even on Broadwell it doesn't look stellar. * Albeit no measurements were done (but tested to work). * Should definitely enable on Skylake. * (In general, should be more of a win if the fetch is 256bit wide - * this is true for the 32bit case above too.) */ } else if (0 && util_cpu_caps.has_avx2 && !need_expansion && src_width == 64 && (length == 2 || length == 4)) { return lp_build_gather_avx2(gallivm, length, src_width, dst_type, base_ptr, offsets); } else { /* Vector */ LLVMValueRef elems[LP_MAX_VECTOR_WIDTH / 8]; unsigned i; boolean vec_zext = FALSE; struct lp_type res_type, gather_res_type; LLVMTypeRef res_t, gather_res_t; res_type = fetch_dst_type; res_type.length *= length; gather_res_type = res_type; if (src_width == 16 && dst_type.width == 32 && dst_type.length == 1) { /* * Note that llvm is never able to optimize zext/insert combos * directly (i.e. zero the simd reg, then place the elements into * the appropriate place directly). (I think this has to do with * scalar/vector transition.) And scalar 16->32bit zext simd loads * aren't possible (instead loading to scalar reg first). * No idea about other archs... * We could do this manually, but instead we just use a vector * zext, which is simple enough (and, in fact, llvm might optimize * this away). * (We're not trying that with other bit widths as that might not be * easier, in particular with 8 bit values at least with only sse2.) */ assert(vec_fetch == FALSE); gather_res_type.width /= 2; fetch_dst_type = fetch_type; src_type = lp_build_vec_type(gallivm, fetch_type); vec_zext = TRUE; } res_t = lp_build_vec_type(gallivm, res_type); gather_res_t = lp_build_vec_type(gallivm, gather_res_type); res = LLVMGetUndef(gather_res_t); for (i = 0; i < length; ++i) { LLVMValueRef index = lp_build_const_int32(gallivm, i); elems[i] = lp_build_gather_elem_vec(gallivm, length, src_width, src_type, fetch_dst_type, aligned, base_ptr, offsets, i, vector_justify); if (!vec_fetch) { res = LLVMBuildInsertElement(gallivm->builder, res, elems[i], index, ""); } } if (vec_zext) { res = LLVMBuildZExt(gallivm->builder, res, res_t, ""); if (vector_justify) { #ifdef PIPE_ARCH_BIG_ENDIAN unsigned sv = dst_type.width - src_width; res = LLVMBuildShl(gallivm->builder, res, lp_build_const_int_vec(gallivm, res_type, sv), ""); #endif } } if (vec_fetch) { /* * Do bitcast now otherwise llvm might get some funny ideas wrt * float/int types... */ for (i = 0; i < length; i++) { elems[i] = LLVMBuildBitCast(gallivm->builder, elems[i], lp_build_vec_type(gallivm, dst_type), ""); } res = lp_build_concat(gallivm, elems, dst_type, length); } else { struct lp_type really_final_type = dst_type; assert(res_type.length * res_type.width == dst_type.length * dst_type.width * length); really_final_type.length *= length; res = LLVMBuildBitCast(gallivm->builder, res, lp_build_vec_type(gallivm, really_final_type), ""); } } return res; }
static LLVMValueRef box_is_box(compile_t* c, ast_t* left_type, LLVMValueRef l_value, LLVMValueRef r_value, int possible_boxes) { pony_assert(LLVMGetTypeKind(LLVMTypeOf(l_value)) == LLVMPointerTypeKind); pony_assert(LLVMGetTypeKind(LLVMTypeOf(r_value)) == LLVMPointerTypeKind); LLVMBasicBlockRef this_block = LLVMGetInsertBlock(c->builder); LLVMBasicBlockRef checkbox_block = codegen_block(c, "is_checkbox"); LLVMBasicBlockRef box_block = codegen_block(c, "is_box"); LLVMBasicBlockRef num_block = NULL; if((possible_boxes & BOXED_SUBTYPES_NUMERIC) != 0) num_block = codegen_block(c, "is_num"); LLVMBasicBlockRef tuple_block = NULL; if((possible_boxes & BOXED_SUBTYPES_TUPLE) != 0) tuple_block = codegen_block(c, "is_tuple"); LLVMBasicBlockRef post_block = codegen_block(c, "is_post"); LLVMValueRef eq_addr = LLVMBuildICmp(c->builder, LLVMIntEQ, l_value, r_value, ""); LLVMBuildCondBr(c->builder, eq_addr, post_block, checkbox_block); // Check whether we have two boxed objects of the same type. LLVMPositionBuilderAtEnd(c->builder, checkbox_block); LLVMValueRef l_desc = gendesc_fetch(c, l_value); LLVMValueRef r_desc = gendesc_fetch(c, r_value); LLVMValueRef same_type = LLVMBuildICmp(c->builder, LLVMIntEQ, l_desc, r_desc, ""); LLVMValueRef l_typeid = NULL; if((possible_boxes & BOXED_SUBTYPES_UNBOXED) != 0) { l_typeid = gendesc_typeid(c, l_value); LLVMValueRef boxed_mask = LLVMConstInt(c->i32, 1, false); LLVMValueRef left_boxed = LLVMBuildAnd(c->builder, l_typeid, boxed_mask, ""); LLVMValueRef zero = LLVMConstInt(c->i32, 0, false); left_boxed = LLVMBuildICmp(c->builder, LLVMIntEQ, left_boxed, zero, ""); LLVMValueRef both_boxed = LLVMBuildAnd(c->builder, same_type, left_boxed, ""); LLVMBuildCondBr(c->builder, both_boxed, box_block, post_block); } else { LLVMBuildCondBr(c->builder, same_type, box_block, post_block); } // Check whether it's a numeric primitive or a tuple. LLVMPositionBuilderAtEnd(c->builder, box_block); if((possible_boxes & BOXED_SUBTYPES_BOXED) == BOXED_SUBTYPES_BOXED) { if(l_typeid == NULL) l_typeid = gendesc_typeid(c, l_value); LLVMValueRef num_mask = LLVMConstInt(c->i32, 2, false); LLVMValueRef boxed_num = LLVMBuildAnd(c->builder, l_typeid, num_mask, ""); LLVMValueRef zero = LLVMConstInt(c->i32, 0, false); boxed_num = LLVMBuildICmp(c->builder, LLVMIntEQ, boxed_num, zero, ""); LLVMBuildCondBr(c->builder, boxed_num, num_block, tuple_block); } else if((possible_boxes & BOXED_SUBTYPES_NUMERIC) != 0) { LLVMBuildBr(c->builder, num_block); } else { pony_assert((possible_boxes & BOXED_SUBTYPES_TUPLE) != 0); LLVMBuildBr(c->builder, tuple_block); } LLVMValueRef args[3]; LLVMValueRef is_num = NULL; if(num_block != NULL) { // Get the machine word size and memcmp without unboxing. LLVMPositionBuilderAtEnd(c->builder, num_block); if(l_typeid == NULL) l_typeid = gendesc_typeid(c, l_value); LLVMValueRef num_sizes = LLVMBuildBitCast(c->builder, c->numeric_sizes, c->void_ptr, ""); args[0] = LLVMBuildZExt(c->builder, l_typeid, c->intptr, ""); LLVMValueRef size = LLVMBuildInBoundsGEP(c->builder, num_sizes, args, 1, ""); size = LLVMBuildBitCast(c->builder, size, LLVMPointerType(c->i32, 0), ""); size = LLVMBuildLoad(c->builder, size, ""); LLVMSetAlignment(size, 4); LLVMValueRef one = LLVMConstInt(c->i32, 1, false); args[0] = LLVMBuildInBoundsGEP(c->builder, l_value, &one, 1, ""); args[0] = LLVMBuildBitCast(c->builder, args[0], c->void_ptr, ""); args[1] = LLVMBuildInBoundsGEP(c->builder, r_value, &one, 1, ""); args[1] = LLVMBuildBitCast(c->builder, args[1], c->void_ptr, ""); args[2] = LLVMBuildZExt(c->builder, size, c->intptr, ""); is_num = gencall_runtime(c, "memcmp", args, 3, ""); is_num = LLVMBuildICmp(c->builder, LLVMIntEQ, is_num, LLVMConstInt(c->i32, 0, false), ""); LLVMBuildBr(c->builder, post_block); } LLVMValueRef is_tuple = NULL; if(tuple_block != NULL) { // Call the type-specific __is function, which will unbox the tuples. LLVMPositionBuilderAtEnd(c->builder, tuple_block); reach_type_t* r_left = reach_type(c->reach, left_type); reach_method_t* is_fn = reach_method(r_left, TK_BOX, stringtab("__is"), NULL); pony_assert(is_fn != NULL); LLVMValueRef func = gendesc_vtable(c, l_value, is_fn->vtable_index); LLVMTypeRef params[2]; params[0] = c->object_ptr; params[1] = c->object_ptr; LLVMTypeRef type = LLVMFunctionType(c->i1, params, 2, false); func = LLVMBuildBitCast(c->builder, func, LLVMPointerType(type, 0), ""); args[0] = l_value; args[1] = r_value; is_tuple = codegen_call(c, func, args, 2); LLVMBuildBr(c->builder, post_block); } LLVMPositionBuilderAtEnd(c->builder, post_block); LLVMValueRef phi = LLVMBuildPhi(c->builder, c->i1, ""); LLVMValueRef one = LLVMConstInt(c->i1, 1, false); LLVMValueRef zero = LLVMConstInt(c->i1, 0, false); LLVMAddIncoming(phi, &one, &this_block, 1); if(is_num != NULL) LLVMAddIncoming(phi, &is_num, &num_block, 1); if(is_tuple != NULL) LLVMAddIncoming(phi, &is_tuple, &tuple_block, 1); LLVMAddIncoming(phi, &zero, &checkbox_block, 1); return phi; }
static LLVMValueRef gen_digestof_value(compile_t* c, ast_t* type, LLVMValueRef value) { LLVMTypeRef impl_type = LLVMTypeOf(value); switch(LLVMGetTypeKind(impl_type)) { case LLVMFloatTypeKind: value = LLVMBuildBitCast(c->builder, value, c->i32, ""); return LLVMBuildZExt(c->builder, value, c->intptr, ""); case LLVMDoubleTypeKind: value = LLVMBuildBitCast(c->builder, value, c->i64, ""); return gen_digestof_int64(c, value); case LLVMIntegerTypeKind: { uint32_t width = LLVMGetIntTypeWidth(impl_type); if(width < 64) { return LLVMBuildZExt(c->builder, value, c->intptr, ""); } else if(width == 64) { return gen_digestof_int64(c, value); } else if(width == 128) { LLVMValueRef shift = LLVMConstInt(c->i128, 64, false); LLVMValueRef high = LLVMBuildLShr(c->builder, value, shift, ""); high = LLVMBuildTrunc(c->builder, high, c->i64, ""); value = LLVMBuildTrunc(c->builder, value, c->i64, ""); high = gen_digestof_int64(c, high); value = gen_digestof_int64(c, value); return LLVMBuildXor(c->builder, value, high, ""); } break; } case LLVMStructTypeKind: { uint32_t count = LLVMCountStructElementTypes(impl_type); LLVMValueRef result = LLVMConstInt(c->intptr, 0, false); ast_t* child = ast_child(type); for(uint32_t i = 0; i < count; i++) { LLVMValueRef elem = LLVMBuildExtractValue(c->builder, value, i, ""); elem = gen_digestof_value(c, child, elem); result = LLVMBuildXor(c->builder, result, elem, ""); child = ast_sibling(child); } pony_assert(child == NULL); return result; } case LLVMPointerTypeKind: if(!is_known(type)) { reach_type_t* t = reach_type(c->reach, type); int sub_kind = subtype_kind(t); if((sub_kind & SUBTYPE_KIND_BOXED) != 0) return gen_digestof_box(c, t, value, sub_kind); } return LLVMBuildPtrToInt(c->builder, value, c->intptr, ""); default: {} } pony_assert(0); return NULL; }
/** * Gather one element from scatter positions in memory. * Nearly the same as above, however the individual elements * may be vectors themselves, and fetches may be float type. * Can also do pad vector instead of ZExt. * * @sa lp_build_gather() */ static LLVMValueRef lp_build_gather_elem_vec(struct gallivm_state *gallivm, unsigned length, unsigned src_width, LLVMTypeRef src_type, struct lp_type dst_type, boolean aligned, LLVMValueRef base_ptr, LLVMValueRef offsets, unsigned i, boolean vector_justify) { LLVMValueRef ptr, res; LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)); ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i); ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, ""); res = LLVMBuildLoad(gallivm->builder, ptr, ""); /* XXX * On some archs we probably really want to avoid having to deal * with alignments lower than 4 bytes (if fetch size is a power of * two >= 32). On x86 it doesn't matter, however. * We should be able to guarantee full alignment for any kind of texture * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends * but I don't think that's quite what we wanted). * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT * looks like a good fit, but it seems this cap bit (and OpenGL) aren't * enforcing what we want (which is what d3d10 does, the offset needs to * be aligned to element size, but GL has bytes regardless of element * size which would only leave us with minimum alignment restriction of 16 * which doesn't make much sense if the type isn't 4x32bit). Due to * translation of offsets to first_elem in sampler_views it actually seems * gallium could not do anything else except 16 no matter what... */ if (!aligned) { LLVMSetAlignment(res, 1); } else if (!util_is_power_of_two(src_width)) { /* * Full alignment is impossible, assume the caller really meant * the individual elements were aligned (e.g. 3x32bit format). * And yes the generated code may otherwise crash, llvm will * really assume 128bit alignment with a 96bit fetch (I suppose * that makes sense as it can just assume the upper 32bit to be * whatever). * Maybe the caller should be able to explicitly set this, but * this should cover all the 3-channel formats. */ if (((src_width / 24) * 24 == src_width) && util_is_power_of_two(src_width / 24)) { LLVMSetAlignment(res, src_width / 24); } else { LLVMSetAlignment(res, 1); } } assert(src_width <= dst_type.width * dst_type.length); if (src_width < dst_type.width * dst_type.length) { if (dst_type.length > 1) { res = lp_build_pad_vector(gallivm, res, dst_type.length); /* * vector_justify hopefully a non-issue since we only deal * with src_width >= 32 here? */ } else { LLVMTypeRef dst_elem_type = lp_build_vec_type(gallivm, dst_type); /* * Only valid if src_ptr_type is int type... */ res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, ""); #ifdef PIPE_ARCH_BIG_ENDIAN if (vector_justify) { res = LLVMBuildShl(gallivm->builder, res, LLVMConstInt(dst_elem_type, dst_type.width - src_width, 0), ""); } if (src_width == 48) { /* Load 3x16 bit vector. * The sequence of loads on big-endian hardware proceeds as follows. * 16-bit fields are denoted by X, Y, Z, and 0. In memory, the sequence * of three fields appears in the order X, Y, Z. * * Load 32-bit word: 0.0.X.Y * Load 16-bit halfword: 0.0.0.Z * Rotate left: 0.X.Y.0 * Bitwise OR: 0.X.Y.Z * * The order in which we need the fields in the result is 0.Z.Y.X, * the same as on little-endian; permute 16-bit fields accordingly * within 64-bit register: */ LLVMValueRef shuffles[4] = { lp_build_const_int32(gallivm, 2), lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 0), lp_build_const_int32(gallivm, 3), }; res = LLVMBuildBitCast(gallivm->builder, res, lp_build_vec_type(gallivm, lp_type_uint_vec(16, 4*16)), ""); res = LLVMBuildShuffleVector(gallivm->builder, res, res, LLVMConstVector(shuffles, 4), ""); res = LLVMBuildBitCast(gallivm->builder, res, dst_elem_type, ""); } #endif } } return res; }
/** * Truncate or expand the bitwidth. * * NOTE: Getting the right sign flags is crucial here, as we employ some * intrinsics that do saturation. */ void lp_build_resize(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, LLVMValueRef *dst, unsigned num_dsts) { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; unsigned i; /* * We don't support float <-> int conversion here. That must be done * before/after calling this function. */ assert(src_type.floating == dst_type.floating); /* * We don't support double <-> float conversion yet, although it could be * added with little effort. */ assert((!src_type.floating && !dst_type.floating) || src_type.width == dst_type.width); /* We must not loose or gain channels. Only precision */ assert(src_type.length * num_srcs == dst_type.length * num_dsts); /* We don't support M:N conversion, only 1:N, M:1, or 1:1 */ assert(num_srcs == 1 || num_dsts == 1); assert(src_type.length <= LP_MAX_VECTOR_LENGTH); assert(dst_type.length <= LP_MAX_VECTOR_LENGTH); assert(num_srcs <= LP_MAX_VECTOR_LENGTH); assert(num_dsts <= LP_MAX_VECTOR_LENGTH); if (src_type.width > dst_type.width) { /* * Truncate bit width. */ assert(num_dsts == 1); if (src_type.width * src_type.length == dst_type.width * dst_type.length) { /* * Register width remains constant -- use vector packing intrinsics */ tmp[0] = lp_build_pack(gallivm, src_type, dst_type, TRUE, src, num_srcs); } else { if (src_type.width / dst_type.width > num_srcs) { /* * First change src vectors size (with shuffle) so they have the * same size as the destination vector, then pack normally. * Note: cannot use cast/extract because llvm generates atrocious code. */ unsigned size_ratio = (src_type.width * src_type.length) / (dst_type.length * dst_type.width); unsigned new_length = src_type.length / size_ratio; for (i = 0; i < size_ratio * num_srcs; i++) { unsigned start_index = (i % size_ratio) * new_length; tmp[i] = lp_build_extract_range(gallivm, src[i / size_ratio], start_index, new_length); } num_srcs *= size_ratio; src_type.length = new_length; tmp[0] = lp_build_pack(gallivm, src_type, dst_type, TRUE, tmp, num_srcs); } else { /* * Truncate bit width but expand vector size - first pack * then expand simply because this should be more AVX-friendly * for the cases we probably hit. */ unsigned size_ratio = (dst_type.width * dst_type.length) / (src_type.length * src_type.width); unsigned num_pack_srcs = num_srcs / size_ratio; dst_type.length = dst_type.length / size_ratio; for (i = 0; i < size_ratio; i++) { tmp[i] = lp_build_pack(gallivm, src_type, dst_type, TRUE, &src[i*num_pack_srcs], num_pack_srcs); } tmp[0] = lp_build_concat(gallivm, tmp, dst_type, size_ratio); } } } else if (src_type.width < dst_type.width) { /* * Expand bit width. */ assert(num_srcs == 1); if (src_type.width * src_type.length == dst_type.width * dst_type.length) { /* * Register width remains constant -- use vector unpack intrinsics */ lp_build_unpack(gallivm, src_type, dst_type, src[0], tmp, num_dsts); } else { /* * Do it element-wise. */ assert(src_type.length * num_srcs == dst_type.length * num_dsts); for (i = 0; i < num_dsts; i++) { tmp[i] = lp_build_undef(gallivm, dst_type); } for (i = 0; i < src_type.length; ++i) { unsigned j = i / dst_type.length; LLVMValueRef srcindex = lp_build_const_int32(gallivm, i); LLVMValueRef dstindex = lp_build_const_int32(gallivm, i % dst_type.length); LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], srcindex, ""); if (src_type.sign && dst_type.sign) { val = LLVMBuildSExt(builder, val, lp_build_elem_type(gallivm, dst_type), ""); } else { val = LLVMBuildZExt(builder, val, lp_build_elem_type(gallivm, dst_type), ""); } tmp[j] = LLVMBuildInsertElement(builder, tmp[j], val, dstindex, ""); } } } else { /* * No-op */ assert(num_srcs == 1); assert(num_dsts == 1); tmp[0] = src[0]; } for(i = 0; i < num_dsts; ++i) dst[i] = tmp[i]; }
struct cl2llvm_val_t *llvm_type_cast(struct cl2llvm_val_t * original_val, struct cl2llvmTypeWrap *totype_w_sign) { struct cl2llvm_val_t *llvm_val = cl2llvm_val_create(); int i; struct cl2llvmTypeWrap *elem_type; struct cl2llvm_val_t *cast_original_val; LLVMValueRef index; LLVMValueRef vector_addr; LLVMValueRef vector; LLVMValueRef const_elems[16]; LLVMTypeRef fromtype = cl2llvmTypeWrapGetLlvmType(original_val->type); LLVMTypeRef totype = cl2llvmTypeWrapGetLlvmType(totype_w_sign); int fromsign = cl2llvmTypeWrapGetSign(original_val->type); int tosign = cl2llvmTypeWrapGetSign(totype_w_sign); /*By default the return value is the same as the original_val*/ llvm_val->val = original_val->val; cl2llvmTypeWrapSetLlvmType(llvm_val->type, cl2llvmTypeWrapGetLlvmType(original_val->type)); cl2llvmTypeWrapSetSign(llvm_val->type, cl2llvmTypeWrapGetSign(original_val->type)); snprintf(temp_var_name, sizeof temp_var_name, "tmp_%d", temp_var_count++); /* Check that fromtype is not a vector, unless both types are identical. */ if (LLVMGetTypeKind(fromtype) == LLVMVectorTypeKind) { if ((LLVMGetVectorSize(fromtype) != LLVMGetVectorSize(totype) || LLVMGetElementType(fromtype) != LLVMGetElementType(totype)) || fromsign != tosign) { if (LLVMGetTypeKind(totype) == LLVMVectorTypeKind) cl2llvm_yyerror("Casts between vector types are forbidden"); cl2llvm_yyerror("A vector may not be cast to any other type."); } } /* If totype is a vector, create a vector whose components are equal to original_val */ if (LLVMGetTypeKind(totype) == LLVMVectorTypeKind && LLVMGetTypeKind(fromtype) != LLVMVectorTypeKind) { /*Go to entry block and declare vector*/ LLVMPositionBuilder(cl2llvm_builder, cl2llvm_current_function->entry_block, cl2llvm_current_function->branch_instr); snprintf(temp_var_name, sizeof temp_var_name, "tmp_%d", temp_var_count++); vector_addr = LLVMBuildAlloca(cl2llvm_builder, totype, temp_var_name); LLVMPositionBuilderAtEnd(cl2llvm_builder, current_basic_block); /* Load vector */ snprintf(temp_var_name, sizeof temp_var_name, "tmp_%d", temp_var_count++); vector = LLVMBuildLoad(cl2llvm_builder, vector_addr, temp_var_name); /* Create object to represent element type of totype */ elem_type = cl2llvmTypeWrapCreate(LLVMGetElementType(totype), tosign); /* If original_val is constant create a constant vector */ if (LLVMIsConstant(original_val->val)) { cast_original_val = llvm_type_cast(original_val, elem_type); for (i = 0; i < LLVMGetVectorSize(totype); i++) const_elems[i] = cast_original_val->val; vector = LLVMConstVector(const_elems, LLVMGetVectorSize(totype)); llvm_val->val = vector; cl2llvm_val_free(cast_original_val); } /* If original value is not constant insert elements */ else { for (i = 0; i < LLVMGetVectorSize(totype); i++) { index = LLVMConstInt(LLVMInt32Type(), i, 0); cast_original_val = llvm_type_cast(original_val, elem_type); snprintf(temp_var_name, sizeof temp_var_name, "tmp_%d", temp_var_count++); vector = LLVMBuildInsertElement(cl2llvm_builder, vector, cast_original_val->val, index, temp_var_name); cl2llvm_val_free(cast_original_val); } } cl2llvmTypeWrapFree(elem_type); llvm_val->val = vector; } if (fromtype == LLVMInt64Type()) { if (totype == LLVMDoubleType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMFloatType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMHalfType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMInt64Type()) { if (tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); temp_var_count--; } else if (totype == LLVMInt32Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt16Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt16Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt8Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt8Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt1Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt1Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } } else if (fromtype == LLVMInt32Type()) { if (totype == LLVMDoubleType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMFloatType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMHalfType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMInt64Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } if (tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt32Type()) { if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); temp_var_count--; } else if (totype == LLVMInt16Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt16Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt8Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt8Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt1Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt1Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } } else if (fromtype == LLVMInt16Type()) { if (totype == LLVMDoubleType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMFloatType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMHalfType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMInt64Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } if (tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt32Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); } if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt16Type()) { if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); temp_var_count--; } else if (totype == LLVMInt8Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt8Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt1Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt1Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } } else if (fromtype == LLVMInt8Type()) { if (totype == LLVMDoubleType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMFloatType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMHalfType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMInt64Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } if (tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt32Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); } if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt16Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt16Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt16Type(), temp_var_name); } if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt8Type()) { if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); temp_var_count--; } else if (totype == LLVMInt1Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt1Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } } else if (fromtype == LLVMInt1Type()) { if (totype == LLVMDoubleType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMFloatType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMHalfType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMInt64Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } if (tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt32Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); } if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt16Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt16Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt16Type(), temp_var_name); } if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt8Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt8Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt8Type(), temp_var_name); } if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt1Type()) { if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); temp_var_count--; } } /*We now know that from type must be a floating point.*/ /*Floating point to signed integer conversions*/ else if (tosign && LLVMGetTypeKind(totype) == 8) { if (totype == LLVMInt64Type()) { llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } else if (totype == LLVMInt32Type()) { llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); } else if (totype == LLVMInt16Type()) { llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, original_val->val, LLVMInt16Type(), temp_var_name); } else if (totype == LLVMInt8Type()) { llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, original_val->val, LLVMInt8Type(), temp_var_name); } else if (totype == LLVMInt1Type()) { llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, original_val->val, LLVMInt1Type(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } /*Floating point to unsigned integer conversions*/ else if (!tosign) { if (totype == LLVMInt64Type()) { llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } else if (totype == LLVMInt32Type()) { llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); } else if (totype == LLVMInt16Type()) { llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, original_val->val, LLVMInt16Type(), temp_var_name); } else if (totype == LLVMInt8Type()) { llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, original_val->val, LLVMInt8Type(), temp_var_name); } else if (totype == LLVMInt1Type()) { llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, original_val->val, LLVMInt1Type(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMDoubleType()) { llvm_val->val = LLVMBuildFPExt(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMFloatType()) { if (fromtype == LLVMDoubleType()) { llvm_val->val = LLVMBuildFPTrunc(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } else if (fromtype == LLVMHalfType()) { llvm_val->val = LLVMBuildFPExt(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMHalfType()) { llvm_val->val = LLVMBuildFPTrunc(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); cl2llvmTypeWrapSetSign(llvm_val->type, 1); } cl2llvmTypeWrapSetLlvmType(llvm_val->type, totype); cl2llvmTypeWrapSetSign(llvm_val->type, tosign); return llvm_val; }