/** * Gather one element from scatter positions in memory. * * @sa lp_build_gather() */ LLVMValueRef lp_build_gather_elem(struct gallivm_state *gallivm, unsigned length, unsigned src_width, unsigned dst_width, boolean aligned, LLVMValueRef base_ptr, LLVMValueRef offsets, unsigned i, boolean vector_justify) { LLVMTypeRef src_type = LLVMIntTypeInContext(gallivm->context, src_width); LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); LLVMTypeRef dst_elem_type = LLVMIntTypeInContext(gallivm->context, dst_width); LLVMValueRef ptr; LLVMValueRef res; assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)); ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i); ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, ""); res = LLVMBuildLoad(gallivm->builder, ptr, ""); /* XXX * On some archs we probably really want to avoid having to deal * with alignments lower than 4 bytes (if fetch size is a power of * two >= 32). On x86 it doesn't matter, however. * We should be able to guarantee full alignment for any kind of texture * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends * but I don't think that's quite what we wanted). * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT * looks like a good fit, but it seems this cap bit (and OpenGL) aren't * enforcing what we want (which is what d3d10 does, the offset needs to * be aligned to element size, but GL has bytes regardless of element * size which would only leave us with minimum alignment restriction of 16 * which doesn't make much sense if the type isn't 4x32bit). Due to * translation of offsets to first_elem in sampler_views it actually seems * gallium could not do anything else except 16 no matter what... */ if (!aligned) { LLVMSetAlignment(res, 1); } assert(src_width <= dst_width); if (src_width > dst_width) { res = LLVMBuildTrunc(gallivm->builder, res, dst_elem_type, ""); } else if (src_width < dst_width) { res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, ""); if (vector_justify) { #ifdef PIPE_ARCH_BIG_ENDIAN res = LLVMBuildShl(gallivm->builder, res, LLVMConstInt(dst_elem_type, dst_width - src_width, 0), ""); #endif } } return res; }
/** * @brief lp_build_fetch_rgba_aos_array * * \param format_desc describes format of the image we're fetching from * \param dst_type output type * \param base_ptr address of the pixel block (or the texel if uncompressed) * \param offset ptr offset */ LLVMValueRef lp_build_fetch_rgba_aos_array(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type dst_type, LLVMValueRef base_ptr, LLVMValueRef offset) { struct lp_build_context bld; LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef src_vec_type; LLVMValueRef ptr, res = NULL; struct lp_type src_type; boolean pure_integer = format_desc->channel[0].pure_integer; struct lp_type tmp_type; lp_type_from_format_desc(&src_type, format_desc); assert(src_type.length <= dst_type.length); src_vec_type = lp_build_vec_type(gallivm, src_type); /* Read whole vector from memory, unaligned */ ptr = LLVMBuildGEP(builder, base_ptr, &offset, 1, ""); ptr = LLVMBuildPointerCast(builder, ptr, LLVMPointerType(src_vec_type, 0), ""); res = LLVMBuildLoad(builder, ptr, ""); LLVMSetAlignment(res, src_type.width / 8); /* Truncate doubles to float */ if (src_type.floating && src_type.width == 64) { src_type.width = 32; src_vec_type = lp_build_vec_type(gallivm, src_type); res = LLVMBuildFPTrunc(builder, res, src_vec_type, ""); } /* Expand to correct length */ if (src_type.length < dst_type.length) { res = lp_build_pad_vector(gallivm, res, dst_type.length); src_type.length = dst_type.length; } tmp_type = dst_type; if (pure_integer) { /* some callers expect (fake) floats other real ints. */ tmp_type.floating = 0; tmp_type.sign = src_type.sign; } /* Convert to correct format */ lp_build_conv(gallivm, src_type, tmp_type, &res, 1, &res, 1); /* Swizzle it */ lp_build_context_init(&bld, gallivm, tmp_type); res = lp_build_format_swizzle_aos(format_desc, &bld, res); /* Bitcast to floats (for pure integers) when requested */ if (pure_integer && dst_type.floating) { res = LLVMBuildBitCast(builder, res, lp_build_vec_type(gallivm, dst_type), ""); } return res; }
/** * Gather one element from scatter positions in memory. * Nearly the same as above, however the individual elements * may be vectors themselves, and fetches may be float type. * Can also do pad vector instead of ZExt. * * @sa lp_build_gather() */ static LLVMValueRef lp_build_gather_elem_vec(struct gallivm_state *gallivm, unsigned length, unsigned src_width, LLVMTypeRef src_type, struct lp_type dst_type, boolean aligned, LLVMValueRef base_ptr, LLVMValueRef offsets, unsigned i, boolean vector_justify) { LLVMValueRef ptr, res; LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)); ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i); ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, ""); res = LLVMBuildLoad(gallivm->builder, ptr, ""); /* XXX * On some archs we probably really want to avoid having to deal * with alignments lower than 4 bytes (if fetch size is a power of * two >= 32). On x86 it doesn't matter, however. * We should be able to guarantee full alignment for any kind of texture * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends * but I don't think that's quite what we wanted). * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT * looks like a good fit, but it seems this cap bit (and OpenGL) aren't * enforcing what we want (which is what d3d10 does, the offset needs to * be aligned to element size, but GL has bytes regardless of element * size which would only leave us with minimum alignment restriction of 16 * which doesn't make much sense if the type isn't 4x32bit). Due to * translation of offsets to first_elem in sampler_views it actually seems * gallium could not do anything else except 16 no matter what... */ if (!aligned) { LLVMSetAlignment(res, 1); } else if (!util_is_power_of_two(src_width)) { /* * Full alignment is impossible, assume the caller really meant * the individual elements were aligned (e.g. 3x32bit format). * And yes the generated code may otherwise crash, llvm will * really assume 128bit alignment with a 96bit fetch (I suppose * that makes sense as it can just assume the upper 32bit to be * whatever). * Maybe the caller should be able to explicitly set this, but * this should cover all the 3-channel formats. */ if (((src_width / 24) * 24 == src_width) && util_is_power_of_two(src_width / 24)) { LLVMSetAlignment(res, src_width / 24); } else { LLVMSetAlignment(res, 1); } } assert(src_width <= dst_type.width * dst_type.length); if (src_width < dst_type.width * dst_type.length) { if (dst_type.length > 1) { res = lp_build_pad_vector(gallivm, res, dst_type.length); /* * vector_justify hopefully a non-issue since we only deal * with src_width >= 32 here? */ } else { LLVMTypeRef dst_elem_type = lp_build_vec_type(gallivm, dst_type); /* * Only valid if src_ptr_type is int type... */ res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, ""); #ifdef PIPE_ARCH_BIG_ENDIAN if (vector_justify) { res = LLVMBuildShl(gallivm->builder, res, LLVMConstInt(dst_elem_type, dst_type.width - src_width, 0), ""); } if (src_width == 48) { /* Load 3x16 bit vector. * The sequence of loads on big-endian hardware proceeds as follows. * 16-bit fields are denoted by X, Y, Z, and 0. In memory, the sequence * of three fields appears in the order X, Y, Z. * * Load 32-bit word: 0.0.X.Y * Load 16-bit halfword: 0.0.0.Z * Rotate left: 0.X.Y.0 * Bitwise OR: 0.X.Y.Z * * The order in which we need the fields in the result is 0.Z.Y.X, * the same as on little-endian; permute 16-bit fields accordingly * within 64-bit register: */ LLVMValueRef shuffles[4] = { lp_build_const_int32(gallivm, 2), lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 0), lp_build_const_int32(gallivm, 3), }; res = LLVMBuildBitCast(gallivm->builder, res, lp_build_vec_type(gallivm, lp_type_uint_vec(16, 4*16)), ""); res = LLVMBuildShuffleVector(gallivm->builder, res, res, LLVMConstVector(shuffles, 4), ""); res = LLVMBuildBitCast(gallivm->builder, res, dst_elem_type, ""); } #endif } } return res; }
static LLVMValueRef box_is_box(compile_t* c, ast_t* left_type, LLVMValueRef l_value, LLVMValueRef r_value, int possible_boxes) { pony_assert(LLVMGetTypeKind(LLVMTypeOf(l_value)) == LLVMPointerTypeKind); pony_assert(LLVMGetTypeKind(LLVMTypeOf(r_value)) == LLVMPointerTypeKind); LLVMBasicBlockRef this_block = LLVMGetInsertBlock(c->builder); LLVMBasicBlockRef checkbox_block = codegen_block(c, "is_checkbox"); LLVMBasicBlockRef box_block = codegen_block(c, "is_box"); LLVMBasicBlockRef num_block = NULL; if((possible_boxes & BOXED_SUBTYPES_NUMERIC) != 0) num_block = codegen_block(c, "is_num"); LLVMBasicBlockRef tuple_block = NULL; if((possible_boxes & BOXED_SUBTYPES_TUPLE) != 0) tuple_block = codegen_block(c, "is_tuple"); LLVMBasicBlockRef post_block = codegen_block(c, "is_post"); LLVMValueRef eq_addr = LLVMBuildICmp(c->builder, LLVMIntEQ, l_value, r_value, ""); LLVMBuildCondBr(c->builder, eq_addr, post_block, checkbox_block); // Check whether we have two boxed objects of the same type. LLVMPositionBuilderAtEnd(c->builder, checkbox_block); LLVMValueRef l_desc = gendesc_fetch(c, l_value); LLVMValueRef r_desc = gendesc_fetch(c, r_value); LLVMValueRef same_type = LLVMBuildICmp(c->builder, LLVMIntEQ, l_desc, r_desc, ""); LLVMValueRef l_typeid = NULL; if((possible_boxes & BOXED_SUBTYPES_UNBOXED) != 0) { l_typeid = gendesc_typeid(c, l_value); LLVMValueRef boxed_mask = LLVMConstInt(c->i32, 1, false); LLVMValueRef left_boxed = LLVMBuildAnd(c->builder, l_typeid, boxed_mask, ""); LLVMValueRef zero = LLVMConstInt(c->i32, 0, false); left_boxed = LLVMBuildICmp(c->builder, LLVMIntEQ, left_boxed, zero, ""); LLVMValueRef both_boxed = LLVMBuildAnd(c->builder, same_type, left_boxed, ""); LLVMBuildCondBr(c->builder, both_boxed, box_block, post_block); } else { LLVMBuildCondBr(c->builder, same_type, box_block, post_block); } // Check whether it's a numeric primitive or a tuple. LLVMPositionBuilderAtEnd(c->builder, box_block); if((possible_boxes & BOXED_SUBTYPES_BOXED) == BOXED_SUBTYPES_BOXED) { if(l_typeid == NULL) l_typeid = gendesc_typeid(c, l_value); LLVMValueRef num_mask = LLVMConstInt(c->i32, 2, false); LLVMValueRef boxed_num = LLVMBuildAnd(c->builder, l_typeid, num_mask, ""); LLVMValueRef zero = LLVMConstInt(c->i32, 0, false); boxed_num = LLVMBuildICmp(c->builder, LLVMIntEQ, boxed_num, zero, ""); LLVMBuildCondBr(c->builder, boxed_num, num_block, tuple_block); } else if((possible_boxes & BOXED_SUBTYPES_NUMERIC) != 0) { LLVMBuildBr(c->builder, num_block); } else { pony_assert((possible_boxes & BOXED_SUBTYPES_TUPLE) != 0); LLVMBuildBr(c->builder, tuple_block); } LLVMValueRef args[3]; LLVMValueRef is_num = NULL; if(num_block != NULL) { // Get the machine word size and memcmp without unboxing. LLVMPositionBuilderAtEnd(c->builder, num_block); if(l_typeid == NULL) l_typeid = gendesc_typeid(c, l_value); LLVMValueRef num_sizes = LLVMBuildBitCast(c->builder, c->numeric_sizes, c->void_ptr, ""); args[0] = LLVMBuildZExt(c->builder, l_typeid, c->intptr, ""); LLVMValueRef size = LLVMBuildInBoundsGEP(c->builder, num_sizes, args, 1, ""); size = LLVMBuildBitCast(c->builder, size, LLVMPointerType(c->i32, 0), ""); size = LLVMBuildLoad(c->builder, size, ""); LLVMSetAlignment(size, 4); LLVMValueRef one = LLVMConstInt(c->i32, 1, false); args[0] = LLVMBuildInBoundsGEP(c->builder, l_value, &one, 1, ""); args[0] = LLVMBuildBitCast(c->builder, args[0], c->void_ptr, ""); args[1] = LLVMBuildInBoundsGEP(c->builder, r_value, &one, 1, ""); args[1] = LLVMBuildBitCast(c->builder, args[1], c->void_ptr, ""); args[2] = LLVMBuildZExt(c->builder, size, c->intptr, ""); is_num = gencall_runtime(c, "memcmp", args, 3, ""); is_num = LLVMBuildICmp(c->builder, LLVMIntEQ, is_num, LLVMConstInt(c->i32, 0, false), ""); LLVMBuildBr(c->builder, post_block); } LLVMValueRef is_tuple = NULL; if(tuple_block != NULL) { // Call the type-specific __is function, which will unbox the tuples. LLVMPositionBuilderAtEnd(c->builder, tuple_block); reach_type_t* r_left = reach_type(c->reach, left_type); reach_method_t* is_fn = reach_method(r_left, TK_BOX, stringtab("__is"), NULL); pony_assert(is_fn != NULL); LLVMValueRef func = gendesc_vtable(c, l_value, is_fn->vtable_index); LLVMTypeRef params[2]; params[0] = c->object_ptr; params[1] = c->object_ptr; LLVMTypeRef type = LLVMFunctionType(c->i1, params, 2, false); func = LLVMBuildBitCast(c->builder, func, LLVMPointerType(type, 0), ""); args[0] = l_value; args[1] = r_value; is_tuple = codegen_call(c, func, args, 2); LLVMBuildBr(c->builder, post_block); } LLVMPositionBuilderAtEnd(c->builder, post_block); LLVMValueRef phi = LLVMBuildPhi(c->builder, c->i1, ""); LLVMValueRef one = LLVMConstInt(c->i1, 1, false); LLVMValueRef zero = LLVMConstInt(c->i1, 0, false); LLVMAddIncoming(phi, &one, &this_block, 1); if(is_num != NULL) LLVMAddIncoming(phi, &is_num, &num_block, 1); if(is_tuple != NULL) LLVMAddIncoming(phi, &is_tuple, &tuple_block, 1); LLVMAddIncoming(phi, &zero, &checkbox_block, 1); return phi; }