/** * Gather one element from scatter positions in memory. * Nearly the same as above, however the individual elements * may be vectors themselves, and fetches may be float type. * Can also do pad vector instead of ZExt. * * @sa lp_build_gather() */ static LLVMValueRef lp_build_gather_elem_vec(struct gallivm_state *gallivm, unsigned length, unsigned src_width, LLVMTypeRef src_type, struct lp_type dst_type, boolean aligned, LLVMValueRef base_ptr, LLVMValueRef offsets, unsigned i, boolean vector_justify) { LLVMValueRef ptr, res; LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)); ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i); ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, ""); res = LLVMBuildLoad(gallivm->builder, ptr, ""); /* XXX * On some archs we probably really want to avoid having to deal * with alignments lower than 4 bytes (if fetch size is a power of * two >= 32). On x86 it doesn't matter, however. * We should be able to guarantee full alignment for any kind of texture * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends * but I don't think that's quite what we wanted). * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT * looks like a good fit, but it seems this cap bit (and OpenGL) aren't * enforcing what we want (which is what d3d10 does, the offset needs to * be aligned to element size, but GL has bytes regardless of element * size which would only leave us with minimum alignment restriction of 16 * which doesn't make much sense if the type isn't 4x32bit). Due to * translation of offsets to first_elem in sampler_views it actually seems * gallium could not do anything else except 16 no matter what... */ if (!aligned) { LLVMSetAlignment(res, 1); } else if (!util_is_power_of_two(src_width)) { /* * Full alignment is impossible, assume the caller really meant * the individual elements were aligned (e.g. 3x32bit format). * And yes the generated code may otherwise crash, llvm will * really assume 128bit alignment with a 96bit fetch (I suppose * that makes sense as it can just assume the upper 32bit to be * whatever). * Maybe the caller should be able to explicitly set this, but * this should cover all the 3-channel formats. */ if (((src_width / 24) * 24 == src_width) && util_is_power_of_two(src_width / 24)) { LLVMSetAlignment(res, src_width / 24); } else { LLVMSetAlignment(res, 1); } } assert(src_width <= dst_type.width * dst_type.length); if (src_width < dst_type.width * dst_type.length) { if (dst_type.length > 1) { res = lp_build_pad_vector(gallivm, res, dst_type.length); /* * vector_justify hopefully a non-issue since we only deal * with src_width >= 32 here? */ } else { LLVMTypeRef dst_elem_type = lp_build_vec_type(gallivm, dst_type); /* * Only valid if src_ptr_type is int type... */ res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, ""); #ifdef PIPE_ARCH_BIG_ENDIAN if (vector_justify) { res = LLVMBuildShl(gallivm->builder, res, LLVMConstInt(dst_elem_type, dst_type.width - src_width, 0), ""); } if (src_width == 48) { /* Load 3x16 bit vector. * The sequence of loads on big-endian hardware proceeds as follows. * 16-bit fields are denoted by X, Y, Z, and 0. In memory, the sequence * of three fields appears in the order X, Y, Z. * * Load 32-bit word: 0.0.X.Y * Load 16-bit halfword: 0.0.0.Z * Rotate left: 0.X.Y.0 * Bitwise OR: 0.X.Y.Z * * The order in which we need the fields in the result is 0.Z.Y.X, * the same as on little-endian; permute 16-bit fields accordingly * within 64-bit register: */ LLVMValueRef shuffles[4] = { lp_build_const_int32(gallivm, 2), lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 0), lp_build_const_int32(gallivm, 3), }; res = LLVMBuildBitCast(gallivm->builder, res, lp_build_vec_type(gallivm, lp_type_uint_vec(16, 4*16)), ""); res = LLVMBuildShuffleVector(gallivm->builder, res, res, LLVMConstVector(shuffles, 4), ""); res = LLVMBuildBitCast(gallivm->builder, res, dst_elem_type, ""); } #endif } } return res; }
/** * Swizzle one channel into other channels. */ LLVMValueRef lp_build_swizzle_scalar_aos(struct lp_build_context *bld, LLVMValueRef a, unsigned channel, unsigned num_channels) { LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; const unsigned n = type.length; unsigned i, j; if(a == bld->undef || a == bld->zero || a == bld->one || num_channels == 1) return a; assert(num_channels == 2 || num_channels == 4); /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing * using shuffles here actually causes worst results. More investigation is * needed. */ if (LLVMIsConstant(a) || type.width >= 16) { /* * Shuffle. */ LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; for(j = 0; j < n; j += num_channels) for(i = 0; i < num_channels; ++i) shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0); return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), ""); } else if (num_channels == 2) { /* * Bit mask and shifts * * XY XY .... XY <= input * 0Y 0Y .... 0Y * YY YY .... YY * YY YY .... YY <= output */ struct lp_type type2; LLVMValueRef tmp = NULL; int shift; a = LLVMBuildAnd(builder, a, lp_build_const_mask_aos(bld->gallivm, type, 1 << channel, num_channels), ""); type2 = type; type2.floating = FALSE; type2.width *= 2; type2.length /= 2; a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), ""); /* * Vector element 0 is always channel X. * * 76 54 32 10 (array numbering) * Little endian reg in: YX YX YX YX * Little endian reg out: YY YY YY YY if shift right (shift == -1) * XX XX XX XX if shift left (shift == 1) * * 01 23 45 67 (array numbering) * Big endian reg in: XY XY XY XY * Big endian reg out: YY YY YY YY if shift left (shift == 1) * XX XX XX XX if shift right (shift == -1) * */ #ifdef PIPE_ARCH_LITTLE_ENDIAN shift = channel == 0 ? 1 : -1; #else shift = channel == 0 ? -1 : 1; #endif if (shift > 0) { tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type2, shift * type.width), ""); } else if (shift < 0) { tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type2, -shift * type.width), ""); } assert(tmp); if (tmp) { a = LLVMBuildOr(builder, a, tmp, ""); } return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), ""); } else { /* * Bit mask and recursive shifts * * Little-endian registers: * * 7654 3210 * WZYX WZYX .... WZYX <= input * 00Y0 00Y0 .... 00Y0 <= mask * 00YY 00YY .... 00YY <= shift right 1 (shift amount -1) * YYYY YYYY .... YYYY <= shift left 2 (shift amount 2) * * Big-endian registers: * * 0123 4567 * XYZW XYZW .... XYZW <= input * 0Y00 0Y00 .... 0Y00 <= mask * YY00 YY00 .... YY00 <= shift left 1 (shift amount 1) * YYYY YYYY .... YYYY <= shift right 2 (shift amount -2) * * shifts[] gives little-endian shift amounts; we need to negate for big-endian. */ struct lp_type type4; const int shifts[4][2] = { { 1, 2}, {-1, 2}, { 1, -2}, {-1, -2} }; unsigned i; a = LLVMBuildAnd(builder, a, lp_build_const_mask_aos(bld->gallivm, type, 1 << channel, 4), ""); /* * Build a type where each element is an integer that cover the four * channels. */ type4 = type; type4.floating = FALSE; type4.width *= 4; type4.length /= 4; a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), ""); for(i = 0; i < 2; ++i) { LLVMValueRef tmp = NULL; int shift = shifts[channel][i]; /* See endianness diagram above */ #ifdef PIPE_ARCH_BIG_ENDIAN shift = -shift; #endif if(shift > 0) tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), ""); if(shift < 0) tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), ""); assert(tmp); if(tmp) a = LLVMBuildOr(builder, a, tmp, ""); } return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), ""); } }
/** * Unpack a single pixel into its XYZW components. * * @param desc the pixel format for the packed pixel value * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM * * @return XYZW in a float[4] or ubyte[4] or ushort[4] vector. */ static INLINE LLVMValueRef lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *desc, LLVMValueRef packed) { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef shifted, casted, scaled, masked; LLVMValueRef shifts[4]; LLVMValueRef masks[4]; LLVMValueRef scales[4]; boolean normalized; boolean needs_uitofp; unsigned i; /* TODO: Support more formats */ assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(desc->block.width == 1); assert(desc->block.height == 1); assert(desc->block.bits <= 32); /* Do the intermediate integer computations with 32bit integers since it * matches floating point size */ assert (LLVMTypeOf(packed) == LLVMInt32TypeInContext(gallivm->context)); /* Broadcast the packed value to all four channels * before: packed = BGRA * after: packed = {BGRA, BGRA, BGRA, BGRA} */ packed = LLVMBuildInsertElement(builder, LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), packed, LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)), ""); packed = LLVMBuildShuffleVector(builder, packed, LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), LLVMConstNull(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), ""); /* Initialize vector constants */ normalized = FALSE; needs_uitofp = FALSE; /* Loop over 4 color components */ for (i = 0; i < 4; ++i) { unsigned bits = desc->channel[i].size; unsigned shift = desc->channel[i].shift; if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); masks[i] = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)); scales[i] = LLVMConstNull(LLVMFloatTypeInContext(gallivm->context)); } else { unsigned long long mask = (1ULL << bits) - 1; assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); if (bits == 32) { needs_uitofp = TRUE; } shifts[i] = lp_build_const_int32(gallivm, shift); masks[i] = lp_build_const_int32(gallivm, mask); if (desc->channel[i].normalized) { scales[i] = lp_build_const_float(gallivm, 1.0 / mask); normalized = TRUE; } else scales[i] = lp_build_const_float(gallivm, 1.0); } } /* Ex: convert packed = {XYZW, XYZW, XYZW, XYZW} * into masked = {X, Y, Z, W} */ shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), ""); masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), ""); if (!needs_uitofp) { /* UIToFP can't be expressed in SSE2 */ casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), ""); } else { casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), ""); } /* At this point 'casted' may be a vector of floats such as * {255.0, 255.0, 255.0, 255.0}. Next, if the pixel values are normalized * we'll scale this to {1.0, 1.0, 1.0, 1.0}. */ if (normalized) scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), ""); else scaled = casted; return scaled; }
/** * Pack a single pixel. * * @param rgba 4 float vector with the unpacked components. * * XXX: This is mostly for reference and testing -- operating a single pixel at * a time is rarely if ever needed. */ LLVMValueRef lp_build_pack_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *desc, LLVMValueRef rgba) { LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef type; LLVMValueRef packed = NULL; LLVMValueRef swizzles[4]; LLVMValueRef shifted, casted, scaled, unswizzled; LLVMValueRef shifts[4]; LLVMValueRef scales[4]; boolean normalized; unsigned i, j; assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(desc->block.width == 1); assert(desc->block.height == 1); type = LLVMIntTypeInContext(gallivm->context, desc->block.bits); /* Unswizzle the color components into the source vector. */ for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) { if (desc->swizzle[j] == i) break; } if (j < 4) swizzles[i] = lp_build_const_int32(gallivm, j); else swizzles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); } unswizzled = LLVMBuildShuffleVector(builder, rgba, LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4)), LLVMConstVector(swizzles, 4), ""); normalized = FALSE; for (i = 0; i < 4; ++i) { unsigned bits = desc->channel[i].size; unsigned shift = desc->channel[i].shift; if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); scales[i] = LLVMGetUndef(LLVMFloatTypeInContext(gallivm->context)); } else { unsigned mask = (1 << bits) - 1; assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); assert(bits < 32); shifts[i] = lp_build_const_int32(gallivm, shift); if (desc->channel[i].normalized) { scales[i] = lp_build_const_float(gallivm, mask); normalized = TRUE; } else scales[i] = lp_build_const_float(gallivm, 1.0); } } if (normalized) scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), ""); else scaled = unswizzled; casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), ""); shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), ""); /* Bitwise or all components */ for (i = 0; i < 4; ++i) { if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, lp_build_const_int32(gallivm, i), ""); if (packed) packed = LLVMBuildOr(builder, packed, component, ""); else packed = component; } } if (!packed) packed = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); if (desc->block.bits < 32) packed = LLVMBuildTrunc(builder, packed, type, ""); return packed; }
LLVMValueRef lp_build_swizzle_aos(struct lp_build_context *bld, LLVMValueRef a, const unsigned char swizzles[4]) { LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; const unsigned n = type.length; unsigned i, j; if (swizzles[0] == PIPE_SWIZZLE_RED && swizzles[1] == PIPE_SWIZZLE_GREEN && swizzles[2] == PIPE_SWIZZLE_BLUE && swizzles[3] == PIPE_SWIZZLE_ALPHA) { return a; } if (swizzles[0] == swizzles[1] && swizzles[1] == swizzles[2] && swizzles[2] == swizzles[3]) { switch (swizzles[0]) { case PIPE_SWIZZLE_RED: case PIPE_SWIZZLE_GREEN: case PIPE_SWIZZLE_BLUE: case PIPE_SWIZZLE_ALPHA: return lp_build_swizzle_scalar_aos(bld, a, swizzles[0]); case PIPE_SWIZZLE_ZERO: return bld->zero; case PIPE_SWIZZLE_ONE: return bld->one; case LP_BLD_SWIZZLE_DONTCARE: return bld->undef; default: assert(0); return bld->undef; } } if (type.width >= 16) { /* * Shuffle. */ LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type)); LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; LLVMValueRef aux[LP_MAX_VECTOR_LENGTH]; memset(aux, 0, sizeof aux); for(j = 0; j < n; j += 4) { for(i = 0; i < 4; ++i) { unsigned shuffle; switch (swizzles[i]) { default: assert(0); /* fall through */ case PIPE_SWIZZLE_RED: case PIPE_SWIZZLE_GREEN: case PIPE_SWIZZLE_BLUE: case PIPE_SWIZZLE_ALPHA: shuffle = j + swizzles[i]; shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); break; case PIPE_SWIZZLE_ZERO: shuffle = type.length + 0; shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); if (!aux[0]) { aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0); } break; case PIPE_SWIZZLE_ONE: shuffle = type.length + 1; shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); if (!aux[1]) { aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0); } break; case LP_BLD_SWIZZLE_DONTCARE: shuffles[j + i] = LLVMGetUndef(i32t); break; } } } for (i = 0; i < n; ++i) { if (!aux[i]) { aux[i] = undef; } } return LLVMBuildShuffleVector(builder, a, LLVMConstVector(aux, n), LLVMConstVector(shuffles, n), ""); } else { /* * Bit mask and shifts. * * For example, this will convert BGRA to RGBA by doing * * rgba = (bgra & 0x00ff0000) >> 16 * | (bgra & 0xff00ff00) * | (bgra & 0x000000ff) << 16 * * This is necessary not only for faster cause, but because X86 backend * will refuse shuffles of <4 x i8> vectors */ LLVMValueRef res; struct lp_type type4; unsigned cond = 0; unsigned chan; int shift; /* * Start with a mixture of 1 and 0. */ for (chan = 0; chan < 4; ++chan) { if (swizzles[chan] == PIPE_SWIZZLE_ONE) { cond |= 1 << chan; } } res = lp_build_select_aos(bld, cond, bld->one, bld->zero); /* * Build a type where each element is an integer that cover the four * channels. */ type4 = type; type4.floating = FALSE; type4.width *= 4; type4.length /= 4; a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), ""); res = LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type4), ""); /* * Mask and shift the channels, trying to group as many channels in the * same shift as possible */ for (shift = -3; shift <= 3; ++shift) { unsigned long long mask = 0; assert(type4.width <= sizeof(mask)*8); for (chan = 0; chan < 4; ++chan) { /* FIXME: big endian */ if (swizzles[chan] < 4 && chan - swizzles[chan] == shift) { mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width); } } if (mask) { LLVMValueRef masked; LLVMValueRef shifted; if (0) debug_printf("shift = %i, mask = 0x%08llx\n", shift, mask); masked = LLVMBuildAnd(builder, a, lp_build_const_int_vec(bld->gallivm, type4, mask), ""); if (shift > 0) { shifted = LLVMBuildShl(builder, masked, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), ""); } else if (shift < 0) { shifted = LLVMBuildLShr(builder, masked, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), ""); } else { shifted = masked; } res = LLVMBuildOr(builder, res, shifted, ""); } } return LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type), ""); } }
static void llvm_emit_tex( const struct lp_build_tgsi_action * action, struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) { struct gallivm_state * gallivm = bld_base->base.gallivm; LLVMValueRef args[7]; unsigned c, sampler_src; struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); if (emit_data->inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { switch (emit_data->inst->Instruction.Opcode) { case TGSI_OPCODE_TXQ: { struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); ctx->uses_tex_buffers = true; bool isEgPlus = (ctx->chip_class >= EVERGREEN); LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, isEgPlus ? 0 : 1); LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, offset, LLVM_R600_BUFFER_INFO_CONST_BUFFER); if (!isEgPlus) { LLVMValueRef maskval[4] = { lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 2), lp_build_const_int32(gallivm, 3), lp_build_const_int32(gallivm, 0), }; LLVMValueRef mask = LLVMConstVector(maskval, 4); cvecval = LLVMBuildShuffleVector(gallivm->builder, cvecval, cvecval, mask, ""); } emit_data->output[0] = cvecval; return; } case TGSI_OPCODE_TXF: { args[0] = LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), ""); args[1] = lp_build_const_int32(gallivm, R600_MAX_CONST_BUFFERS); emit_data->output[0] = lp_build_intrinsic(gallivm->builder, "llvm.R600.load.texbuf", emit_data->dst_type, args, 2, LLVMReadNoneAttribute); if (ctx->chip_class >= EVERGREEN) return; ctx->uses_tex_buffers = true; LLVMDumpValue(emit_data->output[0]); emit_data->output[0] = LLVMBuildBitCast(gallivm->builder, emit_data->output[0], LLVMVectorType(bld_base->base.int_elem_type, 4), ""); LLVMValueRef Mask = llvm_load_const_buffer(bld_base, lp_build_const_int32(gallivm, 0), LLVM_R600_BUFFER_INFO_CONST_BUFFER); Mask = LLVMBuildBitCast(gallivm->builder, Mask, LLVMVectorType(bld_base->base.int_elem_type, 4), ""); emit_data->output[0] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_AND, emit_data->output[0], Mask); LLVMValueRef WComponent = LLVMBuildExtractElement(gallivm->builder, emit_data->output[0], lp_build_const_int32(gallivm, 3), ""); Mask = llvm_load_const_buffer(bld_base, lp_build_const_int32(gallivm, 1), LLVM_R600_BUFFER_INFO_CONST_BUFFER); Mask = LLVMBuildExtractElement(gallivm->builder, Mask, lp_build_const_int32(gallivm, 0), ""); Mask = LLVMBuildBitCast(gallivm->builder, Mask, bld_base->base.int_elem_type, ""); WComponent = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_OR, WComponent, Mask); emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder, emit_data->output[0], WComponent, lp_build_const_int32(gallivm, 3), ""); emit_data->output[0] = LLVMBuildBitCast(gallivm->builder, emit_data->output[0], LLVMVectorType(bld_base->base.elem_type, 4), ""); } return; default: break; } } if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TEX || emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXP) { LLVMValueRef Vector[4] = { LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), ""), LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 1), ""), LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 2), ""), LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 3), ""), }; switch (emit_data->inst->Texture.Texture) { case TGSI_TEXTURE_2D: case TGSI_TEXTURE_RECT: Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type); break; case TGSI_TEXTURE_1D: Vector[1] = Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type); break; default: break; } args[0] = lp_build_gather_values(gallivm, Vector, 4); } else { args[0] = emit_data->args[0]; } assert(emit_data->arg_count + 2 <= Elements(args)); for (c = 1; c < emit_data->arg_count; ++c) args[c] = emit_data->args[c]; if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF) { args[1] = LLVMBuildShl(gallivm->builder, args[1], lp_build_const_int32(gallivm, 1), ""); args[2] = LLVMBuildShl(gallivm->builder, args[2], lp_build_const_int32(gallivm, 1), ""); args[3] = LLVMBuildShl(gallivm->builder, args[3], lp_build_const_int32(gallivm, 1), ""); } sampler_src = emit_data->inst->Instruction.NumSrcRegs-1; args[c++] = lp_build_const_int32(gallivm, emit_data->inst->Src[sampler_src].Register.Index + R600_MAX_CONST_BUFFERS); args[c++] = lp_build_const_int32(gallivm, emit_data->inst->Src[sampler_src].Register.Index); args[c++] = lp_build_const_int32(gallivm, emit_data->inst->Texture.Texture); if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF && (emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) { switch (emit_data->inst->Texture.Texture) { case TGSI_TEXTURE_2D_MSAA: args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D); break; case TGSI_TEXTURE_2D_ARRAY_MSAA: args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D_ARRAY); break; default: break; } if (ctx->has_compressed_msaa_texturing) { LLVMValueRef ldptr_args[10] = { args[0], // Coord args[1], // Offset X args[2], // Offset Y args[3], // Offset Z args[4], args[5], lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 1) }; LLVMValueRef ptr = lp_build_intrinsic(gallivm->builder, "llvm.R600.ldptr", emit_data->dst_type, ldptr_args, 10, LLVMReadNoneAttribute); LLVMValueRef Tmp = LLVMBuildExtractElement(gallivm->builder, args[0], lp_build_const_int32(gallivm, 3), ""); Tmp = LLVMBuildMul(gallivm->builder, Tmp, lp_build_const_int32(gallivm, 4), ""); LLVMValueRef ResX = LLVMBuildExtractElement(gallivm->builder, ptr, lp_build_const_int32(gallivm, 0), ""); ResX = LLVMBuildBitCast(gallivm->builder, ResX, bld_base->base.int_elem_type, ""); Tmp = LLVMBuildLShr(gallivm->builder, ResX, Tmp, ""); Tmp = LLVMBuildAnd(gallivm->builder, Tmp, lp_build_const_int32(gallivm, 0xF), ""); args[0] = LLVMBuildInsertElement(gallivm->builder, args[0], Tmp, lp_build_const_int32(gallivm, 3), ""); args[c++] = lp_build_const_int32(gallivm, emit_data->inst->Texture.Texture); } } emit_data->output[0] = lp_build_intrinsic(gallivm->builder, action->intr_name, emit_data->dst_type, args, c, LLVMReadNoneAttribute); if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXQ && ((emit_data->inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || emit_data->inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY))) if (emit_data->inst->Dst[0].Register.WriteMask & 4) { LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, 0); LLVMValueRef ZLayer = LLVMBuildExtractElement(gallivm->builder, llvm_load_const_buffer(bld_base, offset, LLVM_R600_BUFFER_INFO_CONST_BUFFER), lp_build_const_int32(gallivm, 0), ""); emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder, emit_data->output[0], ZLayer, lp_build_const_int32(gallivm, 2), ""); struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); ctx->has_txq_cube_array_z_comp = true; } }
static LLVMValueRef emit_fetch_constant( struct lp_build_tgsi_context * bld_base, const struct tgsi_full_src_register * reg, enum tgsi_opcode_type stype, unsigned swizzle) { struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base); LLVMBuilderRef builder = bld_base->base.gallivm->builder; struct lp_type type = bld_base->base.type; LLVMValueRef res; unsigned chan; assert(!reg->Register.Indirect); /* * Get the constants components */ res = bld->bld_base.base.undef; for (chan = 0; chan < 4; ++chan) { LLVMValueRef index; LLVMValueRef scalar_ptr; LLVMValueRef scalar; LLVMValueRef swizzle; index = lp_build_const_int32(bld->bld_base.base.gallivm, reg->Register.Index * 4 + chan); scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, ""); scalar = LLVMBuildLoad(builder, scalar_ptr, ""); lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]); /* * NOTE: constants array is always assumed to be RGBA */ swizzle = lp_build_const_int32(bld->bld_base.base.gallivm, bld->swizzles[chan]); res = LLVMBuildInsertElement(builder, res, scalar, swizzle, ""); } /* * Broadcast the first quaternion to all others. * * XXX: could be factored into a reusable function. */ if (type.length > 4) { LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; unsigned i; for (chan = 0; chan < 4; ++chan) { shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan); } for (i = 4; i < type.length; ++i) { shuffles[i] = shuffles[i % 4]; } res = LLVMBuildShuffleVector(builder, res, bld->bld_base.base.undef, LLVMConstVector(shuffles, type.length), ""); } return res; }
/* This function returns an i1 1 if the value is not equal to 0 and an i1 0 if the value is equal to 0. */ struct cl2llvm_val_t *cl2llvm_to_bool_ne_0(struct cl2llvm_val_t *value) { LLVMValueRef const_zero; LLVMValueRef zero_vec[16]; LLVMTypeRef switch_type; int i; int veclength; struct cl2llvm_val_t *bool_val = cl2llvm_val_create_w_init(value->val, cl2llvmTypeWrapGetSign(value->type)); /* if value is i1 no conversion necessary */ if (LLVMTypeOf(value->val) == LLVMInt1Type()) return bool_val; /* If value is a vector create a vector of constant zeros, else create a scalar 0. */ if (LLVMGetTypeKind(cl2llvmTypeWrapGetLlvmType(value->type)) == LLVMVectorTypeKind) { switch_type = LLVMGetElementType(cl2llvmTypeWrapGetLlvmType(value->type)); veclength = LLVMGetVectorSize(cl2llvmTypeWrapGetLlvmType(value->type)); switch (LLVMGetTypeKind(LLVMGetElementType(cl2llvmTypeWrapGetLlvmType(value->type)))) { case LLVMIntegerTypeKind: /* Create zero vector */ for (i = 0; i < veclength; i++) zero_vec[i] = LLVMConstInt(switch_type, 0, 0); break; case LLVMFloatTypeKind: case LLVMDoubleTypeKind: case LLVMHalfTypeKind: /* Create zero vector */ for (i = 0; i < veclength; i++) zero_vec[i] = LLVMConstReal(switch_type, 0); break; default: cl2llvm_yyerror("unreachable code reached"); } const_zero = LLVMConstVector(zero_vec, veclength); } else if (LLVMGetTypeKind(cl2llvmTypeWrapGetLlvmType(value->type)) == LLVMIntegerTypeKind) { const_zero = LLVMConstInt(cl2llvmTypeWrapGetLlvmType(value->type), 0, 0); switch_type = cl2llvmTypeWrapGetLlvmType(value->type); } else if (LLVMGetTypeKind(cl2llvmTypeWrapGetLlvmType(value->type)) == LLVMFloatTypeKind || LLVMGetTypeKind(cl2llvmTypeWrapGetLlvmType(value->type)) == LLVMDoubleTypeKind || LLVMGetTypeKind(cl2llvmTypeWrapGetLlvmType(value->type)) == LLVMHalfTypeKind) { const_zero = LLVMConstReal(cl2llvmTypeWrapGetLlvmType(value->type), 0); switch_type = cl2llvmTypeWrapGetLlvmType(value->type); } /* Create comparison */ snprintf(temp_var_name, sizeof temp_var_name, "tmp_%d", temp_var_count++); switch (LLVMGetTypeKind(switch_type)) { case LLVMFloatTypeKind: case LLVMDoubleTypeKind: case LLVMHalfTypeKind: bool_val->val = LLVMBuildFCmp(cl2llvm_builder, LLVMRealONE, value->val, const_zero, temp_var_name); break; case LLVMIntegerTypeKind: bool_val->val = LLVMBuildICmp(cl2llvm_builder, LLVMIntNE, value->val, const_zero, temp_var_name); break; default: cl2llvm_yyerror("unreachable code reached"); break; } cl2llvmTypeWrapSetLlvmType(bool_val->type, LLVMInt1Type()); cl2llvmTypeWrapSetSign(bool_val->type, 0); return bool_val; }
/** * Sample a single texture image with (bi-)(tri-)linear sampling. * Return filtered color as two vectors of 16-bit fixed point values. */ static void lp_build_sample_image_linear(struct lp_build_sample_context *bld, LLVMValueRef int_size, LLVMValueRef row_stride_vec, LLVMValueRef img_stride_vec, LLVMValueRef data_ptr, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, LLVMValueRef *colors_lo, LLVMValueRef *colors_hi) { const unsigned dims = bld->dims; LLVMBuilderRef builder = bld->gallivm->builder; struct lp_build_context i32, h16, u8n; LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; LLVMValueRef i32_c8, i32_c128, i32_c255; LLVMValueRef width_vec, height_vec, depth_vec; LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_fpart_lo = NULL, t_fpart_hi = NULL; LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_fpart_lo = NULL, r_fpart_hi = NULL; LLVMValueRef x_stride, y_stride, z_stride; LLVMValueRef x_offset0, x_offset1; LLVMValueRef y_offset0, y_offset1; LLVMValueRef z_offset0, z_offset1; LLVMValueRef offset[2][2][2]; /* [z][y][x] */ LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2]; LLVMValueRef neighbors_lo[2][2][2]; /* [z][y][x] */ LLVMValueRef neighbors_hi[2][2][2]; /* [z][y][x] */ LLVMValueRef packed_lo, packed_hi; unsigned x, y, z; unsigned i, j, k; unsigned numj, numk; lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32)); lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16)); lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8)); i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type); h16_vec_type = lp_build_vec_type(bld->gallivm, h16.type); u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type); lp_build_extract_image_sizes(bld, bld->int_size_type, bld->int_coord_type, int_size, &width_vec, &height_vec, &depth_vec); if (bld->static_state->normalized_coords) { LLVMValueRef scaled_size; LLVMValueRef flt_size; /* scale size by 256 (8 fractional bits) */ scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8); flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size); lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r); } else { /* scale coords by 256 (8 fractional bits) */ s = lp_build_mul_imm(&bld->coord_bld, s, 256); if (dims >= 2) t = lp_build_mul_imm(&bld->coord_bld, t, 256); if (dims >= 3) r = lp_build_mul_imm(&bld->coord_bld, r, 256); } /* convert float to int */ s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); if (dims >= 2) t = LLVMBuildFPToSI(builder, t, i32_vec_type, ""); if (dims >= 3) r = LLVMBuildFPToSI(builder, r, i32_vec_type, ""); /* subtract 0.5 (add -128) */ i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128); s = LLVMBuildAdd(builder, s, i32_c128, ""); if (dims >= 2) { t = LLVMBuildAdd(builder, t, i32_c128, ""); } if (dims >= 3) { r = LLVMBuildAdd(builder, r, i32_c128, ""); } /* compute floor (shift right 8) */ i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8); s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); if (dims >= 2) t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); if (dims >= 3) r_ipart = LLVMBuildAShr(builder, r, i32_c8, ""); /* compute fractional part (AND with 0xff) */ i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255); s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); if (dims >= 2) t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); if (dims >= 3) r_fpart = LLVMBuildAnd(builder, r, i32_c255, ""); /* get pixel, row and image strides */ x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type, bld->format_desc->block.bits/8); y_stride = row_stride_vec; z_stride = img_stride_vec; /* do texcoord wrapping and compute texel offsets */ lp_build_sample_wrap_linear_int(bld, bld->format_desc->block.width, s_ipart, width_vec, x_stride, bld->static_state->pot_width, bld->static_state->wrap_s, &x_offset0, &x_offset1, &x_subcoord[0], &x_subcoord[1]); for (z = 0; z < 2; z++) { for (y = 0; y < 2; y++) { offset[z][y][0] = x_offset0; offset[z][y][1] = x_offset1; } } if (dims >= 2) { lp_build_sample_wrap_linear_int(bld, bld->format_desc->block.height, t_ipart, height_vec, y_stride, bld->static_state->pot_height, bld->static_state->wrap_t, &y_offset0, &y_offset1, &y_subcoord[0], &y_subcoord[1]); for (z = 0; z < 2; z++) { for (x = 0; x < 2; x++) { offset[z][0][x] = lp_build_add(&bld->int_coord_bld, offset[z][0][x], y_offset0); offset[z][1][x] = lp_build_add(&bld->int_coord_bld, offset[z][1][x], y_offset1); } } } if (dims >= 3) { lp_build_sample_wrap_linear_int(bld, bld->format_desc->block.height, r_ipart, depth_vec, z_stride, bld->static_state->pot_depth, bld->static_state->wrap_r, &z_offset0, &z_offset1, &z_subcoord[0], &z_subcoord[1]); for (y = 0; y < 2; y++) { for (x = 0; x < 2; x++) { offset[0][y][x] = lp_build_add(&bld->int_coord_bld, offset[0][y][x], z_offset0); offset[1][y][x] = lp_build_add(&bld->int_coord_bld, offset[1][y][x], z_offset1); } } } else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { LLVMValueRef z_offset; z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec); for (y = 0; y < 2; y++) { for (x = 0; x < 2; x++) { /* The r coord is the cube face in [0,5] */ offset[0][y][x] = lp_build_add(&bld->int_coord_bld, offset[0][y][x], z_offset); } } } /* * Transform 4 x i32 in * * s_fpart = {s0, s1, s2, s3} * * into 8 x i16 * * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3} * * into two 8 x i16 * * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1} * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3} * * and likewise for t_fpart. There is no risk of loosing precision here * since the fractional parts only use the lower 8bits. */ s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, ""); if (dims >= 2) t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, ""); if (dims >= 3) r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, ""); { LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH]; LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH]; LLVMValueRef shuffle_lo; LLVMValueRef shuffle_hi; for (j = 0; j < h16.type.length; j += 4) { #ifdef PIPE_ARCH_LITTLE_ENDIAN unsigned subindex = 0; #else unsigned subindex = 1; #endif LLVMValueRef index; index = LLVMConstInt(elem_type, j/2 + subindex, 0); for (i = 0; i < 4; ++i) shuffles_lo[j + i] = index; index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0); for (i = 0; i < 4; ++i) shuffles_hi[j + i] = index; } shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length); shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length); s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, ""); s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, ""); if (dims >= 2) { t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, ""); t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, ""); } if (dims >= 3) { r_fpart_lo = LLVMBuildShuffleVector(builder, r_fpart, h16.undef, shuffle_lo, ""); r_fpart_hi = LLVMBuildShuffleVector(builder, r_fpart, h16.undef, shuffle_hi, ""); } } /* * Fetch the pixels as 4 x 32bit (rgba order might differ): * * rgba0 rgba1 rgba2 rgba3 * * bit cast them into 16 x u8 * * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3 * * unpack them into two 8 x i16: * * r0 g0 b0 a0 r1 g1 b1 a1 * r2 g2 b2 a2 r3 g3 b3 a3 * * The higher 8 bits of the resulting elements will be zero. */ numj = 1 + (dims >= 2); numk = 1 + (dims >= 3); for (k = 0; k < numk; k++) { for (j = 0; j < numj; j++) { for (i = 0; i < 2; i++) { LLVMValueRef rgba8; if (util_format_is_rgba8_variant(bld->format_desc)) { /* * Given the format is a rgba8, just read the pixels as is, * without any swizzling. Swizzling will be done later. */ rgba8 = lp_build_gather(bld->gallivm, bld->texel_type.length, bld->format_desc->block.bits, bld->texel_type.width, data_ptr, offset[k][j][i]); rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, ""); } else { rgba8 = lp_build_fetch_rgba_aos(bld->gallivm, bld->format_desc, u8n.type, data_ptr, offset[k][j][i], x_subcoord[i], y_subcoord[j]); } /* Expand one 4*rgba8 to two 2*rgba16 */ lp_build_unpack2(bld->gallivm, u8n.type, h16.type, rgba8, &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]); } } } /* * Linear interpolation with 8.8 fixed point. */ if (dims == 1) { /* 1-D lerp */ packed_lo = lp_build_lerp(&h16, s_fpart_lo, neighbors_lo[0][0][0], neighbors_lo[0][0][1]); packed_hi = lp_build_lerp(&h16, s_fpart_hi, neighbors_hi[0][0][0], neighbors_hi[0][0][1]); } else { /* 2-D lerp */ packed_lo = lp_build_lerp_2d(&h16, s_fpart_lo, t_fpart_lo, neighbors_lo[0][0][0], neighbors_lo[0][0][1], neighbors_lo[0][1][0], neighbors_lo[0][1][1]); packed_hi = lp_build_lerp_2d(&h16, s_fpart_hi, t_fpart_hi, neighbors_hi[0][0][0], neighbors_hi[0][0][1], neighbors_hi[0][1][0], neighbors_hi[0][1][1]); if (dims >= 3) { LLVMValueRef packed_lo2, packed_hi2; /* lerp in the second z slice */ packed_lo2 = lp_build_lerp_2d(&h16, s_fpart_lo, t_fpart_lo, neighbors_lo[1][0][0], neighbors_lo[1][0][1], neighbors_lo[1][1][0], neighbors_lo[1][1][1]); packed_hi2 = lp_build_lerp_2d(&h16, s_fpart_hi, t_fpart_hi, neighbors_hi[1][0][0], neighbors_hi[1][0][1], neighbors_hi[1][1][0], neighbors_hi[1][1][1]); /* interp between two z slices */ packed_lo = lp_build_lerp(&h16, r_fpart_lo, packed_lo, packed_lo2); packed_hi = lp_build_lerp(&h16, r_fpart_hi, packed_hi, packed_hi2); } } *colors_lo = packed_lo; *colors_hi = packed_hi; }
/** * Sample the texture/mipmap using given image filter and mip filter. * data0_ptr and data1_ptr point to the two mipmap levels to sample * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes. * If we're using nearest miplevel sampling the '1' values will be null/unused. */ static void lp_build_sample_mipmap(struct lp_build_sample_context *bld, unsigned img_filter, unsigned mip_filter, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, LLVMValueRef ilevel0, LLVMValueRef ilevel1, LLVMValueRef lod_fpart, LLVMValueRef colors_lo_var, LLVMValueRef colors_hi_var) { LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef size0; LLVMValueRef size1; LLVMValueRef row_stride0_vec; LLVMValueRef row_stride1_vec; LLVMValueRef img_stride0_vec; LLVMValueRef img_stride1_vec; LLVMValueRef data_ptr0; LLVMValueRef data_ptr1; LLVMValueRef colors0_lo, colors0_hi; LLVMValueRef colors1_lo, colors1_hi; /* sample the first mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel0, &size0, &row_stride0_vec, &img_stride0_vec); data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); if (img_filter == PIPE_TEX_FILTER_NEAREST) { lp_build_sample_image_nearest(bld, size0, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, &colors0_lo, &colors0_hi); } else { assert(img_filter == PIPE_TEX_FILTER_LINEAR); lp_build_sample_image_linear(bld, size0, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, &colors0_lo, &colors0_hi); } /* Store the first level's colors in the output variables */ LLVMBuildStore(builder, colors0_lo, colors_lo_var); LLVMBuildStore(builder, colors0_hi, colors_hi_var); if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { LLVMValueRef h16_scale = lp_build_const_float(bld->gallivm, 256.0); LLVMTypeRef i32_type = LLVMIntTypeInContext(bld->gallivm->context, 32); struct lp_build_if_state if_ctx; LLVMValueRef need_lerp; lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16_scale, ""); lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "lod_fpart.fixed16"); /* need_lerp = lod_fpart > 0 */ need_lerp = LLVMBuildICmp(builder, LLVMIntSGT, lod_fpart, LLVMConstNull(i32_type), "need_lerp"); lp_build_if(&if_ctx, bld->gallivm, need_lerp); { struct lp_build_context h16_bld; lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16)); /* sample the second mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel1, &size1, &row_stride1_vec, &img_stride1_vec); data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); if (img_filter == PIPE_TEX_FILTER_NEAREST) { lp_build_sample_image_nearest(bld, size1, row_stride1_vec, img_stride1_vec, data_ptr1, s, t, r, &colors1_lo, &colors1_hi); } else { lp_build_sample_image_linear(bld, size1, row_stride1_vec, img_stride1_vec, data_ptr1, s, t, r, &colors1_lo, &colors1_hi); } /* interpolate samples from the two mipmap levels */ lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, ""); lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart); #if HAVE_LLVM == 0x208 /* This is a work-around for a bug in LLVM 2.8. * Evidently, something goes wrong in the construction of the * lod_fpart short[8] vector. Adding this no-effect shuffle seems * to force the vector to be properly constructed. * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f). */ { LLVMValueRef shuffles[8], shuffle; int i; assert(h16_bld.type.length <= Elements(shuffles)); for (i = 0; i < h16_bld.type.length; i++) shuffles[i] = lp_build_const_int32(bld->gallivm, 2 * (i & 1)); shuffle = LLVMConstVector(shuffles, h16_bld.type.length); lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, lod_fpart, shuffle, ""); } #endif colors0_lo = lp_build_lerp(&h16_bld, lod_fpart, colors0_lo, colors1_lo); colors0_hi = lp_build_lerp(&h16_bld, lod_fpart, colors0_hi, colors1_hi); LLVMBuildStore(builder, colors0_lo, colors_lo_var); LLVMBuildStore(builder, colors0_hi, colors_hi_var); } lp_build_endif(&if_ctx); } }
/** * Call intrinsic with arguments adapted to intrinsic vector length. * * Split vectors which are too large for the hw, or expand them if they * are too small, so a caller calling a function which might use intrinsics * doesn't need to do splitting/expansion on its own. * This only supports intrinsics where src and dst types match. */ LLVMValueRef lp_build_intrinsic_binary_anylength(struct gallivm_state *gallivm, const char *name, struct lp_type src_type, unsigned intr_size, LLVMValueRef a, LLVMValueRef b) { unsigned i; struct lp_type intrin_type = src_type; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); LLVMValueRef anative, bnative; unsigned intrin_length = intr_size / src_type.width; intrin_type.length = intrin_length; if (intrin_length > src_type.length) { LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; LLVMValueRef constvec, tmp; for (i = 0; i < src_type.length; i++) { elems[i] = lp_build_const_int32(gallivm, i); } for (; i < intrin_length; i++) { elems[i] = i32undef; } if (src_type.length == 1) { LLVMTypeRef elem_type = lp_build_elem_type(gallivm, intrin_type); a = LLVMBuildBitCast(builder, a, LLVMVectorType(elem_type, 1), ""); b = LLVMBuildBitCast(builder, b, LLVMVectorType(elem_type, 1), ""); } constvec = LLVMConstVector(elems, intrin_length); anative = LLVMBuildShuffleVector(builder, a, a, constvec, ""); bnative = LLVMBuildShuffleVector(builder, b, b, constvec, ""); tmp = lp_build_intrinsic_binary(builder, name, lp_build_vec_type(gallivm, intrin_type), anative, bnative); if (src_type.length > 1) { constvec = LLVMConstVector(elems, src_type.length); return LLVMBuildShuffleVector(builder, tmp, tmp, constvec, ""); } else { return LLVMBuildExtractElement(builder, tmp, elems[0], ""); } } else if (intrin_length < src_type.length) { unsigned num_vec = src_type.length / intrin_length; LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; /* don't support arbitrary size here as this is so yuck */ if (src_type.length % intrin_length) { /* FIXME: This is something which should be supported * but there doesn't seem to be any need for it currently * so crash and burn. */ debug_printf("%s: should handle arbitrary vector size\n", __FUNCTION__); assert(0); return NULL; } for (i = 0; i < num_vec; i++) { anative = lp_build_extract_range(gallivm, a, i*intrin_length, intrin_length); bnative = lp_build_extract_range(gallivm, b, i*intrin_length, intrin_length); tmp[i] = lp_build_intrinsic_binary(builder, name, lp_build_vec_type(gallivm, intrin_type), anative, bnative); } return lp_build_concat(gallivm, tmp, intrin_type, num_vec); } else { return lp_build_intrinsic_binary(builder, name, lp_build_vec_type(gallivm, src_type), a, b); } }
/** * Fetch a texels from a texture, returning them in SoA layout. * * \param type the desired return type for 'rgba'. The vector length * is the number of texels to fetch * \param aligned if the offset is guaranteed to be aligned to element width * * \param base_ptr points to the base of the texture mip tree. * \param offset offset to start of the texture image block. For non- * compressed formats, this simply is an offset to the texel. * For compressed formats, it is an offset to the start of the * compressed data block. * * \param i, j the sub-block pixel coordinates. For non-compressed formats * these will always be (0,0). For compressed formats, i will * be in [0, block_width-1] and j will be in [0, block_height-1]. * \param cache optional value pointing to a lp_build_format_cache structure */ void lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, boolean aligned, LLVMValueRef base_ptr, LLVMValueRef offset, LLVMValueRef i, LLVMValueRef j, LLVMValueRef cache, LLVMValueRef rgba_out[4]) { LLVMBuilderRef builder = gallivm->builder; enum pipe_format format = format_desc->format; struct lp_type fetch_type; if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB || format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && format_desc->block.width == 1 && format_desc->block.height == 1 && format_desc->block.bits <= type.width && (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT || format_desc->channel[0].size == 32 || format_desc->channel[0].size == 16)) { /* * The packed pixel fits into an element of the destination format. Put * the packed pixels into a vector and extract each component for all * vector elements in parallel. */ LLVMValueRef packed; /* * gather the texels from the texture * Ex: packed = {XYZW, XYZW, XYZW, XYZW} */ assert(format_desc->block.bits <= type.width); fetch_type = lp_type_uint(type.width); packed = lp_build_gather(gallivm, type.length, format_desc->block.bits, fetch_type, aligned, base_ptr, offset, FALSE); /* * convert texels to float rgba */ lp_build_unpack_rgba_soa(gallivm, format_desc, type, packed, rgba_out); return; } if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB) && format_desc->block.width == 1 && format_desc->block.height == 1 && format_desc->block.bits > type.width && ((format_desc->block.bits <= type.width * type.length && format_desc->channel[0].size <= type.width) || (format_desc->channel[0].size == 64 && format_desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && type.floating))) { /* * Similar to above, but the packed pixel is larger than what fits * into an element of the destination format. The packed pixels will be * shuffled into SoA vectors appropriately, and then the extraction will * be done in parallel as much as possible. * Good for 16xn (n > 2) and 32xn (n > 1) formats, care is taken so * the gathered vectors can be shuffled easily (even with avx). * 64xn float -> 32xn float is handled too but it's a bit special as * it does the conversion pre-shuffle. */ LLVMValueRef packed[4], dst[4], output[4], shuffles[LP_MAX_VECTOR_WIDTH/32]; struct lp_type fetch_type, gather_type = type; unsigned num_gather, fetch_width, i, j; struct lp_build_context bld; boolean fp64 = format_desc->channel[0].size == 64; lp_build_context_init(&bld, gallivm, type); assert(type.width == 32); assert(format_desc->block.bits > type.width); /* * First, figure out fetch order. */ fetch_width = util_next_power_of_two(format_desc->block.bits); num_gather = fetch_width / type.width; /* * fp64 are treated like fp32 except we fetch twice wide values * (as we shuffle after trunc). The shuffles for that work out * mostly fine (slightly suboptimal for 4-wide, perfect for AVX) * albeit we miss the potential opportunity for hw gather (as it * only handles native size). */ num_gather = fetch_width / type.width; gather_type.width *= num_gather; if (fp64) { num_gather /= 2; } gather_type.length /= num_gather; for (i = 0; i < num_gather; i++) { LLVMValueRef offsetr, shuf_vec; if(num_gather == 4) { for (j = 0; j < gather_type.length; j++) { unsigned idx = i + 4*j; shuffles[j] = lp_build_const_int32(gallivm, idx); } shuf_vec = LLVMConstVector(shuffles, gather_type.length); offsetr = LLVMBuildShuffleVector(builder, offset, offset, shuf_vec, ""); } else if (num_gather == 2) { assert(num_gather == 2); for (j = 0; j < gather_type.length; j++) { unsigned idx = i*2 + (j%2) + (j/2)*4; shuffles[j] = lp_build_const_int32(gallivm, idx); } shuf_vec = LLVMConstVector(shuffles, gather_type.length); offsetr = LLVMBuildShuffleVector(builder, offset, offset, shuf_vec, ""); } else { assert(num_gather == 1); offsetr = offset; } if (gather_type.length == 1) { LLVMValueRef zero = lp_build_const_int32(gallivm, 0); offsetr = LLVMBuildExtractElement(builder, offsetr, zero, ""); } /* * Determine whether to use float or int loads. This is mostly * to outsmart the (stupid) llvm int/float shuffle logic, we * don't really care much if the data is floats or ints... * But llvm will refuse to use single float shuffle with int data * and instead use 3 int shuffles instead, the code looks atrocious. * (Note bitcasts often won't help, as llvm is too smart to be * fooled by that.) * Nobody cares about simd float<->int domain transition penalties, * which usually don't even exist for shuffles anyway. * With 4x32bit (and 3x32bit) fetch, we use float vec (the data is * going into transpose, which is unpacks, so doesn't really matter * much). * With 2x32bit or 4x16bit fetch, we use float vec, since those * go into the weird channel separation shuffle. With floats, * this is (with 128bit vectors): * - 2 movq, 2 movhpd, 2 shufps * With ints it would be: * - 4 movq, 2 punpcklqdq, 4 pshufd, 2 blendw * I've seen texture functions increase in code size by 15% just due * to that (there's lots of such fetches in them...) * (We could chose a different gather order to improve this somewhat * for the int path, but it would basically just drop the blends, * so the float path with this order really is optimal.) * Albeit it is tricky sometimes llvm doesn't ignore the float->int * casts so must avoid them until we're done with the float shuffle... * 3x16bit formats (the same is also true for 3x8) are pretty bad but * there's nothing we can do about them (we could overallocate by * those couple bytes and use unaligned but pot sized load). * Note that this is very much x86 specific. I don't know if this * affect other archs at all. */ if (num_gather > 1) { /* * We always want some float type here (with x86) * due to shuffles being float ones afterwards (albeit for * the num_gather == 4 case int should work fine too * (unless there's some problems with avx but not avx2). */ if (format_desc->channel[0].size == 64) { fetch_type = lp_type_float_vec(64, gather_type.width); } else { fetch_type = lp_type_int_vec(32, gather_type.width); } } else { /* type doesn't matter much */ if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && (format_desc->channel[0].size == 32 || format_desc->channel[0].size == 64)) { fetch_type = lp_type_float(gather_type.width); } else { fetch_type = lp_type_uint(gather_type.width); } } /* Now finally gather the values */ packed[i] = lp_build_gather(gallivm, gather_type.length, format_desc->block.bits, fetch_type, aligned, base_ptr, offsetr, FALSE); if (fp64) { struct lp_type conv_type = type; conv_type.width *= 2; packed[i] = LLVMBuildBitCast(builder, packed[i], lp_build_vec_type(gallivm, conv_type), ""); packed[i] = LLVMBuildFPTrunc(builder, packed[i], bld.vec_type, ""); } } /* shuffle the gathered values to SoA */ if (num_gather == 2) { for (i = 0; i < num_gather; i++) { for (j = 0; j < type.length; j++) { unsigned idx = (j%2)*2 + (j/4)*4 + i; if ((j/2)%2) idx += type.length; shuffles[j] = lp_build_const_int32(gallivm, idx); } dst[i] = LLVMBuildShuffleVector(builder, packed[0], packed[1], LLVMConstVector(shuffles, type.length), ""); } } else if (num_gather == 4) { lp_build_transpose_aos(gallivm, lp_int_type(type), packed, dst); } else { assert(num_gather == 1); dst[0] = packed[0]; } /* * And finally unpack exactly as above, except that * chan shift is adjusted and the right vector selected. */ if (!fp64) { for (i = 0; i < num_gather; i++) { dst[i] = LLVMBuildBitCast(builder, dst[i], bld.int_vec_type, ""); } for (i = 0; i < format_desc->nr_channels; i++) { struct util_format_channel_description chan_desc = format_desc->channel[i]; unsigned blockbits = type.width; unsigned vec_nr = chan_desc.shift / type.width; chan_desc.shift %= type.width; output[i] = lp_build_extract_soa_chan(&bld, blockbits, FALSE, chan_desc, dst[vec_nr]); } } else { for (i = 0; i < format_desc->nr_channels; i++) { output[i] = dst[i]; } } lp_build_format_swizzle_soa(format_desc, &bld, output, rgba_out); return; } if (format == PIPE_FORMAT_R11G11B10_FLOAT || format == PIPE_FORMAT_R9G9B9E5_FLOAT) { /* * similar conceptually to above but requiring special * AoS packed -> SoA float conversion code. */ LLVMValueRef packed; struct lp_type fetch_type = lp_type_uint(type.width); assert(type.floating); assert(type.width == 32); packed = lp_build_gather(gallivm, type.length, format_desc->block.bits, fetch_type, aligned, base_ptr, offset, FALSE); if (format == PIPE_FORMAT_R11G11B10_FLOAT) { lp_build_r11g11b10_to_float(gallivm, packed, rgba_out); } else { lp_build_rgb9e5_to_float(gallivm, packed, rgba_out); } return; } if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS && format_desc->block.bits == 64) { /* * special case the format is 64 bits but we only require * 32bit (or 8bit) from each block. */ LLVMValueRef packed; struct lp_type fetch_type = lp_type_uint(type.width); if (format == PIPE_FORMAT_X32_S8X24_UINT) { /* * for stencil simply fix up offsets - could in fact change * base_ptr instead even outside the shader. */ unsigned mask = (1 << 8) - 1; LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4); offset = LLVMBuildAdd(builder, offset, s_offset, ""); packed = lp_build_gather(gallivm, type.length, 32, fetch_type, aligned, base_ptr, offset, FALSE); packed = LLVMBuildAnd(builder, packed, lp_build_const_int_vec(gallivm, type, mask), ""); } else { assert (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT); packed = lp_build_gather(gallivm, type.length, 32, fetch_type, aligned, base_ptr, offset, TRUE); packed = LLVMBuildBitCast(builder, packed, lp_build_vec_type(gallivm, type), ""); } /* for consistency with lp_build_unpack_rgba_soa() return sss1 or zzz1 */ rgba_out[0] = rgba_out[1] = rgba_out[2] = packed; rgba_out[3] = lp_build_const_vec(gallivm, type, 1.0f); return; } /* * Try calling lp_build_fetch_rgba_aos for all pixels. * Should only really hit subsampled, compressed * (for s3tc srgb too, for rgtc the unorm ones only) by now. * (This is invalid for plain 8unorm formats because we're lazy with * the swizzle since some results would arrive swizzled, some not.) */ if ((format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) && (util_format_fits_8unorm(format_desc) || format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) && type.floating && type.width == 32 && (type.length == 1 || (type.length % 4 == 0))) { struct lp_type tmp_type; struct lp_build_context bld; LLVMValueRef packed, rgba[4]; const struct util_format_description *flinear_desc; const struct util_format_description *frgba8_desc; unsigned chan; lp_build_context_init(&bld, gallivm, type); /* * Make sure the conversion in aos really only does convert to rgba8 * and not anything more (so use linear format, adjust type). */ flinear_desc = util_format_description(util_format_linear(format)); memset(&tmp_type, 0, sizeof tmp_type); tmp_type.width = 8; tmp_type.length = type.length * 4; tmp_type.norm = TRUE; packed = lp_build_fetch_rgba_aos(gallivm, flinear_desc, tmp_type, aligned, base_ptr, offset, i, j, cache); packed = LLVMBuildBitCast(builder, packed, bld.int_vec_type, ""); /* * The values are now packed so they match ordinary (srgb) RGBA8 format, * hence need to use matching format for unpack. */ frgba8_desc = util_format_description(PIPE_FORMAT_R8G8B8A8_UNORM); if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { assert(format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC); frgba8_desc = util_format_description(PIPE_FORMAT_R8G8B8A8_SRGB); } lp_build_unpack_rgba_soa(gallivm, frgba8_desc, type, packed, rgba); /* * We converted 4 channels. Make sure llvm can drop unneeded ones * (luckily the rgba order is fixed, only LA needs special case). */ for (chan = 0; chan < 4; chan++) { enum pipe_swizzle swizzle = format_desc->swizzle[chan]; if (chan == 3 && util_format_is_luminance_alpha(format)) { swizzle = PIPE_SWIZZLE_W; } rgba_out[chan] = lp_build_swizzle_soa_channel(&bld, rgba, swizzle); } return; } /* * Fallback to calling lp_build_fetch_rgba_aos for each pixel. * * This is not the most efficient way of fetching pixels, as we * miss some opportunities to do vectorization, but this is * convenient for formats or scenarios for which there was no * opportunity or incentive to optimize. * * We do NOT want to end up here, this typically is quite terrible, * in particular if the formats have less than 4 channels. * * Right now, this should only be hit for: * - RGTC snorm formats * (those miss fast fetch functions hence they are terrible anyway) */ { unsigned k; struct lp_type tmp_type; LLVMValueRef aos_fetch[LP_MAX_VECTOR_WIDTH / 32]; if (gallivm_debug & GALLIVM_DEBUG_PERF) { debug_printf("%s: AoS fetch fallback for %s\n", __FUNCTION__, format_desc->short_name); } tmp_type = type; tmp_type.length = 4; /* * Note that vector transpose can be worse compared to insert/extract * for aos->soa conversion (for formats with 1 or 2 channels). However, * we should try to avoid getting here for just about all formats, so * don't bother. */ /* loop over number of pixels */ for(k = 0; k < type.length; ++k) { LLVMValueRef index = lp_build_const_int32(gallivm, k); LLVMValueRef offset_elem; LLVMValueRef i_elem, j_elem; offset_elem = LLVMBuildExtractElement(builder, offset, index, ""); i_elem = LLVMBuildExtractElement(builder, i, index, ""); j_elem = LLVMBuildExtractElement(builder, j, index, ""); /* Get a single float[4]={R,G,B,A} pixel */ aos_fetch[k] = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type, aligned, base_ptr, offset_elem, i_elem, j_elem, cache); } convert_to_soa(gallivm, aos_fetch, rgba_out, type); } }
/** * Expand the relevent bits of mask_input to a 4-dword mask for the * four pixels in a 2x2 quad. This will set the four elements of the * quad mask vector to 0 or ~0. * * \param quad which quad of the quad group to test, in [0,3] * \param mask_input bitwise mask for the whole 4x4 stamp */ static LLVMValueRef generate_quad_mask(LLVMBuilderRef builder, struct lp_type fs_type, unsigned quad, LLVMValueRef mask_input) /* int32 */ { struct lp_type mask_type; LLVMTypeRef i32t = LLVMInt32Type(); LLVMValueRef bits[4]; LLVMValueRef mask; int shift; /* * XXX: We'll need a different path for 16 x u8 */ assert(fs_type.width == 32); assert(fs_type.length == 4); mask_type = lp_int_type(fs_type); /* * mask_input >>= (quad * 4) */ switch (quad) { case 0: shift = 0; break; case 1: shift = 2; break; case 2: shift = 8; break; case 3: shift = 10; break; default: assert(0); shift = 0; } mask_input = LLVMBuildLShr(builder, mask_input, LLVMConstInt(i32t, shift, 0), ""); /* * mask = { mask_input & (1 << i), for i in [0,3] } */ mask = lp_build_broadcast(builder, lp_build_vec_type(mask_type), mask_input); bits[0] = LLVMConstInt(i32t, 1 << 0, 0); bits[1] = LLVMConstInt(i32t, 1 << 1, 0); bits[2] = LLVMConstInt(i32t, 1 << 4, 0); bits[3] = LLVMConstInt(i32t, 1 << 5, 0); mask = LLVMBuildAnd(builder, mask, LLVMConstVector(bits, 4), ""); /* * mask = mask != 0 ? ~0 : 0 */ mask = lp_build_compare(builder, mask_type, PIPE_FUNC_NOTEQUAL, mask, lp_build_const_int_vec(mask_type, 0)); return mask; }
LLVMValueRef lp_build_swizzle_aos(struct lp_build_context *bld, LLVMValueRef a, const unsigned char swizzles[4]) { LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; const unsigned n = type.length; unsigned i, j; if (swizzles[0] == PIPE_SWIZZLE_X && swizzles[1] == PIPE_SWIZZLE_Y && swizzles[2] == PIPE_SWIZZLE_Z && swizzles[3] == PIPE_SWIZZLE_W) { return a; } if (swizzles[0] == swizzles[1] && swizzles[1] == swizzles[2] && swizzles[2] == swizzles[3]) { switch (swizzles[0]) { case PIPE_SWIZZLE_X: case PIPE_SWIZZLE_Y: case PIPE_SWIZZLE_Z: case PIPE_SWIZZLE_W: return lp_build_swizzle_scalar_aos(bld, a, swizzles[0], 4); case PIPE_SWIZZLE_0: return bld->zero; case PIPE_SWIZZLE_1: return bld->one; case LP_BLD_SWIZZLE_DONTCARE: return bld->undef; default: assert(0); return bld->undef; } } if (LLVMIsConstant(a) || type.width >= 16) { /* * Shuffle. */ LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type)); LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; LLVMValueRef aux[LP_MAX_VECTOR_LENGTH]; memset(aux, 0, sizeof aux); for(j = 0; j < n; j += 4) { for(i = 0; i < 4; ++i) { unsigned shuffle; switch (swizzles[i]) { default: assert(0); /* fall through */ case PIPE_SWIZZLE_X: case PIPE_SWIZZLE_Y: case PIPE_SWIZZLE_Z: case PIPE_SWIZZLE_W: shuffle = j + swizzles[i]; shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); break; case PIPE_SWIZZLE_0: shuffle = type.length + 0; shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); if (!aux[0]) { aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0); } break; case PIPE_SWIZZLE_1: shuffle = type.length + 1; shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); if (!aux[1]) { aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0); } break; case LP_BLD_SWIZZLE_DONTCARE: shuffles[j + i] = LLVMGetUndef(i32t); break; } } } for (i = 0; i < n; ++i) { if (!aux[i]) { aux[i] = undef; } } return LLVMBuildShuffleVector(builder, a, LLVMConstVector(aux, n), LLVMConstVector(shuffles, n), ""); } else { /* * Bit mask and shifts. * * For example, this will convert BGRA to RGBA by doing * * Little endian: * rgba = (bgra & 0x00ff0000) >> 16 * | (bgra & 0xff00ff00) * | (bgra & 0x000000ff) << 16 * * Big endian:A * rgba = (bgra & 0x0000ff00) << 16 * | (bgra & 0x00ff00ff) * | (bgra & 0xff000000) >> 16 * * This is necessary not only for faster cause, but because X86 backend * will refuse shuffles of <4 x i8> vectors */ LLVMValueRef res; struct lp_type type4; unsigned cond = 0; unsigned chan; int shift; /* * Start with a mixture of 1 and 0. */ for (chan = 0; chan < 4; ++chan) { if (swizzles[chan] == PIPE_SWIZZLE_1) { cond |= 1 << chan; } } res = lp_build_select_aos(bld, cond, bld->one, bld->zero, 4); /* * Build a type where each element is an integer that cover the four * channels. */ type4 = type; type4.floating = FALSE; type4.width *= 4; type4.length /= 4; a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), ""); res = LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type4), ""); /* * Mask and shift the channels, trying to group as many channels in the * same shift as possible. The shift amount is positive for shifts left * and negative for shifts right. */ for (shift = -3; shift <= 3; ++shift) { uint64_t mask = 0; assert(type4.width <= sizeof(mask)*8); /* * Vector element numbers follow the XYZW order, so 0 is always X, etc. * After widening 4 times we have: * * 3210 * Little-endian register layout: WZYX * * 0123 * Big-endian register layout: XYZW * * For little-endian, higher-numbered channels are obtained by a shift right * (negative shift amount) and lower-numbered channels by a shift left * (positive shift amount). The opposite is true for big-endian. */ for (chan = 0; chan < 4; ++chan) { if (swizzles[chan] < 4) { /* We need to move channel swizzles[chan] into channel chan */ #ifdef PIPE_ARCH_LITTLE_ENDIAN if (swizzles[chan] - chan == -shift) { mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width); } #else if (swizzles[chan] - chan == shift) { mask |= ((1ULL << type.width) - 1) << (type4.width - type.width) >> (swizzles[chan] * type.width); } #endif } } if (mask) { LLVMValueRef masked; LLVMValueRef shifted; if (0) debug_printf("shift = %i, mask = %" PRIx64 "\n", shift, mask); masked = LLVMBuildAnd(builder, a, lp_build_const_int_vec(bld->gallivm, type4, mask), ""); if (shift > 0) { shifted = LLVMBuildShl(builder, masked, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), ""); } else if (shift < 0) { shifted = LLVMBuildLShr(builder, masked, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), ""); } else { shifted = masked; } res = LLVMBuildOr(builder, res, shifted, ""); } } return LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type), ""); }
/** * Swizzle one channel into all other three channels. */ LLVMValueRef lp_build_swizzle_scalar_aos(struct lp_build_context *bld, LLVMValueRef a, unsigned channel) { LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; const unsigned n = type.length; unsigned i, j; if(a == bld->undef || a == bld->zero || a == bld->one) return a; /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing * using shuffles here actually causes worst results. More investigation is * needed. */ if (type.width >= 16) { /* * Shuffle. */ LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; for(j = 0; j < n; j += 4) for(i = 0; i < 4; ++i) shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0); return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), ""); } else { /* * Bit mask and recursive shifts * * XYZW XYZW .... XYZW <= input * 0Y00 0Y00 .... 0Y00 * YY00 YY00 .... YY00 * YYYY YYYY .... YYYY <= output */ struct lp_type type4; const char shifts[4][2] = { { 1, 2}, {-1, 2}, { 1, -2}, {-1, -2} }; unsigned i; a = LLVMBuildAnd(builder, a, lp_build_const_mask_aos(bld->gallivm, type, 1 << channel), ""); /* * Build a type where each element is an integer that cover the four * channels. */ type4 = type; type4.floating = FALSE; type4.width *= 4; type4.length /= 4; a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), ""); for(i = 0; i < 2; ++i) { LLVMValueRef tmp = NULL; int shift = shifts[channel][i]; #ifdef PIPE_ARCH_LITTLE_ENDIAN shift = -shift; #endif if(shift > 0) tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), ""); if(shift < 0) tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), ""); assert(tmp); if(tmp) a = LLVMBuildOr(builder, a, tmp, ""); } return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), ""); } }
/** * Perform the occlusion test and increase the counter. * Test the depth mask. Add the number of channel which has none zero mask * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}. * The counter will add 4. * * \param type holds element type of the mask vector. * \param maskvalue is the depth test mask. * \param counter is a pointer of the uint32 counter. */ void lp_build_occlusion_count(struct gallivm_state *gallivm, struct lp_type type, LLVMValueRef maskvalue, LLVMValueRef counter) { LLVMBuilderRef builder = gallivm->builder; LLVMContextRef context = gallivm->context; LLVMValueRef countmask = lp_build_const_int_vec(gallivm, type, 1); LLVMValueRef count, newcount; assert(type.length <= 16); assert(type.floating); if(util_cpu_caps.has_sse && type.length == 4) { const char *movmskintr = "llvm.x86.sse.movmsk.ps"; const char *popcntintr = "llvm.ctpop.i32"; LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue, lp_build_vec_type(gallivm, type), ""); bits = lp_build_intrinsic_unary(builder, movmskintr, LLVMInt32TypeInContext(context), bits); count = lp_build_intrinsic_unary(builder, popcntintr, LLVMInt32TypeInContext(context), bits); } else if(util_cpu_caps.has_avx && type.length == 8) { const char *movmskintr = "llvm.x86.avx.movmsk.ps.256"; const char *popcntintr = "llvm.ctpop.i32"; LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue, lp_build_vec_type(gallivm, type), ""); bits = lp_build_intrinsic_unary(builder, movmskintr, LLVMInt32TypeInContext(context), bits); count = lp_build_intrinsic_unary(builder, popcntintr, LLVMInt32TypeInContext(context), bits); } else { unsigned i; LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv"); LLVMTypeRef counttype = LLVMIntTypeInContext(context, type.length * 8); LLVMTypeRef i8vntype = LLVMVectorType(LLVMInt8TypeInContext(context), type.length * 4); LLVMValueRef shufflev, countd; LLVMValueRef shuffles[16]; const char *popcntintr = NULL; countv = LLVMBuildBitCast(builder, countv, i8vntype, ""); for (i = 0; i < type.length; i++) { shuffles[i] = lp_build_const_int32(gallivm, 4*i); } shufflev = LLVMConstVector(shuffles, type.length); countd = LLVMBuildShuffleVector(builder, countv, LLVMGetUndef(i8vntype), shufflev, ""); countd = LLVMBuildBitCast(builder, countd, counttype, "countd"); /* * XXX FIXME * this is bad on cpus without popcount (on x86 supported by intel * nehalem, amd barcelona, and up - not tied to sse42). * Would be much faster to just sum the 4 elements of the vector with * some horizontal add (shuffle/add/shuffle/add after the initial and). */ switch (type.length) { case 4: popcntintr = "llvm.ctpop.i32"; break; case 8: popcntintr = "llvm.ctpop.i64"; break; case 16: popcntintr = "llvm.ctpop.i128"; break; default: assert(0); } count = lp_build_intrinsic_unary(builder, popcntintr, counttype, countd); if (type.length > 4) { count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 32), ""); } } newcount = LLVMBuildLoad(builder, counter, "origcount"); newcount = LLVMBuildAdd(builder, newcount, count, "newcount"); LLVMBuildStore(builder, newcount, counter); }
struct cl2llvm_val_t *llvm_type_cast(struct cl2llvm_val_t * original_val, struct cl2llvmTypeWrap *totype_w_sign) { struct cl2llvm_val_t *llvm_val = cl2llvm_val_create(); int i; struct cl2llvmTypeWrap *elem_type; struct cl2llvm_val_t *cast_original_val; LLVMValueRef index; LLVMValueRef vector_addr; LLVMValueRef vector; LLVMValueRef const_elems[16]; LLVMTypeRef fromtype = cl2llvmTypeWrapGetLlvmType(original_val->type); LLVMTypeRef totype = cl2llvmTypeWrapGetLlvmType(totype_w_sign); int fromsign = cl2llvmTypeWrapGetSign(original_val->type); int tosign = cl2llvmTypeWrapGetSign(totype_w_sign); /*By default the return value is the same as the original_val*/ llvm_val->val = original_val->val; cl2llvmTypeWrapSetLlvmType(llvm_val->type, cl2llvmTypeWrapGetLlvmType(original_val->type)); cl2llvmTypeWrapSetSign(llvm_val->type, cl2llvmTypeWrapGetSign(original_val->type)); snprintf(temp_var_name, sizeof temp_var_name, "tmp_%d", temp_var_count++); /* Check that fromtype is not a vector, unless both types are identical. */ if (LLVMGetTypeKind(fromtype) == LLVMVectorTypeKind) { if ((LLVMGetVectorSize(fromtype) != LLVMGetVectorSize(totype) || LLVMGetElementType(fromtype) != LLVMGetElementType(totype)) || fromsign != tosign) { if (LLVMGetTypeKind(totype) == LLVMVectorTypeKind) cl2llvm_yyerror("Casts between vector types are forbidden"); cl2llvm_yyerror("A vector may not be cast to any other type."); } } /* If totype is a vector, create a vector whose components are equal to original_val */ if (LLVMGetTypeKind(totype) == LLVMVectorTypeKind && LLVMGetTypeKind(fromtype) != LLVMVectorTypeKind) { /*Go to entry block and declare vector*/ LLVMPositionBuilder(cl2llvm_builder, cl2llvm_current_function->entry_block, cl2llvm_current_function->branch_instr); snprintf(temp_var_name, sizeof temp_var_name, "tmp_%d", temp_var_count++); vector_addr = LLVMBuildAlloca(cl2llvm_builder, totype, temp_var_name); LLVMPositionBuilderAtEnd(cl2llvm_builder, current_basic_block); /* Load vector */ snprintf(temp_var_name, sizeof temp_var_name, "tmp_%d", temp_var_count++); vector = LLVMBuildLoad(cl2llvm_builder, vector_addr, temp_var_name); /* Create object to represent element type of totype */ elem_type = cl2llvmTypeWrapCreate(LLVMGetElementType(totype), tosign); /* If original_val is constant create a constant vector */ if (LLVMIsConstant(original_val->val)) { cast_original_val = llvm_type_cast(original_val, elem_type); for (i = 0; i < LLVMGetVectorSize(totype); i++) const_elems[i] = cast_original_val->val; vector = LLVMConstVector(const_elems, LLVMGetVectorSize(totype)); llvm_val->val = vector; cl2llvm_val_free(cast_original_val); } /* If original value is not constant insert elements */ else { for (i = 0; i < LLVMGetVectorSize(totype); i++) { index = LLVMConstInt(LLVMInt32Type(), i, 0); cast_original_val = llvm_type_cast(original_val, elem_type); snprintf(temp_var_name, sizeof temp_var_name, "tmp_%d", temp_var_count++); vector = LLVMBuildInsertElement(cl2llvm_builder, vector, cast_original_val->val, index, temp_var_name); cl2llvm_val_free(cast_original_val); } } cl2llvmTypeWrapFree(elem_type); llvm_val->val = vector; } if (fromtype == LLVMInt64Type()) { if (totype == LLVMDoubleType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMFloatType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMHalfType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMInt64Type()) { if (tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); temp_var_count--; } else if (totype == LLVMInt32Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt16Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt16Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt8Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt8Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt1Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt1Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } } else if (fromtype == LLVMInt32Type()) { if (totype == LLVMDoubleType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMFloatType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMHalfType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMInt64Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } if (tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt32Type()) { if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); temp_var_count--; } else if (totype == LLVMInt16Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt16Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt8Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt8Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt1Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt1Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } } else if (fromtype == LLVMInt16Type()) { if (totype == LLVMDoubleType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMFloatType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMHalfType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMInt64Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } if (tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt32Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); } if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt16Type()) { if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); temp_var_count--; } else if (totype == LLVMInt8Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt8Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt1Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt1Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } } else if (fromtype == LLVMInt8Type()) { if (totype == LLVMDoubleType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMFloatType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMHalfType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMInt64Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } if (tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt32Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); } if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt16Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt16Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt16Type(), temp_var_name); } if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt8Type()) { if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); temp_var_count--; } else if (totype == LLVMInt1Type()) { llvm_val->val = LLVMBuildTrunc(cl2llvm_builder, original_val->val, LLVMInt1Type(), temp_var_name); if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } } else if (fromtype == LLVMInt1Type()) { if (totype == LLVMDoubleType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMFloatType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMHalfType()) { if (fromsign) { llvm_val->val = LLVMBuildSIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } else { llvm_val->val = LLVMBuildUIToFP(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMInt64Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } if (tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt32Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); } if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt16Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt16Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt16Type(), temp_var_name); } if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt8Type()) { if (fromsign) { llvm_val->val = LLVMBuildSExt(cl2llvm_builder, original_val->val, LLVMInt8Type(), temp_var_name); } else { llvm_val->val = LLVMBuildZExt(cl2llvm_builder, original_val->val, LLVMInt8Type(), temp_var_name); } if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMInt1Type()) { if(tosign) cl2llvmTypeWrapSetSign(llvm_val->type, 1); else cl2llvmTypeWrapSetSign(llvm_val->type, 0); temp_var_count--; } } /*We now know that from type must be a floating point.*/ /*Floating point to signed integer conversions*/ else if (tosign && LLVMGetTypeKind(totype) == 8) { if (totype == LLVMInt64Type()) { llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } else if (totype == LLVMInt32Type()) { llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); } else if (totype == LLVMInt16Type()) { llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, original_val->val, LLVMInt16Type(), temp_var_name); } else if (totype == LLVMInt8Type()) { llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, original_val->val, LLVMInt8Type(), temp_var_name); } else if (totype == LLVMInt1Type()) { llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, original_val->val, LLVMInt1Type(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } /*Floating point to unsigned integer conversions*/ else if (!tosign) { if (totype == LLVMInt64Type()) { llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, original_val->val, LLVMInt64Type(), temp_var_name); } else if (totype == LLVMInt32Type()) { llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, original_val->val, LLVMInt32Type(), temp_var_name); } else if (totype == LLVMInt16Type()) { llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, original_val->val, LLVMInt16Type(), temp_var_name); } else if (totype == LLVMInt8Type()) { llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, original_val->val, LLVMInt8Type(), temp_var_name); } else if (totype == LLVMInt1Type()) { llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, original_val->val, LLVMInt1Type(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 0); } else if (totype == LLVMDoubleType()) { llvm_val->val = LLVMBuildFPExt(cl2llvm_builder, original_val->val, LLVMDoubleType(), temp_var_name); cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMFloatType()) { if (fromtype == LLVMDoubleType()) { llvm_val->val = LLVMBuildFPTrunc(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } else if (fromtype == LLVMHalfType()) { llvm_val->val = LLVMBuildFPExt(cl2llvm_builder, original_val->val, LLVMFloatType(), temp_var_name); } cl2llvmTypeWrapSetSign(llvm_val->type, 1); } else if (totype == LLVMHalfType()) { llvm_val->val = LLVMBuildFPTrunc(cl2llvm_builder, original_val->val, LLVMHalfType(), temp_var_name); cl2llvmTypeWrapSetSign(llvm_val->type, 1); } cl2llvmTypeWrapSetLlvmType(llvm_val->type, totype); cl2llvmTypeWrapSetSign(llvm_val->type, tosign); return llvm_val; }