 * Do the one or two-sided stencil test comparison.
 * \sa lp_build_stencil_test_single
 * \param face  an integer indicating front (+) or back (-) facing polygon.
 *              If NULL, assume front-facing.
static LLVMValueRef
lp_build_stencil_test(struct lp_build_context *bld,
                      const struct pipe_stencil_state stencil[2],
                      LLVMValueRef stencilRefs[2],
                      LLVMValueRef stencilVals,
                      LLVMValueRef face)
   LLVMValueRef res;


   if (stencil[1].enabled && face) {
      /* do two-sided test */
      struct lp_build_flow_context *flow_ctx;
      struct lp_build_if_state if_ctx;
      LLVMValueRef front_facing;
      LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
      LLVMValueRef result = bld->undef;

      flow_ctx = lp_build_flow_create(bld->builder);

      lp_build_flow_scope_declare(flow_ctx, &result);

      /* front_facing = face > 0.0 */
      front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, "");

      lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing);
         result = lp_build_stencil_test_single(bld, &stencil[0],
                                               stencilRefs[0], stencilVals);
         result = lp_build_stencil_test_single(bld, &stencil[1],
                                               stencilRefs[1], stencilVals);


      res = result;
   else {
      /* do single-side test */
      res = lp_build_stencil_test_single(bld, &stencil[0],
                                         stencilRefs[0], stencilVals);

   return res;
 * Generate code to do cube face selection and compute per-face texcoords.
lp_build_cube_lookup(struct lp_build_sample_context *bld,
                     LLVMValueRef s,
                     LLVMValueRef t,
                     LLVMValueRef r,
                     LLVMValueRef *face,
                     LLVMValueRef *face_s,
                     LLVMValueRef *face_t)
   struct lp_build_context *float_bld = &bld->float_bld;
   struct lp_build_context *coord_bld = &bld->coord_bld;
   LLVMBuilderRef builder = bld->gallivm->builder;
   LLVMValueRef rx, ry, rz;
   LLVMValueRef arx, ary, arz;
   LLVMValueRef c25 = lp_build_const_float(bld->gallivm, 0.25);
   LLVMValueRef arx_ge_ary, arx_ge_arz;
   LLVMValueRef ary_ge_arx, ary_ge_arz;
   LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz;

   assert(bld->coord_bld.type.length == 4);

    * Use the average of the four pixel's texcoords to choose the face.
   rx = lp_build_mul(float_bld, c25,
                     lp_build_sum_vector(&bld->coord_bld, s));
   ry = lp_build_mul(float_bld, c25,
                     lp_build_sum_vector(&bld->coord_bld, t));
   rz = lp_build_mul(float_bld, c25,
                     lp_build_sum_vector(&bld->coord_bld, r));

   arx = lp_build_abs(float_bld, rx);
   ary = lp_build_abs(float_bld, ry);
   arz = lp_build_abs(float_bld, rz);

    * Compare sign/magnitude of rx,ry,rz to determine face
   arx_ge_ary = LLVMBuildFCmp(builder, LLVMRealUGE, arx, ary, "");
   arx_ge_arz = LLVMBuildFCmp(builder, LLVMRealUGE, arx, arz, "");
   ary_ge_arx = LLVMBuildFCmp(builder, LLVMRealUGE, ary, arx, "");
   ary_ge_arz = LLVMBuildFCmp(builder, LLVMRealUGE, ary, arz, "");

   arx_ge_ary_arz = LLVMBuildAnd(builder, arx_ge_ary, arx_ge_arz, "");
   ary_ge_arx_arz = LLVMBuildAnd(builder, ary_ge_arx, ary_ge_arz, "");

      struct lp_build_if_state if_ctx;
      LLVMValueRef face_s_var;
      LLVMValueRef face_t_var;
      LLVMValueRef face_var;

      face_s_var = lp_build_alloca(bld->gallivm, bld->coord_bld.vec_type, "face_s_var");
      face_t_var = lp_build_alloca(bld->gallivm, bld->coord_bld.vec_type, "face_t_var");
      face_var = lp_build_alloca(bld->gallivm, bld->int_bld.vec_type, "face_var");

      lp_build_if(&if_ctx, bld->gallivm, arx_ge_ary_arz);
         /* +/- X face */
         LLVMValueRef sign = lp_build_sgn(float_bld, rx);
         LLVMValueRef ima = lp_build_cube_ima(coord_bld, s);
         *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima);
         *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
         *face = lp_build_cube_face(bld, rx,
         LLVMBuildStore(builder, *face_s, face_s_var);
         LLVMBuildStore(builder, *face_t, face_t_var);
         LLVMBuildStore(builder, *face, face_var);
         struct lp_build_if_state if_ctx2;

         lp_build_if(&if_ctx2, bld->gallivm, ary_ge_arx_arz);
            /* +/- Y face */
            LLVMValueRef sign = lp_build_sgn(float_bld, ry);
            LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
            *face_s = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
            *face_t = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
            *face = lp_build_cube_face(bld, ry,
            LLVMBuildStore(builder, *face_s, face_s_var);
            LLVMBuildStore(builder, *face_t, face_t_var);
            LLVMBuildStore(builder, *face, face_var);
            /* +/- Z face */
            LLVMValueRef sign = lp_build_sgn(float_bld, rz);
            LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
            *face_s = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
            *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
            *face = lp_build_cube_face(bld, rz,
            LLVMBuildStore(builder, *face_s, face_s_var);
            LLVMBuildStore(builder, *face_t, face_t_var);
            LLVMBuildStore(builder, *face, face_var);


      *face_s = LLVMBuildLoad(builder, face_s_var, "face_s");
      *face_t = LLVMBuildLoad(builder, face_t_var, "face_t");
      *face   = LLVMBuildLoad(builder, face_var, "face");
 * Do a cached lookup.
 * Returns (vectors of) 4x8 rgba aos value
lp_build_fetch_cached_texels(struct gallivm_state *gallivm,
                             const struct util_format_description *format_desc,
                             unsigned n,
                             LLVMValueRef base_ptr,
                             LLVMValueRef offset,
                             LLVMValueRef i,
                             LLVMValueRef j,
                             LLVMValueRef cache)

   LLVMBuilderRef builder = gallivm->builder;
   unsigned count, low_bit, log2size;
   LLVMValueRef color, offset_stored, addr, ptr_addrtrunc, tmp;
   LLVMValueRef ij_index, hash_index, hash_mask, block_index;
   LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
   LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context);
   struct lp_type type;
   struct lp_build_context bld32;
   memset(&type, 0, sizeof type);
   type.width = 32;
   type.length = n;

   assert(format_desc->block.width == 4);
   assert(format_desc->block.height == 4);

   lp_build_context_init(&bld32, gallivm, type);

    * compute hash - we use direct mapped cache, the hash function could
    *                be better but it needs to be simple
    * per-element:
    *    compare offset with offset stored at tag (hash)
    *    if not equal decode/store block, update tag
    *    extract color from cache
    *    assemble result vector

   /* TODO: not ideal with 32bit pointers... */

   low_bit = util_logbase2(format_desc->block.bits / 8);
   log2size = util_logbase2(LP_BUILD_FORMAT_CACHE_SIZE);
   addr = LLVMBuildPtrToInt(builder, base_ptr, i64t, "");
   ptr_addrtrunc = LLVMBuildPtrToInt(builder, base_ptr, i32t, "");
   ptr_addrtrunc = lp_build_broadcast_scalar(&bld32, ptr_addrtrunc);
   /* For the hash function, first mask off the unused lowest bits. Then just
      do some xor with address bits - only use lower 32bits */
   ptr_addrtrunc = LLVMBuildAdd(builder, offset, ptr_addrtrunc, "");
   ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
                                 lp_build_const_int_vec(gallivm, type, low_bit), "");
   /* This only really makes sense for size 64,128,256 */
   hash_index = ptr_addrtrunc;
   ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
                                 lp_build_const_int_vec(gallivm, type, 2*log2size), "");
   hash_index = LLVMBuildXor(builder, ptr_addrtrunc, hash_index, "");
   tmp = LLVMBuildLShr(builder, hash_index,
                       lp_build_const_int_vec(gallivm, type, log2size), "");
   hash_index = LLVMBuildXor(builder, hash_index, tmp, "");

   hash_mask = lp_build_const_int_vec(gallivm, type, LP_BUILD_FORMAT_CACHE_SIZE - 1);
   hash_index = LLVMBuildAnd(builder, hash_index, hash_mask, "");
   ij_index = LLVMBuildShl(builder, i, lp_build_const_int_vec(gallivm, type, 2), "");
   ij_index = LLVMBuildAdd(builder, ij_index, j, "");
   block_index = LLVMBuildShl(builder, hash_index,
                              lp_build_const_int_vec(gallivm, type, 4), "");
   block_index = LLVMBuildAdd(builder, ij_index, block_index, "");

   if (n > 1) {
      color = LLVMGetUndef(LLVMVectorType(i32t, n));
      for (count = 0; count < n; count++) {
         LLVMValueRef index, cond, colorx;
         LLVMValueRef block_indexx, hash_indexx, addrx, offsetx, ptr_addrx;
         struct lp_build_if_state if_ctx;

         index = lp_build_const_int32(gallivm, count);
         offsetx = LLVMBuildExtractElement(builder, offset, index, "");
         addrx = LLVMBuildZExt(builder, offsetx, i64t, "");
         addrx = LLVMBuildAdd(builder, addrx, addr, "");
         block_indexx = LLVMBuildExtractElement(builder, block_index, index, "");
         hash_indexx = LLVMBuildLShr(builder, block_indexx,
                                     lp_build_const_int32(gallivm, 4), "");
         offset_stored = lookup_tag_data(gallivm, cache, hash_indexx);
         cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addrx, "");

         lp_build_if(&if_ctx, gallivm, cond);
            ptr_addrx = LLVMBuildIntToPtr(builder, addrx,
                                          LLVMPointerType(i8t, 0), "");
            update_cached_block(gallivm, format_desc, ptr_addrx, hash_indexx, cache);
            update_cache_access(gallivm, cache, 1,

         colorx = lookup_cached_pixel(gallivm, cache, block_indexx);

         color = LLVMBuildInsertElement(builder, color, colorx,
                                        lp_build_const_int32(gallivm, count), "");
   else {
      LLVMValueRef cond;
      struct lp_build_if_state if_ctx;

      tmp = LLVMBuildZExt(builder, offset, i64t, "");
      addr = LLVMBuildAdd(builder, tmp, addr, "");
      offset_stored = lookup_tag_data(gallivm, cache, hash_index);
      cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addr, "");

      lp_build_if(&if_ctx, gallivm, cond);
         tmp = LLVMBuildIntToPtr(builder, addr, LLVMPointerType(i8t, 0), "");
         update_cached_block(gallivm, format_desc, tmp, hash_index, cache);
         update_cache_access(gallivm, cache, 1,

      color = lookup_cached_pixel(gallivm, cache, block_index);
   update_cache_access(gallivm, cache, n,
   return LLVMBuildBitCast(builder, color, LLVMVectorType(i8t, n * 4), "");
 * Generate the code to do inside/outside triangle testing for the
 * four pixels in a 2x2 quad.  This will set the four elements of the
 * quad mask vector to 0 or ~0.
 * \param i  which quad of the quad group to test, in [0,3]
static void
generate_tri_edge_mask(LLVMBuilderRef builder,
                       unsigned i,
                       LLVMValueRef *mask,      /* ivec4, out */
                       LLVMValueRef c0,         /* int32 */
                       LLVMValueRef c1,         /* int32 */
                       LLVMValueRef c2,         /* int32 */
                       LLVMValueRef step0_ptr,  /* ivec4 */
                       LLVMValueRef step1_ptr,  /* ivec4 */
                       LLVMValueRef step2_ptr)  /* ivec4 */
   struct lp_build_if_state ifctx;
   LLVMValueRef not_draw_all;
   struct lp_build_flow_context *flow;
   struct lp_type i32_type;
   LLVMTypeRef i32vec4_type, mask_type;
   LLVMValueRef c0_vec, c1_vec, c2_vec;
   LLVMValueRef in_out_mask;

   assert(i < 4);
   /* int32 vector type */
   memset(&i32_type, 0, sizeof i32_type);
   i32_type.floating = FALSE; /* values are integers */
   i32_type.sign = TRUE;      /* values are signed */
   i32_type.norm = FALSE;     /* values are not normalized */
   i32_type.width = 32;       /* 32-bit int values */
   i32_type.length = 4;       /* 4 elements per vector */

   i32vec4_type = lp_build_int32_vec4_type();

   mask_type = LLVMIntType(32 * 4);

    * Use a conditional here to do detailed pixel in/out testing.
    * We only have to do this if c0 != INT_MIN.
   flow = lp_build_flow_create(builder);

      /* not_draw_all = (c0 != INT_MIN) */
      not_draw_all = LLVMBuildICmp(builder,
                                   LLVMConstInt(LLVMInt32Type(), INT_MIN, 0),

      in_out_mask = lp_build_int_const_scalar(i32_type, ~0);

      lp_build_flow_scope_declare(flow, &in_out_mask);

      /* if (not_draw_all) {... */
      lp_build_if(&ifctx, flow, builder, not_draw_all);
         LLVMValueRef step0_vec, step1_vec, step2_vec;
         LLVMValueRef m0_vec, m1_vec, m2_vec;
         LLVMValueRef index, m;

         /* c0_vec = {c0, c0, c0, c0}
          * Note that we emit this code four times but LLVM optimizes away
          * three instances of it.
         c0_vec = lp_build_broadcast(builder, i32vec4_type, c0);
         c1_vec = lp_build_broadcast(builder, i32vec4_type, c1);
         c2_vec = lp_build_broadcast(builder, i32vec4_type, c2);
         lp_build_name(c0_vec, "edgeconst0vec");
         lp_build_name(c1_vec, "edgeconst1vec");
         lp_build_name(c2_vec, "edgeconst2vec");

         /* load step0vec, step1, step2 vec from memory */
         index = LLVMConstInt(LLVMInt32Type(), i, 0);
         step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), "");
         step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), "");
         step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), "");
         lp_build_name(step0_vec, "step0vec");
         lp_build_name(step1_vec, "step1vec");
         lp_build_name(step2_vec, "step2vec");

         /* m0_vec = step0_ptr[i] > c0_vec */
         m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec);
         m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec);
         m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec);

         /* in_out_mask = m0_vec & m1_vec & m2_vec */
         m = LLVMBuildAnd(builder, m0_vec, m1_vec, "");
         in_out_mask = LLVMBuildAnd(builder, m, m2_vec, "");
         lp_build_name(in_out_mask, "inoutmaskvec");


   /* This is the initial alive/dead pixel mask for a quad of four pixels.
    * It's an int[4] vector with each word set to 0 or ~0.
    * Words will get cleared when pixels faile the Z test, etc.
   *mask = in_out_mask;
 * Texture sampling in AoS format.  Used when sampling common 32-bit/texel
 * formats.  1D/2D/3D/cube texture supported.  All mipmap sampling modes
 * but only limited texture coord wrap modes.
lp_build_sample_aos(struct lp_build_sample_context *bld,
                    unsigned unit,
                    LLVMValueRef s,
                    LLVMValueRef t,
                    LLVMValueRef r,
                    const LLVMValueRef *ddx,
                    const LLVMValueRef *ddy,
                    LLVMValueRef lod_bias, /* optional */
                    LLVMValueRef explicit_lod, /* optional */
                    LLVMValueRef texel_out[4])
   struct lp_build_context *int_bld = &bld->int_bld;
   LLVMBuilderRef builder = bld->gallivm->builder;
   const unsigned mip_filter = bld->static_state->min_mip_filter;
   const unsigned min_filter = bld->static_state->min_img_filter;
   const unsigned mag_filter = bld->static_state->mag_img_filter;
   const unsigned dims = bld->dims;
   LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
   LLVMValueRef ilevel0, ilevel1 = NULL;
   LLVMValueRef packed, packed_lo, packed_hi;
   LLVMValueRef unswizzled[4];
   LLVMValueRef face_ddx[4], face_ddy[4];
   struct lp_build_context h16_bld;
   LLVMValueRef first_level;
   LLVMValueRef i32t_zero = lp_build_const_int32(bld->gallivm, 0);

   /* we only support the common/simple wrap modes at this time */
   if (dims >= 2)
   if (dims >= 3)

   /* make 16-bit fixed-pt builder context */
   lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16));

   /* cube face selection, compute pre-face coords, etc. */
   if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
      LLVMValueRef face, face_s, face_t;
      lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
      s = face_s; /* vec */
      t = face_t; /* vec */
      /* use 'r' to indicate cube face */
      r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */

      /* recompute ddx, ddy using the new (s,t) face texcoords */
      face_ddx[0] = lp_build_scalar_ddx(&bld->coord_bld, s);
      face_ddx[1] = lp_build_scalar_ddx(&bld->coord_bld, t);
      face_ddx[2] = NULL;
      face_ddx[3] = NULL;
      face_ddy[0] = lp_build_scalar_ddy(&bld->coord_bld, s);
      face_ddy[1] = lp_build_scalar_ddy(&bld->coord_bld, t);
      face_ddy[2] = NULL;
      face_ddy[3] = NULL;
      ddx = face_ddx;
      ddy = face_ddy;

    * Compute the level of detail (float).
   if (min_filter != mag_filter ||
       mip_filter != PIPE_TEX_MIPFILTER_NONE) {
      /* Need to compute lod either to choose mipmap levels or to
       * distinguish between minification/magnification with one mipmap level.
      lp_build_lod_selector(bld, unit, ddx, ddy,
                            lod_bias, explicit_lod,
                            &lod_ipart, &lod_fpart);
   } else {
      lod_ipart = i32t_zero;

    * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
   switch (mip_filter) {
      assert(0 && "bad mip_filter value in lp_build_sample_aos()");
      /* fall-through */
      /* always use mip level 0 */
      if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
         /* XXX this is a work-around for an apparent bug in LLVM 2.7.
          * We should be able to set ilevel0 = const(0) but that causes
          * bad x86 code to be emitted.
         lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
      else {
         first_level = bld->dynamic_state->first_level(bld->dynamic_state,
                                                       bld->gallivm, unit);
         ilevel0 = first_level;
      lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
      lp_build_linear_mip_levels(bld, unit,
                                 lod_ipart, &lod_fpart,
                                 &ilevel0, &ilevel1);

    * Get/interpolate texture colors.

   packed_lo = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_lo");
   packed_hi = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_hi");

   if (min_filter == mag_filter) {
      /* no need to distinquish between minification and magnification */
                             min_filter, mip_filter,
                             s, t, r,
                             ilevel0, ilevel1, lod_fpart,
                             packed_lo, packed_hi);
   else {
      /* Emit conditional to choose min image filter or mag image filter
       * depending on the lod being > 0 or <= 0, respectively.
      struct lp_build_if_state if_ctx;
      LLVMValueRef minify;

      /* minify = lod >= 0.0 */
      minify = LLVMBuildICmp(builder, LLVMIntSGE,
                             lod_ipart, int_bld->zero, "");

      lp_build_if(&if_ctx, bld->gallivm, minify);
         /* Use the minification filter */
                                min_filter, mip_filter,
                                s, t, r,
                                ilevel0, ilevel1, lod_fpart,
                                packed_lo, packed_hi);
         /* Use the magnification filter */
                                mag_filter, PIPE_TEX_MIPFILTER_NONE,
                                s, t, r,
                                ilevel0, NULL, NULL,
                                packed_lo, packed_hi);

    * combine the values stored in 'packed_lo' and 'packed_hi' variables
    * into 'packed'
   packed = lp_build_pack2(bld->gallivm,
                           h16_bld.type, lp_type_unorm(8),
                           LLVMBuildLoad(builder, packed_lo, ""),
                           LLVMBuildLoad(builder, packed_hi, ""));

    * Convert to SoA and swizzle.
                             packed, unswizzled);

   if (util_format_is_rgba8_variant(bld->format_desc)) {
                                  unswizzled, texel_out);
   else {
      texel_out[0] = unswizzled[0];
      texel_out[1] = unswizzled[1];
      texel_out[2] = unswizzled[2];
      texel_out[3] = unswizzled[3];
 * Sample the texture/mipmap using given image filter and mip filter.
 * data0_ptr and data1_ptr point to the two mipmap levels to sample
 * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
 * If we're using nearest miplevel sampling the '1' values will be null/unused.
static void
lp_build_sample_mipmap(struct lp_build_sample_context *bld,
                       unsigned img_filter,
                       unsigned mip_filter,
                       LLVMValueRef s,
                       LLVMValueRef t,
                       LLVMValueRef r,
                       LLVMValueRef ilevel0,
                       LLVMValueRef ilevel1,
                       LLVMValueRef lod_fpart,
                       LLVMValueRef colors_lo_var,
                       LLVMValueRef colors_hi_var)
   LLVMBuilderRef builder = bld->gallivm->builder;
   LLVMValueRef size0;
   LLVMValueRef size1;
   LLVMValueRef row_stride0_vec;
   LLVMValueRef row_stride1_vec;
   LLVMValueRef img_stride0_vec;
   LLVMValueRef img_stride1_vec;
   LLVMValueRef data_ptr0;
   LLVMValueRef data_ptr1;
   LLVMValueRef colors0_lo, colors0_hi;
   LLVMValueRef colors1_lo, colors1_hi;

   /* sample the first mipmap level */
   lp_build_mipmap_level_sizes(bld, ilevel0,
                               &row_stride0_vec, &img_stride0_vec);
   data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
   if (img_filter == PIPE_TEX_FILTER_NEAREST) {
                                    row_stride0_vec, img_stride0_vec,
                                    data_ptr0, s, t, r,
                                    &colors0_lo, &colors0_hi);
   else {
      assert(img_filter == PIPE_TEX_FILTER_LINEAR);
                                   row_stride0_vec, img_stride0_vec,
                                   data_ptr0, s, t, r,
                                   &colors0_lo, &colors0_hi);

   /* Store the first level's colors in the output variables */
   LLVMBuildStore(builder, colors0_lo, colors_lo_var);
   LLVMBuildStore(builder, colors0_hi, colors_hi_var);

   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
      LLVMValueRef h16_scale = lp_build_const_float(bld->gallivm, 256.0);
      LLVMTypeRef i32_type = LLVMIntTypeInContext(bld->gallivm->context, 32);
      struct lp_build_if_state if_ctx;
      LLVMValueRef need_lerp;

      lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16_scale, "");
      lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "lod_fpart.fixed16");

      /* need_lerp = lod_fpart > 0 */
      need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
                                lod_fpart, LLVMConstNull(i32_type),

      lp_build_if(&if_ctx, bld->gallivm, need_lerp);
         struct lp_build_context h16_bld;

         lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16));

         /* sample the second mipmap level */
         lp_build_mipmap_level_sizes(bld, ilevel1,
                                     &row_stride1_vec, &img_stride1_vec);
         data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
         if (img_filter == PIPE_TEX_FILTER_NEAREST) {
                                          row_stride1_vec, img_stride1_vec,
                                          data_ptr1, s, t, r,
                                          &colors1_lo, &colors1_hi);
         else {
                                         row_stride1_vec, img_stride1_vec,
                                         data_ptr1, s, t, r,
                                         &colors1_lo, &colors1_hi);

         /* interpolate samples from the two mipmap levels */

         lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, "");
         lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart);

#if HAVE_LLVM == 0x208
         /* This is a work-around for a bug in LLVM 2.8.
          * Evidently, something goes wrong in the construction of the
          * lod_fpart short[8] vector.  Adding this no-effect shuffle seems
          * to force the vector to be properly constructed.
          * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f).
            LLVMValueRef shuffles[8], shuffle;
            int i;
            assert(h16_bld.type.length <= Elements(shuffles));
            for (i = 0; i < h16_bld.type.length; i++)
               shuffles[i] = lp_build_const_int32(bld->gallivm, 2 * (i & 1));
            shuffle = LLVMConstVector(shuffles, h16_bld.type.length);
            lod_fpart = LLVMBuildShuffleVector(builder,
                                               lod_fpart, lod_fpart,
                                               shuffle, "");

         colors0_lo = lp_build_lerp(&h16_bld, lod_fpart,
                                    colors0_lo, colors1_lo);
         colors0_hi = lp_build_lerp(&h16_bld, lod_fpart,
                                    colors0_hi, colors1_hi);

         LLVMBuildStore(builder, colors0_lo, colors_lo_var);
         LLVMBuildStore(builder, colors0_hi, colors_hi_var);