Example #1
0
/**
 * Normalized 8bit multiplication.
 *
 * - alpha plus one
 *
 *     makes the following approximation to the division (Sree)
 *    
 *       a*b/255 ~= (a*(b + 1)) >> 256
 *    
 *     which is the fastest method that satisfies the following OpenGL criteria
 *    
 *       0*0 = 0 and 255*255 = 255
 *
 * - geometric series
 *
 *     takes the geometric series approximation to the division
 *
 *       t/255 = (t >> 8) + (t >> 16) + (t >> 24) ..
 *
 *     in this case just the first two terms to fit in 16bit arithmetic
 *
 *       t/255 ~= (t + (t >> 8)) >> 8
 *
 *     note that just by itself it doesn't satisfies the OpenGL criteria, as
 *     255*255 = 254, so the special case b = 255 must be accounted or roundoff
 *     must be used
 *
 * - geometric series plus rounding
 *
 *     when using a geometric series division instead of truncating the result
 *     use roundoff in the approximation (Jim Blinn)
 *
 *       t/255 ~= (t + (t >> 8) + 0x80) >> 8
 *
 *     achieving the exact results
 *
 * @sa Alvy Ray Smith, Image Compositing Fundamentals, Tech Memo 4, Aug 15, 1995, 
 *     ftp://ftp.alvyray.com/Acrobat/4_Comp.pdf
 * @sa Michael Herf, The "double blend trick", May 2000, 
 *     http://www.stereopsis.com/doubleblend.html
 */
static LLVMValueRef
lp_build_mul_u8n(LLVMBuilderRef builder,
                 struct lp_type i16_type,
                 LLVMValueRef a, LLVMValueRef b)
{
   LLVMValueRef c8;
   LLVMValueRef ab;

   c8 = lp_build_int_const_scalar(i16_type, 8);
   
#if 0
   
   /* a*b/255 ~= (a*(b + 1)) >> 256 */
   b = LLVMBuildAdd(builder, b, lp_build_int_const_scalar(i16_type, 1), "");
   ab = LLVMBuildMul(builder, a, b, "");

#else
   
   /* ab/255 ~= (ab + (ab >> 8) + 0x80) >> 8 */
   ab = LLVMBuildMul(builder, a, b, "");
   ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c8, ""), "");
   ab = LLVMBuildAdd(builder, ab, lp_build_int_const_scalar(i16_type, 0x80), "");

#endif
   
   ab = LLVMBuildLShr(builder, ab, c8, "");

   return ab;
}
Example #2
0
/**
 * Special case for converting clamped IEEE-754 floats to unsigned norms.
 *
 * The mathematical voodoo below may seem excessive but it is actually
 * paramount we do it this way for several reasons. First, there is no single
 * precision FP to unsigned integer conversion Intel SSE instruction. Second,
 * secondly, even if there was, since the FP's mantissa takes only a fraction
 * of register bits the typically scale and cast approach would require double
 * precision for accurate results, and therefore half the throughput
 *
 * Although the result values can be scaled to an arbitrary bit width specified
 * by dst_width, the actual result type will have the same width.
 */
LLVMValueRef
lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
                                        struct lp_type src_type,
                                        unsigned dst_width,
                                        LLVMValueRef src)
{
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(src_type);
   LLVMValueRef res;
   unsigned mantissa;
   unsigned n;
   unsigned long long ubound;
   unsigned long long mask;
   double scale;
   double bias;

   assert(src_type.floating);

   mantissa = lp_mantissa(src_type);

   /* We cannot carry more bits than the mantissa */
   n = MIN2(mantissa, dst_width);

   /* This magic coefficients will make the desired result to appear in the
    * lowest significant bits of the mantissa.
    */
   ubound = ((unsigned long long)1 << n);
   mask = ubound - 1;
   scale = (double)mask/ubound;
   bias = (double)((unsigned long long)1 << (mantissa - n));

   res = LLVMBuildMul(builder, src, lp_build_const_scalar(src_type, scale), "");
   res = LLVMBuildAdd(builder, res, lp_build_const_scalar(src_type, bias), "");
   res = LLVMBuildBitCast(builder, res, int_vec_type, "");

   if(dst_width > n) {
      int shift = dst_width - n;
      res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), "");

      /* TODO: Fill in the empty lower bits for additional precision? */
#if 0
      {
         LLVMValueRef msb;
         msb = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, dst_width - 1), "");
         msb = LLVMBuildShl(builder, msb, lp_build_int_const_scalar(src_type, shift), "");
         msb = LLVMBuildSub(builder, msb, lp_build_int_const_scalar(src_type, 1), "");
         res = LLVMBuildOr(builder, res, msb, "");
      }
#elif 0
      while(shift > 0) {
         res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, n), ""), "");
         shift -= n;
         n *= 2;
      }
#endif
   }
   else
      res = LLVMBuildAnd(builder, res, lp_build_int_const_scalar(src_type, mask), "");

   return res;
}
/**
 * Extract Y, U, V channels from packed YUYV.
 * @param packed  is a <n x i32> vector with the packed YUYV blocks
 * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
 */
static void
yuyv_to_yuv_soa(struct gallivm_state *gallivm,
                unsigned n,
                LLVMValueRef packed,
                LLVMValueRef i,
                LLVMValueRef *y,
                LLVMValueRef *u,
                LLVMValueRef *v)
{
   LLVMBuilderRef builder = gallivm->builder;
   struct lp_type type;
   LLVMValueRef mask;

   memset(&type, 0, sizeof type);
   type.width = 32;
   type.length = n;

   assert(lp_check_value(type, packed));
   assert(lp_check_value(type, i));

   /*
    * y = (yuyv >> 16*i) & 0xff
    * u = (yuyv >> 8   ) & 0xff
    * v = (yuyv >> 24  ) & 0xff
    */

#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
   /*
    * Avoid shift with per-element count.
    * No support on x86, gets translated to roughly 5 instructions
    * per element. Didn't measure performance but cuts shader size
    * by quite a bit (less difference if cpu has no sse4.1 support).
    */
   if (util_cpu_caps.has_sse2 && n == 4) {
      LLVMValueRef sel, tmp;
      struct lp_build_context bld32;

      lp_build_context_init(&bld32, gallivm, type);

      tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
      sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
       *y = lp_build_select(&bld32, sel, packed, tmp);
   } else
#endif
   {
      LLVMValueRef shift;
      shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
      *y = LLVMBuildLShr(builder, packed, shift, "");
   }

   *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
   *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");

   mask = lp_build_const_int_vec(gallivm, type, 0xff);

   *y = LLVMBuildAnd(builder, *y, mask, "y");
   *u = LLVMBuildAnd(builder, *u, mask, "u");
   *v = LLVMBuildAnd(builder, *v, mask, "v");
}
Example #4
0
/**
 * Compute the offset of a pixel.
 *
 * x, y, y_stride are vectors
 */
LLVMValueRef
lp_build_sample_offset(struct lp_build_context *bld,
                       const struct util_format_description *format_desc,
                       LLVMValueRef x,
                       LLVMValueRef y,
                       LLVMValueRef y_stride,
                       LLVMValueRef data_ptr)
{
   LLVMValueRef x_stride;
   LLVMValueRef offset;

   x_stride = lp_build_const_scalar(bld->type, format_desc->block.bits/8);

   if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
      LLVMValueRef x_lo, x_hi;
      LLVMValueRef y_lo, y_hi;
      LLVMValueRef x_stride_lo, x_stride_hi;
      LLVMValueRef y_stride_lo, y_stride_hi;
      LLVMValueRef x_offset_lo, x_offset_hi;
      LLVMValueRef y_offset_lo, y_offset_hi;
      LLVMValueRef offset_lo, offset_hi;

      x_lo = LLVMBuildAnd(bld->builder, x, bld->one, "");
      y_lo = LLVMBuildAnd(bld->builder, y, bld->one, "");

      x_hi = LLVMBuildLShr(bld->builder, x, bld->one, "");
      y_hi = LLVMBuildLShr(bld->builder, y, bld->one, "");

      x_stride_lo = x_stride;
      y_stride_lo = lp_build_const_scalar(bld->type, 2*format_desc->block.bits/8);

      x_stride_hi = lp_build_const_scalar(bld->type, 4*format_desc->block.bits/8);
      y_stride_hi = LLVMBuildShl(bld->builder, y_stride, bld->one, "");

      x_offset_lo = lp_build_mul(bld, x_lo, x_stride_lo);
      y_offset_lo = lp_build_mul(bld, y_lo, y_stride_lo);
      offset_lo = lp_build_add(bld, x_offset_lo, y_offset_lo);

      x_offset_hi = lp_build_mul(bld, x_hi, x_stride_hi);
      y_offset_hi = lp_build_mul(bld, y_hi, y_stride_hi);
      offset_hi = lp_build_add(bld, x_offset_hi, y_offset_hi);

      offset = lp_build_add(bld, offset_hi, offset_lo);
   }
   else {
      LLVMValueRef x_offset;
      LLVMValueRef y_offset;

      x_offset = lp_build_mul(bld, x, x_stride);
      y_offset = lp_build_mul(bld, y, y_stride);

      offset = lp_build_add(bld, x_offset, y_offset);
   }

   return offset;
}
Example #5
0
LLVMValueRef gen_name(struct node *ast)
{
	LLVMValueRef func, ptr, val;
	LLVMTypeRef type;

	ptr = lvalue(ast);
	type = LLVMTypeOf(ptr);

	if (LLVMGetTypeKind(type) == LLVMLabelTypeKind) {
		func = LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder));
		return LLVMBuildPtrToInt(builder,
				LLVMBlockAddress(func, (LLVMBasicBlockRef)ptr),
				TYPE_INT,
				"");
	}

	type = LLVMGetElementType(LLVMTypeOf(ptr));

	switch (LLVMGetTypeKind(type)) {
	case LLVMIntegerTypeKind:
		val = LLVMBuildLoad(builder, ptr, ast->val);

		if (LLVMIsAGlobalValue(ptr))
			val = LLVMBuildLShr(builder, val, CONST(WORDPOW), "");

		return val;

	default:
		generror("unexpected type '%s'", LLVMPrintTypeToString(type));
		return NULL;
	}
}
Example #6
0
LLVMValueRef build_t_from_tag(struct llvm_ctx *ctx, LLVMValueRef mr0)
{
	LLVMValueRef t = LLVMBuildAnd(ctx->builder,
		LLVMBuildLShr(ctx->builder, mr0, CONST_WORD(6), "tag.t.raw"),
		CONST_WORD(0x3f), "tag.t");
	return LLVMBuildTruncOrBitCast(ctx->builder, t, ctx->i32t, "tag.t.int");
}
Example #7
0
/**
 * Convert a vector of rgba8 values into 32bit wide SoA vectors.
 *
 * \param dst_type  The desired return type. For pure integer formats
 *                  this should be a 32bit wide int or uint vector type,
 *                  otherwise a float vector type.
 *
 * \param packed    The rgba8 values to pack.
 *
 * \param rgba      The 4 SoA return vectors.
 */
void
lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
                           struct lp_type dst_type,
                           LLVMValueRef packed,
                           LLVMValueRef *rgba)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff);
   unsigned chan;

   /* XXX technically shouldn't use that for uint dst_type */
   packed = LLVMBuildBitCast(builder, packed,
                             lp_build_int_vec_type(gallivm, dst_type), "");

   /* Decode the input vector components */
   for (chan = 0; chan < 4; ++chan) {
      unsigned start = chan*8;
      unsigned stop = start + 8;
      LLVMValueRef input;

      input = packed;

      if (start)
         input = LLVMBuildLShr(builder, input,
                               lp_build_const_int_vec(gallivm, dst_type, start), "");

      if (stop < 32)
         input = LLVMBuildAnd(builder, input, mask, "");

      if (dst_type.floating)
         input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input);

      rgba[chan] = input;
   }
}
Example #8
0
LLVMValueRef gen_right(struct node *ast)
{
	return LLVMBuildLShr(builder,
			codegen(ast->one),
			codegen(ast->two),
			"");
}
Example #9
0
static void emit_ushr(const struct lp_build_tgsi_action *action,
		      struct lp_build_tgsi_context *bld_base,
		      struct lp_build_emit_data *emit_data)
{
	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
	emit_data->output[emit_data->chan] = LLVMBuildLShr(builder,
			emit_data->args[0], emit_data->args[1], "");
}
Example #10
0
static LLVMValueRef lvalue_to_rvalue(LLVMValueRef lvalue)
{
	/*
	* TODO: Make sure all addresses are word-aligned
	*       (autos, vectors, strings, etc.)
	*/
	lvalue = LLVMBuildPtrToInt(builder, lvalue, TYPE_INT, "");

	return LLVMBuildLShr(builder, lvalue, CONST(WORDPOW), "");
}
Example #11
0
static LLVMValueRef gen_digestof_value(compile_t* c, LLVMValueRef value)
{
  LLVMTypeRef type = LLVMTypeOf(value);

  switch(LLVMGetTypeKind(type))
  {
    case LLVMFloatTypeKind:
      value = LLVMBuildBitCast(c->builder, value, c->i32, "");
      return LLVMBuildZExt(c->builder, value, c->i64, "");

    case LLVMDoubleTypeKind:
      return LLVMBuildBitCast(c->builder, value, c->i64, "");

    case LLVMIntegerTypeKind:
    {
      uint32_t width = LLVMGetIntTypeWidth(type);

      if(width < 64)
      {
        value = LLVMBuildZExt(c->builder, value, c->i64, "");
      } else if(width == 128) {
        LLVMValueRef shift = LLVMConstInt(c->i128, 64, false);
        LLVMValueRef high = LLVMBuildLShr(c->builder, value, shift, "");
        high = LLVMBuildTrunc(c->builder, high, c->i64, "");
        value = LLVMBuildTrunc(c->builder, value, c->i64, "");
        value = LLVMBuildXor(c->builder, value, high, "");
      }

      return value;
    }

    case LLVMStructTypeKind:
    {
      uint32_t count = LLVMCountStructElementTypes(type);
      LLVMValueRef result = LLVMConstInt(c->i64, 0, false);

      for(uint32_t i = 0; i < count; i++)
      {
        LLVMValueRef elem = LLVMBuildExtractValue(c->builder, value, i, "");
        elem = gen_digestof_value(c, elem);
        result = LLVMBuildXor(c->builder, result, elem, "");
      }

      return result;
    }

    case LLVMPointerTypeKind:
      return LLVMBuildPtrToInt(c->builder, value, c->i64, "");

    default: {}
  }

  assert(0);
  return NULL;
}
Example #12
0
/*
 * gen_shift
 *
 * Shifts are a little tricky, since LLVM has explicit left-shift and
 * right-shift instructions, which take non-negative shift values.  BLISS,
 * on the other hand, has a single shift operator and generates right-shifts
 * when the RHS is negative.  If the RHS is a constant, we can do the translation
 * here; otherwise, we have to build a conditional to check at runtime.
 */
static LLVMValueRef
gen_shift (gencodectx_t gctx, expr_node_t *lhs, expr_node_t *rhs, LLVMTypeRef neededtype)
{
    LLVMBuilderRef builder = gctx->curfn->builder;
    LLVMTypeRef inttype = gctx->fullwordtype;
    LLVMValueRef lval, rval, result, test;

    lval = (lhs == 0 ? 0 : llvmgen_expression(gctx, lhs, inttype));

    if (expr_type(rhs) == EXPTYPE_PRIM_LIT) {
        long count = expr_litval(rhs);
        if (count < 0) {
            rval = LLVMConstInt(inttype, -count, 0);
            result = LLVMBuildLShr(builder, lval, rval, llvmgen_temp(gctx));
        } else {
            rval = LLVMConstInt(inttype, count, 0);
            result = LLVMBuildShl(builder, lval, rval, llvmgen_temp(gctx));
        }
    } else {
        LLVMBasicBlockRef exitblock = llvmgen_exitblock_create(gctx, 0);
        LLVMBasicBlockRef lshiftblk, rshiftblk;
        llvm_btrack_t *bt = llvmgen_btrack_create(gctx, exitblock);

        lshiftblk = LLVMInsertBasicBlockInContext(gctx->llvmctx, exitblock, llvmgen_label(gctx));
        rshiftblk = LLVMInsertBasicBlockInContext(gctx->llvmctx, exitblock, llvmgen_label(gctx));

        rval = llvmgen_expression(gctx, rhs, inttype);
        test = LLVMBuildICmp(builder, LLVMIntSLT, rval, LLVMConstNull(inttype), llvmgen_temp(gctx));
        LLVMBuildCondBr(builder, test, rshiftblk, lshiftblk);
        LLVMPositionBuilderAtEnd(builder, lshiftblk);
        result = LLVMBuildShl(builder, lval, rval, llvmgen_temp(gctx));
        llvmgen_btrack_update(gctx, bt, result);
        LLVMPositionBuilderAtEnd(builder, rshiftblk);
        rval = LLVMBuildNeg(builder, rval, llvmgen_temp(gctx));
        result = LLVMBuildLShr(builder, lval, rval, llvmgen_temp(gctx));
        llvmgen_btrack_update(gctx, bt, result);
        result = llvmgen_btrack_finalize(gctx, bt, inttype);
    }

    return llvmgen_adjustval(gctx, result, neededtype, 0);

} /* gen_shift */
/**
 * Extract Y, U, V channels from packed UYVY.
 * @param packed  is a <n x i32> vector with the packed UYVY blocks
 * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
 */
static void
uyvy_to_yuv_soa(LLVMBuilderRef builder,
                unsigned n,
                LLVMValueRef packed,
                LLVMValueRef i,
                LLVMValueRef *y,
                LLVMValueRef *u,
                LLVMValueRef *v)
{
   struct lp_type type;
   LLVMValueRef shift, mask;

   memset(&type, 0, sizeof type);
   type.width = 32;
   type.length = n;

   assert(lp_check_value(type, packed));
   assert(lp_check_value(type, i));

   /*
    * y = (uyvy >> 16*i) & 0xff
    * u = (uyvy        ) & 0xff
    * v = (uyvy >> 16  ) & 0xff
    */

   shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
   shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), "");
   *y = LLVMBuildLShr(builder, packed, shift, "");
   *u = packed;
   *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), "");

   mask = lp_build_const_int_vec(type, 0xff);

   *y = LLVMBuildAnd(builder, *y, mask, "y");
   *u = LLVMBuildAnd(builder, *u, mask, "u");
   *v = LLVMBuildAnd(builder, *v, mask, "v");
}
Example #14
0
static LLVMValueRef gen_digestof_int64(compile_t* c, LLVMValueRef value)
{
  pony_assert(LLVMTypeOf(value) == c->i64);

  if(target_is_ilp32(c->opt->triple))
  {
    LLVMValueRef shift = LLVMConstInt(c->i64, 32, false);
    LLVMValueRef high = LLVMBuildLShr(c->builder, value, shift, "");
    high = LLVMBuildTrunc(c->builder, high, c->i32, "");
    value = LLVMBuildTrunc(c->builder, value, c->i32, "");
    value = LLVMBuildXor(c->builder, value, high, "");
  }

  return value;
}
/**
 * Inverse of lp_build_clamped_float_to_unsigned_norm above.
 * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1]
 * return {float, float, float, float} with values in range [0, 1].
 */
LLVMValueRef
lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
                                unsigned src_width,
                                struct lp_type dst_type,
                                LLVMValueRef src)
{
   LLVMTypeRef vec_type = lp_build_vec_type(dst_type);
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(dst_type);
   LLVMValueRef bias_;
   LLVMValueRef res;
   unsigned mantissa;
   unsigned n;
   unsigned long long ubound;
   unsigned long long mask;
   double scale;
   double bias;

   assert(dst_type.floating);

   mantissa = lp_mantissa(dst_type);

   n = MIN2(mantissa, src_width);

   ubound = ((unsigned long long)1 << n);
   mask = ubound - 1;
   scale = (double)ubound/mask;
   bias = (double)((unsigned long long)1 << (mantissa - n));

   res = src;

   if(src_width > mantissa) {
      int shift = src_width - mantissa;
      res = LLVMBuildLShr(builder, res, lp_build_const_int_vec(dst_type, shift), "");
   }

   bias_ = lp_build_const_vec(dst_type, bias);

   res = LLVMBuildOr(builder,
                     res,
                     LLVMBuildBitCast(builder, bias_, int_vec_type, ""), "");

   res = LLVMBuildBitCast(builder, res, vec_type, "");

   res = LLVMBuildFSub(builder, res, bias_, "");
   res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), "");

   return res;
}
Example #16
0
/**
 * Compute the partial offset of a pixel block along an arbitrary axis.
 *
 * @param coord   coordinate in pixels
 * @param stride  number of bytes between rows of successive pixel blocks
 * @param block_length  number of pixels in a pixels block along the coordinate
 *                      axis
 * @param out_offset    resulting relative offset of the pixel block in bytes
 * @param out_subcoord  resulting sub-block pixel coordinate
 */
void
lp_build_sample_partial_offset(struct lp_build_context *bld,
                               unsigned block_length,
                               LLVMValueRef coord,
                               LLVMValueRef stride,
                               LLVMValueRef *out_offset,
                               LLVMValueRef *out_subcoord)
{
   LLVMBuilderRef builder = bld->gallivm->builder;
   LLVMValueRef offset;
   LLVMValueRef subcoord;

   if (block_length == 1) {
      subcoord = bld->zero;
   }
   else {
      /*
       * Pixel blocks have power of two dimensions. LLVM should convert the
       * rem/div to bit arithmetic.
       * TODO: Verify this.
       * It does indeed BUT it does transform it to scalar (and back) when doing so
       * (using roughly extract, shift/and, mov, unpack) (llvm 2.7).
       * The generated code looks seriously unfunny and is quite expensive.
       */
#if 0
      LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length);
      subcoord = LLVMBuildURem(builder, coord, block_width, "");
      coord    = LLVMBuildUDiv(builder, coord, block_width, "");
#else
      unsigned logbase2 = util_logbase2(block_length);
      LLVMValueRef block_shift = lp_build_const_int_vec(bld->gallivm, bld->type, logbase2);
      LLVMValueRef block_mask = lp_build_const_int_vec(bld->gallivm, bld->type, block_length - 1);
      subcoord = LLVMBuildAnd(builder, coord, block_mask, "");
      coord = LLVMBuildLShr(builder, coord, block_shift, "");
#endif
   }

   offset = lp_build_mul(bld, coord, stride);

   assert(out_offset);
   assert(out_subcoord);

   *out_offset = offset;
   *out_subcoord = subcoord;
}
Example #17
0
/**
 * Shift right.
 */
LLVMValueRef
lp_build_shr(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b)
{
   LLVMBuilderRef builder = bld->gallivm->builder;
   const struct lp_type type = bld->type;
   LLVMValueRef res;

   assert(!type.floating);

   assert(lp_check_value(type, a));
   assert(lp_check_value(type, b));

   if (type.sign) {
      res = LLVMBuildAShr(builder, a, b, "");
   } else {
      res = LLVMBuildLShr(builder, a, b, "");
   }

   return res;
}
Example #18
0
/**
 * Codegen equivalent for u_minify().
 * Return max(1, base_size >> level);
 */
LLVMValueRef
lp_build_minify(struct lp_build_context *bld,
                LLVMValueRef base_size,
                LLVMValueRef level)
{
   LLVMBuilderRef builder = bld->gallivm->builder;
   assert(lp_check_value(bld->type, base_size));
   assert(lp_check_value(bld->type, level));

   if (level == bld->zero) {
      /* if we're using mipmap level zero, no minification is needed */
      return base_size;
   }
   else {
      LLVMValueRef size =
         LLVMBuildLShr(builder, base_size, level, "minify");
      assert(bld->type.sign);
      size = lp_build_max(bld, size, bld->one);
      return size;
   }
}
Example #19
0
static void emit_up2h(const struct lp_build_tgsi_action *action,
		      struct lp_build_tgsi_context *bld_base,
		      struct lp_build_emit_data *emit_data)
{
	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
	LLVMContextRef context = bld_base->base.gallivm->context;
	struct lp_build_context *uint_bld = &bld_base->uint_bld;
	LLVMTypeRef fp16, i16;
	LLVMValueRef const16, input, val;
	unsigned i;

	fp16 = LLVMHalfTypeInContext(context);
	i16 = LLVMInt16TypeInContext(context);
	const16 = lp_build_const_int32(uint_bld->gallivm, 16);
	input = emit_data->args[0];

	for (i = 0; i < 2; i++) {
		val = i == 1 ? LLVMBuildLShr(builder, input, const16, "") : input;
		val = LLVMBuildTrunc(builder, val, i16, "");
		val = LLVMBuildBitCast(builder, val, fp16, "");
		emit_data->output[i] =
			LLVMBuildFPExt(builder, val, bld_base->base.elem_type, "");
	}
}
Example #20
0
LLVMValueRef gen_shr(compile_t* c, ast_t* left, ast_t* right)
{
  ast_t* type = ast_type(left);
  bool sign = is_signed(c->opt, type);

  LLVMValueRef l_value = gen_expr(c, left);
  LLVMValueRef r_value = gen_expr(c, right);

  if((l_value == NULL) || (r_value == NULL))
    return NULL;

  if(LLVMIsConstant(l_value) && LLVMIsConstant(r_value))
  {
    if(sign)
      return LLVMConstAShr(l_value, r_value);

    return LLVMConstLShr(l_value, r_value);
  }

  if(sign)
    return LLVMBuildAShr(c->builder, l_value, r_value, "");

  return LLVMBuildLShr(c->builder, l_value, r_value, "");
}
/**
 * Unpack a single pixel into its RGBA components.
 *
 * @param desc  the pixel format for the packed pixel value
 * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM
 *
 * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector.
 */
static INLINE LLVMValueRef
lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm,
                               const struct util_format_description *desc,
                               LLVMValueRef packed)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef shifted, casted, scaled, masked;
   LLVMValueRef shifts[4];
   LLVMValueRef masks[4];
   LLVMValueRef scales[4];

   boolean normalized;
   boolean needs_uitofp;
   unsigned shift;
   unsigned i;

   /* TODO: Support more formats */
   assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
   assert(desc->block.width == 1);
   assert(desc->block.height == 1);
   assert(desc->block.bits <= 32);

   /* Do the intermediate integer computations with 32bit integers since it
    * matches floating point size */
   assert (LLVMTypeOf(packed) == LLVMInt32TypeInContext(gallivm->context));

   /* Broadcast the packed value to all four channels
    * before: packed = BGRA
    * after: packed = {BGRA, BGRA, BGRA, BGRA}
    */
   packed = LLVMBuildInsertElement(builder,
                                   LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)),
                                   packed,
                                   LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)),
                                   "");
   packed = LLVMBuildShuffleVector(builder,
                                   packed,
                                   LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)),
                                   LLVMConstNull(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)),
                                   "");

   /* Initialize vector constants */
   normalized = FALSE;
   needs_uitofp = FALSE;
   shift = 0;

   /* Loop over 4 color components */
   for (i = 0; i < 4; ++i) {
      unsigned bits = desc->channel[i].size;

      if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
         shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
         masks[i] = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context));
         scales[i] =  LLVMConstNull(LLVMFloatTypeInContext(gallivm->context));
      }
      else {
         unsigned long long mask = (1ULL << bits) - 1;

         assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);

         if (bits == 32) {
            needs_uitofp = TRUE;
         }

         shifts[i] = lp_build_const_int32(gallivm, shift);
         masks[i] = lp_build_const_int32(gallivm, mask);

         if (desc->channel[i].normalized) {
            scales[i] = lp_build_const_float(gallivm, 1.0 / mask);
            normalized = TRUE;
         }
         else
            scales[i] =  lp_build_const_float(gallivm, 1.0);
      }

      shift += bits;
   }

   /* Ex: convert packed = {BGRA, BGRA, BGRA, BGRA}
    * into masked = {B, G, R, A}
    */
   shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
   masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");


   if (!needs_uitofp) {
      /* UIToFP can't be expressed in SSE2 */
      casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), "");
   } else {
      casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), "");
   }

   /* At this point 'casted' may be a vector of floats such as
    * {255.0, 255.0, 255.0, 255.0}.  Next, if the pixel values are normalized
    * we'll scale this to {1.0, 1.0, 1.0, 1.0}.
    */

   if (normalized)
      scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), "");
   else
      scaled = casted;

   return scaled;
}
Example #22
0
/**
 * Generic type conversion.
 *
 * TODO: Take a precision argument, or even better, add a new precision member
 * to the lp_type union.
 */
void
lp_build_conv(struct gallivm_state *gallivm,
              struct lp_type src_type,
              struct lp_type dst_type,
              const LLVMValueRef *src, unsigned num_srcs,
              LLVMValueRef *dst, unsigned num_dsts)
{
    LLVMBuilderRef builder = gallivm->builder;
    struct lp_type tmp_type;
    LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
    unsigned num_tmps;
    unsigned i;

    /* We must not loose or gain channels. Only precision */
    assert(src_type.length * num_srcs == dst_type.length * num_dsts);

    assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
    assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);
    assert(num_srcs <= LP_MAX_VECTOR_LENGTH);
    assert(num_dsts <= LP_MAX_VECTOR_LENGTH);

    tmp_type = src_type;
    for(i = 0; i < num_srcs; ++i) {
        assert(lp_check_value(src_type, src[i]));
        tmp[i] = src[i];
    }
    num_tmps = num_srcs;


    /* Special case 4x4f --> 1x16ub
     */
    if (src_type.floating == 1 &&
            src_type.fixed    == 0 &&
            src_type.sign     == 1 &&
            src_type.norm     == 0 &&
            src_type.width    == 32 &&
            src_type.length   == 4 &&

            dst_type.floating == 0 &&
            dst_type.fixed    == 0 &&
            dst_type.sign     == 0 &&
            dst_type.norm     == 1 &&
            dst_type.width    == 8 &&
            dst_type.length   == 16 &&

            4 * num_dsts      == num_srcs &&

            util_cpu_caps.has_sse2)
    {
        struct lp_build_context bld;
        struct lp_type int16_type = dst_type;
        struct lp_type int32_type = dst_type;
        LLVMValueRef const_255f;
        unsigned i, j;

        lp_build_context_init(&bld, gallivm, src_type);

        int16_type.width *= 2;
        int16_type.length /= 2;
        int16_type.sign = 1;

        int32_type.width *= 4;
        int32_type.length /= 4;
        int32_type.sign = 1;

        const_255f = lp_build_const_vec(gallivm, src_type, 255.0f);

        for (i = 0; i < num_dsts; ++i, src += 4) {
            LLVMValueRef lo, hi;

            for (j = 0; j < 4; ++j) {
                tmp[j] = LLVMBuildFMul(builder, src[j], const_255f, "");
                tmp[j] = lp_build_iround(&bld, tmp[j]);
            }

            /* relying on clamping behavior of sse2 intrinsics here */
            lo = lp_build_pack2(gallivm, int32_type, int16_type, tmp[0], tmp[1]);
            hi = lp_build_pack2(gallivm, int32_type, int16_type, tmp[2], tmp[3]);
            dst[i] = lp_build_pack2(gallivm, int16_type, dst_type, lo, hi);
        }

        return;
    }

    /* Special case 2x8f --> 1x16ub
     */
    else if (src_type.floating == 1 &&
             src_type.fixed    == 0 &&
             src_type.sign     == 1 &&
             src_type.norm     == 0 &&
             src_type.width    == 32 &&
             src_type.length   == 8 &&

             dst_type.floating == 0 &&
             dst_type.fixed    == 0 &&
             dst_type.sign     == 0 &&
             dst_type.norm     == 1 &&
             dst_type.width    == 8 &&
             dst_type.length   == 16 &&

             2 * num_dsts      == num_srcs &&

             util_cpu_caps.has_avx) {

        struct lp_build_context bld;
        struct lp_type int16_type = dst_type;
        struct lp_type int32_type = dst_type;
        LLVMValueRef const_255f;
        unsigned i;

        lp_build_context_init(&bld, gallivm, src_type);

        int16_type.width *= 2;
        int16_type.length /= 2;
        int16_type.sign = 1;

        int32_type.width *= 4;
        int32_type.length /= 4;
        int32_type.sign = 1;

        const_255f = lp_build_const_vec(gallivm, src_type, 255.0f);

        for (i = 0; i < num_dsts; ++i, src += 2) {
            LLVMValueRef lo, hi, a, b;

            a = LLVMBuildFMul(builder, src[0], const_255f, "");
            b = LLVMBuildFMul(builder, src[1], const_255f, "");

            a = lp_build_iround(&bld, a);
            b = lp_build_iround(&bld, b);

            tmp[0] = lp_build_extract_range(gallivm, a, 0, 4);
            tmp[1] = lp_build_extract_range(gallivm, a, 4, 4);
            tmp[2] = lp_build_extract_range(gallivm, b, 0, 4);
            tmp[3] = lp_build_extract_range(gallivm, b, 4, 4);

            /* relying on clamping behavior of sse2 intrinsics here */
            lo = lp_build_pack2(gallivm, int32_type, int16_type, tmp[0], tmp[1]);
            hi = lp_build_pack2(gallivm, int32_type, int16_type, tmp[2], tmp[3]);
            dst[i] = lp_build_pack2(gallivm, int16_type, dst_type, lo, hi);
        }
        return;
    }

    /* Pre convert half-floats to floats
     */
    else if (src_type.floating && src_type.width == 16)
    {
        for(i = 0; i < num_tmps; ++i)
            tmp[i] = lp_build_half_to_float(gallivm, src_type, tmp[i]);

        tmp_type.width = 32;
    }

    /*
     * Clamp if necessary
     */

    if(memcmp(&src_type, &dst_type, sizeof src_type) != 0) {
        struct lp_build_context bld;
        double src_min = lp_const_min(src_type);
        double dst_min = lp_const_min(dst_type);
        double src_max = lp_const_max(src_type);
        double dst_max = lp_const_max(dst_type);
        LLVMValueRef thres;

        lp_build_context_init(&bld, gallivm, tmp_type);

        if(src_min < dst_min) {
            if(dst_min == 0.0)
                thres = bld.zero;
            else
                thres = lp_build_const_vec(gallivm, src_type, dst_min);
            for(i = 0; i < num_tmps; ++i)
                tmp[i] = lp_build_max(&bld, tmp[i], thres);
        }

        if(src_max > dst_max) {
            if(dst_max == 1.0)
                thres = bld.one;
            else
                thres = lp_build_const_vec(gallivm, src_type, dst_max);
            for(i = 0; i < num_tmps; ++i)
                tmp[i] = lp_build_min(&bld, tmp[i], thres);
        }
    }

    /*
     * Scale to the narrowest range
     */

    if(dst_type.floating) {
        /* Nothing to do */
    }
    else if(tmp_type.floating) {
        if(!dst_type.fixed && !dst_type.sign && dst_type.norm) {
            for(i = 0; i < num_tmps; ++i) {
                tmp[i] = lp_build_clamped_float_to_unsigned_norm(gallivm,
                         tmp_type,
                         dst_type.width,
                         tmp[i]);
            }
            tmp_type.floating = FALSE;
        }
        else {
            double dst_scale = lp_const_scale(dst_type);
            LLVMTypeRef tmp_vec_type;

            if (dst_scale != 1.0) {
                LLVMValueRef scale = lp_build_const_vec(gallivm, tmp_type, dst_scale);
                for(i = 0; i < num_tmps; ++i)
                    tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, "");
            }

            /* Use an equally sized integer for intermediate computations */
            tmp_type.floating = FALSE;
            tmp_vec_type = lp_build_vec_type(gallivm, tmp_type);
            for(i = 0; i < num_tmps; ++i) {
#if 0
                if(dst_type.sign)
                    tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, "");
                else
                    tmp[i] = LLVMBuildFPToUI(builder, tmp[i], tmp_vec_type, "");
#else
                /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */
                tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, "");
#endif
            }
        }
    }
    else {
        unsigned src_shift = lp_const_shift(src_type);
        unsigned dst_shift = lp_const_shift(dst_type);
        unsigned src_offset = lp_const_offset(src_type);
        unsigned dst_offset = lp_const_offset(dst_type);

        /* Compensate for different offsets */
        if (dst_offset > src_offset && src_type.width > dst_type.width) {
            for (i = 0; i < num_tmps; ++i) {
                LLVMValueRef shifted;
                LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, src_shift - 1);
                if(src_type.sign)
                    shifted = LLVMBuildAShr(builder, tmp[i], shift, "");
                else
                    shifted = LLVMBuildLShr(builder, tmp[i], shift, "");

                tmp[i] = LLVMBuildSub(builder, tmp[i], shifted, "");
            }
        }

        if(src_shift > dst_shift) {
            LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type,
                                 src_shift - dst_shift);
            for(i = 0; i < num_tmps; ++i)
                if(src_type.sign)
                    tmp[i] = LLVMBuildAShr(builder, tmp[i], shift, "");
                else
                    tmp[i] = LLVMBuildLShr(builder, tmp[i], shift, "");
        }
    }

    /*
     * Truncate or expand bit width
     *
     * No data conversion should happen here, although the sign bits are
     * crucial to avoid bad clamping.
     */

    {
        struct lp_type new_type;

        new_type = tmp_type;
        new_type.sign   = dst_type.sign;
        new_type.width  = dst_type.width;
        new_type.length = dst_type.length;

        lp_build_resize(gallivm, tmp_type, new_type, tmp, num_srcs, tmp, num_dsts);

        tmp_type = new_type;
        num_tmps = num_dsts;
    }

    /*
     * Scale to the widest range
     */

    if(src_type.floating) {
        /* Nothing to do */
    }
    else if(!src_type.floating && dst_type.floating) {
        if(!src_type.fixed && !src_type.sign && src_type.norm) {
            for(i = 0; i < num_tmps; ++i) {
                tmp[i] = lp_build_unsigned_norm_to_float(gallivm,
                         src_type.width,
                         dst_type,
                         tmp[i]);
            }
            tmp_type.floating = TRUE;
        }
        else {
            double src_scale = lp_const_scale(src_type);
            LLVMTypeRef tmp_vec_type;

            /* Use an equally sized integer for intermediate computations */
            tmp_type.floating = TRUE;
            tmp_type.sign = TRUE;
            tmp_vec_type = lp_build_vec_type(gallivm, tmp_type);
            for(i = 0; i < num_tmps; ++i) {
#if 0
                if(dst_type.sign)
                    tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, "");
                else
                    tmp[i] = LLVMBuildUIToFP(builder, tmp[i], tmp_vec_type, "");
#else
                /* FIXME: there is no SSE counterpart for LLVMBuildUIToFP */
                tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, "");
#endif
            }

            if (src_scale != 1.0) {
                LLVMValueRef scale = lp_build_const_vec(gallivm, tmp_type, 1.0/src_scale);
                for(i = 0; i < num_tmps; ++i)
                    tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, "");
            }
        }
    }
    else {
        unsigned src_shift = lp_const_shift(src_type);
        unsigned dst_shift = lp_const_shift(dst_type);
        unsigned src_offset = lp_const_offset(src_type);
        unsigned dst_offset = lp_const_offset(dst_type);

        if (src_shift < dst_shift) {
            LLVMValueRef pre_shift[LP_MAX_VECTOR_LENGTH];
            LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, dst_shift - src_shift);

            for (i = 0; i < num_tmps; ++i) {
                pre_shift[i] = tmp[i];
                tmp[i] = LLVMBuildShl(builder, tmp[i], shift, "");
            }

            /* Compensate for different offsets */
            if (dst_offset > src_offset) {
                for (i = 0; i < num_tmps; ++i) {
                    tmp[i] = LLVMBuildSub(builder, tmp[i], pre_shift[i], "");
                }
            }
        }
    }

    for(i = 0; i < num_dsts; ++i) {
        dst[i] = tmp[i];
        assert(lp_check_value(dst_type, dst[i]));
    }
}
Example #23
0
/**
 * Unpack several pixels in SoA.
 *
 * It takes a vector of packed pixels:
 *
 *   packed = {P0, P1, P2, P3, ..., Pn}
 *
 * And will produce four vectors:
 *
 *   red    = {R0, R1, R2, R3, ..., Rn}
 *   green  = {G0, G1, G2, G3, ..., Gn}
 *   blue   = {B0, B1, B2, B3, ..., Bn}
 *   alpha  = {A0, A1, A2, A3, ..., An}
 *
 * It requires that a packed pixel fits into an element of the output
 * channels. The common case is when converting pixel with a depth of 32 bit or
 * less into floats.
 *
 * \param format_desc  the format of the 'packed' incoming pixel vector
 * \param type  the desired type for rgba_out (type.length = n, above)
 * \param packed  the incoming vector of packed pixels
 * \param rgba_out  returns the SoA R,G,B,A vectors
 */
void
lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
                         const struct util_format_description *format_desc,
                         struct lp_type type,
                         LLVMValueRef packed,
                         LLVMValueRef rgba_out[4])
{
   LLVMBuilderRef builder = gallivm->builder;
   struct lp_build_context bld;
   LLVMValueRef inputs[4];
   unsigned chan;

   assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
   assert(format_desc->block.width == 1);
   assert(format_desc->block.height == 1);
   assert(format_desc->block.bits <= type.width);
   /* FIXME: Support more output types */
   assert(type.width == 32);

   lp_build_context_init(&bld, gallivm, type);

   /* Decode the input vector components */
   for (chan = 0; chan < format_desc->nr_channels; ++chan) {
      const unsigned width = format_desc->channel[chan].size;
      const unsigned start = format_desc->channel[chan].shift;
      const unsigned stop = start + width;
      LLVMValueRef input;

      input = packed;

      switch(format_desc->channel[chan].type) {
      case UTIL_FORMAT_TYPE_VOID:
         input = lp_build_undef(gallivm, type);
         break;

      case UTIL_FORMAT_TYPE_UNSIGNED:
         /*
          * Align the LSB
          */

         if (start) {
            input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), "");
         }

         /*
          * Zero the MSBs
          */

         if (stop < format_desc->block.bits) {
            unsigned mask = ((unsigned long long)1 << width) - 1;
            input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), "");
         }

         /*
          * Type conversion
          */

         if (type.floating) {
            if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
               assert(width == 8);
               if (format_desc->swizzle[3] == chan) {
                  input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
               }
               else {
                  struct lp_type conv_type = lp_uint_type(type);
                  input = lp_build_srgb_to_linear(gallivm, conv_type, input);
               }
            }
            else {
               if(format_desc->channel[chan].normalized)
                  input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
               else
                  input = LLVMBuildSIToFP(builder, input,
                                          lp_build_vec_type(gallivm, type), "");
            }
         }
         else if (format_desc->channel[chan].pure_integer) {
            /* Nothing to do */
         } else {
             /* FIXME */
             assert(0);
         }

         break;

      case UTIL_FORMAT_TYPE_SIGNED:
         /*
          * Align the sign bit first.
          */

         if (stop < type.width) {
            unsigned bits = type.width - stop;
            LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
            input = LLVMBuildShl(builder, input, bits_val, "");
         }

         /*
          * Align the LSB (with an arithmetic shift to preserve the sign)
          */

         if (format_desc->channel[chan].size < type.width) {
            unsigned bits = type.width - format_desc->channel[chan].size;
            LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
            input = LLVMBuildAShr(builder, input, bits_val, "");
         }

         /*
          * Type conversion
          */

         if (type.floating) {
            input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
            if (format_desc->channel[chan].normalized) {
               double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
               LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
               input = LLVMBuildFMul(builder, input, scale_val, "");
               /* the formula above will produce value below -1.0 for most negative
                * value but everything seems happy with that hence disable for now */
               if (0)
                  input = lp_build_max(&bld, input,
                                       lp_build_const_vec(gallivm, type, -1.0f));
            }
         }
         else if (format_desc->channel[chan].pure_integer) {
            /* Nothing to do */
         } else {
             /* FIXME */
             assert(0);
         }

         break;

      case UTIL_FORMAT_TYPE_FLOAT:
         if (type.floating) {
            assert(start == 0);
            assert(stop == 32);
            assert(type.width == 32);
            input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), "");
         }
         else {
            /* FIXME */
            assert(0);
            input = lp_build_undef(gallivm, type);
         }
         break;

      case UTIL_FORMAT_TYPE_FIXED:
         if (type.floating) {
            double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
            LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
            input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
            input = LLVMBuildFMul(builder, input, scale_val, "");
         }
         else {
            /* FIXME */
            assert(0);
            input = lp_build_undef(gallivm, type);
         }
         break;

      default:
         assert(0);
         input = lp_build_undef(gallivm, type);
         break;
      }

      inputs[chan] = input;
   }

   lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
}
Example #24
0
/**
 * Special case for converting clamped IEEE-754 floats to unsigned norms.
 *
 * The mathematical voodoo below may seem excessive but it is actually
 * paramount we do it this way for several reasons. First, there is no single
 * precision FP to unsigned integer conversion Intel SSE instruction. Second,
 * secondly, even if there was, since the FP's mantissa takes only a fraction
 * of register bits the typically scale and cast approach would require double
 * precision for accurate results, and therefore half the throughput
 *
 * Although the result values can be scaled to an arbitrary bit width specified
 * by dst_width, the actual result type will have the same width.
 *
 * Ex: src = { float, float, float, float }
 * return { i32, i32, i32, i32 } where each value is in [0, 2^dst_width-1].
 */
LLVMValueRef
lp_build_clamped_float_to_unsigned_norm(struct gallivm_state *gallivm,
                                        struct lp_type src_type,
                                        unsigned dst_width,
                                        LLVMValueRef src)
{
    LLVMBuilderRef builder = gallivm->builder;
    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, src_type);
    LLVMValueRef res;
    unsigned mantissa;

    assert(src_type.floating);
    assert(dst_width <= src_type.width);
    src_type.sign = FALSE;

    mantissa = lp_mantissa(src_type);

    if (dst_width <= mantissa) {
        /*
         * Apply magic coefficients that will make the desired result to appear
         * in the lowest significant bits of the mantissa, with correct rounding.
         *
         * This only works if the destination width fits in the mantissa.
         */

        unsigned long long ubound;
        unsigned long long mask;
        double scale;
        double bias;

        ubound = (1ULL << dst_width);
        mask = ubound - 1;
        scale = (double)mask/ubound;
        bias = (double)(1ULL << (mantissa - dst_width));

        res = LLVMBuildFMul(builder, src, lp_build_const_vec(gallivm, src_type, scale), "");
        res = LLVMBuildFAdd(builder, res, lp_build_const_vec(gallivm, src_type, bias), "");
        res = LLVMBuildBitCast(builder, res, int_vec_type, "");
        res = LLVMBuildAnd(builder, res,
                           lp_build_const_int_vec(gallivm, src_type, mask), "");
    }
    else if (dst_width == (mantissa + 1)) {
        /*
         * The destination width matches exactly what can be represented in
         * floating point (i.e., mantissa + 1 bits). So do a straight
         * multiplication followed by casting. No further rounding is necessary.
         */

        double scale;

        scale = (double)((1ULL << dst_width) - 1);

        res = LLVMBuildFMul(builder, src,
                            lp_build_const_vec(gallivm, src_type, scale), "");
        res = LLVMBuildFPToSI(builder, res, int_vec_type, "");
    }
    else {
        /*
         * The destination exceeds what can be represented in the floating point.
         * So multiply by the largest power two we get away with, and when
         * subtract the most significant bit to rescale to normalized values.
         *
         * The largest power of two factor we can get away is
         * (1 << (src_type.width - 1)), because we need to use signed . In theory it
         * should be (1 << (src_type.width - 2)), but IEEE 754 rules states
         * INT_MIN should be returned in FPToSI, which is the correct result for
         * values near 1.0!
         *
         * This means we get (src_type.width - 1) correct bits for values near 0.0,
         * and (mantissa + 1) correct bits for values near 1.0. Equally or more
         * important, we also get exact results for 0.0 and 1.0.
         */

        unsigned n = MIN2(src_type.width - 1, dst_width);

        double scale = (double)(1ULL << n);
        unsigned lshift = dst_width - n;
        unsigned rshift = n;
        LLVMValueRef lshifted;
        LLVMValueRef rshifted;

        res = LLVMBuildFMul(builder, src,
                            lp_build_const_vec(gallivm, src_type, scale), "");
        res = LLVMBuildFPToSI(builder, res, int_vec_type, "");

        /*
         * Align the most significant bit to its final place.
         *
         * This will cause 1.0 to overflow to 0, but the later adjustment will
         * get it right.
         */
        if (lshift) {
            lshifted = LLVMBuildShl(builder, res,
                                    lp_build_const_int_vec(gallivm, src_type,
                                            lshift), "");
        } else {
            lshifted = res;
        }

        /*
         * Align the most significant bit to the right.
         */
        rshifted =  LLVMBuildLShr(builder, res,
                                  lp_build_const_int_vec(gallivm, src_type, rshift),
                                  "");

        /*
         * Subtract the MSB to the LSB, therefore re-scaling from
         * (1 << dst_width) to ((1 << dst_width) - 1).
         */

        res = LLVMBuildSub(builder, lshifted, rshifted, "");
    }

    return res;
}
Example #25
0
/**
 * Inverse of lp_build_clamped_float_to_unsigned_norm above.
 * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1]
 * return {float, float, float, float} with values in range [0, 1].
 */
LLVMValueRef
lp_build_unsigned_norm_to_float(struct gallivm_state *gallivm,
                                unsigned src_width,
                                struct lp_type dst_type,
                                LLVMValueRef src)
{
    LLVMBuilderRef builder = gallivm->builder;
    LLVMTypeRef vec_type = lp_build_vec_type(gallivm, dst_type);
    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, dst_type);
    LLVMValueRef bias_;
    LLVMValueRef res;
    unsigned mantissa;
    unsigned n;
    unsigned long long ubound;
    unsigned long long mask;
    double scale;
    double bias;

    assert(dst_type.floating);

    mantissa = lp_mantissa(dst_type);

    if (src_width <= (mantissa + 1)) {
        /*
         * The source width matches fits what can be represented in floating
         * point (i.e., mantissa + 1 bits). So do a straight multiplication
         * followed by casting. No further rounding is necessary.
         */

        scale = 1.0/(double)((1ULL << src_width) - 1);
        res = LLVMBuildSIToFP(builder, src, vec_type, "");
        res = LLVMBuildFMul(builder, res,
                            lp_build_const_vec(gallivm, dst_type, scale), "");
        return res;
    }
    else {
        /*
         * The source width exceeds what can be represented in floating
         * point. So truncate the incoming values.
         */

        n = MIN2(mantissa, src_width);

        ubound = ((unsigned long long)1 << n);
        mask = ubound - 1;
        scale = (double)ubound/mask;
        bias = (double)((unsigned long long)1 << (mantissa - n));

        res = src;

        if (src_width > mantissa) {
            int shift = src_width - mantissa;
            res = LLVMBuildLShr(builder, res,
                                lp_build_const_int_vec(gallivm, dst_type, shift), "");
        }

        bias_ = lp_build_const_vec(gallivm, dst_type, bias);

        res = LLVMBuildOr(builder,
                          res,
                          LLVMBuildBitCast(builder, bias_, int_vec_type, ""), "");

        res = LLVMBuildBitCast(builder, res, vec_type, "");

        res = LLVMBuildFSub(builder, res, bias_, "");
        res = LLVMBuildFMul(builder, res, lp_build_const_vec(gallivm, dst_type, scale), "");
    }

    return res;
}
Example #26
0
static LLVMValueRef gen_digestof_value(compile_t* c, ast_t* type,
  LLVMValueRef value)
{
  LLVMTypeRef impl_type = LLVMTypeOf(value);

  switch(LLVMGetTypeKind(impl_type))
  {
    case LLVMFloatTypeKind:
      value = LLVMBuildBitCast(c->builder, value, c->i32, "");
      return LLVMBuildZExt(c->builder, value, c->intptr, "");

    case LLVMDoubleTypeKind:
      value = LLVMBuildBitCast(c->builder, value, c->i64, "");
      return gen_digestof_int64(c, value);

    case LLVMIntegerTypeKind:
    {
      uint32_t width = LLVMGetIntTypeWidth(impl_type);

      if(width < 64)
      {
        return LLVMBuildZExt(c->builder, value, c->intptr, "");
      } else if(width == 64) {
        return gen_digestof_int64(c, value);
      } else if(width == 128) {
        LLVMValueRef shift = LLVMConstInt(c->i128, 64, false);
        LLVMValueRef high = LLVMBuildLShr(c->builder, value, shift, "");
        high = LLVMBuildTrunc(c->builder, high, c->i64, "");
        value = LLVMBuildTrunc(c->builder, value, c->i64, "");
        high = gen_digestof_int64(c, high);
        value = gen_digestof_int64(c, value);
        return LLVMBuildXor(c->builder, value, high, "");
      }
      break;
    }

    case LLVMStructTypeKind:
    {
      uint32_t count = LLVMCountStructElementTypes(impl_type);
      LLVMValueRef result = LLVMConstInt(c->intptr, 0, false);
      ast_t* child = ast_child(type);

      for(uint32_t i = 0; i < count; i++)
      {
        LLVMValueRef elem = LLVMBuildExtractValue(c->builder, value, i, "");
        elem = gen_digestof_value(c, child, elem);
        result = LLVMBuildXor(c->builder, result, elem, "");
        child = ast_sibling(child);
      }

      pony_assert(child == NULL);

      return result;
    }

    case LLVMPointerTypeKind:
      if(!is_known(type))
      {
        reach_type_t* t = reach_type(c->reach, type);
        int sub_kind = subtype_kind(t);

        if((sub_kind & SUBTYPE_KIND_BOXED) != 0)
          return gen_digestof_box(c, t, value, sub_kind);
      }

      return LLVMBuildPtrToInt(c->builder, value, c->intptr, "");

    default: {}
  }

  pony_assert(0);
  return NULL;
}
Example #27
0
/*
 * gen_fetch
 *
 * Generates a load operation for a fetch expression.
 */
static LLVMValueRef
gen_fetch (gencodectx_t gctx, expr_node_t *rhs, LLVMTypeRef neededtype)
{
    LLVMBuilderRef builder = gctx->curfn->builder;
    llvm_accinfo_t accinfo;
    LLVMValueRef addr, val;
    LLVMTypeRef type;
    int shifts_required = 0;
    int signext;

    // For field references with non-zero bit position, or with
    // non-CTCE size, we'll have to do bit shifting to extract
    // the field.
    addr = llvmgen_addr_expression(gctx, rhs, &accinfo);
    if (accinfo.posval != 0 || accinfo.sizeval != 0) {
        type = gctx->fullwordtype;
        if ((accinfo.flags & LLVMGEN_M_ACC_CONSTSIZ)) {
            accinfo.sizeval = LLVMConstInt(gctx->fullwordtype, accinfo.size, 0);
        }
        shifts_required = 1;
    } else if ((accinfo.flags & LLVMGEN_M_ACC_CONSTSIZ)) {
        if (accinfo.size == 0) {
            // XXX signal invalid size
            type = gctx->int1type;
        } else {
            type = LLVMIntTypeInContext(gctx->llvmctx, accinfo.size);
        }
    } else {
        type = gctx->fullwordtype;
    }
    signext = ((accinfo.flags & LLVMGEN_M_SEG_SIGNEXT) != 0);

    // If we're fetching from a register, there's no load intruction
    // required - EXCEPT if this was a scalar BIND, where the BIND

    if ((accinfo.segclass == LLVM_REG &&
        (accinfo.flags & LLVMGEN_M_SEG_DEREFED) == 0) &&
        (accinfo.flags & LLVMGEN_M_SEG_BINDPTR) == 0) {
        val = llvmgen_adjustval(gctx, addr, type, signext);
    } else {
        addr = llvmgen_adjustval(gctx, addr, LLVMPointerType(type, 0), 0);
        val = LLVMBuildLoad(builder, addr, llvmgen_temp(gctx));
        if ((accinfo.flags & LLVMGEN_M_SEG_VOLATILE) != 0) LLVMSetVolatile(val, 1);
    }
    if (shifts_required) {
        val = llvmgen_adjustval(gctx, val, gctx->fullwordtype, signext);
        if (signext) {
            val = LLVMBuildAShr(builder, val, accinfo.posval, llvmgen_temp(gctx));
        } else {
            val = LLVMBuildLShr(builder, val, accinfo.posval, llvmgen_temp(gctx));
        }

        if ((accinfo.flags & LLVMGEN_M_ACC_CONSTSIZ) != 0) {
            LLVMTypeRef trunctype = LLVMIntTypeInContext(gctx->llvmctx, accinfo.size);
            val = llvmgen_adjustval(gctx, val, trunctype, signext);
        } else {
            LLVMValueRef neg1 = LLVMConstAllOnes(gctx->fullwordtype);
            LLVMValueRef mask;

            mask = LLVMBuildShl(builder, neg1, accinfo.sizeval, llvmgen_temp(gctx));
            mask = LLVMBuildNeg(builder, mask, llvmgen_temp(gctx));
            val = LLVMBuildAnd(builder, val, mask, llvmgen_temp(gctx));
            if (signext) {
                val = LLVMBuildSExt(builder, val, gctx->fullwordtype, llvmgen_temp(gctx));
            }
        }
    }

    return llvmgen_adjustval(gctx, val, neededtype, signext);

} /* gen_fetch */
static void llvm_emit_tex(
	const struct lp_build_tgsi_action * action,
	struct lp_build_tgsi_context * bld_base,
	struct lp_build_emit_data * emit_data)
{
	struct gallivm_state * gallivm = bld_base->base.gallivm;
	LLVMValueRef args[7];
	unsigned c, sampler_src;
	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);

	if (emit_data->inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
		switch (emit_data->inst->Instruction.Opcode) {
		case TGSI_OPCODE_TXQ: {
			struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
			ctx->uses_tex_buffers = true;
			bool isEgPlus = (ctx->chip_class >= EVERGREEN);
			LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm,
				isEgPlus ? 0 : 1);
			LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, offset,
				LLVM_R600_BUFFER_INFO_CONST_BUFFER);
			if (!isEgPlus) {
				LLVMValueRef maskval[4] = {
					lp_build_const_int32(gallivm, 1),
					lp_build_const_int32(gallivm, 2),
					lp_build_const_int32(gallivm, 3),
					lp_build_const_int32(gallivm, 0),
				};
				LLVMValueRef mask = LLVMConstVector(maskval, 4);
				cvecval = LLVMBuildShuffleVector(gallivm->builder, cvecval, cvecval,
					mask, "");
			}
			emit_data->output[0] = cvecval;
			return;
		}
		case TGSI_OPCODE_TXF: {
			args[0] = LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), "");
			args[1] = lp_build_const_int32(gallivm, R600_MAX_CONST_BUFFERS);
			emit_data->output[0] = build_intrinsic(gallivm->builder,
							"llvm.R600.load.texbuf",
							emit_data->dst_type, args, 2, LLVMReadNoneAttribute);
			if (ctx->chip_class >= EVERGREEN)
				return;
			ctx->uses_tex_buffers = true;
			LLVMDumpValue(emit_data->output[0]);
			emit_data->output[0] = LLVMBuildBitCast(gallivm->builder,
				emit_data->output[0], LLVMVectorType(bld_base->base.int_elem_type, 4),
				"");
			LLVMValueRef Mask = llvm_load_const_buffer(bld_base,
				lp_build_const_int32(gallivm, 0),
				LLVM_R600_BUFFER_INFO_CONST_BUFFER);
			Mask = LLVMBuildBitCast(gallivm->builder, Mask,
				LLVMVectorType(bld_base->base.int_elem_type, 4), "");
			emit_data->output[0] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_AND,
				emit_data->output[0],
				Mask);
			LLVMValueRef WComponent = LLVMBuildExtractElement(gallivm->builder,
				emit_data->output[0], lp_build_const_int32(gallivm, 3), "");
			Mask = llvm_load_const_buffer(bld_base, lp_build_const_int32(gallivm, 1),
				LLVM_R600_BUFFER_INFO_CONST_BUFFER);
			Mask = LLVMBuildExtractElement(gallivm->builder, Mask,
				lp_build_const_int32(gallivm, 0), "");
			Mask = LLVMBuildBitCast(gallivm->builder, Mask,
				bld_base->base.int_elem_type, "");
			WComponent = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_OR,
				WComponent, Mask);
			emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder,
				emit_data->output[0], WComponent, lp_build_const_int32(gallivm, 3), "");
			emit_data->output[0] = LLVMBuildBitCast(gallivm->builder,
				emit_data->output[0], LLVMVectorType(bld_base->base.elem_type, 4), "");
		}
			return;
		default:
			break;
		}
	}

	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TEX ||
		emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
		LLVMValueRef Vector[4] = {
			LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
				lp_build_const_int32(gallivm, 0), ""),
			LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
				lp_build_const_int32(gallivm, 1), ""),
			LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
				lp_build_const_int32(gallivm, 2), ""),
			LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
				lp_build_const_int32(gallivm, 3), ""),
		};
		switch (emit_data->inst->Texture.Texture) {
		case TGSI_TEXTURE_2D:
		case TGSI_TEXTURE_RECT:
			Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type);
			break;
		case TGSI_TEXTURE_1D:
			Vector[1] = Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type);
			break;
		default:
			break;
		}
		args[0] = lp_build_gather_values(gallivm, Vector, 4);
	} else {
		args[0] = emit_data->args[0];
	}

	assert(emit_data->arg_count + 2 <= Elements(args));

	for (c = 1; c < emit_data->arg_count; ++c)
		args[c] = emit_data->args[c];

	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
		args[1] = LLVMBuildShl(gallivm->builder, args[1], lp_build_const_int32(gallivm, 1), "");
		args[2] = LLVMBuildShl(gallivm->builder, args[2], lp_build_const_int32(gallivm, 1), "");
		args[3] = LLVMBuildShl(gallivm->builder, args[3], lp_build_const_int32(gallivm, 1), "");
	}

	sampler_src = emit_data->inst->Instruction.NumSrcRegs-1;

	args[c++] = lp_build_const_int32(gallivm,
					emit_data->inst->Src[sampler_src].Register.Index + R600_MAX_CONST_BUFFERS);
	args[c++] = lp_build_const_int32(gallivm,
					emit_data->inst->Src[sampler_src].Register.Index);
	args[c++] = lp_build_const_int32(gallivm,
					emit_data->inst->Texture.Texture);

	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF &&
		(emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
		emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) {

		switch (emit_data->inst->Texture.Texture) {
		case TGSI_TEXTURE_2D_MSAA:
			args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D);
			break;
		case TGSI_TEXTURE_2D_ARRAY_MSAA:
			args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D_ARRAY);
			break;
		default:
			break;
		}

		if (ctx->has_compressed_msaa_texturing) {
			LLVMValueRef ldptr_args[10] = {
				args[0], // Coord
				args[1], // Offset X
				args[2], // Offset Y
				args[3], // Offset Z
				args[4],
				args[5],
				lp_build_const_int32(gallivm, 1),
				lp_build_const_int32(gallivm, 1),
				lp_build_const_int32(gallivm, 1),
				lp_build_const_int32(gallivm, 1)
			};
			LLVMValueRef ptr = build_intrinsic(gallivm->builder,
				"llvm.R600.ldptr",
				emit_data->dst_type, ldptr_args, 10, LLVMReadNoneAttribute);
			LLVMValueRef Tmp = LLVMBuildExtractElement(gallivm->builder, args[0],
				lp_build_const_int32(gallivm, 3), "");
			Tmp = LLVMBuildMul(gallivm->builder, Tmp,
				lp_build_const_int32(gallivm, 4), "");
			LLVMValueRef ResX = LLVMBuildExtractElement(gallivm->builder, ptr,
				lp_build_const_int32(gallivm, 0), "");
			ResX = LLVMBuildBitCast(gallivm->builder, ResX,
				bld_base->base.int_elem_type, "");
			Tmp = LLVMBuildLShr(gallivm->builder, ResX, Tmp, "");
			Tmp = LLVMBuildAnd(gallivm->builder, Tmp,
				lp_build_const_int32(gallivm, 0xF), "");
			args[0] = LLVMBuildInsertElement(gallivm->builder, args[0], Tmp,
				lp_build_const_int32(gallivm, 3), "");
			args[c++] = lp_build_const_int32(gallivm,
				emit_data->inst->Texture.Texture);
		}
	}

	emit_data->output[0] = build_intrinsic(gallivm->builder,
					action->intr_name,
					emit_data->dst_type, args, c, LLVMReadNoneAttribute);

	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXQ &&
		((emit_data->inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
		emit_data->inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY)))
		if (emit_data->inst->Dst[0].Register.WriteMask & 4) {
			LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, 0);
			LLVMValueRef ZLayer = LLVMBuildExtractElement(gallivm->builder,
				llvm_load_const_buffer(bld_base, offset, CONSTANT_TXQ_BUFFER),
				lp_build_const_int32(gallivm, 0), "");

			emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder, emit_data->output[0], ZLayer, lp_build_const_int32(gallivm, 2), "");
			struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
			ctx->has_txq_cube_array_z_comp = true;
		}
}
Example #29
0
LLVMValueRef build_label_from_tag(struct llvm_ctx *ctx, LLVMValueRef tag) {
	return LLVMBuildLShr(ctx->builder, tag, CONST_WORD(16), "tag.label");
}
Example #30
0
static LLVMValueRef get_stritem_len_fn(struct llvm_ctx *ctx)
{
	if(ctx->stritem_len_fn != NULL) return ctx->stritem_len_fn;

	/* returns (i32 len, i32 new_tpos)
	 * params (word *utcbptr, i32 tpos)
	 *
	 * when return value "new_tpos" > tmax + 1, the result is invalid. the function
	 * should also not be called when tpos > tmax + 1.
	 */
	LLVMTypeRef ret_types[2] = { ctx->i32t, ctx->i32t },
		parm_types[2] = { LLVMPointerType(ctx->wordt, 0), ctx->i32t },
		ret_type = LLVMStructTypeInContext(ctx->ctx, ret_types, 2, 0),
		fn_type = LLVMFunctionType(ret_type, parm_types, 2, 0);
	LLVMValueRef fn = LLVMAddFunction(ctx->module, "__muidl_get_stritem_len",
		fn_type);
	LLVMSetVisibility(fn, LLVMHiddenVisibility);
	LLVMSetLinkage(fn, LLVMInternalLinkage);
	V fn_args[2];
	LLVMGetParams(fn, fn_args);
	LLVMAddAttribute(fn_args[0], LLVMNoCaptureAttribute);
	for(int i=0; i<2; i++) {
		LLVMAddAttribute(fn_args[i], LLVMInRegAttribute);
	}
	ctx->stritem_len_fn = fn;

	LLVMBuilderRef old_builder = ctx->builder;
	ctx->builder = LLVMCreateBuilderInContext(ctx->ctx);
	LLVMBasicBlockRef entry_bb = LLVMAppendBasicBlockInContext(ctx->ctx, fn,
			"EntryBlock"),
		loop_bb = LLVMAppendBasicBlockInContext(ctx->ctx, fn, "loop"),
		valid_bb = LLVMAppendBasicBlockInContext(ctx->ctx, fn, "valid"),
		exit_bb = LLVMAppendBasicBlockInContext(ctx->ctx, fn, "exit");

	LLVMPositionBuilderAtEnd(ctx->builder, entry_bb);
	LLVMValueRef old_utcb = ctx->utcb, old_tpos = ctx->tpos;
	ctx->utcb = fn_args[0];
	ctx->tpos = fn_args[1];
	LLVMBuildBr(ctx->builder, loop_bb);

	LLVMPositionBuilderAtEnd(ctx->builder, exit_bb);
	LLVMValueRef exit_len_phi = LLVMBuildPhi(ctx->builder, ctx->i32t,
			"exit.len.phi"),
		exit_tpos_phi = LLVMBuildPhi(ctx->builder, ctx->i32t,
			"exit.tpos.phi");
	LLVMValueRef rvals[2] = { exit_len_phi, exit_tpos_phi };
	LLVMBuildAggregateRet(ctx->builder, rvals, 2);

	LLVMPositionBuilderAtEnd(ctx->builder, loop_bb);
	LLVMValueRef len_phi = LLVMBuildPhi(ctx->builder, ctx->i32t, "len.phi"),
		tpos_phi = LLVMBuildPhi(ctx->builder, ctx->i32t, "tpos.phi");
	LLVMAddIncoming(len_phi, &ctx->zero, &entry_bb, 1);
	LLVMAddIncoming(tpos_phi, &ctx->tpos, &entry_bb, 1);
	ctx->tpos = tpos_phi;
	/* test: if *tpos doesn't look like a string item, conk out. */
	LLVMValueRef infoword = build_utcb_load(ctx, ctx->tpos, "si.info");
	LLVMValueRef is_cond = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
		ctx->zero, LLVMBuildAnd(ctx->builder, infoword,
			CONST_WORD(1 << 4), "infoword.si.mask"),
		"infoword.si.cond");
	/* anything + 100 is sure to be > tmax + 1. */
	LLVMValueRef fucked_tpos = LLVMBuildAdd(ctx->builder, tpos_phi,
		CONST_INT(100), "f****d.tpos");
	branch_set_phi(ctx, exit_len_phi, len_phi);
	branch_set_phi(ctx, exit_tpos_phi, fucked_tpos);
	LLVMBuildCondBr(ctx->builder, is_cond, valid_bb, exit_bb);

	LLVMPositionBuilderAtEnd(ctx->builder, valid_bb);
	LLVMValueRef string_length = LLVMBuildTruncOrBitCast(ctx->builder,
			LLVMBuildLShr(ctx->builder, infoword,
				CONST_INT(10), "si.info.len"),
			ctx->i32t, "si.info.len.int"),
		string_j = LLVMBuildTruncOrBitCast(ctx->builder,
			LLVMBuildAnd(ctx->builder, CONST_WORD(0x1f),
				LLVMBuildLShr(ctx->builder, infoword, CONST_WORD(4),
					"si.info.j.shift"),
				"si.info.j.masked"),
			ctx->i32t, "si.info.j"),
		string_c = LLVMBuildTruncOrBitCast(ctx->builder,
			LLVMBuildAnd(ctx->builder, CONST_WORD(1 << 9),
				infoword, "si.info.c.masked"),
			ctx->i32t, "si.info.c.masked.int"),
		c_cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
			string_c, CONST_WORD(0), "si.info.c.cond"),
		new_len = LLVMBuildAdd(ctx->builder, len_phi,
			LLVMBuildMul(ctx->builder, string_length,
				LLVMBuildAdd(ctx->builder, string_j,
					CONST_INT(1), "j.plus.one"),
				"len.incr"),
			"len.new"),
		new_tpos = LLVMBuildAdd(ctx->builder, ctx->tpos,
			LLVMBuildSelect(ctx->builder, c_cond,
				LLVMBuildAdd(ctx->builder, CONST_INT(2),
					string_j, "cont.tpos.bump"),
				CONST_INT(2), "tpos.bump"),
			"tpos.new");
	LLVMAddIncoming(len_phi, &new_len, &valid_bb, 1);
	LLVMAddIncoming(tpos_phi, &new_tpos, &valid_bb, 1);
	LLVMAddIncoming(exit_len_phi, &new_len, &valid_bb, 1);
	LLVMAddIncoming(exit_tpos_phi, &new_tpos, &valid_bb, 1);
	LLVMBuildCondBr(ctx->builder, c_cond, loop_bb, exit_bb);

	LLVMDisposeBuilder(ctx->builder);
	ctx->builder = old_builder;
	ctx->utcb = old_utcb;
	ctx->tpos = old_tpos;

	return ctx->stritem_len_fn;
}