Esempio n. 1
0
/**
 * Normalized 8bit multiplication.
 *
 * - alpha plus one
 *
 *     makes the following approximation to the division (Sree)
 *    
 *       a*b/255 ~= (a*(b + 1)) >> 256
 *    
 *     which is the fastest method that satisfies the following OpenGL criteria
 *    
 *       0*0 = 0 and 255*255 = 255
 *
 * - geometric series
 *
 *     takes the geometric series approximation to the division
 *
 *       t/255 = (t >> 8) + (t >> 16) + (t >> 24) ..
 *
 *     in this case just the first two terms to fit in 16bit arithmetic
 *
 *       t/255 ~= (t + (t >> 8)) >> 8
 *
 *     note that just by itself it doesn't satisfies the OpenGL criteria, as
 *     255*255 = 254, so the special case b = 255 must be accounted or roundoff
 *     must be used
 *
 * - geometric series plus rounding
 *
 *     when using a geometric series division instead of truncating the result
 *     use roundoff in the approximation (Jim Blinn)
 *
 *       t/255 ~= (t + (t >> 8) + 0x80) >> 8
 *
 *     achieving the exact results
 *
 * @sa Alvy Ray Smith, Image Compositing Fundamentals, Tech Memo 4, Aug 15, 1995, 
 *     ftp://ftp.alvyray.com/Acrobat/4_Comp.pdf
 * @sa Michael Herf, The "double blend trick", May 2000, 
 *     http://www.stereopsis.com/doubleblend.html
 */
static LLVMValueRef
lp_build_mul_u8n(LLVMBuilderRef builder,
                 struct lp_type i16_type,
                 LLVMValueRef a, LLVMValueRef b)
{
   LLVMValueRef c8;
   LLVMValueRef ab;

   c8 = lp_build_int_const_scalar(i16_type, 8);
   
#if 0
   
   /* a*b/255 ~= (a*(b + 1)) >> 256 */
   b = LLVMBuildAdd(builder, b, lp_build_int_const_scalar(i16_type, 1), "");
   ab = LLVMBuildMul(builder, a, b, "");

#else
   
   /* ab/255 ~= (ab + (ab >> 8) + 0x80) >> 8 */
   ab = LLVMBuildMul(builder, a, b, "");
   ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c8, ""), "");
   ab = LLVMBuildAdd(builder, ab, lp_build_int_const_scalar(i16_type, 0x80), "");

#endif
   
   ab = LLVMBuildLShr(builder, ab, c8, "");

   return ab;
}
Esempio n. 2
0
/*
 * SI implements derivatives using the local data store (LDS)
 * All writes to the LDS happen in all executing threads at
 * the same time. TID is the Thread ID for the current
 * thread and is a value between 0 and 63, representing
 * the thread's position in the wavefront.
 *
 * For the pixel shader threads are grouped into quads of four pixels.
 * The TIDs of the pixels of a quad are:
 *
 *  +------+------+
 *  |4n + 0|4n + 1|
 *  +------+------+
 *  |4n + 2|4n + 3|
 *  +------+------+
 *
 * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
 * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
 * the current pixel's column, and masking with 0xfffffffe yields the TID
 * of the left pixel of the current pixel's row.
 *
 * Adding 1 yields the TID of the pixel to the right of the left pixel, and
 * adding 2 yields the TID of the pixel below the top pixel.
 */
LLVMValueRef
ac_build_ddxy(struct ac_llvm_context *ctx,
	      bool has_ds_bpermute,
	      uint32_t mask,
	      int idx,
	      LLVMValueRef lds,
	      LLVMValueRef val)
{
	LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2];
	LLVMValueRef result;

	thread_id = ac_get_thread_id(ctx);

	tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
			      LLVMConstInt(ctx->i32, mask, false), "");

	trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
				LLVMConstInt(ctx->i32, idx, false), "");

	if (has_ds_bpermute) {
		args[0] = LLVMBuildMul(ctx->builder, tl_tid,
				       LLVMConstInt(ctx->i32, 4, false), "");
		args[1] = val;
		tl = ac_build_intrinsic(ctx,
					"llvm.amdgcn.ds.bpermute", ctx->i32,
					args, 2,
					AC_FUNC_ATTR_READNONE |
					AC_FUNC_ATTR_CONVERGENT);

		args[0] = LLVMBuildMul(ctx->builder, trbl_tid,
				       LLVMConstInt(ctx->i32, 4, false), "");
		trbl = ac_build_intrinsic(ctx,
					  "llvm.amdgcn.ds.bpermute", ctx->i32,
					  args, 2,
					  AC_FUNC_ATTR_READNONE |
					  AC_FUNC_ATTR_CONVERGENT);
	} else {
		LLVMValueRef store_ptr, load_ptr0, load_ptr1;

		store_ptr = ac_build_gep0(ctx, lds, thread_id);
		load_ptr0 = ac_build_gep0(ctx, lds, tl_tid);
		load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid);

		LLVMBuildStore(ctx->builder, val, store_ptr);
		tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
		trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
	}

	tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
	trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
	result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
	return result;
}
static void
store_cached_block(struct gallivm_state *gallivm,
                   LLVMValueRef *col,
                   LLVMValueRef tag_value,
                   LLVMValueRef hash_index,
                   LLVMValueRef cache)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef ptr, indices[3];
   LLVMTypeRef type_ptr4x32;
   unsigned count;

   type_ptr4x32 = LLVMPointerType(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), 0);
   indices[0] = lp_build_const_int32(gallivm, 0);
   indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS);
   indices[2] = hash_index;
   ptr = LLVMBuildGEP(builder, cache, indices, ARRAY_SIZE(indices), "");
   LLVMBuildStore(builder, tag_value, ptr);

   indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA);
   hash_index = LLVMBuildMul(builder, hash_index,
                             lp_build_const_int32(gallivm, 16), "");
   for (count = 0; count < 4; count++) {
      indices[2] = hash_index;
      ptr = LLVMBuildGEP(builder, cache, indices, ARRAY_SIZE(indices), "");
      ptr = LLVMBuildBitCast(builder, ptr, type_ptr4x32, "");
      LLVMBuildStore(builder, col[count], ptr);
      hash_index = LLVMBuildAdd(builder, hash_index,
                                lp_build_const_int32(gallivm, 4), "");
   }
}
Esempio n. 4
0
struct LLVMOpaqueValue *bllvm_compile_rirbop(const struct rir_expression *expr,
                                             struct llvm_traversal_ctx *ctx)
{
    LLVMValueRef ret;
    LLVMValueRef left = bllvm_value_from_rir_value_or_die(expr->binaryop.a, ctx);
    LLVMValueRef right = bllvm_value_from_rir_value_or_die(expr->binaryop.b, ctx);
    switch(expr->type) {
    case RIR_EXPRESSION_ADD:
        ret = LLVMBuildAdd(ctx->builder, left, right, "");
        break;
    case RIR_EXPRESSION_SUB:
        ret = LLVMBuildSub(ctx->builder, left, right, "");
        break;
    case RIR_EXPRESSION_MUL:
        ret = LLVMBuildMul(ctx->builder, left, right, "");
        break;
    case RIR_EXPRESSION_DIV:
        ret = LLVMBuildUDiv(ctx->builder, left, right, "");
        break;
    default:
        RF_CRITICAL_FAIL("Should never get anything other than binaryop here");
        break;
    }
    return ret;
}
Esempio n. 5
0
static void pointer_offset(compile_t* c, reach_type_t* t, reach_type_t* t_elem)
{
  FIND_METHOD("_offset");

  LLVMTypeRef params[3];
  params[0] = t->use_type;
  params[1] = c->intptr;
  start_function(c, m, t->use_type, params, 2);

  // Set up a constant integer for the allocation size.
  size_t size = (size_t)LLVMABISizeOfType(c->target_data, t_elem->use_type);
  LLVMValueRef l_size = LLVMConstInt(c->intptr, size, false);

  LLVMValueRef ptr = LLVMGetParam(m->func, 0);
  LLVMValueRef n = LLVMGetParam(m->func, 1);

  // Return ptr + (n * sizeof(len)).
  LLVMValueRef src = LLVMBuildPtrToInt(c->builder, ptr, c->intptr, "");
  LLVMValueRef offset = LLVMBuildMul(c->builder, n, l_size, "");
  LLVMValueRef result = LLVMBuildAdd(c->builder, src, offset, "");
  result = LLVMBuildIntToPtr(c->builder, result, t->use_type, "");

  LLVMBuildRet(c->builder, result);
  codegen_finishfun(c);

  BOX_FUNCTION();
}
Esempio n. 6
0
static void pointer_alloc(compile_t* c, reach_type_t* t,
  reach_type_t* t_elem)
{
  FIND_METHOD("_alloc");

  LLVMTypeRef params[2];
  params[0] = t->use_type;
  params[1] = c->intptr;
  start_function(c, m, t->use_type, params, 2);

  // Set up a constant integer for the allocation size.
  size_t size = (size_t)LLVMABISizeOfType(c->target_data, t_elem->use_type);
  LLVMValueRef l_size = LLVMConstInt(c->intptr, size, false);

  LLVMValueRef len = LLVMGetParam(m->func, 1);
  LLVMValueRef args[2];
  args[0] = codegen_ctx(c);
  args[1] = LLVMBuildMul(c->builder, len, l_size, "");

  LLVMValueRef result = gencall_runtime(c, "pony_alloc", args, 2, "");
  result = LLVMBuildBitCast(c->builder, result, t->use_type, "");

  LLVMBuildRet(c->builder, result);
  codegen_finishfun(c);
}
Esempio n. 7
0
LLVMValueRef gen_mul(struct node *ast)
{
	return LLVMBuildMul(builder,
			codegen(ast->one),
			codegen(ast->two),
			"");
}
Esempio n. 8
0
/**
 * Special case for converting clamped IEEE-754 floats to unsigned norms.
 *
 * The mathematical voodoo below may seem excessive but it is actually
 * paramount we do it this way for several reasons. First, there is no single
 * precision FP to unsigned integer conversion Intel SSE instruction. Second,
 * secondly, even if there was, since the FP's mantissa takes only a fraction
 * of register bits the typically scale and cast approach would require double
 * precision for accurate results, and therefore half the throughput
 *
 * Although the result values can be scaled to an arbitrary bit width specified
 * by dst_width, the actual result type will have the same width.
 */
LLVMValueRef
lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
                                        struct lp_type src_type,
                                        unsigned dst_width,
                                        LLVMValueRef src)
{
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(src_type);
   LLVMValueRef res;
   unsigned mantissa;
   unsigned n;
   unsigned long long ubound;
   unsigned long long mask;
   double scale;
   double bias;

   assert(src_type.floating);

   mantissa = lp_mantissa(src_type);

   /* We cannot carry more bits than the mantissa */
   n = MIN2(mantissa, dst_width);

   /* This magic coefficients will make the desired result to appear in the
    * lowest significant bits of the mantissa.
    */
   ubound = ((unsigned long long)1 << n);
   mask = ubound - 1;
   scale = (double)mask/ubound;
   bias = (double)((unsigned long long)1 << (mantissa - n));

   res = LLVMBuildMul(builder, src, lp_build_const_scalar(src_type, scale), "");
   res = LLVMBuildAdd(builder, res, lp_build_const_scalar(src_type, bias), "");
   res = LLVMBuildBitCast(builder, res, int_vec_type, "");

   if(dst_width > n) {
      int shift = dst_width - n;
      res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), "");

      /* TODO: Fill in the empty lower bits for additional precision? */
#if 0
      {
         LLVMValueRef msb;
         msb = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, dst_width - 1), "");
         msb = LLVMBuildShl(builder, msb, lp_build_int_const_scalar(src_type, shift), "");
         msb = LLVMBuildSub(builder, msb, lp_build_int_const_scalar(src_type, 1), "");
         res = LLVMBuildOr(builder, res, msb, "");
      }
#elif 0
      while(shift > 0) {
         res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, n), ""), "");
         shift -= n;
         n *= 2;
      }
#endif
   }
   else
      res = LLVMBuildAnd(builder, res, lp_build_int_const_scalar(src_type, mask), "");

   return res;
}
Esempio n. 9
0
/**
 * Extract Y, U, V channels from packed YUYV.
 * @param packed  is a <n x i32> vector with the packed YUYV blocks
 * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
 */
static void
yuyv_to_yuv_soa(struct gallivm_state *gallivm,
                unsigned n,
                LLVMValueRef packed,
                LLVMValueRef i,
                LLVMValueRef *y,
                LLVMValueRef *u,
                LLVMValueRef *v)
{
   LLVMBuilderRef builder = gallivm->builder;
   struct lp_type type;
   LLVMValueRef mask;

   memset(&type, 0, sizeof type);
   type.width = 32;
   type.length = n;

   assert(lp_check_value(type, packed));
   assert(lp_check_value(type, i));

   /*
    * y = (yuyv >> 16*i) & 0xff
    * u = (yuyv >> 8   ) & 0xff
    * v = (yuyv >> 24  ) & 0xff
    */

#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
   /*
    * Avoid shift with per-element count.
    * No support on x86, gets translated to roughly 5 instructions
    * per element. Didn't measure performance but cuts shader size
    * by quite a bit (less difference if cpu has no sse4.1 support).
    */
   if (util_cpu_caps.has_sse2 && n == 4) {
      LLVMValueRef sel, tmp;
      struct lp_build_context bld32;

      lp_build_context_init(&bld32, gallivm, type);

      tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
      sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
       *y = lp_build_select(&bld32, sel, packed, tmp);
   } else
#endif
   {
      LLVMValueRef shift;
      shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
      *y = LLVMBuildLShr(builder, packed, shift, "");
   }

   *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
   *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");

   mask = lp_build_const_int_vec(gallivm, type, 0xff);

   *y = LLVMBuildAnd(builder, *y, mask, "y");
   *u = LLVMBuildAnd(builder, *u, mask, "u");
   *v = LLVMBuildAnd(builder, *v, mask, "v");
}
Esempio n. 10
0
void
lp_build_exp2_approx(struct lp_build_context *bld,
                     LLVMValueRef x,
                     LLVMValueRef *p_exp2_int_part,
                     LLVMValueRef *p_frac_part,
                     LLVMValueRef *p_exp2)
{
   const struct lp_type type = bld->type;
   LLVMTypeRef vec_type = lp_build_vec_type(type);
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
   LLVMValueRef ipart = NULL;
   LLVMValueRef fpart = NULL;
   LLVMValueRef expipart = NULL;
   LLVMValueRef expfpart = NULL;
   LLVMValueRef res = NULL;

   if(p_exp2_int_part || p_frac_part || p_exp2) {
      /* TODO: optimize the constant case */
      if(LLVMIsConstant(x))
         debug_printf("%s: inefficient/imprecise constant arithmetic\n",
                      __FUNCTION__);

      assert(type.floating && type.width == 32);

      x = lp_build_min(bld, x, lp_build_const_scalar(type,  129.0));
      x = lp_build_max(bld, x, lp_build_const_scalar(type, -126.99999));

      /* ipart = int(x - 0.5) */
      ipart = LLVMBuildSub(bld->builder, x, lp_build_const_scalar(type, 0.5f), "");
      ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, "");

      /* fpart = x - ipart */
      fpart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, "");
      fpart = LLVMBuildSub(bld->builder, x, fpart, "");
   }

   if(p_exp2_int_part || p_exp2) {
      /* expipart = (float) (1 << ipart) */
      expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_scalar(type, 127), "");
      expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_scalar(type, 23), "");
      expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, "");
   }

   if(p_exp2) {
      expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
                                     Elements(lp_build_exp2_polynomial));

      res = LLVMBuildMul(bld->builder, expipart, expfpart, "");
   }

   if(p_exp2_int_part)
      *p_exp2_int_part = expipart;

   if(p_frac_part)
      *p_frac_part = fpart;

   if(p_exp2)
      *p_exp2 = res;
}
Esempio n. 11
0
static void pointer_delete(compile_t* c, reach_type_t* t, reach_type_t* t_elem)
{
  FIND_METHOD("_delete");

  LLVMTypeRef params[3];
  params[0] = t->use_type;
  params[1] = c->intptr;
  params[2] = c->intptr;
  start_function(c, m, t_elem->use_type, params, 3);

  // Set up a constant integer for the allocation size.
  size_t size = (size_t)LLVMABISizeOfType(c->target_data, t_elem->use_type);
  LLVMValueRef l_size = LLVMConstInt(c->intptr, size, false);

  LLVMValueRef ptr = LLVMGetParam(m->func, 0);
  LLVMValueRef n = LLVMGetParam(m->func, 1);
  LLVMValueRef len = LLVMGetParam(m->func, 2);

  LLVMValueRef elem_ptr = LLVMBuildBitCast(c->builder, ptr,
    LLVMPointerType(t_elem->use_type, 0), "");
  LLVMValueRef result = LLVMBuildLoad(c->builder, elem_ptr, "");

  LLVMValueRef dst = LLVMBuildPtrToInt(c->builder, elem_ptr, c->intptr, "");
  LLVMValueRef offset = LLVMBuildMul(c->builder, n, l_size, "");
  LLVMValueRef src = LLVMBuildAdd(c->builder, dst, offset, "");
  LLVMValueRef elen = LLVMBuildMul(c->builder, len, l_size, "");

  LLVMValueRef args[5];
  args[0] = LLVMBuildIntToPtr(c->builder, dst, c->void_ptr, "");
  args[1] = LLVMBuildIntToPtr(c->builder, src, c->void_ptr, "");
  args[2] = elen;
  args[3] = LLVMConstInt(c->i32, 1, false);
  args[4] = LLVMConstInt(c->i1, 0, false);

  // llvm.memmove.*(ptr, ptr + (n * sizeof(elem)), len * sizeof(elem))
  if(target_is_ilp32(c->opt->triple))
  {
    gencall_runtime(c, "llvm.memmove.p0i8.p0i8.i32", args, 5, "");
  } else {
    gencall_runtime(c, "llvm.memmove.p0i8.p0i8.i64", args, 5, "");
  }

  // Return ptr[0].
  LLVMBuildRet(c->builder, result);
  codegen_finishfun(c);
}
Esempio n. 12
0
/**
 * Inverse of lp_build_clamped_float_to_unsigned_norm above.
 */
LLVMValueRef
lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
                                unsigned src_width,
                                struct lp_type dst_type,
                                LLVMValueRef src)
{
   LLVMTypeRef vec_type = lp_build_vec_type(dst_type);
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(dst_type);
   LLVMValueRef bias_;
   LLVMValueRef res;
   unsigned mantissa;
   unsigned n;
   unsigned long long ubound;
   unsigned long long mask;
   double scale;
   double bias;

   mantissa = lp_mantissa(dst_type);

   n = MIN2(mantissa, src_width);

   ubound = ((unsigned long long)1 << n);
   mask = ubound - 1;
   scale = (double)ubound/mask;
   bias = (double)((unsigned long long)1 << (mantissa - n));

   res = src;

   if(src_width > mantissa) {
      int shift = src_width - mantissa;
      res = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(dst_type, shift), "");
   }

   bias_ = lp_build_const_scalar(dst_type, bias);

   res = LLVMBuildOr(builder,
                     res,
                     LLVMBuildBitCast(builder, bias_, int_vec_type, ""), "");

   res = LLVMBuildBitCast(builder, res, vec_type, "");

   res = LLVMBuildSub(builder, res, bias_, "");
   res = LLVMBuildMul(builder, res, lp_build_const_scalar(dst_type, scale), "");

   return res;
}
Esempio n. 13
0
static inline LLVMValueRef LLVM_visit(ASTNode *node, LLVMBuilderRef builder) {
  switch(node->type) {
    case AST_BINARY_OP: {
      ASTBinaryOp *binary_op = (ASTBinaryOp*)node;
      LLVMValueRef a = LLVM_visit(binary_op->lhs, builder);
      LLVMValueRef b = LLVM_visit(binary_op->rhs, builder);
      switch(binary_op->op) {
        case '+': return LLVMBuildAdd(builder, a, b, "a + b");
        case '-': return LLVMBuildSub(builder, a, b, "a - b");
        case '*': return LLVMBuildMul(builder, a, b, "a * b");
        case '/': return LLVMBuildSDiv(builder, a, b, "a / b");
      }
    }
    case AST_INT: {
      return LLVMConstInt(LLVMInt32Type(), ((ASTInt*)node)->value, 0);
    }
  }
}
Esempio n. 14
0
static LLVMValueRef
translateIntBinOp(NodeKind Op, LLVMValueRef ValueE1, LLVMValueRef ValueE2) {
  switch (Op) {
    case OrOp:   return LLVMBuildOr (Builder, ValueE1, ValueE2, ""); 
    case AndOp:  return LLVMBuildAnd(Builder, ValueE1, ValueE2, ""); 
    case SumOp:  return LLVMBuildAdd(Builder, ValueE1, ValueE2, ""); 
    case SubOp:  return LLVMBuildSub(Builder, ValueE1, ValueE2, ""); 
    case MultOp: return LLVMBuildMul(Builder, ValueE1, ValueE2, ""); 
    case DivOp:  return LLVMBuildSDiv(Builder, ValueE1, ValueE2, ""); 
    case LtOp:   return LLVMBuildICmp(Builder, LLVMIntSLT, ValueE1, ValueE2, ""); 
    case LeOp:   return LLVMBuildICmp(Builder, LLVMIntSLE, ValueE1, ValueE2, ""); 
    case GtOp:   return LLVMBuildICmp(Builder, LLVMIntSGT, ValueE1, ValueE2, ""); 
    case GeOp:   return LLVMBuildICmp(Builder, LLVMIntSGE, ValueE1, ValueE2, ""); 
    case EqOp:   return LLVMBuildICmp(Builder, LLVMIntEQ,  ValueE1, ValueE2, ""); 
    case DiffOp: return LLVMBuildICmp(Builder, LLVMIntNE,  ValueE1, ValueE2, ""); 
    default:     return NULL;
  }
}
Esempio n. 15
0
static LLVMValueRef
emit_array_index(
	struct lp_build_tgsi_soa_context *bld,
	const struct tgsi_full_src_register *reg,
	unsigned swizzle)
{
	struct gallivm_state * gallivm = bld->bld_base.base.gallivm;

	LLVMValueRef addr = LLVMBuildLoad(gallivm->builder,
	bld->addr[reg->Indirect.Index][swizzle], "");
	LLVMValueRef offset = lp_build_const_int32(gallivm, reg->Register.Index);
	LLVMValueRef hw_index = LLVMBuildAdd(gallivm->builder, addr, offset, "");
	LLVMValueRef soa_index = LLVMBuildMul(gallivm->builder, hw_index,
	lp_build_const_int32(gallivm, 4), "");
	LLVMValueRef array_index = LLVMBuildAdd(gallivm->builder, soa_index,
	lp_build_const_int32(gallivm, swizzle), "");

	return array_index;
}
Esempio n. 16
0
void genprim_array_serialise_trace(compile_t* c, reach_type_t* t)
{
  // Generate the serialise_trace function.
  t->serialise_trace_fn = codegen_addfun(c, genname_serialise_trace(t->name),
    c->trace_type);

  codegen_startfun(c, t->serialise_trace_fn, NULL, NULL);
  LLVMSetFunctionCallConv(t->serialise_trace_fn, LLVMCCallConv);
  LLVMSetLinkage(t->serialise_trace_fn, LLVMExternalLinkage);

  LLVMValueRef ctx = LLVMGetParam(t->serialise_trace_fn, 0);
  LLVMValueRef arg = LLVMGetParam(t->serialise_trace_fn, 1);
  LLVMValueRef object = LLVMBuildBitCast(c->builder, arg, t->use_type, "");

  // Read the size.
  LLVMValueRef size = field_value(c, object, 1);

  // Calculate the size of the element type.
  ast_t* typeargs = ast_childidx(t->ast, 2);
  ast_t* typearg = ast_child(typeargs);
  reach_type_t* t_elem = reach_type(c->reach, typearg);

  size_t abisize = (size_t)LLVMABISizeOfType(c->target_data, t_elem->use_type);
  LLVMValueRef l_size = LLVMConstInt(c->intptr, abisize, false);

  // Reserve space for the array elements.
  LLVMValueRef pointer = field_value(c, object, 3);

  LLVMValueRef args[3];
  args[0] = ctx;
  args[1] = pointer;
  args[2] = LLVMBuildMul(c->builder, size, l_size, "");
  gencall_runtime(c, "pony_serialise_reserve", args, 3, "");

  // Trace the array elements.
  trace_array_elements(c, t, ctx, object, pointer);

  LLVMBuildRetVoid(c->builder);
  codegen_finishfun(c);
}
Esempio n. 17
0
static void pointer_copy_to(compile_t* c, reach_type_t* t,
  reach_type_t* t_elem)
{
  FIND_METHOD("_copy_to");

  LLVMTypeRef params[3];
  params[0] = t->use_type;
  params[1] = t->use_type;
  params[2] = c->intptr;
  start_function(c, m, t->use_type, params, 3);

  // Set up a constant integer for the allocation size.
  size_t size = (size_t)LLVMABISizeOfType(c->target_data, t_elem->use_type);
  LLVMValueRef l_size = LLVMConstInt(c->intptr, size, false);

  LLVMValueRef ptr = LLVMGetParam(m->func, 0);
  LLVMValueRef ptr2 = LLVMGetParam(m->func, 1);
  LLVMValueRef n = LLVMGetParam(m->func, 2);
  LLVMValueRef elen = LLVMBuildMul(c->builder, n, l_size, "");

  LLVMValueRef args[5];
  args[0] = LLVMBuildBitCast(c->builder, ptr2, c->void_ptr, "");
  args[1] = LLVMBuildBitCast(c->builder, ptr, c->void_ptr, "");
  args[2] = elen;
  args[3] = LLVMConstInt(c->i32, 1, false);
  args[4] = LLVMConstInt(c->i1, 0, false);

  // llvm.memcpy.*(ptr2, ptr, n * sizeof(elem), 1, 0)
  if(target_is_ilp32(c->opt->triple))
  {
    gencall_runtime(c, "llvm.memcpy.p0i8.p0i8.i32", args, 5, "");
  } else {
    gencall_runtime(c, "llvm.memcpy.p0i8.p0i8.i64", args, 5, "");
  }

  LLVMBuildRet(c->builder, ptr);
  codegen_finishfun(c);

  BOX_FUNCTION();
}
/**
 * Extract Y, U, V channels from packed UYVY.
 * @param packed  is a <n x i32> vector with the packed UYVY blocks
 * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
 */
static void
uyvy_to_yuv_soa(LLVMBuilderRef builder,
                unsigned n,
                LLVMValueRef packed,
                LLVMValueRef i,
                LLVMValueRef *y,
                LLVMValueRef *u,
                LLVMValueRef *v)
{
   struct lp_type type;
   LLVMValueRef shift, mask;

   memset(&type, 0, sizeof type);
   type.width = 32;
   type.length = n;

   assert(lp_check_value(type, packed));
   assert(lp_check_value(type, i));

   /*
    * y = (uyvy >> 16*i) & 0xff
    * u = (uyvy        ) & 0xff
    * v = (uyvy >> 16  ) & 0xff
    */

   shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
   shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), "");
   *y = LLVMBuildLShr(builder, packed, shift, "");
   *u = packed;
   *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), "");

   mask = lp_build_const_int_vec(type, 0xff);

   *y = LLVMBuildAnd(builder, *y, mask, "y");
   *u = LLVMBuildAnd(builder, *u, mask, "u");
   *v = LLVMBuildAnd(builder, *v, mask, "v");
}
Esempio n. 19
0
static INLINE void
yuv_to_rgb_soa(struct gallivm_state *gallivm,
               unsigned n,
               LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
               LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)
{
   LLVMBuilderRef builder = gallivm->builder;
   struct lp_type type;
   struct lp_build_context bld;

   LLVMValueRef c0;
   LLVMValueRef c8;
   LLVMValueRef c16;
   LLVMValueRef c128;
   LLVMValueRef c255;

   LLVMValueRef cy;
   LLVMValueRef cug;
   LLVMValueRef cub;
   LLVMValueRef cvr;
   LLVMValueRef cvg;

   memset(&type, 0, sizeof type);
   type.sign = TRUE;
   type.width = 32;
   type.length = n;

   lp_build_context_init(&bld, gallivm, type);

   assert(lp_check_value(type, y));
   assert(lp_check_value(type, u));
   assert(lp_check_value(type, v));

   /*
    * Constants
    */

   c0   = lp_build_const_int_vec(gallivm, type,   0);
   c8   = lp_build_const_int_vec(gallivm, type,   8);
   c16  = lp_build_const_int_vec(gallivm, type,  16);
   c128 = lp_build_const_int_vec(gallivm, type, 128);
   c255 = lp_build_const_int_vec(gallivm, type, 255);

   cy  = lp_build_const_int_vec(gallivm, type,  298);
   cug = lp_build_const_int_vec(gallivm, type, -100);
   cub = lp_build_const_int_vec(gallivm, type,  516);
   cvr = lp_build_const_int_vec(gallivm, type,  409);
   cvg = lp_build_const_int_vec(gallivm, type, -208);

   /*
    *  y -= 16;
    *  u -= 128;
    *  v -= 128;
    */

   y = LLVMBuildSub(builder, y, c16, "");
   u = LLVMBuildSub(builder, u, c128, "");
   v = LLVMBuildSub(builder, v, c128, "");

   /*
    * r = 298 * _y            + 409 * _v + 128;
    * g = 298 * _y - 100 * _u - 208 * _v + 128;
    * b = 298 * _y + 516 * _u            + 128;
    */

   y = LLVMBuildMul(builder, y, cy, "");
   y = LLVMBuildAdd(builder, y, c128, "");

   *r = LLVMBuildMul(builder, v, cvr, "");
   *g = LLVMBuildAdd(builder,
                     LLVMBuildMul(builder, u, cug, ""),
                     LLVMBuildMul(builder, v, cvg, ""),
                     "");
   *b = LLVMBuildMul(builder, u, cub, "");

   *r = LLVMBuildAdd(builder, *r, y, "");
   *g = LLVMBuildAdd(builder, *g, y, "");
   *b = LLVMBuildAdd(builder, *b, y, "");

   /*
    * r >>= 8;
    * g >>= 8;
    * b >>= 8;
    */

   *r = LLVMBuildAShr(builder, *r, c8, "r");
   *g = LLVMBuildAShr(builder, *g, c8, "g");
   *b = LLVMBuildAShr(builder, *b, c8, "b");

   /*
    * Clamp
    */

   *r = lp_build_clamp(&bld, *r, c0, c255);
   *g = lp_build_clamp(&bld, *g, c0, c255);
   *b = lp_build_clamp(&bld, *b, c0, c255);
}
static void llvm_emit_tex(
	const struct lp_build_tgsi_action * action,
	struct lp_build_tgsi_context * bld_base,
	struct lp_build_emit_data * emit_data)
{
	struct gallivm_state * gallivm = bld_base->base.gallivm;
	LLVMValueRef args[7];
	unsigned c, sampler_src;
	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);

	if (emit_data->inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
		switch (emit_data->inst->Instruction.Opcode) {
		case TGSI_OPCODE_TXQ: {
			struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
			ctx->uses_tex_buffers = true;
			bool isEgPlus = (ctx->chip_class >= EVERGREEN);
			LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm,
				isEgPlus ? 0 : 1);
			LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, offset,
				LLVM_R600_BUFFER_INFO_CONST_BUFFER);
			if (!isEgPlus) {
				LLVMValueRef maskval[4] = {
					lp_build_const_int32(gallivm, 1),
					lp_build_const_int32(gallivm, 2),
					lp_build_const_int32(gallivm, 3),
					lp_build_const_int32(gallivm, 0),
				};
				LLVMValueRef mask = LLVMConstVector(maskval, 4);
				cvecval = LLVMBuildShuffleVector(gallivm->builder, cvecval, cvecval,
					mask, "");
			}
			emit_data->output[0] = cvecval;
			return;
		}
		case TGSI_OPCODE_TXF: {
			args[0] = LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), "");
			args[1] = lp_build_const_int32(gallivm, R600_MAX_CONST_BUFFERS);
			emit_data->output[0] = build_intrinsic(gallivm->builder,
							"llvm.R600.load.texbuf",
							emit_data->dst_type, args, 2, LLVMReadNoneAttribute);
			if (ctx->chip_class >= EVERGREEN)
				return;
			ctx->uses_tex_buffers = true;
			LLVMDumpValue(emit_data->output[0]);
			emit_data->output[0] = LLVMBuildBitCast(gallivm->builder,
				emit_data->output[0], LLVMVectorType(bld_base->base.int_elem_type, 4),
				"");
			LLVMValueRef Mask = llvm_load_const_buffer(bld_base,
				lp_build_const_int32(gallivm, 0),
				LLVM_R600_BUFFER_INFO_CONST_BUFFER);
			Mask = LLVMBuildBitCast(gallivm->builder, Mask,
				LLVMVectorType(bld_base->base.int_elem_type, 4), "");
			emit_data->output[0] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_AND,
				emit_data->output[0],
				Mask);
			LLVMValueRef WComponent = LLVMBuildExtractElement(gallivm->builder,
				emit_data->output[0], lp_build_const_int32(gallivm, 3), "");
			Mask = llvm_load_const_buffer(bld_base, lp_build_const_int32(gallivm, 1),
				LLVM_R600_BUFFER_INFO_CONST_BUFFER);
			Mask = LLVMBuildExtractElement(gallivm->builder, Mask,
				lp_build_const_int32(gallivm, 0), "");
			Mask = LLVMBuildBitCast(gallivm->builder, Mask,
				bld_base->base.int_elem_type, "");
			WComponent = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_OR,
				WComponent, Mask);
			emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder,
				emit_data->output[0], WComponent, lp_build_const_int32(gallivm, 3), "");
			emit_data->output[0] = LLVMBuildBitCast(gallivm->builder,
				emit_data->output[0], LLVMVectorType(bld_base->base.elem_type, 4), "");
		}
			return;
		default:
			break;
		}
	}

	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TEX ||
		emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
		LLVMValueRef Vector[4] = {
			LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
				lp_build_const_int32(gallivm, 0), ""),
			LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
				lp_build_const_int32(gallivm, 1), ""),
			LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
				lp_build_const_int32(gallivm, 2), ""),
			LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
				lp_build_const_int32(gallivm, 3), ""),
		};
		switch (emit_data->inst->Texture.Texture) {
		case TGSI_TEXTURE_2D:
		case TGSI_TEXTURE_RECT:
			Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type);
			break;
		case TGSI_TEXTURE_1D:
			Vector[1] = Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type);
			break;
		default:
			break;
		}
		args[0] = lp_build_gather_values(gallivm, Vector, 4);
	} else {
		args[0] = emit_data->args[0];
	}

	assert(emit_data->arg_count + 2 <= Elements(args));

	for (c = 1; c < emit_data->arg_count; ++c)
		args[c] = emit_data->args[c];

	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
		args[1] = LLVMBuildShl(gallivm->builder, args[1], lp_build_const_int32(gallivm, 1), "");
		args[2] = LLVMBuildShl(gallivm->builder, args[2], lp_build_const_int32(gallivm, 1), "");
		args[3] = LLVMBuildShl(gallivm->builder, args[3], lp_build_const_int32(gallivm, 1), "");
	}

	sampler_src = emit_data->inst->Instruction.NumSrcRegs-1;

	args[c++] = lp_build_const_int32(gallivm,
					emit_data->inst->Src[sampler_src].Register.Index + R600_MAX_CONST_BUFFERS);
	args[c++] = lp_build_const_int32(gallivm,
					emit_data->inst->Src[sampler_src].Register.Index);
	args[c++] = lp_build_const_int32(gallivm,
					emit_data->inst->Texture.Texture);

	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF &&
		(emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
		emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) {

		switch (emit_data->inst->Texture.Texture) {
		case TGSI_TEXTURE_2D_MSAA:
			args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D);
			break;
		case TGSI_TEXTURE_2D_ARRAY_MSAA:
			args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D_ARRAY);
			break;
		default:
			break;
		}

		if (ctx->has_compressed_msaa_texturing) {
			LLVMValueRef ldptr_args[10] = {
				args[0], // Coord
				args[1], // Offset X
				args[2], // Offset Y
				args[3], // Offset Z
				args[4],
				args[5],
				lp_build_const_int32(gallivm, 1),
				lp_build_const_int32(gallivm, 1),
				lp_build_const_int32(gallivm, 1),
				lp_build_const_int32(gallivm, 1)
			};
			LLVMValueRef ptr = build_intrinsic(gallivm->builder,
				"llvm.R600.ldptr",
				emit_data->dst_type, ldptr_args, 10, LLVMReadNoneAttribute);
			LLVMValueRef Tmp = LLVMBuildExtractElement(gallivm->builder, args[0],
				lp_build_const_int32(gallivm, 3), "");
			Tmp = LLVMBuildMul(gallivm->builder, Tmp,
				lp_build_const_int32(gallivm, 4), "");
			LLVMValueRef ResX = LLVMBuildExtractElement(gallivm->builder, ptr,
				lp_build_const_int32(gallivm, 0), "");
			ResX = LLVMBuildBitCast(gallivm->builder, ResX,
				bld_base->base.int_elem_type, "");
			Tmp = LLVMBuildLShr(gallivm->builder, ResX, Tmp, "");
			Tmp = LLVMBuildAnd(gallivm->builder, Tmp,
				lp_build_const_int32(gallivm, 0xF), "");
			args[0] = LLVMBuildInsertElement(gallivm->builder, args[0], Tmp,
				lp_build_const_int32(gallivm, 3), "");
			args[c++] = lp_build_const_int32(gallivm,
				emit_data->inst->Texture.Texture);
		}
	}

	emit_data->output[0] = build_intrinsic(gallivm->builder,
					action->intr_name,
					emit_data->dst_type, args, c, LLVMReadNoneAttribute);

	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXQ &&
		((emit_data->inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
		emit_data->inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY)))
		if (emit_data->inst->Dst[0].Register.WriteMask & 4) {
			LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, 0);
			LLVMValueRef ZLayer = LLVMBuildExtractElement(gallivm->builder,
				llvm_load_const_buffer(bld_base, offset, CONSTANT_TXQ_BUFFER),
				lp_build_const_int32(gallivm, 0), "");

			emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder, emit_data->output[0], ZLayer, lp_build_const_int32(gallivm, 2), "");
			struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
			ctx->has_txq_cube_array_z_comp = true;
		}
}
Esempio n. 21
0
static LLVMValueRef get_stritem_len_fn(struct llvm_ctx *ctx)
{
	if(ctx->stritem_len_fn != NULL) return ctx->stritem_len_fn;

	/* returns (i32 len, i32 new_tpos)
	 * params (word *utcbptr, i32 tpos)
	 *
	 * when return value "new_tpos" > tmax + 1, the result is invalid. the function
	 * should also not be called when tpos > tmax + 1.
	 */
	LLVMTypeRef ret_types[2] = { ctx->i32t, ctx->i32t },
		parm_types[2] = { LLVMPointerType(ctx->wordt, 0), ctx->i32t },
		ret_type = LLVMStructTypeInContext(ctx->ctx, ret_types, 2, 0),
		fn_type = LLVMFunctionType(ret_type, parm_types, 2, 0);
	LLVMValueRef fn = LLVMAddFunction(ctx->module, "__muidl_get_stritem_len",
		fn_type);
	LLVMSetVisibility(fn, LLVMHiddenVisibility);
	LLVMSetLinkage(fn, LLVMInternalLinkage);
	V fn_args[2];
	LLVMGetParams(fn, fn_args);
	LLVMAddAttribute(fn_args[0], LLVMNoCaptureAttribute);
	for(int i=0; i<2; i++) {
		LLVMAddAttribute(fn_args[i], LLVMInRegAttribute);
	}
	ctx->stritem_len_fn = fn;

	LLVMBuilderRef old_builder = ctx->builder;
	ctx->builder = LLVMCreateBuilderInContext(ctx->ctx);
	LLVMBasicBlockRef entry_bb = LLVMAppendBasicBlockInContext(ctx->ctx, fn,
			"EntryBlock"),
		loop_bb = LLVMAppendBasicBlockInContext(ctx->ctx, fn, "loop"),
		valid_bb = LLVMAppendBasicBlockInContext(ctx->ctx, fn, "valid"),
		exit_bb = LLVMAppendBasicBlockInContext(ctx->ctx, fn, "exit");

	LLVMPositionBuilderAtEnd(ctx->builder, entry_bb);
	LLVMValueRef old_utcb = ctx->utcb, old_tpos = ctx->tpos;
	ctx->utcb = fn_args[0];
	ctx->tpos = fn_args[1];
	LLVMBuildBr(ctx->builder, loop_bb);

	LLVMPositionBuilderAtEnd(ctx->builder, exit_bb);
	LLVMValueRef exit_len_phi = LLVMBuildPhi(ctx->builder, ctx->i32t,
			"exit.len.phi"),
		exit_tpos_phi = LLVMBuildPhi(ctx->builder, ctx->i32t,
			"exit.tpos.phi");
	LLVMValueRef rvals[2] = { exit_len_phi, exit_tpos_phi };
	LLVMBuildAggregateRet(ctx->builder, rvals, 2);

	LLVMPositionBuilderAtEnd(ctx->builder, loop_bb);
	LLVMValueRef len_phi = LLVMBuildPhi(ctx->builder, ctx->i32t, "len.phi"),
		tpos_phi = LLVMBuildPhi(ctx->builder, ctx->i32t, "tpos.phi");
	LLVMAddIncoming(len_phi, &ctx->zero, &entry_bb, 1);
	LLVMAddIncoming(tpos_phi, &ctx->tpos, &entry_bb, 1);
	ctx->tpos = tpos_phi;
	/* test: if *tpos doesn't look like a string item, conk out. */
	LLVMValueRef infoword = build_utcb_load(ctx, ctx->tpos, "si.info");
	LLVMValueRef is_cond = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
		ctx->zero, LLVMBuildAnd(ctx->builder, infoword,
			CONST_WORD(1 << 4), "infoword.si.mask"),
		"infoword.si.cond");
	/* anything + 100 is sure to be > tmax + 1. */
	LLVMValueRef fucked_tpos = LLVMBuildAdd(ctx->builder, tpos_phi,
		CONST_INT(100), "f****d.tpos");
	branch_set_phi(ctx, exit_len_phi, len_phi);
	branch_set_phi(ctx, exit_tpos_phi, fucked_tpos);
	LLVMBuildCondBr(ctx->builder, is_cond, valid_bb, exit_bb);

	LLVMPositionBuilderAtEnd(ctx->builder, valid_bb);
	LLVMValueRef string_length = LLVMBuildTruncOrBitCast(ctx->builder,
			LLVMBuildLShr(ctx->builder, infoword,
				CONST_INT(10), "si.info.len"),
			ctx->i32t, "si.info.len.int"),
		string_j = LLVMBuildTruncOrBitCast(ctx->builder,
			LLVMBuildAnd(ctx->builder, CONST_WORD(0x1f),
				LLVMBuildLShr(ctx->builder, infoword, CONST_WORD(4),
					"si.info.j.shift"),
				"si.info.j.masked"),
			ctx->i32t, "si.info.j"),
		string_c = LLVMBuildTruncOrBitCast(ctx->builder,
			LLVMBuildAnd(ctx->builder, CONST_WORD(1 << 9),
				infoword, "si.info.c.masked"),
			ctx->i32t, "si.info.c.masked.int"),
		c_cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
			string_c, CONST_WORD(0), "si.info.c.cond"),
		new_len = LLVMBuildAdd(ctx->builder, len_phi,
			LLVMBuildMul(ctx->builder, string_length,
				LLVMBuildAdd(ctx->builder, string_j,
					CONST_INT(1), "j.plus.one"),
				"len.incr"),
			"len.new"),
		new_tpos = LLVMBuildAdd(ctx->builder, ctx->tpos,
			LLVMBuildSelect(ctx->builder, c_cond,
				LLVMBuildAdd(ctx->builder, CONST_INT(2),
					string_j, "cont.tpos.bump"),
				CONST_INT(2), "tpos.bump"),
			"tpos.new");
	LLVMAddIncoming(len_phi, &new_len, &valid_bb, 1);
	LLVMAddIncoming(tpos_phi, &new_tpos, &valid_bb, 1);
	LLVMAddIncoming(exit_len_phi, &new_len, &valid_bb, 1);
	LLVMAddIncoming(exit_tpos_phi, &new_tpos, &valid_bb, 1);
	LLVMBuildCondBr(ctx->builder, c_cond, loop_bb, exit_bb);

	LLVMDisposeBuilder(ctx->builder);
	ctx->builder = old_builder;
	ctx->utcb = old_utcb;
	ctx->tpos = old_tpos;

	return ctx->stritem_len_fn;
}
Esempio n. 22
0
/*
 * gen_operator_expression
 *
 * Code generation for operator expressions.  Most of them have straightforward
 * translations into LLVM instructions and are handled directly here.
 */
static LLVMValueRef
gen_operator_expression (gencodectx_t gctx, expr_node_t *exp, LLVMTypeRef neededtype)
{
    expr_node_t *lhs = expr_op_lhs(exp);
    expr_node_t *rhs = expr_op_rhs(exp);
    optype_t op = expr_op_type(exp);
    LLVMBuilderRef builder = gctx->curfn->builder;
    LLVMTypeRef inttype;
    LLVMValueRef lval, rval, result;

    if (op == OPER_FETCH) {
        return gen_fetch(gctx, rhs, neededtype);
    }

    if (op == OPER_ASSIGN) {
        LLVMValueRef val = llvmgen_assignment(gctx, lhs, rhs);
        return llvmgen_adjustval(gctx, val, neededtype, 0);
    }

    if (op == OPER_SHIFT) {
        return gen_shift(gctx, lhs, rhs, neededtype);
    }

    inttype = LLVMIntTypeInContext(gctx->llvmctx, machine_scalar_bits(gctx->mach));

    lval = (lhs == 0 ? 0 : llvmgen_expression(gctx, lhs, inttype));
    rval = llvmgen_expression(gctx, rhs, inttype);
    switch (op) {
        case OPER_UNARY_PLUS:
            result = rval;
            break;
        case OPER_UNARY_MINUS:
            result = LLVMBuildNeg(builder, rval, llvmgen_temp(gctx));
            break;
        case OPER_ADD:
            result = LLVMBuildAdd(builder, lval, rval, llvmgen_temp(gctx));
            break;
        case OPER_SUBTRACT:
            result = LLVMBuildSub(builder, lval, rval, llvmgen_temp(gctx));
            break;
        case OPER_MULT:
            result = LLVMBuildMul(builder, lval, rval, llvmgen_temp(gctx));
            break;
        case OPER_DIV:
            result = LLVMBuildUDiv(builder, lval, rval, llvmgen_temp(gctx));
            break;
        case OPER_MODULO:
            result = LLVMBuildURem(builder, lval, rval, llvmgen_temp(gctx));
            break;
        case OPER_AND:
            result = LLVMBuildAnd(builder, lval, rval, llvmgen_temp(gctx));
            break;
        case OPER_OR:
            result = LLVMBuildOr(builder, lval, rval, llvmgen_temp(gctx));
            break;
        case OPER_NOT:
            result = LLVMBuildNot(builder, rval, llvmgen_temp(gctx));
            break;
        case OPER_XOR:
            result = LLVMBuildXor(builder, lval, rval, llvmgen_temp(gctx));
            break;
        case OPER_EQV:
            result = LLVMBuildXor(builder, lval, rval, llvmgen_temp(gctx));
            result = LLVMBuildNot(builder, result, llvmgen_temp(gctx));
            break;
        default:
            if (op >= OPER_CMP_EQL && op <= OPER_CMP_GEQA) {
                result = LLVMBuildICmp(builder,
                                       llvmgen_predfromop(op, machine_addr_signed(gctx->mach)),
                                       lval, rval, llvmgen_temp(gctx));
            } else {
                // Everything should be covered
                expr_signal(gctx->ectx, STC__INTCMPERR, "gen_operator_expression");
                result = LLVMConstNull(inttype);
            }
            break;
    }

    return llvmgen_adjustval(gctx, result, neededtype, 0);

} /* gen_operator_expression */
Esempio n. 23
0
/**
 * See http://www.devmaster.net/forums/showthread.php?p=43580
 */
void
lp_build_log2_approx(struct lp_build_context *bld,
                     LLVMValueRef x,
                     LLVMValueRef *p_exp,
                     LLVMValueRef *p_floor_log2,
                     LLVMValueRef *p_log2)
{
   const struct lp_type type = bld->type;
   LLVMTypeRef vec_type = lp_build_vec_type(type);
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);

   LLVMValueRef expmask = lp_build_int_const_scalar(type, 0x7f800000);
   LLVMValueRef mantmask = lp_build_int_const_scalar(type, 0x007fffff);
   LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type);

   LLVMValueRef i = NULL;
   LLVMValueRef exp = NULL;
   LLVMValueRef mant = NULL;
   LLVMValueRef logexp = NULL;
   LLVMValueRef logmant = NULL;
   LLVMValueRef res = NULL;

   if(p_exp || p_floor_log2 || p_log2) {
      /* TODO: optimize the constant case */
      if(LLVMIsConstant(x))
         debug_printf("%s: inefficient/imprecise constant arithmetic\n",
                      __FUNCTION__);

      assert(type.floating && type.width == 32);

      i = LLVMBuildBitCast(bld->builder, x, int_vec_type, "");

      /* exp = (float) exponent(x) */
      exp = LLVMBuildAnd(bld->builder, i, expmask, "");
   }

   if(p_floor_log2 || p_log2) {
      logexp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_scalar(type, 23), "");
      logexp = LLVMBuildSub(bld->builder, logexp, lp_build_int_const_scalar(type, 127), "");
      logexp = LLVMBuildSIToFP(bld->builder, logexp, vec_type, "");
   }

   if(p_log2) {
      /* mant = (float) mantissa(x) */
      mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
      mant = LLVMBuildOr(bld->builder, mant, one, "");
      mant = LLVMBuildBitCast(bld->builder, mant, vec_type, "");

      logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial,
                                    Elements(lp_build_log2_polynomial));

      /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
      logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), "");

      res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
   }

   if(p_exp)
      *p_exp = exp;

   if(p_floor_log2)
      *p_floor_log2 = logexp;

   if(p_log2)
      *p_log2 = res;
}
Esempio n. 24
0
void genprim_array_deserialise(compile_t* c, reach_type_t* t)
{
  // Generate the deserisalise function.
  t->deserialise_fn = codegen_addfun(c, genname_serialise(t->name),
    c->trace_type);

  codegen_startfun(c, t->deserialise_fn, NULL, NULL);
  LLVMSetFunctionCallConv(t->deserialise_fn, LLVMCCallConv);

  LLVMValueRef ctx = LLVMGetParam(t->deserialise_fn, 0);
  LLVMValueRef arg = LLVMGetParam(t->deserialise_fn, 1);

  LLVMValueRef object = LLVMBuildBitCast(c->builder, arg, t->structure_ptr,
    "");
  gendeserialise_typeid(c, t, object);

  // Deserialise the array contents.
  LLVMValueRef alloc = field_value(c, object, 2);
  LLVMValueRef ptr_offset = field_value(c, object, 3);
  ptr_offset = LLVMBuildPtrToInt(c->builder, ptr_offset, c->intptr, "");

  ast_t* typeargs = ast_childidx(t->ast, 2);
  ast_t* typearg = ast_child(typeargs);

  reach_type_t* t_elem = reach_type(c->reach, typearg);
  size_t abisize = (size_t)LLVMABISizeOfType(c->target_data, t_elem->use_type);
  LLVMValueRef l_size = LLVMConstInt(c->intptr, abisize, false);

  LLVMValueRef args[3];
  args[0] = ctx;
  args[1] = ptr_offset;
  args[2] = LLVMBuildMul(c->builder, alloc, l_size, "");
  LLVMValueRef ptr = gencall_runtime(c, "pony_deserialise_block", args, 3, "");

  LLVMValueRef ptr_loc = LLVMBuildStructGEP(c->builder, object, 3, "");
  LLVMBuildStore(c->builder, ptr, ptr_loc);

  if((t_elem->underlying == TK_PRIMITIVE) && (t_elem->primitive != NULL))
  {
    // Do nothing. A memcpy is sufficient.
  } else {
    LLVMValueRef size = field_value(c, object, 1);
    ptr = LLVMBuildBitCast(c->builder, ptr,
      LLVMPointerType(t_elem->use_type, 0), "");

    LLVMBasicBlockRef entry_block = LLVMGetInsertBlock(c->builder);
    LLVMBasicBlockRef cond_block = codegen_block(c, "cond");
    LLVMBasicBlockRef body_block = codegen_block(c, "body");
    LLVMBasicBlockRef post_block = codegen_block(c, "post");

    LLVMBuildBr(c->builder, cond_block);

    // While the index is less than the size, deserialise an element. The
    // initial index when coming from the entry block is zero.
    LLVMPositionBuilderAtEnd(c->builder, cond_block);
    LLVMValueRef phi = LLVMBuildPhi(c->builder, c->intptr, "");
    LLVMValueRef zero = LLVMConstInt(c->intptr, 0, false);
    LLVMAddIncoming(phi, &zero, &entry_block, 1);
    LLVMValueRef test = LLVMBuildICmp(c->builder, LLVMIntULT, phi, size, "");
    LLVMBuildCondBr(c->builder, test, body_block, post_block);

    // The phi node is the index. Get the element and deserialise it.
    LLVMPositionBuilderAtEnd(c->builder, body_block);
    LLVMValueRef elem_ptr = LLVMBuildGEP(c->builder, ptr, &phi, 1, "");
    gendeserialise_element(c, t_elem, false, ctx, elem_ptr);

    // Add one to the phi node and branch back to the cond block.
    LLVMValueRef one = LLVMConstInt(c->intptr, 1, false);
    LLVMValueRef inc = LLVMBuildAdd(c->builder, phi, one, "");
    body_block = LLVMGetInsertBlock(c->builder);
    LLVMAddIncoming(phi, &inc, &body_block, 1);
    LLVMBuildBr(c->builder, cond_block);

    LLVMPositionBuilderAtEnd(c->builder, post_block);
  }

  LLVMBuildRetVoid(c->builder);
  codegen_finishfun(c);
}
Esempio n. 25
0
void genprim_array_serialise(compile_t* c, reach_type_t* t)
{
  // Generate the serialise function.
  t->serialise_fn = codegen_addfun(c, genname_serialise(t->name),
    c->serialise_type);

  codegen_startfun(c, t->serialise_fn, NULL, NULL);
  LLVMSetFunctionCallConv(t->serialise_fn, LLVMCCallConv);

  LLVMValueRef ctx = LLVMGetParam(t->serialise_fn, 0);
  LLVMValueRef arg = LLVMGetParam(t->serialise_fn, 1);
  LLVMValueRef addr = LLVMGetParam(t->serialise_fn, 2);
  LLVMValueRef offset = LLVMGetParam(t->serialise_fn, 3);
  LLVMValueRef mut = LLVMGetParam(t->serialise_fn, 4);

  LLVMValueRef object = LLVMBuildBitCast(c->builder, arg, t->structure_ptr,
    "");
  LLVMValueRef offset_addr = LLVMBuildAdd(c->builder,
    LLVMBuildPtrToInt(c->builder, addr, c->intptr, ""), offset, "");

  genserialise_typeid(c, t, offset_addr);

  // Don't serialise our contents if we are opaque.
  LLVMBasicBlockRef body_block = codegen_block(c, "body");
  LLVMBasicBlockRef post_block = codegen_block(c, "post");

  LLVMValueRef test = LLVMBuildICmp(c->builder, LLVMIntNE, mut,
    LLVMConstInt(c->i32, PONY_TRACE_OPAQUE, false), "");
  LLVMBuildCondBr(c->builder, test, body_block, post_block);
  LLVMPositionBuilderAtEnd(c->builder, body_block);

  // Write the size twice, effectively rewriting alloc to be the same as size.
  LLVMValueRef size = field_value(c, object, 1);

  LLVMValueRef size_loc = field_loc(c, offset_addr, t->structure,
    c->intptr, 1);
  LLVMBuildStore(c->builder, size, size_loc);

  LLVMValueRef alloc_loc = field_loc(c, offset_addr, t->structure,
    c->intptr, 2);
  LLVMBuildStore(c->builder, size, alloc_loc);

  // Write the pointer.
  LLVMValueRef ptr = field_value(c, object, 3);

  // The resulting offset will only be invalid (i.e. have the high bit set) if
  // the size is zero. For an opaque array, we don't serialise the contents,
  // so we don't get here, so we don't end up with an invalid offset.
  LLVMValueRef args[5];
  args[0] = ctx;
  args[1] = ptr;
  LLVMValueRef ptr_offset = gencall_runtime(c, "pony_serialise_offset",
    args, 2, "");

  LLVMValueRef ptr_loc = field_loc(c, offset_addr, t->structure, c->intptr, 3);
  LLVMBuildStore(c->builder, ptr_offset, ptr_loc);

  LLVMValueRef ptr_offset_addr = LLVMBuildAdd(c->builder, ptr_offset,
    LLVMBuildPtrToInt(c->builder, addr, c->intptr, ""), "");

  // Serialise elements.
  ast_t* typeargs = ast_childidx(t->ast, 2);
  ast_t* typearg = ast_child(typeargs);
  reach_type_t* t_elem = reach_type(c->reach, typearg);

  size_t abisize = (size_t)LLVMABISizeOfType(c->target_data, t_elem->use_type);
  LLVMValueRef l_size = LLVMConstInt(c->intptr, abisize, false);

  if((t_elem->underlying == TK_PRIMITIVE) && (t_elem->primitive != NULL))
  {
    // memcpy machine words
    args[0] = LLVMBuildIntToPtr(c->builder, ptr_offset_addr, c->void_ptr, "");
    args[1] = LLVMBuildBitCast(c->builder, ptr, c->void_ptr, "");
    args[2] = LLVMBuildMul(c->builder, size, l_size, "");
    args[3] = LLVMConstInt(c->i32, 1, false);
    args[4] = LLVMConstInt(c->i1, 0, false);
    if(target_is_ilp32(c->opt->triple))
    {
      gencall_runtime(c, "llvm.memcpy.p0i8.p0i8.i32", args, 5, "");
    } else {
      gencall_runtime(c, "llvm.memcpy.p0i8.p0i8.i64", args, 5, "");
    }
  } else {
    ptr = LLVMBuildBitCast(c->builder, ptr,
      LLVMPointerType(t_elem->use_type, 0), "");

    LLVMBasicBlockRef entry_block = LLVMGetInsertBlock(c->builder);
    LLVMBasicBlockRef cond_block = codegen_block(c, "cond");
    LLVMBasicBlockRef body_block = codegen_block(c, "body");
    LLVMBasicBlockRef post_block = codegen_block(c, "post");

    LLVMValueRef offset_var = LLVMBuildAlloca(c->builder, c->intptr, "");
    LLVMBuildStore(c->builder, ptr_offset_addr, offset_var);

    LLVMBuildBr(c->builder, cond_block);

    // While the index is less than the size, serialise an element. The
    // initial index when coming from the entry block is zero.
    LLVMPositionBuilderAtEnd(c->builder, cond_block);
    LLVMValueRef phi = LLVMBuildPhi(c->builder, c->intptr, "");
    LLVMValueRef zero = LLVMConstInt(c->intptr, 0, false);
    LLVMAddIncoming(phi, &zero, &entry_block, 1);
    LLVMValueRef test = LLVMBuildICmp(c->builder, LLVMIntULT, phi, size, "");
    LLVMBuildCondBr(c->builder, test, body_block, post_block);

    // The phi node is the index. Get the element and serialise it.
    LLVMPositionBuilderAtEnd(c->builder, body_block);
    LLVMValueRef elem_ptr = LLVMBuildGEP(c->builder, ptr, &phi, 1, "");

    ptr_offset_addr = LLVMBuildLoad(c->builder, offset_var, "");
    genserialise_element(c, t_elem, false, ctx, elem_ptr, ptr_offset_addr);
    ptr_offset_addr = LLVMBuildAdd(c->builder, ptr_offset_addr, l_size, "");
    LLVMBuildStore(c->builder, ptr_offset_addr, offset_var);

    // Add one to the phi node and branch back to the cond block.
    LLVMValueRef one = LLVMConstInt(c->intptr, 1, false);
    LLVMValueRef inc = LLVMBuildAdd(c->builder, phi, one, "");
    body_block = LLVMGetInsertBlock(c->builder);
    LLVMAddIncoming(phi, &inc, &body_block, 1);
    LLVMBuildBr(c->builder, cond_block);

    LLVMPositionBuilderAtEnd(c->builder, post_block);
  }

  LLVMBuildBr(c->builder, post_block);
  LLVMPositionBuilderAtEnd(c->builder, post_block);
  LLVMBuildRetVoid(c->builder);
  codegen_finishfun(c);
}
Esempio n. 26
0
/**
 * Generate a * b
 */
LLVMValueRef
lp_build_mul(struct lp_build_context *bld,
             LLVMValueRef a,
             LLVMValueRef b)
{
   const struct lp_type type = bld->type;
   LLVMValueRef shift;
   LLVMValueRef res;

   if(a == bld->zero)
      return bld->zero;
   if(a == bld->one)
      return b;
   if(b == bld->zero)
      return bld->zero;
   if(b == bld->one)
      return a;
   if(a == bld->undef || b == bld->undef)
      return bld->undef;

   if(!type.floating && !type.fixed && type.norm) {
      if(type.width == 8) {
         struct lp_type i16_type = lp_wider_type(type);
         LLVMValueRef al, ah, bl, bh, abl, abh, ab;

         lp_build_unpack2(bld->builder, type, i16_type, a, &al, &ah);
         lp_build_unpack2(bld->builder, type, i16_type, b, &bl, &bh);

         /* PMULLW, PSRLW, PADDW */
         abl = lp_build_mul_u8n(bld->builder, i16_type, al, bl);
         abh = lp_build_mul_u8n(bld->builder, i16_type, ah, bh);

         ab = lp_build_pack2(bld->builder, i16_type, type, abl, abh);
         
         return ab;
      }

      /* FIXME */
      assert(0);
   }

   if(type.fixed)
      shift = lp_build_int_const_scalar(type, type.width/2);
   else
      shift = NULL;

   if(LLVMIsConstant(a) && LLVMIsConstant(b)) {
      res =  LLVMConstMul(a, b);
      if(shift) {
         if(type.sign)
            res = LLVMConstAShr(res, shift);
         else
            res = LLVMConstLShr(res, shift);
      }
   }
   else {
      res = LLVMBuildMul(bld->builder, a, b, "");
      if(shift) {
         if(type.sign)
            res = LLVMBuildAShr(bld->builder, res, shift, "");
         else
            res = LLVMBuildLShr(bld->builder, res, shift, "");
      }
   }

   return res;
}
Esempio n. 27
0
/**
 * Generic type conversion.
 *
 * TODO: Take a precision argument, or even better, add a new precision member
 * to the lp_type union.
 */
void
lp_build_conv(LLVMBuilderRef builder,
              struct lp_type src_type,
              struct lp_type dst_type,
              const LLVMValueRef *src, unsigned num_srcs,
              LLVMValueRef *dst, unsigned num_dsts)
{
   struct lp_type tmp_type;
   LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
   unsigned num_tmps;
   unsigned i;

   /* Register width must remain constant */
   assert(src_type.width * src_type.length == dst_type.width * dst_type.length);

   /* We must not loose or gain channels. Only precision */
   assert(src_type.length * num_srcs == dst_type.length * num_dsts);

   assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
   assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);

   tmp_type = src_type;
   for(i = 0; i < num_srcs; ++i)
      tmp[i] = src[i];
   num_tmps = num_srcs;

   /*
    * Clamp if necessary
    */

   if(memcmp(&src_type, &dst_type, sizeof src_type) != 0) {
      struct lp_build_context bld;
      double src_min = lp_const_min(src_type);
      double dst_min = lp_const_min(dst_type);
      double src_max = lp_const_max(src_type);
      double dst_max = lp_const_max(dst_type);
      LLVMValueRef thres;

      lp_build_context_init(&bld, builder, tmp_type);

      if(src_min < dst_min) {
         if(dst_min == 0.0)
            thres = bld.zero;
         else
            thres = lp_build_const_scalar(src_type, dst_min);
         for(i = 0; i < num_tmps; ++i)
            tmp[i] = lp_build_max(&bld, tmp[i], thres);
      }

      if(src_max > dst_max) {
         if(dst_max == 1.0)
            thres = bld.one;
         else
            thres = lp_build_const_scalar(src_type, dst_max);
         for(i = 0; i < num_tmps; ++i)
            tmp[i] = lp_build_min(&bld, tmp[i], thres);
      }
   }

   /*
    * Scale to the narrowest range
    */

   if(dst_type.floating) {
      /* Nothing to do */
   }
   else if(tmp_type.floating) {
      if(!dst_type.fixed && !dst_type.sign && dst_type.norm) {
         for(i = 0; i < num_tmps; ++i) {
            tmp[i] = lp_build_clamped_float_to_unsigned_norm(builder,
                                                             tmp_type,
                                                             dst_type.width,
                                                             tmp[i]);
         }
         tmp_type.floating = FALSE;
      }
      else {
         double dst_scale = lp_const_scale(dst_type);
         LLVMTypeRef tmp_vec_type;

         if (dst_scale != 1.0) {
            LLVMValueRef scale = lp_build_const_scalar(tmp_type, dst_scale);
            for(i = 0; i < num_tmps; ++i)
               tmp[i] = LLVMBuildMul(builder, tmp[i], scale, "");
         }

         /* Use an equally sized integer for intermediate computations */
         tmp_type.floating = FALSE;
         tmp_vec_type = lp_build_vec_type(tmp_type);
         for(i = 0; i < num_tmps; ++i) {
#if 0
            if(dst_type.sign)
               tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, "");
            else
               tmp[i] = LLVMBuildFPToUI(builder, tmp[i], tmp_vec_type, "");
#else
           /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */
            tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, "");
#endif
         }
      }
   }
   else {
      unsigned src_shift = lp_const_shift(src_type);
      unsigned dst_shift = lp_const_shift(dst_type);

      /* FIXME: compensate different offsets too */
      if(src_shift > dst_shift) {
         LLVMValueRef shift = lp_build_int_const_scalar(tmp_type, src_shift - dst_shift);
         for(i = 0; i < num_tmps; ++i)
            if(src_type.sign)
               tmp[i] = LLVMBuildAShr(builder, tmp[i], shift, "");
            else
               tmp[i] = LLVMBuildLShr(builder, tmp[i], shift, "");
      }
   }

   /*
    * Truncate or expand bit width
    */

   assert(!tmp_type.floating || tmp_type.width == dst_type.width);

   if(tmp_type.width > dst_type.width) {
      assert(num_dsts == 1);
      tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps);
      tmp_type.width = dst_type.width;
      tmp_type.length = dst_type.length;
      num_tmps = 1;
   }

   if(tmp_type.width < dst_type.width) {
      assert(num_tmps == 1);
      lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts);
      tmp_type.width = dst_type.width;
      tmp_type.length = dst_type.length;
      num_tmps = num_dsts;
   }

   assert(tmp_type.width == dst_type.width);
   assert(tmp_type.length == dst_type.length);
   assert(num_tmps == num_dsts);

   /*
    * Scale to the widest range
    */

   if(src_type.floating) {
      /* Nothing to do */
   }
   else if(!src_type.floating && dst_type.floating) {
      if(!src_type.fixed && !src_type.sign && src_type.norm) {
         for(i = 0; i < num_tmps; ++i) {
            tmp[i] = lp_build_unsigned_norm_to_float(builder,
                                                     src_type.width,
                                                     dst_type,
                                                     tmp[i]);
         }
         tmp_type.floating = TRUE;
      }
      else {
         double src_scale = lp_const_scale(src_type);
         LLVMTypeRef tmp_vec_type;

         /* Use an equally sized integer for intermediate computations */
         tmp_type.floating = TRUE;
         tmp_type.sign = TRUE;
         tmp_vec_type = lp_build_vec_type(tmp_type);
         for(i = 0; i < num_tmps; ++i) {
#if 0
            if(dst_type.sign)
               tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, "");
            else
               tmp[i] = LLVMBuildUIToFP(builder, tmp[i], tmp_vec_type, "");
#else
            /* FIXME: there is no SSE counterpart for LLVMBuildUIToFP */
            tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, "");
#endif
          }

          if (src_scale != 1.0) {
             LLVMValueRef scale = lp_build_const_scalar(tmp_type, 1.0/src_scale);
             for(i = 0; i < num_tmps; ++i)
                tmp[i] = LLVMBuildMul(builder, tmp[i], scale, "");
          }
      }
    }
    else {
       unsigned src_shift = lp_const_shift(src_type);
       unsigned dst_shift = lp_const_shift(dst_type);

       /* FIXME: compensate different offsets too */
       if(src_shift < dst_shift) {
          LLVMValueRef shift = lp_build_int_const_scalar(tmp_type, dst_shift - src_shift);
          for(i = 0; i < num_tmps; ++i)
             tmp[i] = LLVMBuildShl(builder, tmp[i], shift, "");
       }
    }

   for(i = 0; i < num_dsts; ++i)
      dst[i] = tmp[i];
}
Esempio n. 28
0
/**
 * Store depth/stencil values.
 * Incoming values are swizzled (typically n 2x2 quads), stored linear.
 * If there's a mask it will do select/store otherwise just store.
 *
 * \param type  the data type of the fragment depth/stencil values
 * \param format_desc  description of the depth/stencil surface
 * \param mask  the alive/dead pixel mask for the quad (vector)
 * \param z_fb  z values read from fb (with padding)
 * \param s_fb  s values read from fb (with padding)
 * \param loop_counter  the current loop iteration
 * \param depth_ptr  pointer to the depth/stencil values of this 4x4 block
 * \param depth_stride  stride of the depth/stencil buffer
 * \param z_value the depth values to store (with padding)
 * \param s_value the stencil values to store (with padding)
 */
void
lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
                                      struct lp_type z_src_type,
                                      const struct util_format_description *format_desc,
                                      struct lp_build_mask_context *mask,
                                      LLVMValueRef z_fb,
                                      LLVMValueRef s_fb,
                                      LLVMValueRef loop_counter,
                                      LLVMValueRef depth_ptr,
                                      LLVMValueRef depth_stride,
                                      LLVMValueRef z_value,
                                      LLVMValueRef s_value)
{
   struct lp_build_context z_bld;
   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef mask_value = NULL;
   LLVMValueRef zs_dst1, zs_dst2;
   LLVMValueRef zs_dst_ptr1, zs_dst_ptr2;
   LLVMValueRef depth_offset1, depth_offset2;
   LLVMTypeRef load_ptr_type;
   unsigned depth_bytes = format_desc->block.bits / 8;
   struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
   struct lp_type z_type = zs_type;
   struct lp_type zs_load_type = zs_type;

   zs_load_type.length = zs_load_type.length / 2;
   load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0);

   z_type.width = z_src_type.width;

   lp_build_context_init(&z_bld, gallivm, z_type);

   /*
    * This is far from ideal, at least for late depth write we should do this
    * outside the fs loop to avoid all the swizzle stuff.
    */
   if (z_src_type.length == 4) {
      LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter,
                                          lp_build_const_int32(gallivm, 1), "");
      LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter,
                                          lp_build_const_int32(gallivm, 2), "");
      LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
                                          depth_stride, "");
      depth_offset1 = LLVMBuildMul(builder, looplsb,
                                   lp_build_const_int32(gallivm, depth_bytes * 2), "");
      depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");
   }
   else {
      unsigned i;
      LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter,
                                         lp_build_const_int32(gallivm, 1), "");
      assert(z_src_type.length == 8);
      depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, "");
      /*
       * We load 2x4 values, and need to swizzle them (order
       * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately.
       */
      for (i = 0; i < 8; i++) {
         shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
      }
   }

   depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, "");

   zs_dst_ptr1 = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
   zs_dst_ptr1 = LLVMBuildBitCast(builder, zs_dst_ptr1, load_ptr_type, "");
   zs_dst_ptr2 = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
   zs_dst_ptr2 = LLVMBuildBitCast(builder, zs_dst_ptr2, load_ptr_type, "");

   if (format_desc->block.bits > 32) {
      s_value = LLVMBuildBitCast(builder, s_value, z_bld.vec_type, "");
   }

   if (mask) {
      mask_value = lp_build_mask_value(mask);
      z_value = lp_build_select(&z_bld, mask_value, z_value, z_fb);
      if (format_desc->block.bits > 32) {
         s_fb = LLVMBuildBitCast(builder, s_fb, z_bld.vec_type, "");
         s_value = lp_build_select(&z_bld, mask_value, s_value, s_fb);
      }
   }

   if (zs_type.width < z_src_type.width) {
      /* Truncate ZS values (e.g., when writing to Z16_UNORM) */
      z_value = LLVMBuildTrunc(builder, z_value,
                               lp_build_int_vec_type(gallivm, zs_type), "");
   }

   if (format_desc->block.bits <= 32) {
      if (z_src_type.length == 4) {
         zs_dst1 = lp_build_extract_range(gallivm, z_value, 0, 2);
         zs_dst2 = lp_build_extract_range(gallivm, z_value, 2, 2);
      }
      else {
         assert(z_src_type.length == 8);
         zs_dst1 = LLVMBuildShuffleVector(builder, z_value, z_value,
                                          LLVMConstVector(&shuffles[0],
                                                          zs_load_type.length), "");
         zs_dst2 = LLVMBuildShuffleVector(builder, z_value, z_value,
                                          LLVMConstVector(&shuffles[4],
                                                          zs_load_type.length), "");
      }
   }
   else {
      if (z_src_type.length == 4) {
         zs_dst1 = lp_build_interleave2(gallivm, z_type,
                                        z_value, s_value, 0);
         zs_dst2 = lp_build_interleave2(gallivm, z_type,
                                        z_value, s_value, 1);
      }
      else {
         unsigned i;
         LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 2];
         assert(z_src_type.length == 8);
         for (i = 0; i < 8; i++) {
            shuffles[i*2] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
            shuffles[i*2+1] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2 +
                                                   z_src_type.length);
         }
         zs_dst1 = LLVMBuildShuffleVector(builder, z_value, s_value,
                                          LLVMConstVector(&shuffles[0],
                                                          z_src_type.length), "");
         zs_dst2 = LLVMBuildShuffleVector(builder, z_value, s_value,
                                          LLVMConstVector(&shuffles[8],
                                                          z_src_type.length), "");
      }
      zs_dst1 = LLVMBuildBitCast(builder, zs_dst1,
                                 lp_build_vec_type(gallivm, zs_load_type), "");
      zs_dst2 = LLVMBuildBitCast(builder, zs_dst2,
                                 lp_build_vec_type(gallivm, zs_load_type), "");
   }

   LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1);
   LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2);
}
Esempio n. 29
0
/**
 * Load depth/stencil values.
 * The stored values are linear, swizzle them.
 *
 * \param type  the data type of the fragment depth/stencil values
 * \param format_desc  description of the depth/stencil surface
 * \param loop_counter  the current loop iteration
 * \param depth_ptr  pointer to the depth/stencil values of this 4x4 block
 * \param depth_stride  stride of the depth/stencil buffer
 * \param z_fb  contains z values loaded from fb (may include padding)
 * \param s_fb  contains s values loaded from fb (may include padding)
 */
void
lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
                                     struct lp_type z_src_type,
                                     const struct util_format_description *format_desc,
                                     LLVMValueRef depth_ptr,
                                     LLVMValueRef depth_stride,
                                     LLVMValueRef *z_fb,
                                     LLVMValueRef *s_fb,
                                     LLVMValueRef loop_counter)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
   LLVMValueRef zs_dst1, zs_dst2;
   LLVMValueRef zs_dst_ptr;
   LLVMValueRef depth_offset1, depth_offset2;
   LLVMTypeRef load_ptr_type;
   unsigned depth_bytes = format_desc->block.bits / 8;
   struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
   struct lp_type zs_load_type = zs_type;

   zs_load_type.length = zs_load_type.length / 2;
   load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0);

   if (z_src_type.length == 4) {
      unsigned i;
      LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter,
                                          lp_build_const_int32(gallivm, 1), "");
      LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter,
                                          lp_build_const_int32(gallivm, 2), "");
      LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
                                          depth_stride, "");
      depth_offset1 = LLVMBuildMul(builder, looplsb,
                                   lp_build_const_int32(gallivm, depth_bytes * 2), "");
      depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");

      /* just concatenate the loaded 2x2 values into 4-wide vector */
      for (i = 0; i < 4; i++) {
         shuffles[i] = lp_build_const_int32(gallivm, i);
      }
   }
   else {
      unsigned i;
      LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter,
                                         lp_build_const_int32(gallivm, 1), "");
      assert(z_src_type.length == 8);
      depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, "");
      /*
       * We load 2x4 values, and need to swizzle them (order
       * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately.
       */
      for (i = 0; i < 8; i++) {
         shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
      }
   }

   depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, "");

   /* Load current z/stencil values from z/stencil buffer */
   zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
   zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, "");
   zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
   zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, "");

   *z_fb = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
                                  LLVMConstVector(shuffles, zs_type.length), "");
   *s_fb = *z_fb;

   if (format_desc->block.bits < z_src_type.width) {
      /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */
      *z_fb = LLVMBuildZExt(builder, *z_fb,
                            lp_build_int_vec_type(gallivm, z_src_type), "");
   }

   else if (format_desc->block.bits > 32) {
      /* rely on llvm to handle too wide vector we have here nicely */
      unsigned i;
      struct lp_type typex2 = zs_type;
      struct lp_type s_type = zs_type;
      LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH / 4];
      LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH / 4];
      LLVMValueRef tmp;

      typex2.width = typex2.width / 2;
      typex2.length = typex2.length * 2;
      s_type.width = s_type.width / 2;
      s_type.floating = 0;

      tmp = LLVMBuildBitCast(builder, *z_fb,
                             lp_build_vec_type(gallivm, typex2), "");

      for (i = 0; i < zs_type.length; i++) {
         shuffles1[i] = lp_build_const_int32(gallivm, i * 2);
         shuffles2[i] = lp_build_const_int32(gallivm, i * 2 + 1);
      }
      *z_fb = LLVMBuildShuffleVector(builder, tmp, tmp,
                                     LLVMConstVector(shuffles1, zs_type.length), "");
      *s_fb = LLVMBuildShuffleVector(builder, tmp, tmp,
                                     LLVMConstVector(shuffles2, zs_type.length), "");
      *s_fb = LLVMBuildBitCast(builder, *s_fb,
                               lp_build_vec_type(gallivm, s_type), "");
      lp_build_name(*s_fb, "s_dst");
   }

   lp_build_name(*z_fb, "z_dst");
   lp_build_name(*s_fb, "s_dst");
   lp_build_name(*z_fb, "z_dst");
}