Esempio n. 1
0
static LLVMValueRef gen_digestof_value(compile_t* c, LLVMValueRef value)
{
  LLVMTypeRef type = LLVMTypeOf(value);

  switch(LLVMGetTypeKind(type))
  {
    case LLVMFloatTypeKind:
      value = LLVMBuildBitCast(c->builder, value, c->i32, "");
      return LLVMBuildZExt(c->builder, value, c->i64, "");

    case LLVMDoubleTypeKind:
      return LLVMBuildBitCast(c->builder, value, c->i64, "");

    case LLVMIntegerTypeKind:
    {
      uint32_t width = LLVMGetIntTypeWidth(type);

      if(width < 64)
      {
        value = LLVMBuildZExt(c->builder, value, c->i64, "");
      } else if(width == 128) {
        LLVMValueRef shift = LLVMConstInt(c->i128, 64, false);
        LLVMValueRef high = LLVMBuildLShr(c->builder, value, shift, "");
        high = LLVMBuildTrunc(c->builder, high, c->i64, "");
        value = LLVMBuildTrunc(c->builder, value, c->i64, "");
        value = LLVMBuildXor(c->builder, value, high, "");
      }

      return value;
    }

    case LLVMStructTypeKind:
    {
      uint32_t count = LLVMCountStructElementTypes(type);
      LLVMValueRef result = LLVMConstInt(c->i64, 0, false);

      for(uint32_t i = 0; i < count; i++)
      {
        LLVMValueRef elem = LLVMBuildExtractValue(c->builder, value, i, "");
        elem = gen_digestof_value(c, elem);
        result = LLVMBuildXor(c->builder, result, elem, "");
      }

      return result;
    }

    case LLVMPointerTypeKind:
      return LLVMBuildPtrToInt(c->builder, value, c->i64, "");

    default: {}
  }

  assert(0);
  return NULL;
}
Esempio n. 2
0
static void emit_pk2h(const struct lp_build_tgsi_action *action,
		      struct lp_build_tgsi_context *bld_base,
		      struct lp_build_emit_data *emit_data)
{
	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
	LLVMContextRef context = bld_base->base.gallivm->context;
	struct lp_build_context *uint_bld = &bld_base->uint_bld;
	LLVMTypeRef fp16, i16;
	LLVMValueRef const16, comp[2];
	unsigned i;

	fp16 = LLVMHalfTypeInContext(context);
	i16 = LLVMInt16TypeInContext(context);
	const16 = lp_build_const_int32(uint_bld->gallivm, 16);

	for (i = 0; i < 2; i++) {
		comp[i] = LLVMBuildFPTrunc(builder, emit_data->args[i], fp16, "");
		comp[i] = LLVMBuildBitCast(builder, comp[i], i16, "");
		comp[i] = LLVMBuildZExt(builder, comp[i], uint_bld->elem_type, "");
	}

	comp[1] = LLVMBuildShl(builder, comp[1], const16, "");
	comp[0] = LLVMBuildOr(builder, comp[0], comp[1], "");

	emit_data->output[emit_data->chan] = comp[0];
}
Esempio n. 3
0
/**
 * Converts int16 half-float to float32
 * Note this can be performed in 1 instruction if vcvtph2ps exists (f16c/cvt16)
 * [llvm.x86.vcvtph2ps / _mm_cvtph_ps]
 *
 * @param src           value to convert
 *
 */
LLVMValueRef
lp_build_half_to_float(struct gallivm_state *gallivm,
                       LLVMValueRef src)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef src_type = LLVMTypeOf(src);
   unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ?
                            LLVMGetVectorSize(src_type) : 1;

   struct lp_type f32_type = lp_type_float_vec(32, 32 * src_length);
   struct lp_type i32_type = lp_type_int_vec(32, 32 * src_length);
   LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type);
   LLVMValueRef h;

   if (util_cpu_caps.has_f16c && HAVE_LLVM >= 0x0301 &&
       (src_length == 4 || src_length == 8)) {
      const char *intrinsic = NULL;
      if (src_length == 4) {
         src = lp_build_pad_vector(gallivm, src, 8);
         intrinsic = "llvm.x86.vcvtph2ps.128";
      }
      else {
         intrinsic = "llvm.x86.vcvtph2ps.256";
      }
      return lp_build_intrinsic_unary(builder, intrinsic,
                                      lp_build_vec_type(gallivm, f32_type), src);
   }

   /* Convert int16 vector to int32 vector by zero ext (might generate bad code) */
   h = LLVMBuildZExt(builder, src, int_vec_type, "");
   return lp_build_smallfloat_to_float(gallivm, f32_type, h, 10, 5, 0, true);
}
Esempio n. 4
0
/**
 * Gather one element from scatter positions in memory.
 *
 * @sa lp_build_gather()
 */
LLVMValueRef
lp_build_gather_elem(struct gallivm_state *gallivm,
                     unsigned length,
                     unsigned src_width,
                     unsigned dst_width,
                     LLVMValueRef base_ptr,
                     LLVMValueRef offsets,
                     unsigned i)
{
   LLVMTypeRef src_type = LLVMIntTypeInContext(gallivm->context, src_width);
   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
   LLVMTypeRef dst_elem_type = LLVMIntTypeInContext(gallivm->context, dst_width);
   LLVMValueRef ptr;
   LLVMValueRef res;

   assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0));

   ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i);
   ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, "");
   res = LLVMBuildLoad(gallivm->builder, ptr, "");

   assert(src_width <= dst_width);
   if (src_width > dst_width)
      res = LLVMBuildTrunc(gallivm->builder, res, dst_elem_type, "");
   if (src_width < dst_width)
      res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, "");

   return res;
}
Esempio n. 5
0
static LLVMValueRef
translateBinOp(SymbolTable *TyTable, SymbolTable *ValTable, ASTNode *Node) {
  ASTNode *NodeE1 = (ASTNode*) ptrVectorGet(&(Node->Child), 0),
          *NodeE2 = (ASTNode*) ptrVectorGet(&(Node->Child), 1);

  LLVMValueRef ValueE1Ptr = translateExpr(TyTable, ValTable, NodeE1),
               ValueE2Ptr = translateExpr(TyTable, ValTable, NodeE2);

  LLVMValueRef ValueE1 = LLVMBuildLoad(Builder, ValueE1Ptr, "binop.ld.e1."),
               ValueE2 = LLVMBuildLoad(Builder, ValueE2Ptr, "binop.ld.e2.");

  LLVMValueRef ResultVal = NULL;
  switch (LLVMGetTypeKind(LLVMTypeOf(ValueE1))) {
    case LLVMIntegerTypeKind: ResultVal = translateIntBinOp   (Node->Kind, ValueE1, ValueE2); break;
    case LLVMFloatTypeKind:   ResultVal = translateFloatBinOp (Node->Kind, ValueE1, ValueE2); break;
    case LLVMStructTypeKind:  ResultVal = translateStructBinOp(Node->Kind, ValueE1Ptr, ValueE2Ptr); break;
    case LLVMPointerTypeKind: ResultVal = translateStringBinOp(Node->Kind, ValueE1, ValueE2); break;

    default: return NULL;
  }

  switch (LLVMGetTypeKind(LLVMTypeOf(ResultVal))) {
    case LLVMIntegerTypeKind: ResultVal = LLVMBuildZExt(Builder, ResultVal, LLVMInt32Type(), ""); break;
    default: break;
  }
  return wrapValue(ResultVal);
}
Esempio n. 6
0
LLVMValueRef gen_not(compile_t* c, ast_t* ast)
{
  LLVMValueRef value = gen_expr(c, ast);

  if(value == NULL)
    return NULL;

  ast_t* type = ast_type(ast);

  if(is_bool(type))
  {
    if(LLVMIsAConstantInt(value))
    {
      if(is_always_true(value))
        return LLVMConstInt(c->ibool, 0, false);

      return LLVMConstInt(c->ibool, 1, false);
    }

    LLVMValueRef test = LLVMBuildICmp(c->builder, LLVMIntEQ, value,
      LLVMConstInt(c->ibool, 0, false), "");
    return LLVMBuildZExt(c->builder, test, c->ibool, "");
  }

  if(LLVMIsAConstantInt(value))
    return LLVMConstNot(value);

  return LLVMBuildNot(c->builder, value, "");
}
Esempio n. 7
0
LLVMValueRef gendesc_fieldptr(compile_t* c, LLVMValueRef ptr,
  LLVMValueRef field_info)
{
  LLVMValueRef offset = LLVMBuildExtractValue(c->builder, field_info, 0, "");
  offset = LLVMBuildZExt(c->builder, offset, c->intptr, "");
  return LLVMBuildInBoundsGEP(c->builder, ptr, &offset, 1, "");
}
Esempio n. 8
0
/**
 * Gather one element from scatter positions in memory.
 *
 * @sa lp_build_gather()
 */
LLVMValueRef
lp_build_gather_elem(struct gallivm_state *gallivm,
                     unsigned length,
                     unsigned src_width,
                     unsigned dst_width,
                     boolean aligned,
                     LLVMValueRef base_ptr,
                     LLVMValueRef offsets,
                     unsigned i,
                     boolean vector_justify)
{
   LLVMTypeRef src_type = LLVMIntTypeInContext(gallivm->context, src_width);
   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
   LLVMTypeRef dst_elem_type = LLVMIntTypeInContext(gallivm->context, dst_width);
   LLVMValueRef ptr;
   LLVMValueRef res;

   assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0));

   ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i);
   ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, "");
   res = LLVMBuildLoad(gallivm->builder, ptr, "");

   /* XXX
    * On some archs we probably really want to avoid having to deal
    * with alignments lower than 4 bytes (if fetch size is a power of
    * two >= 32). On x86 it doesn't matter, however.
    * We should be able to guarantee full alignment for any kind of texture
    * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch
    * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends
    * but I don't think that's quite what we wanted).
    * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT
    * looks like a good fit, but it seems this cap bit (and OpenGL) aren't
    * enforcing what we want (which is what d3d10 does, the offset needs to
    * be aligned to element size, but GL has bytes regardless of element
    * size which would only leave us with minimum alignment restriction of 16
    * which doesn't make much sense if the type isn't 4x32bit). Due to
    * translation of offsets to first_elem in sampler_views it actually seems
    * gallium could not do anything else except 16 no matter what...
    */
  if (!aligned) {
      LLVMSetAlignment(res, 1);
   }

   assert(src_width <= dst_width);
   if (src_width > dst_width) {
      res = LLVMBuildTrunc(gallivm->builder, res, dst_elem_type, "");
   } else if (src_width < dst_width) {
      res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, "");
      if (vector_justify) {
#ifdef PIPE_ARCH_BIG_ENDIAN
         res = LLVMBuildShl(gallivm->builder, res,
                            LLVMConstInt(dst_elem_type, dst_width - src_width, 0), "");
#endif
      }
   }

   return res;
}
Esempio n. 9
0
static void number_conversion(compile_t* c, num_conv_t* from, num_conv_t* to,
  bool native128)
{
  if(!native128 &&
    ((from->is_float && (to->size > 64)) ||
    (to->is_float && (from->size > 64)))
    )
  {
    return;
  }

  reach_type_t* t = reach_type_name(c->reach, from->type_name);

  if(t == NULL)
    return;

  FIND_METHOD(to->fun_name);
  start_function(c, m, to->type, &from->type, 1);

  LLVMValueRef arg = LLVMGetParam(m->func, 0);
  LLVMValueRef result;

  if(from->is_float)
  {
    if(to->is_float)
    {
      if(from->size < to->size)
        result = LLVMBuildFPExt(c->builder, arg, to->type, "");
      else if(from->size > to->size)
        result = LLVMBuildFPTrunc(c->builder, arg, to->type, "");
      else
        result = arg;
    } else if(to->is_signed) {
      result = LLVMBuildFPToSI(c->builder, arg, to->type, "");
    } else {
      result = LLVMBuildFPToUI(c->builder, arg, to->type, "");
    }
  } else if(to->is_float) {
    if(from->is_signed)
      result = LLVMBuildSIToFP(c->builder, arg, to->type, "");
    else
      result = LLVMBuildUIToFP(c->builder, arg, to->type, "");
  } else if(from->size > to->size) {
      result = LLVMBuildTrunc(c->builder, arg, to->type, "");
  } else if(from->size < to->size) {
    if(from->is_signed)
      result = LLVMBuildSExt(c->builder, arg, to->type, "");
    else
      result = LLVMBuildZExt(c->builder, arg, to->type, "");
  } else {
    result = arg;
  }

  LLVMBuildRet(c->builder, result);
  codegen_finishfun(c);

  BOX_FUNCTION();
}
Esempio n. 10
0
LLVMValueRef gendesc_ptr_to_fields(compile_t* c, LLVMValueRef object,
  LLVMValueRef desc)
{
  // Skip the descriptor.
  LLVMValueRef offset = desc_field(c, desc, DESC_FIELD_OFFSET);
  offset = LLVMBuildZExt(c->builder, offset, c->intptr, "");

  LLVMValueRef base = LLVMBuildBitCast(c->builder, object, c->void_ptr, "");
  return LLVMBuildInBoundsGEP(c->builder, base, &offset, 1, "");
}
Esempio n. 11
0
LLVMValueRef gen_eq_rvalue(compile_t* c, ast_t* left, LLVMValueRef r_value)
{
  ast_t* type = ast_type(left);
  bool sign = is_signed(type);
  LLVMValueRef l_value = gen_expr(c, left);

  LLVMValueRef test = make_cmp_value(c, sign, l_value, r_value,
    LLVMRealOEQ, LLVMIntEQ, LLVMIntEQ);

  return LLVMBuildZExt(c->builder, test, c->ibool, "");
}
Esempio n. 12
0
LLVMValueRef gen_not(struct node *ast)
{
	LLVMValueRef truth;

	truth = LLVMBuildICmp(builder,
			LLVMIntEQ,
			codegen(ast->one),
			CONST(0),
			"");

	return LLVMBuildZExt(builder, truth, TYPE_INT, "");
}
Esempio n. 13
0
LLVMValueRef gen_ge(struct node *ast)
{
	LLVMValueRef truth;

	truth = LLVMBuildICmp(builder,
			LLVMIntSGE,
			codegen(ast->one),
			codegen(ast->two),
			"");

	return LLVMBuildZExt(builder, truth, TYPE_INT, "");
}
Esempio n. 14
0
LLVMValueRef gendesc_ptr_to_fields(compile_t* c, LLVMValueRef object,
  LLVMValueRef desc)
{
  // Skip the descriptor.
  LLVMValueRef offset = desc_field(c, desc, DESC_FIELD_OFFSET);
  offset = LLVMBuildZExt(c->builder, offset, c->intptr, "");

  LLVMValueRef base = LLVMBuildPtrToInt(c->builder, object, c->intptr, "");
  LLVMValueRef result = LLVMBuildAdd(c->builder, base, offset, "");

  // Return as a c->intptr.
  return result;
}
Esempio n. 15
0
static LLVMValueRef make_cmp(compile_t* c, ast_t* left, ast_t* right,
  LLVMRealPredicate cmp_f, LLVMIntPredicate cmp_si, LLVMIntPredicate cmp_ui)
{
  ast_t* type = ast_type(left);
  bool sign = is_signed(type);

  LLVMValueRef l_value = gen_expr(c, left);
  LLVMValueRef r_value = gen_expr(c, right);

  LLVMValueRef test = make_cmp_value(c, sign, l_value, r_value,
    cmp_f, cmp_si, cmp_ui);

  return LLVMBuildZExt(c->builder, test, c->ibool, "");
}
Esempio n. 16
0
static void maybe_is_none(compile_t* c, reach_type_t* t)
{
  // Returns true if the receiver is null.
  FIND_METHOD("is_none");
  start_function(c, m, c->ibool, &t->use_type, 1);

  LLVMValueRef receiver = LLVMGetParam(m->func, 0);
  LLVMValueRef test = LLVMBuildIsNull(c->builder, receiver, "");
  LLVMValueRef value = LLVMBuildZExt(c->builder, test, c->ibool, "");

  LLVMBuildRet(c->builder, value);
  codegen_finishfun(c);

  BOX_FUNCTION();
}
Esempio n. 17
0
/**
 * Converts int16 half-float to float32
 * Note this can be performed in 1 instruction if vcvtph2ps exists (sse5 i think?)
 * [llvm.x86.vcvtph2ps / _mm_cvtph_ps]
 *
 * @param src_type      <vector> type of int16
 * @param src           value to convert
 *
 * ref http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
 */
LLVMValueRef
lp_build_half_to_float(struct gallivm_state *gallivm,
                       struct lp_type src_type,
                       LLVMValueRef src)
{
    struct lp_type f32_type = lp_type_float_vec(32, 32 * src_type.length);
    struct lp_type i32_type = lp_type_int_vec(32, 32 * src_type.length);

    LLVMBuilderRef builder = gallivm->builder;
    LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type);
    LLVMTypeRef float_vec_type = lp_build_vec_type(gallivm, f32_type);

    /* Constants */
    LLVMValueRef i32_13          = lp_build_const_int_vec(gallivm, i32_type, 13);
    LLVMValueRef i32_16          = lp_build_const_int_vec(gallivm, i32_type, 16);
    LLVMValueRef i32_mask_nosign = lp_build_const_int_vec(gallivm, i32_type, 0x7fff);
    LLVMValueRef i32_was_infnan  = lp_build_const_int_vec(gallivm, i32_type, 0x7bff);
    LLVMValueRef i32_exp_infnan  = lp_build_const_int_vec(gallivm, i32_type, 0xff << 23);
    LLVMValueRef f32_magic       = LLVMBuildBitCast(builder,
                                   lp_build_const_int_vec(gallivm, i32_type, (254 - 15) << 23),
                                   float_vec_type, "");

    /* Convert int16 vector to int32 vector by zero ext */
    LLVMValueRef h             = LLVMBuildZExt(builder, src, int_vec_type, "");

    /* Exponent / mantissa bits */
    LLVMValueRef expmant       = LLVMBuildAnd(builder, i32_mask_nosign, h, "");
    LLVMValueRef shifted       = LLVMBuildBitCast(builder, LLVMBuildShl(builder, expmant, i32_13, ""), float_vec_type, "");

    /* Exponent adjust */
    LLVMValueRef scaled        = LLVMBuildBitCast(builder, LLVMBuildFMul(builder, shifted, f32_magic, ""), int_vec_type, "");

    /* Make sure Inf/NaN survive */
    LLVMValueRef b_wasinfnan   = lp_build_compare(gallivm, i32_type, PIPE_FUNC_GREATER, expmant, i32_was_infnan);
    LLVMValueRef infnanexp     = LLVMBuildAnd(builder, b_wasinfnan, i32_exp_infnan, "");

    /* Sign bit */
    LLVMValueRef justsign      = LLVMBuildXor(builder, h, expmant, "");
    LLVMValueRef sign          = LLVMBuildShl(builder, justsign, i32_16, "");

    /* Combine result */
    LLVMValueRef sign_inf      = LLVMBuildOr(builder, sign, infnanexp, "");
    LLVMValueRef final         = LLVMBuildOr(builder, scaled, sign_inf, "");

    /* Cast from int32 vector to float32 vector */
    return LLVMBuildBitCast(builder, final, float_vec_type, "");
}
Esempio n. 18
0
/**
 * lp_build_assert.
 *
 * Build an assertion in LLVM IR by building a function call to the
 * lp_assert() function above.
 *
 * \param condition should be an 'i1' or 'i32' value
 * \param msg  a string to print if the assertion fails.
 */
LLVMValueRef
lp_build_assert(LLVMBuilderRef builder, LLVMValueRef condition,
                const char *msg)
{
   LLVMModuleRef module;
   LLVMTypeRef arg_types[2];
   LLVMValueRef msg_string, assert_func, params[2], r;

   module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(
                            LLVMGetInsertBlock(builder)));

   msg_string = lp_build_const_string_variable(module, msg, strlen(msg) + 1);

   arg_types[0] = LLVMInt32Type();
   arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);

   /* lookup the lp_assert function */
   assert_func = LLVMGetNamedFunction(module, "lp_assert");

   /* Create the assertion function if not found */
   if (!assert_func) {
      LLVMTypeRef func_type =
         LLVMFunctionType(LLVMVoidType(), arg_types, 2, 0);

      assert_func = LLVMAddFunction(module, "lp_assert", func_type);
      LLVMSetFunctionCallConv(assert_func, LLVMCCallConv);
      LLVMSetLinkage(assert_func, LLVMExternalLinkage);
      LLVMAddGlobalMapping(lp_build_engine, assert_func,
                           func_to_pointer((func_pointer)lp_assert));
   }
   assert(assert_func);

   /* build function call param list */
   params[0] = LLVMBuildZExt(builder, condition, arg_types[0], "");
   params[1] = LLVMBuildBitCast(builder, msg_string, arg_types[1], "");

   /* check arg types */
   assert(LLVMTypeOf(params[0]) == arg_types[0]);
   assert(LLVMTypeOf(params[1]) == arg_types[1]);

   r = LLVMBuildCall(builder, assert_func, params, 2, "");

   return r;
}
Esempio n. 19
0
LLVMValueRef gen_is(compile_t* c, ast_t* ast)
{
  AST_GET_CHILDREN(ast, left, right);
  ast_t* left_type = ast_type(left);
  ast_t* right_type = ast_type(right);

  LLVMValueRef l_value = gen_expr(c, left);
  LLVMValueRef r_value = gen_expr(c, right);

  if((l_value == NULL) || (r_value == NULL))
    return NULL;

  codegen_debugloc(c, ast);
  LLVMValueRef result = gen_is_value(c, left_type, right_type,
    l_value, r_value);
  LLVMValueRef value = LLVMBuildZExt(c->builder, result, c->ibool, "");
  codegen_debugloc(c, NULL);
  return value;
}
Esempio n. 20
0
/**
 * Gather elements from scatter positions in memory into a single vector.
 *
 * @param src_width src element width
 * @param dst_width result element width (source will be expanded to fit)
 * @param length length of the offsets,
 * @param base_ptr base pointer, should be a i8 pointer type.
 * @param offsets vector with offsets
 */
LLVMValueRef
lp_build_gather(LLVMBuilderRef builder,
                unsigned length,
                unsigned src_width,
                unsigned dst_width,
                LLVMValueRef base_ptr,
                LLVMValueRef offsets)
{
   LLVMTypeRef src_type = LLVMIntType(src_width);
   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
   LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
   LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
   LLVMValueRef res;
   unsigned i;

   res = LLVMGetUndef(dst_vec_type);
   for(i = 0; i < length; ++i) {
      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
      LLVMValueRef elem_offset;
      LLVMValueRef elem_ptr;
      LLVMValueRef elem;

      elem_offset = LLVMBuildExtractElement(builder, offsets, index, "");
      elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, "");
      elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, "");
      elem = LLVMBuildLoad(builder, elem_ptr, "");

      assert(src_width <= dst_width);
      if(src_width > dst_width)
         elem = LLVMBuildTrunc(builder, elem, dst_elem_type, "");
      if(src_width < dst_width)
         elem = LLVMBuildZExt(builder, elem, dst_elem_type, "");

      res = LLVMBuildInsertElement(builder, res, elem, index, "");
   }

   return res;
}
Esempio n. 21
0
/*
 * Do a cached lookup.
 *
 * Returns (vectors of) 4x8 rgba aos value
 */
LLVMValueRef
lp_build_fetch_cached_texels(struct gallivm_state *gallivm,
                             const struct util_format_description *format_desc,
                             unsigned n,
                             LLVMValueRef base_ptr,
                             LLVMValueRef offset,
                             LLVMValueRef i,
                             LLVMValueRef j,
                             LLVMValueRef cache)

{
   LLVMBuilderRef builder = gallivm->builder;
   unsigned count, low_bit, log2size;
   LLVMValueRef color, offset_stored, addr, ptr_addrtrunc, tmp;
   LLVMValueRef ij_index, hash_index, hash_mask, block_index;
   LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
   LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context);
   struct lp_type type;
   struct lp_build_context bld32;
   memset(&type, 0, sizeof type);
   type.width = 32;
   type.length = n;

   assert(format_desc->block.width == 4);
   assert(format_desc->block.height == 4);

   lp_build_context_init(&bld32, gallivm, type);

   /*
    * compute hash - we use direct mapped cache, the hash function could
    *                be better but it needs to be simple
    * per-element:
    *    compare offset with offset stored at tag (hash)
    *    if not equal decode/store block, update tag
    *    extract color from cache
    *    assemble result vector
    */

   /* TODO: not ideal with 32bit pointers... */

   low_bit = util_logbase2(format_desc->block.bits / 8);
   log2size = util_logbase2(LP_BUILD_FORMAT_CACHE_SIZE);
   addr = LLVMBuildPtrToInt(builder, base_ptr, i64t, "");
   ptr_addrtrunc = LLVMBuildPtrToInt(builder, base_ptr, i32t, "");
   ptr_addrtrunc = lp_build_broadcast_scalar(&bld32, ptr_addrtrunc);
   /* For the hash function, first mask off the unused lowest bits. Then just
      do some xor with address bits - only use lower 32bits */
   ptr_addrtrunc = LLVMBuildAdd(builder, offset, ptr_addrtrunc, "");
   ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
                                 lp_build_const_int_vec(gallivm, type, low_bit), "");
   /* This only really makes sense for size 64,128,256 */
   hash_index = ptr_addrtrunc;
   ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
                                 lp_build_const_int_vec(gallivm, type, 2*log2size), "");
   hash_index = LLVMBuildXor(builder, ptr_addrtrunc, hash_index, "");
   tmp = LLVMBuildLShr(builder, hash_index,
                       lp_build_const_int_vec(gallivm, type, log2size), "");
   hash_index = LLVMBuildXor(builder, hash_index, tmp, "");

   hash_mask = lp_build_const_int_vec(gallivm, type, LP_BUILD_FORMAT_CACHE_SIZE - 1);
   hash_index = LLVMBuildAnd(builder, hash_index, hash_mask, "");
   ij_index = LLVMBuildShl(builder, i, lp_build_const_int_vec(gallivm, type, 2), "");
   ij_index = LLVMBuildAdd(builder, ij_index, j, "");
   block_index = LLVMBuildShl(builder, hash_index,
                              lp_build_const_int_vec(gallivm, type, 4), "");
   block_index = LLVMBuildAdd(builder, ij_index, block_index, "");

   if (n > 1) {
      color = LLVMGetUndef(LLVMVectorType(i32t, n));
      for (count = 0; count < n; count++) {
         LLVMValueRef index, cond, colorx;
         LLVMValueRef block_indexx, hash_indexx, addrx, offsetx, ptr_addrx;
         struct lp_build_if_state if_ctx;

         index = lp_build_const_int32(gallivm, count);
         offsetx = LLVMBuildExtractElement(builder, offset, index, "");
         addrx = LLVMBuildZExt(builder, offsetx, i64t, "");
         addrx = LLVMBuildAdd(builder, addrx, addr, "");
         block_indexx = LLVMBuildExtractElement(builder, block_index, index, "");
         hash_indexx = LLVMBuildLShr(builder, block_indexx,
                                     lp_build_const_int32(gallivm, 4), "");
         offset_stored = lookup_tag_data(gallivm, cache, hash_indexx);
         cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addrx, "");

         lp_build_if(&if_ctx, gallivm, cond);
         {
            ptr_addrx = LLVMBuildIntToPtr(builder, addrx,
                                          LLVMPointerType(i8t, 0), "");
            update_cached_block(gallivm, format_desc, ptr_addrx, hash_indexx, cache);
#if LP_BUILD_FORMAT_CACHE_DEBUG
            update_cache_access(gallivm, cache, 1,
                                LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
#endif
         }
         lp_build_endif(&if_ctx);

         colorx = lookup_cached_pixel(gallivm, cache, block_indexx);

         color = LLVMBuildInsertElement(builder, color, colorx,
                                        lp_build_const_int32(gallivm, count), "");
      }
   }
   else {
      LLVMValueRef cond;
      struct lp_build_if_state if_ctx;

      tmp = LLVMBuildZExt(builder, offset, i64t, "");
      addr = LLVMBuildAdd(builder, tmp, addr, "");
      offset_stored = lookup_tag_data(gallivm, cache, hash_index);
      cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addr, "");

      lp_build_if(&if_ctx, gallivm, cond);
      {
         tmp = LLVMBuildIntToPtr(builder, addr, LLVMPointerType(i8t, 0), "");
         update_cached_block(gallivm, format_desc, tmp, hash_index, cache);
#if LP_BUILD_FORMAT_CACHE_DEBUG
         update_cache_access(gallivm, cache, 1,
                             LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
#endif
      }
      lp_build_endif(&if_ctx);

      color = lookup_cached_pixel(gallivm, cache, block_index);
   }
#if LP_BUILD_FORMAT_CACHE_DEBUG
   update_cache_access(gallivm, cache, n,
                       LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL);
#endif
   return LLVMBuildBitCast(builder, color, LLVMVectorType(i8t, n * 4), "");
}
Esempio n. 22
0
File: genexe.c Progetto: volth/ponyc
static void gen_main(compile_t* c, gentype_t* main_g, gentype_t* env_g)
{
  LLVMTypeRef params[3];
  params[0] = c->i32;
  params[1] = LLVMPointerType(LLVMPointerType(c->i8, 0), 0);
  params[2] = LLVMPointerType(LLVMPointerType(c->i8, 0), 0);

  LLVMTypeRef ftype = LLVMFunctionType(c->i32, params, 3, false);
  LLVMValueRef func = LLVMAddFunction(c->module, "main", ftype);

  codegen_startfun(c, func, false);

  LLVMValueRef args[3];
  args[0] = LLVMGetParam(func, 0);
  LLVMSetValueName(args[0], "argc");

  args[1] = LLVMGetParam(func, 1);
  LLVMSetValueName(args[1], "argv");

  args[2] = LLVMGetParam(func, 2);
  LLVMSetValueName(args[1], "envp");

  // Initialise the pony runtime with argc and argv, getting a new argc.
  args[0] = gencall_runtime(c, "pony_init", args, 2, "argc");

  // Create the main actor and become it.
  LLVMValueRef main_actor = create_main(c, main_g);

  // Create an Env on the main actor's heap.
  const char* env_name = "Env";
  const char* env_create = genname_fun(env_name, "_create", NULL);

  LLVMValueRef env_args[4];
  env_args[0] = gencall_alloc(c, env_g);
  env_args[1] = LLVMBuildZExt(c->builder, args[0], c->i64, "");
  env_args[2] = args[1];
  env_args[3] = args[2];

  LLVMValueRef env = gencall_runtime(c, env_create, env_args, 4, "env");
  LLVMSetInstructionCallConv(env, GEN_CALLCONV);

  // Run primitive initialisers using the main actor's heap.
  primitive_call(c, stringtab("_init"), env);

  // Create a type for the message.
  LLVMTypeRef f_params[4];
  f_params[0] = c->i32;
  f_params[1] = c->i32;
  f_params[2] = c->void_ptr;
  f_params[3] = LLVMTypeOf(env);
  LLVMTypeRef msg_type = LLVMStructTypeInContext(c->context, f_params, 4,
    false);
  LLVMTypeRef msg_type_ptr = LLVMPointerType(msg_type, 0);

  // Allocate the message, setting its size and ID.
  uint32_t index = genfun_vtable_index(c, main_g, stringtab("create"), NULL);

  size_t msg_size = LLVMABISizeOfType(c->target_data, msg_type);
  args[0] = LLVMConstInt(c->i32, pool_index(msg_size), false);
  args[1] = LLVMConstInt(c->i32, index, false);
  LLVMValueRef msg = gencall_runtime(c, "pony_alloc_msg", args, 2, "");
  LLVMValueRef msg_ptr = LLVMBuildBitCast(c->builder, msg, msg_type_ptr, "");

  // Set the message contents.
  LLVMValueRef env_ptr = LLVMBuildStructGEP(c->builder, msg_ptr, 3, "");
  LLVMBuildStore(c->builder, env, env_ptr);

  // Trace the message.
  gencall_runtime(c, "pony_gc_send", NULL, 0, "");
  const char* env_trace = genname_trace(env_name);

  args[0] = LLVMBuildBitCast(c->builder, env, c->object_ptr, "");
  args[1] = LLVMGetNamedFunction(c->module, env_trace);
  gencall_runtime(c, "pony_traceobject", args, 2, "");
  gencall_runtime(c, "pony_send_done", NULL, 0, "");

  // Send the message.
  args[0] = main_actor;
  args[1] = msg;
  gencall_runtime(c, "pony_sendv", args, 2, "");

  // Start the runtime.
  LLVMValueRef zero = LLVMConstInt(c->i32, 0, false);
  LLVMValueRef rc = gencall_runtime(c, "pony_start", &zero, 1, "");

  // Run primitive finalisers. We create a new main actor as a context to run
  // the finalisers in, but we do not initialise or schedule it.
  LLVMValueRef final_actor = create_main(c, main_g);
  primitive_call(c, stringtab("_final"), NULL);
  args[0] = final_actor;
  gencall_runtime(c, "pony_destroy", args, 1, "");

  // Return the runtime exit code.
  LLVMBuildRet(c->builder, rc);

  codegen_finishfun(c);

  // External linkage for main().
  LLVMSetLinkage(func, LLVMExternalLinkage);
}
Esempio n. 23
0
/**
 * Load depth/stencil values.
 * The stored values are linear, swizzle them.
 *
 * \param type  the data type of the fragment depth/stencil values
 * \param format_desc  description of the depth/stencil surface
 * \param loop_counter  the current loop iteration
 * \param depth_ptr  pointer to the depth/stencil values of this 4x4 block
 * \param depth_stride  stride of the depth/stencil buffer
 * \param z_fb  contains z values loaded from fb (may include padding)
 * \param s_fb  contains s values loaded from fb (may include padding)
 */
void
lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
                                     struct lp_type z_src_type,
                                     const struct util_format_description *format_desc,
                                     LLVMValueRef depth_ptr,
                                     LLVMValueRef depth_stride,
                                     LLVMValueRef *z_fb,
                                     LLVMValueRef *s_fb,
                                     LLVMValueRef loop_counter)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
   LLVMValueRef zs_dst1, zs_dst2;
   LLVMValueRef zs_dst_ptr;
   LLVMValueRef depth_offset1, depth_offset2;
   LLVMTypeRef load_ptr_type;
   unsigned depth_bytes = format_desc->block.bits / 8;
   struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
   struct lp_type zs_load_type = zs_type;

   zs_load_type.length = zs_load_type.length / 2;
   load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0);

   if (z_src_type.length == 4) {
      unsigned i;
      LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter,
                                          lp_build_const_int32(gallivm, 1), "");
      LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter,
                                          lp_build_const_int32(gallivm, 2), "");
      LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
                                          depth_stride, "");
      depth_offset1 = LLVMBuildMul(builder, looplsb,
                                   lp_build_const_int32(gallivm, depth_bytes * 2), "");
      depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");

      /* just concatenate the loaded 2x2 values into 4-wide vector */
      for (i = 0; i < 4; i++) {
         shuffles[i] = lp_build_const_int32(gallivm, i);
      }
   }
   else {
      unsigned i;
      LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter,
                                         lp_build_const_int32(gallivm, 1), "");
      assert(z_src_type.length == 8);
      depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, "");
      /*
       * We load 2x4 values, and need to swizzle them (order
       * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately.
       */
      for (i = 0; i < 8; i++) {
         shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
      }
   }

   depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, "");

   /* Load current z/stencil values from z/stencil buffer */
   zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
   zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, "");
   zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
   zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, "");

   *z_fb = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
                                  LLVMConstVector(shuffles, zs_type.length), "");
   *s_fb = *z_fb;

   if (format_desc->block.bits < z_src_type.width) {
      /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */
      *z_fb = LLVMBuildZExt(builder, *z_fb,
                            lp_build_int_vec_type(gallivm, z_src_type), "");
   }

   else if (format_desc->block.bits > 32) {
      /* rely on llvm to handle too wide vector we have here nicely */
      unsigned i;
      struct lp_type typex2 = zs_type;
      struct lp_type s_type = zs_type;
      LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH / 4];
      LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH / 4];
      LLVMValueRef tmp;

      typex2.width = typex2.width / 2;
      typex2.length = typex2.length * 2;
      s_type.width = s_type.width / 2;
      s_type.floating = 0;

      tmp = LLVMBuildBitCast(builder, *z_fb,
                             lp_build_vec_type(gallivm, typex2), "");

      for (i = 0; i < zs_type.length; i++) {
         shuffles1[i] = lp_build_const_int32(gallivm, i * 2);
         shuffles2[i] = lp_build_const_int32(gallivm, i * 2 + 1);
      }
      *z_fb = LLVMBuildShuffleVector(builder, tmp, tmp,
                                     LLVMConstVector(shuffles1, zs_type.length), "");
      *s_fb = LLVMBuildShuffleVector(builder, tmp, tmp,
                                     LLVMConstVector(shuffles2, zs_type.length), "");
      *s_fb = LLVMBuildBitCast(builder, *s_fb,
                               lp_build_vec_type(gallivm, s_type), "");
      lp_build_name(*s_fb, "s_dst");
   }

   lp_build_name(*z_fb, "z_dst");
   lp_build_name(*s_fb, "s_dst");
   lp_build_name(*z_fb, "z_dst");
}
Esempio n. 24
0
/*
 * Create a function that deforms a tuple of type desc up to natts columns.
 */
LLVMValueRef
slot_compile_deform(LLVMJitContext *context, TupleDesc desc, int natts)
{
	char	   *funcname;

	LLVMModuleRef mod;
	LLVMBuilderRef b;

	LLVMTypeRef deform_sig;
	LLVMValueRef v_deform_fn;

	LLVMBasicBlockRef b_entry;
	LLVMBasicBlockRef b_adjust_unavail_cols;
	LLVMBasicBlockRef b_find_start;

	LLVMBasicBlockRef b_out;
	LLVMBasicBlockRef b_dead;
	LLVMBasicBlockRef *attcheckattnoblocks;
	LLVMBasicBlockRef *attstartblocks;
	LLVMBasicBlockRef *attisnullblocks;
	LLVMBasicBlockRef *attcheckalignblocks;
	LLVMBasicBlockRef *attalignblocks;
	LLVMBasicBlockRef *attstoreblocks;

	LLVMValueRef v_offp;

	LLVMValueRef v_tupdata_base;
	LLVMValueRef v_tts_values;
	LLVMValueRef v_tts_nulls;
	LLVMValueRef v_slotoffp;
	LLVMValueRef v_slowp;
	LLVMValueRef v_nvalidp;
	LLVMValueRef v_nvalid;
	LLVMValueRef v_maxatt;

	LLVMValueRef v_slot;

	LLVMValueRef v_tupleheaderp;
	LLVMValueRef v_tuplep;
	LLVMValueRef v_infomask1;
	LLVMValueRef v_infomask2;
	LLVMValueRef v_bits;

	LLVMValueRef v_hoff;

	LLVMValueRef v_hasnulls;

	/* last column (0 indexed) guaranteed to exist */
	int			guaranteed_column_number = -1;

	/* current known alignment */
	int			known_alignment = 0;

	/* if true, known_alignment describes definite offset of column */
	bool		attguaranteedalign = true;

	int			attnum;

	mod = llvm_mutable_module(context);

	funcname = llvm_expand_funcname(context, "deform");

	/*
	 * Check which columns do have to exist, so we don't have to check the
	 * rows natts unnecessarily.
	 */
	for (attnum = 0; attnum < desc->natts; attnum++)
	{
		Form_pg_attribute att = TupleDescAttr(desc, attnum);

		/*
		 * If the column is possibly missing, we can't rely on its (or
		 * subsequent) NOT NULL constraints to indicate minimum attributes in
		 * the tuple, so stop here.
		 */
		if (att->atthasmissing)
			break;

		/*
		 * Column is NOT NULL and there've been no preceding missing columns,
		 * it's guaranteed that all columns up to here exist at least in the
		 * NULL bitmap.
		 */
		if (att->attnotnull)
			guaranteed_column_number = attnum;
	}

	/* Create the signature and function */
	{
		LLVMTypeRef param_types[1];

		param_types[0] = l_ptr(StructTupleTableSlot);

		deform_sig = LLVMFunctionType(LLVMVoidType(), param_types,
									  lengthof(param_types), 0);
	}
	v_deform_fn = LLVMAddFunction(mod, funcname, deform_sig);
	LLVMSetLinkage(v_deform_fn, LLVMInternalLinkage);
	LLVMSetParamAlignment(LLVMGetParam(v_deform_fn, 0), MAXIMUM_ALIGNOF);
	llvm_copy_attributes(AttributeTemplate, v_deform_fn);

	b_entry =
		LLVMAppendBasicBlock(v_deform_fn, "entry");
	b_adjust_unavail_cols =
		LLVMAppendBasicBlock(v_deform_fn, "adjust_unavail_cols");
	b_find_start =
		LLVMAppendBasicBlock(v_deform_fn, "find_startblock");
	b_out =
		LLVMAppendBasicBlock(v_deform_fn, "outblock");
	b_dead =
		LLVMAppendBasicBlock(v_deform_fn, "deadblock");

	b = LLVMCreateBuilder();

	attcheckattnoblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);
	attstartblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);
	attisnullblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);
	attcheckalignblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);
	attalignblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);
	attstoreblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);

	known_alignment = 0;

	LLVMPositionBuilderAtEnd(b, b_entry);

	/* perform allocas first, llvm only converts those to registers */
	v_offp = LLVMBuildAlloca(b, TypeSizeT, "v_offp");

	v_slot = LLVMGetParam(v_deform_fn, 0);

	v_tts_values =
		l_load_struct_gep(b, v_slot, FIELDNO_TUPLETABLESLOT_VALUES,
						  "tts_values");
	v_tts_nulls =
		l_load_struct_gep(b, v_slot, FIELDNO_TUPLETABLESLOT_ISNULL,
						  "tts_ISNULL");

	v_slotoffp = LLVMBuildStructGEP(b, v_slot, FIELDNO_TUPLETABLESLOT_OFF, "");
	v_slowp = LLVMBuildStructGEP(b, v_slot, FIELDNO_TUPLETABLESLOT_SLOW, "");
	v_nvalidp = LLVMBuildStructGEP(b, v_slot, FIELDNO_TUPLETABLESLOT_NVALID, "");

	v_tupleheaderp =
		l_load_struct_gep(b, v_slot, FIELDNO_TUPLETABLESLOT_TUPLE,
						  "tupleheader");
	v_tuplep =
		l_load_struct_gep(b, v_tupleheaderp, FIELDNO_HEAPTUPLEDATA_DATA,
						  "tuple");
	v_bits =
		LLVMBuildBitCast(b,
						 LLVMBuildStructGEP(b, v_tuplep,
											FIELDNO_HEAPTUPLEHEADERDATA_BITS,
											""),
						 l_ptr(LLVMInt8Type()),
						 "t_bits");
	v_infomask1 =
		l_load_struct_gep(b, v_tuplep,
						  FIELDNO_HEAPTUPLEHEADERDATA_INFOMASK,
						  "infomask1");
	v_infomask2 =
		l_load_struct_gep(b,
						  v_tuplep, FIELDNO_HEAPTUPLEHEADERDATA_INFOMASK2,
						  "infomask2");

	/* t_infomask & HEAP_HASNULL */
	v_hasnulls =
		LLVMBuildICmp(b, LLVMIntNE,
					  LLVMBuildAnd(b,
								   l_int16_const(HEAP_HASNULL),
								   v_infomask1, ""),
					  l_int16_const(0),
					  "hasnulls");

	/* t_infomask2 & HEAP_NATTS_MASK */
	v_maxatt = LLVMBuildAnd(b,
							l_int16_const(HEAP_NATTS_MASK),
							v_infomask2,
							"maxatt");

	v_hoff =
		l_load_struct_gep(b, v_tuplep,
						  FIELDNO_HEAPTUPLEHEADERDATA_HOFF,
						  "t_hoff");

	v_tupdata_base =
		LLVMBuildGEP(b,
					 LLVMBuildBitCast(b,
									  v_tuplep,
									  l_ptr(LLVMInt8Type()),
									  ""),
					 &v_hoff, 1,
					 "v_tupdata_base");

	/*
	 * Load tuple start offset from slot. Will be reset below in case there's
	 * no existing deformed columns in slot.
	 */
	{
		LLVMValueRef v_off_start;

		v_off_start = LLVMBuildLoad(b, v_slotoffp, "v_slot_off");
		v_off_start = LLVMBuildZExt(b, v_off_start, TypeSizeT, "");
		LLVMBuildStore(b, v_off_start, v_offp);
	}

	/* build the basic block for each attribute, need them as jump target */
	for (attnum = 0; attnum < natts; attnum++)
	{
		attcheckattnoblocks[attnum] =
			l_bb_append_v(v_deform_fn, "block.attr.%d.attcheckattno", attnum);
		attstartblocks[attnum] =
			l_bb_append_v(v_deform_fn, "block.attr.%d.start", attnum);
		attisnullblocks[attnum] =
			l_bb_append_v(v_deform_fn, "block.attr.%d.attisnull", attnum);
		attcheckalignblocks[attnum] =
			l_bb_append_v(v_deform_fn, "block.attr.%d.attcheckalign", attnum);
		attalignblocks[attnum] =
			l_bb_append_v(v_deform_fn, "block.attr.%d.align", attnum);
		attstoreblocks[attnum] =
			l_bb_append_v(v_deform_fn, "block.attr.%d.store", attnum);
	}

	/*
	 * Check if's guaranteed the all the desired attributes are available in
	 * tuple. If so, we can start deforming. If not, need to make sure to
	 * fetch the missing columns.
	 */
	if ((natts - 1) <= guaranteed_column_number)
	{
		/* just skip through unnecessary blocks */
		LLVMBuildBr(b, b_adjust_unavail_cols);
		LLVMPositionBuilderAtEnd(b, b_adjust_unavail_cols);
		LLVMBuildBr(b, b_find_start);
	}
	else
	{
		LLVMValueRef v_params[3];

		/* branch if not all columns available */
		LLVMBuildCondBr(b,
						LLVMBuildICmp(b, LLVMIntULT,
									  v_maxatt,
									  l_int16_const(natts),
									  ""),
						b_adjust_unavail_cols,
						b_find_start);

		/* if not, memset tts_isnull of relevant cols to true */
		LLVMPositionBuilderAtEnd(b, b_adjust_unavail_cols);

		v_params[0] = v_slot;
		v_params[1] = LLVMBuildZExt(b, v_maxatt, LLVMInt32Type(), "");
		v_params[2] = l_int32_const(natts);
		LLVMBuildCall(b, llvm_get_decl(mod, FuncSlotGetmissingattrs),
					  v_params, lengthof(v_params), "");
		LLVMBuildBr(b, b_find_start);
	}

	LLVMPositionBuilderAtEnd(b, b_find_start);

	v_nvalid = LLVMBuildLoad(b, v_nvalidp, "");

	/*
	 * Build switch to go from nvalid to the right startblock.  Callers
	 * currently don't have the knowledge, but it'd be good for performance to
	 * avoid this check when it's known that the slot is empty (e.g. in scan
	 * nodes).
	 */
	if (true)
	{
		LLVMValueRef v_switch = LLVMBuildSwitch(b, v_nvalid,
												b_dead, natts);

		for (attnum = 0; attnum < natts; attnum++)
		{
			LLVMValueRef v_attno = l_int32_const(attnum);

			LLVMAddCase(v_switch, v_attno, attcheckattnoblocks[attnum]);
		}

	}
	else
	{
		/* jump from entry block to first block */
		LLVMBuildBr(b, attcheckattnoblocks[0]);
	}

	LLVMPositionBuilderAtEnd(b, b_dead);
	LLVMBuildUnreachable(b);

	/*
	 * Iterate over each attribute that needs to be deformed, build code to
	 * deform it.
	 */
	for (attnum = 0; attnum < natts; attnum++)
	{
		Form_pg_attribute att = TupleDescAttr(desc, attnum);
		LLVMValueRef v_incby;
		int			alignto;
		LLVMValueRef l_attno = l_int16_const(attnum);
		LLVMValueRef v_attdatap;
		LLVMValueRef v_resultp;

		/* build block checking whether we did all the necessary attributes */
		LLVMPositionBuilderAtEnd(b, attcheckattnoblocks[attnum]);

		/*
		 * If this is the first attribute, slot->tts_nvalid was 0. Therefore
		 * reset offset to 0 to, it be from a previous execution.
		 */
		if (attnum == 0)
		{
			LLVMBuildStore(b, l_sizet_const(0), v_offp);
		}

		/*
		 * Build check whether column is available (i.e. whether the tuple has
		 * that many columns stored). We can avoid the branch if we know
		 * there's a subsequent NOT NULL column.
		 */
		if (attnum <= guaranteed_column_number)
		{
			LLVMBuildBr(b, attstartblocks[attnum]);
		}
		else
		{
			LLVMValueRef v_islast;

			v_islast = LLVMBuildICmp(b, LLVMIntUGE,
									 l_attno,
									 v_maxatt,
									 "heap_natts");
			LLVMBuildCondBr(b, v_islast, b_out, attstartblocks[attnum]);
		}
		LLVMPositionBuilderAtEnd(b, attstartblocks[attnum]);

		/*
		 * Check for nulls if necessary. No need to take missing attributes
		 * into account, because in case they're present the heaptuple's natts
		 * would have indicated that a slot_getmissingattrs() is needed.
		 */
		if (!att->attnotnull)
		{
			LLVMBasicBlockRef b_ifnotnull;
			LLVMBasicBlockRef b_ifnull;
			LLVMBasicBlockRef b_next;
			LLVMValueRef v_attisnull;
			LLVMValueRef v_nullbyteno;
			LLVMValueRef v_nullbytemask;
			LLVMValueRef v_nullbyte;
			LLVMValueRef v_nullbit;

			b_ifnotnull = attcheckalignblocks[attnum];
			b_ifnull = attisnullblocks[attnum];

			if (attnum + 1 == natts)
				b_next = b_out;
			else
				b_next = attcheckattnoblocks[attnum + 1];

			v_nullbyteno = l_int32_const(attnum >> 3);
			v_nullbytemask = l_int8_const(1 << ((attnum) & 0x07));
			v_nullbyte = l_load_gep1(b, v_bits, v_nullbyteno, "attnullbyte");

			v_nullbit = LLVMBuildICmp(b,
									  LLVMIntEQ,
									  LLVMBuildAnd(b, v_nullbyte, v_nullbytemask, ""),
									  l_int8_const(0),
									  "attisnull");

			v_attisnull = LLVMBuildAnd(b, v_hasnulls, v_nullbit, "");

			LLVMBuildCondBr(b, v_attisnull, b_ifnull, b_ifnotnull);

			LLVMPositionBuilderAtEnd(b, b_ifnull);

			/* store null-byte */
			LLVMBuildStore(b,
						   l_int8_const(1),
						   LLVMBuildGEP(b, v_tts_nulls, &l_attno, 1, ""));
			/* store zero datum */
			LLVMBuildStore(b,
						   l_sizet_const(0),
						   LLVMBuildGEP(b, v_tts_values, &l_attno, 1, ""));

			LLVMBuildBr(b, b_next);
			attguaranteedalign = false;
		}
		else
		{
Esempio n. 25
0
/**
 * Gather elements from scatter positions in memory into a single vector.
 * Use for fetching texels from a texture.
 * For SSE, typical values are length=4, src_width=32, dst_width=32.
 *
 * When src_width < dst_width, the return value can be justified in
 * one of two ways:
 * "integer justification" is used when the caller treats the destination
 * as a packed integer bitmask, as described by the channels' "shift" and
 * "width" fields;
 * "vector justification" is used when the caller casts the destination
 * to a vector and needs channel X to be in vector element 0.
 *
 * @param length length of the offsets
 * @param src_width src element width in bits
 * @param dst_type result element type (src will be expanded to fit,
 *        but truncation is not allowed)
 *        (this may be a vector, must be pot sized)
 * @param aligned whether the data is guaranteed to be aligned (to src_width)
 * @param base_ptr base pointer, needs to be a i8 pointer type.
 * @param offsets vector with offsets
 * @param vector_justify select vector rather than integer justification
 */
LLVMValueRef
lp_build_gather(struct gallivm_state *gallivm,
                unsigned length,
                unsigned src_width,
                struct lp_type dst_type,
                boolean aligned,
                LLVMValueRef base_ptr,
                LLVMValueRef offsets,
                boolean vector_justify)
{
   LLVMValueRef res;
   boolean need_expansion = src_width < dst_type.width * dst_type.length;
   boolean vec_fetch;
   struct lp_type fetch_type, fetch_dst_type;
   LLVMTypeRef src_type;

   assert(src_width <= dst_type.width * dst_type.length);

   /*
    * This is quite a mess...
    * Figure out if the fetch should be done as:
    * a) scalar or vector
    * b) float or int
    *
    * As an example, for a 96bit fetch expanded into 4x32bit, it is better
    * to use (3x32bit) vector type (then pad the vector). Otherwise, the
    * zext will cause extra instructions.
    * However, the same isn't true for 3x16bit (the codegen for that is
    * completely worthless on x86 simd, and for 3x8bit is is way worse
    * still, don't try that... (To get really good code out of llvm for
    * these cases, the only way is to decompose the fetches manually
    * into 1x32bit/1x16bit, or 1x16/1x8bit respectively, although the latter
    * case requires sse41, otherwise simple scalar zext is way better.
    * But probably not important enough, so don't bother.)
    * Also, we try to honor the floating bit of destination (but isn't
    * possible if caller asks for instance for 2x32bit dst_type with
    * 48bit fetch - the idea would be to use 3x16bit fetch, pad and
    * cast to 2x32f type, so the fetch is always int and on top of that
    * we avoid the vec pad and use scalar zext due the above mentioned
    * issue).
    * Note this is optimized for x86 sse2 and up backend. Could be tweaked
    * for other archs if necessary...
    */
   if (((src_width % 32) == 0) && ((src_width % dst_type.width) == 0) &&
       (dst_type.length > 1)) {
      /* use vector fetch (if dst_type is vector) */
      vec_fetch = TRUE;
      if (dst_type.floating) {
         fetch_type = lp_type_float_vec(dst_type.width, src_width);
      } else {
         fetch_type = lp_type_int_vec(dst_type.width, src_width);
      }
      /* intentionally not using lp_build_vec_type here */
      src_type = LLVMVectorType(lp_build_elem_type(gallivm, fetch_type),
                                fetch_type.length);
      fetch_dst_type = fetch_type;
      fetch_dst_type.length = dst_type.length;
    } else {
      /* use scalar fetch */
      vec_fetch = FALSE;
      if (dst_type.floating && ((src_width == 32) || (src_width == 64))) {
         fetch_type = lp_type_float(src_width);
      } else {
         fetch_type = lp_type_int(src_width);
      }
      src_type = lp_build_vec_type(gallivm, fetch_type);
      fetch_dst_type = fetch_type;
      fetch_dst_type.width = dst_type.width * dst_type.length;
   }

   if (length == 1) {
      /* Scalar */
      res = lp_build_gather_elem_vec(gallivm, length,
                                     src_width, src_type, fetch_dst_type,
                                     aligned, base_ptr, offsets, 0,
                                     vector_justify);
      return LLVMBuildBitCast(gallivm->builder, res,
                              lp_build_vec_type(gallivm, dst_type), "");
      /*
       * Excluding expansion from these paths because if you need it for
       * 32bit/64bit fetches you're doing it wrong (this is gather, not
       * conversion) and it would be awkward for floats.
       */
   } else if (util_cpu_caps.has_avx2 && !need_expansion &&
              src_width == 32 && (length == 4 || length == 8)) {
      return lp_build_gather_avx2(gallivm, length, src_width, dst_type,
                                  base_ptr, offsets);
   /*
    * This looks bad on paper wrt throughtput/latency on Haswell.
    * Even on Broadwell it doesn't look stellar.
    * Albeit no measurements were done (but tested to work).
    * Should definitely enable on Skylake.
    * (In general, should be more of a win if the fetch is 256bit wide -
    * this is true for the 32bit case above too.)
    */
   } else if (0 && util_cpu_caps.has_avx2 && !need_expansion &&
              src_width == 64 && (length == 2 || length == 4)) {
      return lp_build_gather_avx2(gallivm, length, src_width, dst_type,
                                  base_ptr, offsets);
   } else {
      /* Vector */

      LLVMValueRef elems[LP_MAX_VECTOR_WIDTH / 8];
      unsigned i;
      boolean vec_zext = FALSE;
      struct lp_type res_type, gather_res_type;
      LLVMTypeRef res_t, gather_res_t;

      res_type = fetch_dst_type;
      res_type.length *= length;
      gather_res_type = res_type;

      if (src_width == 16 && dst_type.width == 32 && dst_type.length == 1) {
         /*
          * Note that llvm is never able to optimize zext/insert combos
          * directly (i.e. zero the simd reg, then place the elements into
          * the appropriate place directly). (I think this has to do with
          * scalar/vector transition.) And scalar 16->32bit zext simd loads
          * aren't possible (instead loading to scalar reg first).
          * No idea about other archs...
          * We could do this manually, but instead we just use a vector
          * zext, which is simple enough (and, in fact, llvm might optimize
          * this away).
          * (We're not trying that with other bit widths as that might not be
          * easier, in particular with 8 bit values at least with only sse2.)
          */
         assert(vec_fetch == FALSE);
         gather_res_type.width /= 2;
         fetch_dst_type = fetch_type;
         src_type = lp_build_vec_type(gallivm, fetch_type);
         vec_zext = TRUE;
      }
      res_t = lp_build_vec_type(gallivm, res_type);
      gather_res_t = lp_build_vec_type(gallivm, gather_res_type);
      res = LLVMGetUndef(gather_res_t);
      for (i = 0; i < length; ++i) {
         LLVMValueRef index = lp_build_const_int32(gallivm, i);
         elems[i] = lp_build_gather_elem_vec(gallivm, length,
                                             src_width, src_type, fetch_dst_type,
                                             aligned, base_ptr, offsets, i,
                                             vector_justify);
         if (!vec_fetch) {
            res = LLVMBuildInsertElement(gallivm->builder, res, elems[i], index, "");
         }
      }
      if (vec_zext) {
         res = LLVMBuildZExt(gallivm->builder, res, res_t, "");
         if (vector_justify) {
#ifdef PIPE_ARCH_BIG_ENDIAN
            unsigned sv = dst_type.width - src_width;
            res = LLVMBuildShl(gallivm->builder, res,
                               lp_build_const_int_vec(gallivm, res_type, sv), "");
#endif
         }
      }
      if (vec_fetch) {
         /*
          * Do bitcast now otherwise llvm might get some funny ideas wrt
          * float/int types...
          */
         for (i = 0; i < length; i++) {
            elems[i] = LLVMBuildBitCast(gallivm->builder, elems[i],
                                        lp_build_vec_type(gallivm, dst_type), "");
         }
         res = lp_build_concat(gallivm, elems, dst_type, length);
      } else {
         struct lp_type really_final_type = dst_type;
         assert(res_type.length * res_type.width ==
                dst_type.length * dst_type.width * length);
         really_final_type.length *= length;
         res = LLVMBuildBitCast(gallivm->builder, res,
                                lp_build_vec_type(gallivm, really_final_type), "");
      }
   }

   return res;
}
Esempio n. 26
0
static LLVMValueRef box_is_box(compile_t* c, ast_t* left_type,
  LLVMValueRef l_value, LLVMValueRef r_value, int possible_boxes)
{
  pony_assert(LLVMGetTypeKind(LLVMTypeOf(l_value)) == LLVMPointerTypeKind);
  pony_assert(LLVMGetTypeKind(LLVMTypeOf(r_value)) == LLVMPointerTypeKind);

  LLVMBasicBlockRef this_block = LLVMGetInsertBlock(c->builder);
  LLVMBasicBlockRef checkbox_block = codegen_block(c, "is_checkbox");
  LLVMBasicBlockRef box_block = codegen_block(c, "is_box");
  LLVMBasicBlockRef num_block = NULL;
  if((possible_boxes & BOXED_SUBTYPES_NUMERIC) != 0)
    num_block = codegen_block(c, "is_num");
  LLVMBasicBlockRef tuple_block = NULL;
  if((possible_boxes & BOXED_SUBTYPES_TUPLE) != 0)
    tuple_block = codegen_block(c, "is_tuple");
  LLVMBasicBlockRef post_block = codegen_block(c, "is_post");

  LLVMValueRef eq_addr = LLVMBuildICmp(c->builder, LLVMIntEQ, l_value, r_value,
    "");
  LLVMBuildCondBr(c->builder, eq_addr, post_block, checkbox_block);

  // Check whether we have two boxed objects of the same type.
  LLVMPositionBuilderAtEnd(c->builder, checkbox_block);
  LLVMValueRef l_desc = gendesc_fetch(c, l_value);
  LLVMValueRef r_desc = gendesc_fetch(c, r_value);
  LLVMValueRef same_type = LLVMBuildICmp(c->builder, LLVMIntEQ, l_desc, r_desc,
    "");
  LLVMValueRef l_typeid = NULL;
  if((possible_boxes & BOXED_SUBTYPES_UNBOXED) != 0)
  {
    l_typeid = gendesc_typeid(c, l_value);
    LLVMValueRef boxed_mask = LLVMConstInt(c->i32, 1, false);
    LLVMValueRef left_boxed = LLVMBuildAnd(c->builder, l_typeid, boxed_mask,
      "");
    LLVMValueRef zero = LLVMConstInt(c->i32, 0, false);
    left_boxed = LLVMBuildICmp(c->builder, LLVMIntEQ, left_boxed, zero, "");
    LLVMValueRef both_boxed = LLVMBuildAnd(c->builder, same_type, left_boxed,
      "");
    LLVMBuildCondBr(c->builder, both_boxed, box_block, post_block);
  } else {
    LLVMBuildCondBr(c->builder, same_type, box_block, post_block);
  }

  // Check whether it's a numeric primitive or a tuple.
  LLVMPositionBuilderAtEnd(c->builder, box_block);
  if((possible_boxes & BOXED_SUBTYPES_BOXED) == BOXED_SUBTYPES_BOXED)
  {
    if(l_typeid == NULL)
      l_typeid = gendesc_typeid(c, l_value);
    LLVMValueRef num_mask = LLVMConstInt(c->i32, 2, false);
    LLVMValueRef boxed_num = LLVMBuildAnd(c->builder, l_typeid, num_mask, "");
    LLVMValueRef zero = LLVMConstInt(c->i32, 0, false);
    boxed_num = LLVMBuildICmp(c->builder, LLVMIntEQ, boxed_num, zero, "");
    LLVMBuildCondBr(c->builder, boxed_num, num_block, tuple_block);
  } else if((possible_boxes & BOXED_SUBTYPES_NUMERIC) != 0) {
    LLVMBuildBr(c->builder, num_block);
  } else {
    pony_assert((possible_boxes & BOXED_SUBTYPES_TUPLE) != 0);
    LLVMBuildBr(c->builder, tuple_block);
  }

  LLVMValueRef args[3];
  LLVMValueRef is_num = NULL;
  if(num_block != NULL)
  {
    // Get the machine word size and memcmp without unboxing.
    LLVMPositionBuilderAtEnd(c->builder, num_block);
    if(l_typeid == NULL)
      l_typeid = gendesc_typeid(c, l_value);
    LLVMValueRef num_sizes = LLVMBuildBitCast(c->builder, c->numeric_sizes,
      c->void_ptr, "");
    args[0] = LLVMBuildZExt(c->builder, l_typeid, c->intptr, "");
    LLVMValueRef size = LLVMBuildInBoundsGEP(c->builder, num_sizes, args, 1,
      "");
    size = LLVMBuildBitCast(c->builder, size, LLVMPointerType(c->i32, 0), "");
    size = LLVMBuildLoad(c->builder, size, "");
    LLVMSetAlignment(size, 4);
    LLVMValueRef one = LLVMConstInt(c->i32, 1, false);
    args[0] = LLVMBuildInBoundsGEP(c->builder, l_value, &one, 1, "");
    args[0] = LLVMBuildBitCast(c->builder, args[0], c->void_ptr, "");
    args[1] = LLVMBuildInBoundsGEP(c->builder, r_value, &one, 1, "");
    args[1] = LLVMBuildBitCast(c->builder, args[1], c->void_ptr, "");
    args[2] = LLVMBuildZExt(c->builder, size, c->intptr, "");
    is_num = gencall_runtime(c, "memcmp", args, 3, "");
    is_num = LLVMBuildICmp(c->builder, LLVMIntEQ, is_num,
      LLVMConstInt(c->i32, 0, false), "");
    LLVMBuildBr(c->builder, post_block);
  }

  LLVMValueRef is_tuple = NULL;
  if(tuple_block != NULL)
  {
    // Call the type-specific __is function, which will unbox the tuples.
    LLVMPositionBuilderAtEnd(c->builder, tuple_block);
    reach_type_t* r_left = reach_type(c->reach, left_type);
    reach_method_t* is_fn = reach_method(r_left, TK_BOX, stringtab("__is"),
      NULL);
    pony_assert(is_fn != NULL);
    LLVMValueRef func = gendesc_vtable(c, l_value, is_fn->vtable_index);
    LLVMTypeRef params[2];
    params[0] = c->object_ptr;
    params[1] = c->object_ptr;
    LLVMTypeRef type = LLVMFunctionType(c->i1, params, 2, false);
    func = LLVMBuildBitCast(c->builder, func, LLVMPointerType(type, 0), "");
    args[0] = l_value;
    args[1] = r_value;
    is_tuple = codegen_call(c, func, args, 2);
    LLVMBuildBr(c->builder, post_block);
  }

  LLVMPositionBuilderAtEnd(c->builder, post_block);
  LLVMValueRef phi = LLVMBuildPhi(c->builder, c->i1, "");
  LLVMValueRef one = LLVMConstInt(c->i1, 1, false);
  LLVMValueRef zero = LLVMConstInt(c->i1, 0, false);
  LLVMAddIncoming(phi, &one, &this_block, 1);
  if(is_num != NULL)
    LLVMAddIncoming(phi, &is_num, &num_block, 1);
  if(is_tuple != NULL)
    LLVMAddIncoming(phi, &is_tuple, &tuple_block, 1);
  LLVMAddIncoming(phi, &zero, &checkbox_block, 1);
  return phi;
}
Esempio n. 27
0
static LLVMValueRef gen_digestof_value(compile_t* c, ast_t* type,
  LLVMValueRef value)
{
  LLVMTypeRef impl_type = LLVMTypeOf(value);

  switch(LLVMGetTypeKind(impl_type))
  {
    case LLVMFloatTypeKind:
      value = LLVMBuildBitCast(c->builder, value, c->i32, "");
      return LLVMBuildZExt(c->builder, value, c->intptr, "");

    case LLVMDoubleTypeKind:
      value = LLVMBuildBitCast(c->builder, value, c->i64, "");
      return gen_digestof_int64(c, value);

    case LLVMIntegerTypeKind:
    {
      uint32_t width = LLVMGetIntTypeWidth(impl_type);

      if(width < 64)
      {
        return LLVMBuildZExt(c->builder, value, c->intptr, "");
      } else if(width == 64) {
        return gen_digestof_int64(c, value);
      } else if(width == 128) {
        LLVMValueRef shift = LLVMConstInt(c->i128, 64, false);
        LLVMValueRef high = LLVMBuildLShr(c->builder, value, shift, "");
        high = LLVMBuildTrunc(c->builder, high, c->i64, "");
        value = LLVMBuildTrunc(c->builder, value, c->i64, "");
        high = gen_digestof_int64(c, high);
        value = gen_digestof_int64(c, value);
        return LLVMBuildXor(c->builder, value, high, "");
      }
      break;
    }

    case LLVMStructTypeKind:
    {
      uint32_t count = LLVMCountStructElementTypes(impl_type);
      LLVMValueRef result = LLVMConstInt(c->intptr, 0, false);
      ast_t* child = ast_child(type);

      for(uint32_t i = 0; i < count; i++)
      {
        LLVMValueRef elem = LLVMBuildExtractValue(c->builder, value, i, "");
        elem = gen_digestof_value(c, child, elem);
        result = LLVMBuildXor(c->builder, result, elem, "");
        child = ast_sibling(child);
      }

      pony_assert(child == NULL);

      return result;
    }

    case LLVMPointerTypeKind:
      if(!is_known(type))
      {
        reach_type_t* t = reach_type(c->reach, type);
        int sub_kind = subtype_kind(t);

        if((sub_kind & SUBTYPE_KIND_BOXED) != 0)
          return gen_digestof_box(c, t, value, sub_kind);
      }

      return LLVMBuildPtrToInt(c->builder, value, c->intptr, "");

    default: {}
  }

  pony_assert(0);
  return NULL;
}
Esempio n. 28
0
/**
 * Gather one element from scatter positions in memory.
 * Nearly the same as above, however the individual elements
 * may be vectors themselves, and fetches may be float type.
 * Can also do pad vector instead of ZExt.
 *
 * @sa lp_build_gather()
 */
static LLVMValueRef
lp_build_gather_elem_vec(struct gallivm_state *gallivm,
                         unsigned length,
                         unsigned src_width,
                         LLVMTypeRef src_type,
                         struct lp_type dst_type,
                         boolean aligned,
                         LLVMValueRef base_ptr,
                         LLVMValueRef offsets,
                         unsigned i,
                         boolean vector_justify)
{
   LLVMValueRef ptr, res;
   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
   assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0));

   ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i);
   ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, "");
   res = LLVMBuildLoad(gallivm->builder, ptr, "");

   /* XXX
    * On some archs we probably really want to avoid having to deal
    * with alignments lower than 4 bytes (if fetch size is a power of
    * two >= 32). On x86 it doesn't matter, however.
    * We should be able to guarantee full alignment for any kind of texture
    * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch
    * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends
    * but I don't think that's quite what we wanted).
    * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT
    * looks like a good fit, but it seems this cap bit (and OpenGL) aren't
    * enforcing what we want (which is what d3d10 does, the offset needs to
    * be aligned to element size, but GL has bytes regardless of element
    * size which would only leave us with minimum alignment restriction of 16
    * which doesn't make much sense if the type isn't 4x32bit). Due to
    * translation of offsets to first_elem in sampler_views it actually seems
    * gallium could not do anything else except 16 no matter what...
    */
   if (!aligned) {
      LLVMSetAlignment(res, 1);
   } else if (!util_is_power_of_two(src_width)) {
      /*
       * Full alignment is impossible, assume the caller really meant
       * the individual elements were aligned (e.g. 3x32bit format).
       * And yes the generated code may otherwise crash, llvm will
       * really assume 128bit alignment with a 96bit fetch (I suppose
       * that makes sense as it can just assume the upper 32bit to be
       * whatever).
       * Maybe the caller should be able to explicitly set this, but
       * this should cover all the 3-channel formats.
       */
      if (((src_width / 24) * 24 == src_width) &&
           util_is_power_of_two(src_width / 24)) {
          LLVMSetAlignment(res, src_width / 24);
      } else {
         LLVMSetAlignment(res, 1);
      }
   }

   assert(src_width <= dst_type.width * dst_type.length);
   if (src_width < dst_type.width * dst_type.length) {
      if (dst_type.length > 1) {
         res = lp_build_pad_vector(gallivm, res, dst_type.length);
         /*
          * vector_justify hopefully a non-issue since we only deal
          * with src_width >= 32 here?
          */
      } else {
         LLVMTypeRef dst_elem_type = lp_build_vec_type(gallivm, dst_type);

         /*
          * Only valid if src_ptr_type is int type...
          */
         res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, "");

#ifdef PIPE_ARCH_BIG_ENDIAN
         if (vector_justify) {
         res = LLVMBuildShl(gallivm->builder, res,
                            LLVMConstInt(dst_elem_type,
                                         dst_type.width - src_width, 0), "");
         }
         if (src_width == 48) {
            /* Load 3x16 bit vector.
             * The sequence of loads on big-endian hardware proceeds as follows.
             * 16-bit fields are denoted by X, Y, Z, and 0.  In memory, the sequence
             * of three fields appears in the order X, Y, Z.
             *
             * Load 32-bit word: 0.0.X.Y
             * Load 16-bit halfword: 0.0.0.Z
             * Rotate left: 0.X.Y.0
             * Bitwise OR: 0.X.Y.Z
             *
             * The order in which we need the fields in the result is 0.Z.Y.X,
             * the same as on little-endian; permute 16-bit fields accordingly
             * within 64-bit register:
             */
            LLVMValueRef shuffles[4] = {
               lp_build_const_int32(gallivm, 2),
               lp_build_const_int32(gallivm, 1),
               lp_build_const_int32(gallivm, 0),
               lp_build_const_int32(gallivm, 3),
            };
            res = LLVMBuildBitCast(gallivm->builder, res,
                                   lp_build_vec_type(gallivm, lp_type_uint_vec(16, 4*16)), "");
            res = LLVMBuildShuffleVector(gallivm->builder, res, res, LLVMConstVector(shuffles, 4), "");
            res = LLVMBuildBitCast(gallivm->builder, res, dst_elem_type, "");
         }
#endif
      }
   }
   return res;
}
/**
 * Truncate or expand the bitwidth.
 *
 * NOTE: Getting the right sign flags is crucial here, as we employ some
 * intrinsics that do saturation.
 */
void
lp_build_resize(struct gallivm_state *gallivm,
                struct lp_type src_type,
                struct lp_type dst_type,
                const LLVMValueRef *src, unsigned num_srcs,
                LLVMValueRef *dst, unsigned num_dsts)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
   unsigned i;

   /*
    * We don't support float <-> int conversion here. That must be done
    * before/after calling this function.
    */
   assert(src_type.floating == dst_type.floating);

   /*
    * We don't support double <-> float conversion yet, although it could be
    * added with little effort.
    */
   assert((!src_type.floating && !dst_type.floating) ||
          src_type.width == dst_type.width);

   /* We must not loose or gain channels. Only precision */
   assert(src_type.length * num_srcs == dst_type.length * num_dsts);

   /* We don't support M:N conversion, only 1:N, M:1, or 1:1 */
   assert(num_srcs == 1 || num_dsts == 1);

   assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
   assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);
   assert(num_srcs <= LP_MAX_VECTOR_LENGTH);
   assert(num_dsts <= LP_MAX_VECTOR_LENGTH);

   if (src_type.width > dst_type.width) {
      /*
       * Truncate bit width.
       */

      assert(num_dsts == 1);

      if (src_type.width * src_type.length == dst_type.width * dst_type.length) {
        /*
         * Register width remains constant -- use vector packing intrinsics
         */
         tmp[0] = lp_build_pack(gallivm, src_type, dst_type, TRUE, src, num_srcs);
      }
      else {
         if (src_type.width / dst_type.width > num_srcs) {
            /*
            * First change src vectors size (with shuffle) so they have the
            * same size as the destination vector, then pack normally.
            * Note: cannot use cast/extract because llvm generates atrocious code.
            */
            unsigned size_ratio = (src_type.width * src_type.length) /
                                  (dst_type.length * dst_type.width);
            unsigned new_length = src_type.length / size_ratio;

            for (i = 0; i < size_ratio * num_srcs; i++) {
               unsigned start_index = (i % size_ratio) * new_length;
               tmp[i] = lp_build_extract_range(gallivm, src[i / size_ratio],
                                               start_index, new_length);
            }
            num_srcs *= size_ratio;
            src_type.length = new_length;
            tmp[0] = lp_build_pack(gallivm, src_type, dst_type, TRUE, tmp, num_srcs);
         }
         else {
            /*
             * Truncate bit width but expand vector size - first pack
             * then expand simply because this should be more AVX-friendly
             * for the cases we probably hit.
             */
            unsigned size_ratio = (dst_type.width * dst_type.length) /
                                  (src_type.length * src_type.width);
            unsigned num_pack_srcs = num_srcs / size_ratio;
            dst_type.length = dst_type.length / size_ratio;

            for (i = 0; i < size_ratio; i++) {
               tmp[i] = lp_build_pack(gallivm, src_type, dst_type, TRUE,
                                      &src[i*num_pack_srcs], num_pack_srcs);
            }
            tmp[0] = lp_build_concat(gallivm, tmp, dst_type, size_ratio);
         }
      }
   }
   else if (src_type.width < dst_type.width) {
      /*
       * Expand bit width.
       */

      assert(num_srcs == 1);

      if (src_type.width * src_type.length == dst_type.width * dst_type.length) {
         /*
          * Register width remains constant -- use vector unpack intrinsics
          */
         lp_build_unpack(gallivm, src_type, dst_type, src[0], tmp, num_dsts);
      }
      else {
         /*
          * Do it element-wise.
          */
         assert(src_type.length * num_srcs == dst_type.length * num_dsts);

         for (i = 0; i < num_dsts; i++) {
            tmp[i] = lp_build_undef(gallivm, dst_type);
         }

         for (i = 0; i < src_type.length; ++i) {
            unsigned j = i / dst_type.length;
            LLVMValueRef srcindex = lp_build_const_int32(gallivm, i);
            LLVMValueRef dstindex = lp_build_const_int32(gallivm, i % dst_type.length);
            LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], srcindex, "");

            if (src_type.sign && dst_type.sign) {
               val = LLVMBuildSExt(builder, val, lp_build_elem_type(gallivm, dst_type), "");
            } else {
               val = LLVMBuildZExt(builder, val, lp_build_elem_type(gallivm, dst_type), "");
            }
            tmp[j] = LLVMBuildInsertElement(builder, tmp[j], val, dstindex, "");
         }
      }
   }
   else {
      /*
       * No-op
       */

      assert(num_srcs == 1);
      assert(num_dsts == 1);

      tmp[0] = src[0];
   }

   for(i = 0; i < num_dsts; ++i)
      dst[i] = tmp[i];
}
Esempio n. 30
0
struct cl2llvm_val_t *llvm_type_cast(struct cl2llvm_val_t * original_val, 
	struct cl2llvmTypeWrap *totype_w_sign)
{
	struct cl2llvm_val_t *llvm_val = cl2llvm_val_create();

	int i;
	struct cl2llvmTypeWrap *elem_type;
	struct cl2llvm_val_t *cast_original_val;
	LLVMValueRef index;
	LLVMValueRef vector_addr;
	LLVMValueRef vector;
	LLVMValueRef const_elems[16];
	LLVMTypeRef fromtype = cl2llvmTypeWrapGetLlvmType(original_val->type);
	LLVMTypeRef totype = cl2llvmTypeWrapGetLlvmType(totype_w_sign);
	int fromsign = cl2llvmTypeWrapGetSign(original_val->type);
	int tosign = cl2llvmTypeWrapGetSign(totype_w_sign);

	/*By default the return value is the same as the original_val*/
	llvm_val->val = original_val->val;
	cl2llvmTypeWrapSetLlvmType(llvm_val->type, cl2llvmTypeWrapGetLlvmType(original_val->type));
	cl2llvmTypeWrapSetSign(llvm_val->type, cl2llvmTypeWrapGetSign(original_val->type));
	
	snprintf(temp_var_name, sizeof temp_var_name,
		"tmp_%d", temp_var_count++);
		
	/* Check that fromtype is not a vector, unless both types are identical. */
	if (LLVMGetTypeKind(fromtype) == LLVMVectorTypeKind)
	{
		if ((LLVMGetVectorSize(fromtype) != LLVMGetVectorSize(totype) 
			|| LLVMGetElementType(fromtype) 
			!= LLVMGetElementType(totype)) 
			|| fromsign != tosign)
		{
			if (LLVMGetTypeKind(totype) == LLVMVectorTypeKind)
				cl2llvm_yyerror("Casts between vector types are forbidden");
			cl2llvm_yyerror("A vector may not be cast to any other type.");
		}
	}

	/* If totype is a vector, create a vector whose components are equal to 
	original_val */

	if (LLVMGetTypeKind(totype) == LLVMVectorTypeKind
		&& LLVMGetTypeKind(fromtype) != LLVMVectorTypeKind)
	{
		/*Go to entry block and declare vector*/
		LLVMPositionBuilder(cl2llvm_builder, cl2llvm_current_function->entry_block,
			cl2llvm_current_function->branch_instr);
		
		snprintf(temp_var_name, sizeof temp_var_name,
			"tmp_%d", temp_var_count++);
			
		vector_addr = LLVMBuildAlloca(cl2llvm_builder, 
			totype, temp_var_name);
		LLVMPositionBuilderAtEnd(cl2llvm_builder, current_basic_block);

		/* Load vector */
		snprintf(temp_var_name, sizeof temp_var_name,
			"tmp_%d", temp_var_count++);
	
		vector = LLVMBuildLoad(cl2llvm_builder, vector_addr, temp_var_name);
		
		/* Create object to represent element type of totype */
		elem_type = cl2llvmTypeWrapCreate(LLVMGetElementType(totype), tosign);

		/* If original_val is constant create a constant vector */
		if (LLVMIsConstant(original_val->val))
		{
			cast_original_val = llvm_type_cast(original_val, elem_type);
			for (i = 0; i < LLVMGetVectorSize(totype); i++)
				const_elems[i] = cast_original_val->val;

			vector = LLVMConstVector(const_elems, 	
				LLVMGetVectorSize(totype));
			llvm_val->val = vector;

			cl2llvm_val_free(cast_original_val);
		}
		/* If original value is not constant insert elements */
		else
		{
			for (i = 0; i < LLVMGetVectorSize(totype); i++)
			{
				index = LLVMConstInt(LLVMInt32Type(), i, 0);
				cast_original_val = llvm_type_cast(original_val, elem_type);
				snprintf(temp_var_name, sizeof temp_var_name,
					"tmp_%d", temp_var_count++);
	
				vector = LLVMBuildInsertElement(cl2llvm_builder, 
					vector, cast_original_val->val, index, temp_var_name);
				cl2llvm_val_free(cast_original_val);
			}
		}
		cl2llvmTypeWrapFree(elem_type);
		llvm_val->val = vector;
	}


	if (fromtype == LLVMInt64Type())
	{
		if (totype == LLVMDoubleType())
		{
			if (fromsign)
			{
				llvm_val->val =
						LLVMBuildSIToFP(cl2llvm_builder,
						  original_val->val, LLVMDoubleType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMDoubleType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMFloatType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMFloatType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMFloatType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMHalfType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMHalfType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMHalfType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMInt64Type())
		{
			if (tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
			temp_var_count--;
		}
		else if (totype == LLVMInt32Type())
		{
			llvm_val->val = LLVMBuildTrunc(cl2llvm_builder,
				  original_val->val, LLVMInt32Type(), temp_var_name);
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt16Type())
		{
			llvm_val->val = LLVMBuildTrunc(cl2llvm_builder,
				  original_val->val, LLVMInt16Type(), temp_var_name);
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt8Type())
		{
			llvm_val->val = LLVMBuildTrunc(cl2llvm_builder,
				  original_val->val, LLVMInt8Type(), temp_var_name);
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt1Type())
		{
			llvm_val->val = LLVMBuildTrunc(cl2llvm_builder,
				  original_val->val, LLVMInt1Type(), temp_var_name);
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
			
	}
	else if (fromtype == LLVMInt32Type())
	{
		if (totype == LLVMDoubleType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMDoubleType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMDoubleType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMFloatType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMFloatType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMFloatType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMHalfType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMHalfType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMHalfType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMInt64Type())
		{
			if (fromsign)
			{
				llvm_val->val = LLVMBuildSExt(cl2llvm_builder,
					  original_val->val, LLVMInt64Type(),
					temp_var_name);
			}
			else
			{
				llvm_val->val = LLVMBuildZExt(cl2llvm_builder,
					  original_val->val, LLVMInt64Type(),
					temp_var_name);
			}
			if (tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt32Type())
		{
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
			temp_var_count--;
		}
		else if (totype == LLVMInt16Type())
		{
			llvm_val->val = LLVMBuildTrunc(cl2llvm_builder,
				  original_val->val, LLVMInt16Type(), temp_var_name);
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt8Type())
		{
			llvm_val->val = LLVMBuildTrunc(cl2llvm_builder,
				 original_val->val, LLVMInt8Type(), temp_var_name);
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt1Type())
		{
			llvm_val->val = LLVMBuildTrunc(cl2llvm_builder,
				  original_val->val, LLVMInt1Type(), temp_var_name);
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
			
	}
	else if (fromtype == LLVMInt16Type())
	{
		if (totype == LLVMDoubleType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMDoubleType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMDoubleType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMFloatType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMFloatType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMFloatType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMHalfType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMHalfType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMHalfType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMInt64Type())
		{
			if (fromsign)
			{
				llvm_val->val = LLVMBuildSExt(cl2llvm_builder,
					  original_val->val, LLVMInt64Type(),
					temp_var_name);
			}
			else
			{
				llvm_val->val = LLVMBuildZExt(cl2llvm_builder,
					  original_val->val, LLVMInt64Type(),
					temp_var_name);
			}
			if (tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt32Type())
		{
			if (fromsign)
			{
				llvm_val->val = LLVMBuildSExt(cl2llvm_builder,
					  original_val->val, LLVMInt32Type(),
					temp_var_name);
			}
			else
			{
				llvm_val->val = LLVMBuildZExt(cl2llvm_builder,
					  original_val->val, LLVMInt32Type(),
					temp_var_name);
			}
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt16Type())
		{
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
			temp_var_count--;
		}
		else if (totype == LLVMInt8Type())
		{
			llvm_val->val = LLVMBuildTrunc(cl2llvm_builder,
				  original_val->val, LLVMInt8Type(), temp_var_name);
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt1Type())
		{
			llvm_val->val = LLVMBuildTrunc(cl2llvm_builder,
				  original_val->val, LLVMInt1Type(), temp_var_name);
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
			
	}
	else if (fromtype == LLVMInt8Type())
	{
		if (totype == LLVMDoubleType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMDoubleType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMDoubleType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMFloatType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMFloatType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMFloatType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMHalfType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMHalfType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMHalfType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMInt64Type())
		{
			if (fromsign)
			{
				llvm_val->val = LLVMBuildSExt(cl2llvm_builder,
					  original_val->val, LLVMInt64Type(),
					temp_var_name);
			}
			else
			{
				llvm_val->val = LLVMBuildZExt(cl2llvm_builder,
					  original_val->val, LLVMInt64Type(),
					temp_var_name);
			}
			if (tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt32Type())
		{
			if (fromsign)
			{
				llvm_val->val = LLVMBuildSExt(cl2llvm_builder,
					  original_val->val, LLVMInt32Type(),
					temp_var_name);
			}
			else
			{
				llvm_val->val = LLVMBuildZExt(cl2llvm_builder,
					  original_val->val, LLVMInt32Type(),
					temp_var_name);
			}
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt16Type())
		{
			if (fromsign)
			{
				llvm_val->val = LLVMBuildSExt(cl2llvm_builder,
					  original_val->val, LLVMInt16Type(),
					temp_var_name);
			}
			else
			{
				llvm_val->val = LLVMBuildZExt(cl2llvm_builder,
					  original_val->val, LLVMInt16Type(),
					temp_var_name);
			}
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt8Type())
		{
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
			temp_var_count--;
		}
		else if (totype == LLVMInt1Type())
		{
			llvm_val->val = LLVMBuildTrunc(cl2llvm_builder,
				  original_val->val, LLVMInt1Type(), temp_var_name);
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
			
	}
	else if (fromtype == LLVMInt1Type())
	{
		if (totype == LLVMDoubleType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMDoubleType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMDoubleType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMFloatType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMFloatType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMFloatType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMHalfType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMHalfType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMHalfType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMInt64Type())
		{
			if (fromsign)
			{
				llvm_val->val = LLVMBuildSExt(cl2llvm_builder,
					  original_val->val, LLVMInt64Type(),
					temp_var_name);
			}
			else
			{
				llvm_val->val = LLVMBuildZExt(cl2llvm_builder,
					  original_val->val, LLVMInt64Type(),
					temp_var_name);
			}
			if (tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt32Type())
		{
			if (fromsign)
			{
				llvm_val->val = LLVMBuildSExt(cl2llvm_builder,
					  original_val->val, LLVMInt32Type(),
					temp_var_name);
			}
			else
			{
				llvm_val->val = LLVMBuildZExt(cl2llvm_builder,
					  original_val->val, LLVMInt32Type(),
					temp_var_name);
			}
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt16Type())
		{
			if (fromsign)
			{
				llvm_val->val = LLVMBuildSExt(cl2llvm_builder,
					  original_val->val, LLVMInt16Type(),
					temp_var_name);
			}
			else
			{
				llvm_val->val = LLVMBuildZExt(cl2llvm_builder,
					  original_val->val, LLVMInt16Type(),
					temp_var_name);
			}
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt8Type())
		{
			if (fromsign)
			{
				llvm_val->val = LLVMBuildSExt(cl2llvm_builder,
					  original_val->val, LLVMInt8Type(),
					temp_var_name);
			}
			else
			{
				llvm_val->val = LLVMBuildZExt(cl2llvm_builder,
					  original_val->val, LLVMInt8Type(),
					temp_var_name);
			}
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt1Type())
		{
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
			temp_var_count--;
		}			
	}

	/*We now know that from type must be a floating point.*/

	/*Floating point to signed integer conversions*/
	else if (tosign && LLVMGetTypeKind(totype) == 8)
	{
		if (totype == LLVMInt64Type())
		{
			llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, 
				  original_val->val, LLVMInt64Type(), temp_var_name);
		}
		else if (totype == LLVMInt32Type())
		{
			llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, 
				  original_val->val, LLVMInt32Type(), temp_var_name);
		}
		else if (totype == LLVMInt16Type())
		{
			llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, 
				  original_val->val, LLVMInt16Type(), temp_var_name);
		}
		else if (totype == LLVMInt8Type())
		{
			llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, 
				  original_val->val, LLVMInt8Type(), temp_var_name);
		}
		else if (totype == LLVMInt1Type())
		{
			llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, 
				  original_val->val, LLVMInt1Type(), temp_var_name);
		}
		cl2llvmTypeWrapSetSign(llvm_val->type, 1);
	}
	/*Floating point to unsigned integer conversions*/
	else if (!tosign)
	{
		if (totype == LLVMInt64Type())
		{
			llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, 
				  original_val->val, LLVMInt64Type(), temp_var_name);
		}
		else if (totype == LLVMInt32Type())
		{
			llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, 
				  original_val->val, LLVMInt32Type(), temp_var_name);
		}
		else if (totype == LLVMInt16Type())
		{
			llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, 
				  original_val->val, LLVMInt16Type(), temp_var_name);
		}
		else if (totype == LLVMInt8Type())
		{
			llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, 
				  original_val->val, LLVMInt8Type(), temp_var_name);
		}
		else if (totype == LLVMInt1Type())
		{
			llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, 
				  original_val->val, LLVMInt1Type(), temp_var_name);
		}
		cl2llvmTypeWrapSetSign(llvm_val->type, 0);
	}
	else if (totype == LLVMDoubleType())
	{
		llvm_val->val = LLVMBuildFPExt(cl2llvm_builder, 
			  original_val->val, LLVMDoubleType(), temp_var_name);
		cl2llvmTypeWrapSetSign(llvm_val->type, 1);
	}
	else if (totype == LLVMFloatType())
	{
		if (fromtype == LLVMDoubleType())
		{
			llvm_val->val = LLVMBuildFPTrunc(cl2llvm_builder, 
				  original_val->val, LLVMFloatType(), temp_var_name);
		}
		else if (fromtype == LLVMHalfType())
		{
			llvm_val->val = LLVMBuildFPExt(cl2llvm_builder, 
				  original_val->val, LLVMFloatType(), temp_var_name);
		}
		cl2llvmTypeWrapSetSign(llvm_val->type, 1);
	}
	else if (totype == LLVMHalfType())
	{
		llvm_val->val = LLVMBuildFPTrunc(cl2llvm_builder, 
			  original_val->val, LLVMHalfType(), temp_var_name);
		cl2llvmTypeWrapSetSign(llvm_val->type, 1);
	}
	cl2llvmTypeWrapSetLlvmType(llvm_val->type, totype);
	cl2llvmTypeWrapSetSign(llvm_val->type, tosign);
	
	return llvm_val;
}