Пример #1
0
static void
calc_offsets(struct lp_build_context *coeff_bld,
             unsigned quad_start_index,
             LLVMValueRef *pixoffx,
             LLVMValueRef *pixoffy)
{
   unsigned i;
   unsigned num_pix = coeff_bld->type.length;
   struct gallivm_state *gallivm = coeff_bld->gallivm;
   LLVMBuilderRef builder = coeff_bld->gallivm->builder;
   LLVMValueRef nr, pixxf, pixyf;

   *pixoffx = coeff_bld->undef;
   *pixoffy = coeff_bld->undef;

   for (i = 0; i < num_pix; i++) {
      nr = lp_build_const_int32(gallivm, i);
      pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] +
                                   (quad_start_index & 1) * 2);
      pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] +
                                   (quad_start_index & 2));
      *pixoffx = LLVMBuildInsertElement(builder, *pixoffx, pixxf, nr, "");
      *pixoffy = LLVMBuildInsertElement(builder, *pixoffy, pixyf, nr, "");
   }
}
Пример #2
0
/**
 * Gather elements from scatter positions in memory into a single vector.
 * Use for fetching texels from a texture.
 * For SSE, typical values are length=4, src_width=32, dst_width=32.
 *
 * @param length length of the offsets
 * @param src_width src element width in bits
 * @param dst_width result element width in bits (src will be expanded to fit)
 * @param base_ptr base pointer, should be a i8 pointer type.
 * @param offsets vector with offsets
 */
LLVMValueRef
lp_build_gather(struct gallivm_state *gallivm,
                unsigned length,
                unsigned src_width,
                unsigned dst_width,
                LLVMValueRef base_ptr,
                LLVMValueRef offsets)
{
   LLVMValueRef res;

   if (length == 1) {
      /* Scalar */
      return lp_build_gather_elem(gallivm, length,
                                  src_width, dst_width,
                                  base_ptr, offsets, 0);
   } else {
      /* Vector */

      LLVMTypeRef dst_elem_type = LLVMIntTypeInContext(gallivm->context, dst_width);
      LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
      unsigned i;

      res = LLVMGetUndef(dst_vec_type);
      for (i = 0; i < length; ++i) {
         LLVMValueRef index = lp_build_const_int32(gallivm, i);
         LLVMValueRef elem;
         elem = lp_build_gather_elem(gallivm, length,
                                     src_width, dst_width,
                                     base_ptr, offsets, i);
         res = LLVMBuildInsertElement(gallivm->builder, res, elem, index, "");
      }
   }

   return res;
}
Пример #3
0
LLVMValueRef
ac_build_gather_values_extended(struct ac_llvm_context *ctx,
				LLVMValueRef *values,
				unsigned value_count,
				unsigned value_stride,
				bool load)
{
	LLVMBuilderRef builder = ctx->builder;
	LLVMValueRef vec = NULL;
	unsigned i;

	if (value_count == 1) {
		if (load)
			return LLVMBuildLoad(builder, values[0], "");
		return values[0];
	} else if (!value_count)
		unreachable("value_count is 0");

	for (i = 0; i < value_count; i++) {
		LLVMValueRef value = values[i * value_stride];
		if (load)
			value = LLVMBuildLoad(builder, value, "");

		if (!i)
			vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
		LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
		vec = LLVMBuildInsertElement(builder, vec, value, index, "");
	}
	return vec;
}
Пример #4
0
LLVMValueRef
lp_build_broadcast(struct gallivm_state *gallivm,
                   LLVMTypeRef vec_type,
                   LLVMValueRef scalar)
{
   LLVMValueRef res;

   if (LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) {
      /* scalar */
      assert(vec_type == LLVMTypeOf(scalar));
      res = scalar;
   } else {
      LLVMBuilderRef builder = gallivm->builder;
      const unsigned length = LLVMGetVectorSize(vec_type);
      LLVMValueRef undef = LLVMGetUndef(vec_type);
      /* The shuffle vector is always made of int32 elements */
      LLVMTypeRef i32_type = LLVMInt32TypeInContext(gallivm->context);
      LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length);

      assert(LLVMGetElementType(vec_type) == LLVMTypeOf(scalar));

      res = LLVMBuildInsertElement(builder, undef, scalar, LLVMConstNull(i32_type), "");
      res = LLVMBuildShuffleVector(builder, res, undef, LLVMConstNull(i32_vec_type), "");
   }

   return res;
}
Пример #5
0
static LLVMValueRef
emit_array_fetch(
	struct lp_build_tgsi_context *bld_base,
	unsigned File, enum tgsi_opcode_type type,
	struct tgsi_declaration_range range,
	unsigned swizzle)
{
	struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
	struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
	LLVMBuilderRef builder = bld_base->base.gallivm->builder;

	unsigned i, size = range.Last - range.First + 1;
	LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
	LLVMValueRef result = LLVMGetUndef(vec);

	struct tgsi_full_src_register tmp_reg = {};
	tmp_reg.Register.File = File;

	for (i = 0; i < size; ++i) {
		tmp_reg.Register.Index = i + range.First;
		LLVMValueRef temp = emit_fetch(bld_base, &tmp_reg, type, swizzle);
		result = LLVMBuildInsertElement(builder, result, temp,
			lp_build_const_int32(gallivm, i), "");
	}
	return result;
}
Пример #6
0
LLVMValueRef
lp_build_intrinsic_map(struct gallivm_state *gallivm,
                       const char *name,
                       LLVMTypeRef ret_type,
                       LLVMValueRef *args,
                       unsigned num_args)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef ret_elem_type = LLVMGetElementType(ret_type);
   unsigned n = LLVMGetVectorSize(ret_type);
   unsigned i, j;
   LLVMValueRef res;

   assert(num_args <= LP_MAX_FUNC_ARGS);

   res = LLVMGetUndef(ret_type);
   for(i = 0; i < n; ++i) {
      LLVMValueRef index = lp_build_const_int32(gallivm, i);
      LLVMValueRef arg_elems[LP_MAX_FUNC_ARGS];
      LLVMValueRef res_elem;
      for(j = 0; j < num_args; ++j)
         arg_elems[j] = LLVMBuildExtractElement(builder, args[j], index, "");
      res_elem = lp_build_intrinsic(builder, name, ret_elem_type, arg_elems, num_args, 0);
      res = LLVMBuildInsertElement(builder, res, res_elem, index, "");
   }

   return res;
}
Пример #7
0
LLVMValueRef
lp_build_broadcast(struct gallivm_state *gallivm,
                   LLVMTypeRef vec_type,
                   LLVMValueRef scalar)
{
   LLVMValueRef res;

   if (LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) {
      /* scalar */
      assert(vec_type == LLVMTypeOf(scalar));
      res = scalar;
   } else {
      LLVMBuilderRef builder = gallivm->builder;
      const unsigned length = LLVMGetVectorSize(vec_type);
      LLVMValueRef undef = LLVMGetUndef(vec_type);
      LLVMTypeRef i32_type = LLVMInt32TypeInContext(gallivm->context);

      assert(LLVMGetElementType(vec_type) == LLVMTypeOf(scalar));

      if (HAVE_LLVM >= 0x207) {
         /* The shuffle vector is always made of int32 elements */
         LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length);
         res = LLVMBuildInsertElement(builder, undef, scalar, LLVMConstNull(i32_type), "");
         res = LLVMBuildShuffleVector(builder, res, undef, LLVMConstNull(i32_vec_type), "");
      } else {
         /* XXX: The above path provokes a bug in LLVM 2.6 */
         unsigned i;
         res = undef;
         for(i = 0; i < length; ++i) {
            LLVMValueRef index = lp_build_const_int32(gallivm, i);
            res = LLVMBuildInsertElement(builder, res, scalar, index, "");
         }
      }
   }

   return res;
}
Пример #8
0
LLVMValueRef
lp_build_gather_values(struct gallivm_state * gallivm,
                       LLVMValueRef * values,
                       unsigned value_count)
{
   LLVMTypeRef vec_type = LLVMVectorType(LLVMTypeOf(values[0]), value_count);
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef vec = LLVMGetUndef(vec_type);
   unsigned i;

   for (i = 0; i < value_count; i++) {
      LLVMValueRef index = lp_build_const_int32(gallivm, i);
      vec = LLVMBuildInsertElement(builder, vec, values[i], index, "");
   }
   return vec;
}
Пример #9
0
/**
 * Expands src vector from src.length to dst_length
 */
LLVMValueRef
lp_build_pad_vector(struct gallivm_state *gallivm,
                    LLVMValueRef src,
                    unsigned dst_length)
{
   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
   LLVMValueRef undef;
   LLVMTypeRef type;
   unsigned i, src_length;

   type = LLVMTypeOf(src);

   if (LLVMGetTypeKind(type) != LLVMVectorTypeKind) {
      /* Can't use ShuffleVector on non-vector type */
      undef = LLVMGetUndef(LLVMVectorType(type, dst_length));
      return LLVMBuildInsertElement(gallivm->builder, undef, src, lp_build_const_int32(gallivm, 0), "");
   }

   undef      = LLVMGetUndef(type);
   src_length = LLVMGetVectorSize(type);

   assert(dst_length <= Elements(elems));
   assert(dst_length >= src_length);

   if (src_length == dst_length)
      return src;

   /* All elements from src vector */
   for (i = 0; i < src_length; ++i)
      elems[i] = lp_build_const_int32(gallivm, i);

   /* Undef fill remaining space */
   for (i = src_length; i < dst_length; ++i)
      elems[i] = lp_build_const_int32(gallivm, src_length);

   /* Combine the two vectors */
   return LLVMBuildShuffleVector(gallivm->builder, src, undef, LLVMConstVector(elems, dst_length), "");
}
Пример #10
0
/**
 * Gather elements from scatter positions in memory into a single vector.
 *
 * @param src_width src element width
 * @param dst_width result element width (source will be expanded to fit)
 * @param length length of the offsets,
 * @param base_ptr base pointer, should be a i8 pointer type.
 * @param offsets vector with offsets
 */
LLVMValueRef
lp_build_gather(LLVMBuilderRef builder,
                unsigned length,
                unsigned src_width,
                unsigned dst_width,
                LLVMValueRef base_ptr,
                LLVMValueRef offsets)
{
   LLVMTypeRef src_type = LLVMIntType(src_width);
   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
   LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
   LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
   LLVMValueRef res;
   unsigned i;

   res = LLVMGetUndef(dst_vec_type);
   for(i = 0; i < length; ++i) {
      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
      LLVMValueRef elem_offset;
      LLVMValueRef elem_ptr;
      LLVMValueRef elem;

      elem_offset = LLVMBuildExtractElement(builder, offsets, index, "");
      elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, "");
      elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, "");
      elem = LLVMBuildLoad(builder, elem_ptr, "");

      assert(src_width <= dst_width);
      if(src_width > dst_width)
         elem = LLVMBuildTrunc(builder, elem, dst_elem_type, "");
      if(src_width < dst_width)
         elem = LLVMBuildZExt(builder, elem, dst_elem_type, "");

      res = LLVMBuildInsertElement(builder, res, elem, index, "");
   }

   return res;
}
static void llvm_emit_tex(
	const struct lp_build_tgsi_action * action,
	struct lp_build_tgsi_context * bld_base,
	struct lp_build_emit_data * emit_data)
{
	struct gallivm_state * gallivm = bld_base->base.gallivm;
	LLVMValueRef args[7];
	unsigned c, sampler_src;
	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);

	if (emit_data->inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
		switch (emit_data->inst->Instruction.Opcode) {
		case TGSI_OPCODE_TXQ: {
			struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
			ctx->uses_tex_buffers = true;
			bool isEgPlus = (ctx->chip_class >= EVERGREEN);
			LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm,
				isEgPlus ? 0 : 1);
			LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, offset,
				LLVM_R600_BUFFER_INFO_CONST_BUFFER);
			if (!isEgPlus) {
				LLVMValueRef maskval[4] = {
					lp_build_const_int32(gallivm, 1),
					lp_build_const_int32(gallivm, 2),
					lp_build_const_int32(gallivm, 3),
					lp_build_const_int32(gallivm, 0),
				};
				LLVMValueRef mask = LLVMConstVector(maskval, 4);
				cvecval = LLVMBuildShuffleVector(gallivm->builder, cvecval, cvecval,
					mask, "");
			}
			emit_data->output[0] = cvecval;
			return;
		}
		case TGSI_OPCODE_TXF: {
			args[0] = LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), "");
			args[1] = lp_build_const_int32(gallivm, R600_MAX_CONST_BUFFERS);
			emit_data->output[0] = build_intrinsic(gallivm->builder,
							"llvm.R600.load.texbuf",
							emit_data->dst_type, args, 2, LLVMReadNoneAttribute);
			if (ctx->chip_class >= EVERGREEN)
				return;
			ctx->uses_tex_buffers = true;
			LLVMDumpValue(emit_data->output[0]);
			emit_data->output[0] = LLVMBuildBitCast(gallivm->builder,
				emit_data->output[0], LLVMVectorType(bld_base->base.int_elem_type, 4),
				"");
			LLVMValueRef Mask = llvm_load_const_buffer(bld_base,
				lp_build_const_int32(gallivm, 0),
				LLVM_R600_BUFFER_INFO_CONST_BUFFER);
			Mask = LLVMBuildBitCast(gallivm->builder, Mask,
				LLVMVectorType(bld_base->base.int_elem_type, 4), "");
			emit_data->output[0] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_AND,
				emit_data->output[0],
				Mask);
			LLVMValueRef WComponent = LLVMBuildExtractElement(gallivm->builder,
				emit_data->output[0], lp_build_const_int32(gallivm, 3), "");
			Mask = llvm_load_const_buffer(bld_base, lp_build_const_int32(gallivm, 1),
				LLVM_R600_BUFFER_INFO_CONST_BUFFER);
			Mask = LLVMBuildExtractElement(gallivm->builder, Mask,
				lp_build_const_int32(gallivm, 0), "");
			Mask = LLVMBuildBitCast(gallivm->builder, Mask,
				bld_base->base.int_elem_type, "");
			WComponent = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_OR,
				WComponent, Mask);
			emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder,
				emit_data->output[0], WComponent, lp_build_const_int32(gallivm, 3), "");
			emit_data->output[0] = LLVMBuildBitCast(gallivm->builder,
				emit_data->output[0], LLVMVectorType(bld_base->base.elem_type, 4), "");
		}
			return;
		default:
			break;
		}
	}

	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TEX ||
		emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
		LLVMValueRef Vector[4] = {
			LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
				lp_build_const_int32(gallivm, 0), ""),
			LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
				lp_build_const_int32(gallivm, 1), ""),
			LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
				lp_build_const_int32(gallivm, 2), ""),
			LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
				lp_build_const_int32(gallivm, 3), ""),
		};
		switch (emit_data->inst->Texture.Texture) {
		case TGSI_TEXTURE_2D:
		case TGSI_TEXTURE_RECT:
			Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type);
			break;
		case TGSI_TEXTURE_1D:
			Vector[1] = Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type);
			break;
		default:
			break;
		}
		args[0] = lp_build_gather_values(gallivm, Vector, 4);
	} else {
		args[0] = emit_data->args[0];
	}

	assert(emit_data->arg_count + 2 <= Elements(args));

	for (c = 1; c < emit_data->arg_count; ++c)
		args[c] = emit_data->args[c];

	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
		args[1] = LLVMBuildShl(gallivm->builder, args[1], lp_build_const_int32(gallivm, 1), "");
		args[2] = LLVMBuildShl(gallivm->builder, args[2], lp_build_const_int32(gallivm, 1), "");
		args[3] = LLVMBuildShl(gallivm->builder, args[3], lp_build_const_int32(gallivm, 1), "");
	}

	sampler_src = emit_data->inst->Instruction.NumSrcRegs-1;

	args[c++] = lp_build_const_int32(gallivm,
					emit_data->inst->Src[sampler_src].Register.Index + R600_MAX_CONST_BUFFERS);
	args[c++] = lp_build_const_int32(gallivm,
					emit_data->inst->Src[sampler_src].Register.Index);
	args[c++] = lp_build_const_int32(gallivm,
					emit_data->inst->Texture.Texture);

	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF &&
		(emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
		emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) {

		switch (emit_data->inst->Texture.Texture) {
		case TGSI_TEXTURE_2D_MSAA:
			args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D);
			break;
		case TGSI_TEXTURE_2D_ARRAY_MSAA:
			args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D_ARRAY);
			break;
		default:
			break;
		}

		if (ctx->has_compressed_msaa_texturing) {
			LLVMValueRef ldptr_args[10] = {
				args[0], // Coord
				args[1], // Offset X
				args[2], // Offset Y
				args[3], // Offset Z
				args[4],
				args[5],
				lp_build_const_int32(gallivm, 1),
				lp_build_const_int32(gallivm, 1),
				lp_build_const_int32(gallivm, 1),
				lp_build_const_int32(gallivm, 1)
			};
			LLVMValueRef ptr = build_intrinsic(gallivm->builder,
				"llvm.R600.ldptr",
				emit_data->dst_type, ldptr_args, 10, LLVMReadNoneAttribute);
			LLVMValueRef Tmp = LLVMBuildExtractElement(gallivm->builder, args[0],
				lp_build_const_int32(gallivm, 3), "");
			Tmp = LLVMBuildMul(gallivm->builder, Tmp,
				lp_build_const_int32(gallivm, 4), "");
			LLVMValueRef ResX = LLVMBuildExtractElement(gallivm->builder, ptr,
				lp_build_const_int32(gallivm, 0), "");
			ResX = LLVMBuildBitCast(gallivm->builder, ResX,
				bld_base->base.int_elem_type, "");
			Tmp = LLVMBuildLShr(gallivm->builder, ResX, Tmp, "");
			Tmp = LLVMBuildAnd(gallivm->builder, Tmp,
				lp_build_const_int32(gallivm, 0xF), "");
			args[0] = LLVMBuildInsertElement(gallivm->builder, args[0], Tmp,
				lp_build_const_int32(gallivm, 3), "");
			args[c++] = lp_build_const_int32(gallivm,
				emit_data->inst->Texture.Texture);
		}
	}

	emit_data->output[0] = build_intrinsic(gallivm->builder,
					action->intr_name,
					emit_data->dst_type, args, c, LLVMReadNoneAttribute);

	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXQ &&
		((emit_data->inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
		emit_data->inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY)))
		if (emit_data->inst->Dst[0].Register.WriteMask & 4) {
			LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, 0);
			LLVMValueRef ZLayer = LLVMBuildExtractElement(gallivm->builder,
				llvm_load_const_buffer(bld_base, offset, CONSTANT_TXQ_BUFFER),
				lp_build_const_int32(gallivm, 0), "");

			emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder, emit_data->output[0], ZLayer, lp_build_const_int32(gallivm, 2), "");
			struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
			ctx->has_txq_cube_array_z_comp = true;
		}
}
Пример #12
0
/**
 * Generate code to compute coordinate gradient (rho).
 * \param ddx  partial derivatives of (s, t, r, q) with respect to X
 * \param ddy  partial derivatives of (s, t, r, q) with respect to Y
 *
 * XXX: The resulting rho is scalar, so we ignore all but the first element of
 * derivatives that are passed by the shader.
 */
static LLVMValueRef
lp_build_rho(struct lp_build_sample_context *bld,
             unsigned unit,
             const LLVMValueRef ddx[4],
             const LLVMValueRef ddy[4])
{
   struct lp_build_context *int_size_bld = &bld->int_size_bld;
   struct lp_build_context *float_size_bld = &bld->float_size_bld;
   struct lp_build_context *float_bld = &bld->float_bld;
   const unsigned dims = bld->dims;
   LLVMBuilderRef builder = bld->gallivm->builder;
   LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
   LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
   LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
   LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0);
   LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
   LLVMValueRef rho_x, rho_y;
   LLVMValueRef rho_vec;
   LLVMValueRef int_size, float_size;
   LLVMValueRef rho;
   LLVMValueRef first_level, first_level_vec;

   dsdx = ddx[0];
   dsdy = ddy[0];

   if (dims <= 1) {
      rho_x = dsdx;
      rho_y = dsdy;
   }
   else {
      rho_x = float_size_bld->undef;
      rho_y = float_size_bld->undef;

      rho_x = LLVMBuildInsertElement(builder, rho_x, dsdx, index0, "");
      rho_y = LLVMBuildInsertElement(builder, rho_y, dsdy, index0, "");

      dtdx = ddx[1];
      dtdy = ddy[1];

      rho_x = LLVMBuildInsertElement(builder, rho_x, dtdx, index1, "");
      rho_y = LLVMBuildInsertElement(builder, rho_y, dtdy, index1, "");

      if (dims >= 3) {
         drdx = ddx[2];
         drdy = ddy[2];

         rho_x = LLVMBuildInsertElement(builder, rho_x, drdx, index2, "");
         rho_y = LLVMBuildInsertElement(builder, rho_y, drdy, index2, "");
      }
   }

   rho_x = lp_build_abs(float_size_bld, rho_x);
   rho_y = lp_build_abs(float_size_bld, rho_y);

   rho_vec = lp_build_max(float_size_bld, rho_x, rho_y);

   first_level = bld->dynamic_state->first_level(bld->dynamic_state,
                                                 bld->gallivm, unit);
   first_level_vec = lp_build_broadcast_scalar(&bld->int_size_bld, first_level);
   int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec);
   float_size = lp_build_int_to_float(float_size_bld, int_size);

   rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);

   if (dims <= 1) {
      rho = rho_vec;
   }
   else {
      if (dims >= 2) {
         LLVMValueRef rho_s, rho_t, rho_r;

         rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, "");
         rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, "");

         rho = lp_build_max(float_bld, rho_s, rho_t);
         if (dims >= 3) {
            rho_r = LLVMBuildExtractElement(builder, rho_vec, index2, "");
            rho = lp_build_max(float_bld, rho, rho_r);
         }
      }
   }

   return rho;
}
Пример #13
0
/**
 * Initialize the bld->a, dadq fields.  This involves fetching
 * those values from the arrays which are passed into the JIT function.
 */
static void
coeffs_init(struct lp_build_interp_soa_context *bld,
            LLVMValueRef a0_ptr,
            LLVMValueRef dadx_ptr,
            LLVMValueRef dady_ptr)
{
   struct lp_build_context *coeff_bld = &bld->coeff_bld;
   struct lp_build_context *setup_bld = &bld->setup_bld;
   struct gallivm_state *gallivm = coeff_bld->gallivm;
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef pixoffx, pixoffy;
   unsigned attrib;
   unsigned chan;
   unsigned i;

   pixoffx = coeff_bld->undef;
   pixoffy = coeff_bld->undef;
   for (i = 0; i < coeff_bld->type.length; i++) {
      LLVMValueRef nr = lp_build_const_int32(gallivm, i);
      LLVMValueRef pixxf = lp_build_const_float(gallivm, quad_offset_x[i]);
      LLVMValueRef pixyf = lp_build_const_float(gallivm, quad_offset_y[i]);
      pixoffx = LLVMBuildInsertElement(builder, pixoffx, pixxf, nr, "");
      pixoffy = LLVMBuildInsertElement(builder, pixoffy, pixyf, nr, "");
   }


   for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
      const unsigned mask = bld->mask[attrib];
      const unsigned interp = bld->interp[attrib];
      LLVMValueRef index = lp_build_const_int32(gallivm,
                                attrib * TGSI_NUM_CHANNELS);
      LLVMValueRef ptr;
      LLVMValueRef dadxaos = setup_bld->zero;
      LLVMValueRef dadyaos = setup_bld->zero;
      LLVMValueRef a0aos = setup_bld->zero;

      /* always fetch all 4 values for performance/simplicity */
      switch (interp) {
      case LP_INTERP_PERSPECTIVE:
         /* fall-through */

      case LP_INTERP_LINEAR:
         ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, "");
         ptr = LLVMBuildBitCast(builder, ptr,
               LLVMPointerType(setup_bld->vec_type, 0), "");
         dadxaos = LLVMBuildLoad(builder, ptr, "");

         ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, "");
         ptr = LLVMBuildBitCast(builder, ptr,
               LLVMPointerType(setup_bld->vec_type, 0), "");
         dadyaos = LLVMBuildLoad(builder, ptr, "");

         attrib_name(dadxaos, attrib, 0, ".dadxaos");
         attrib_name(dadyaos, attrib, 0, ".dadyaos");
         /* fall-through */

      case LP_INTERP_CONSTANT:
      case LP_INTERP_FACING:
         ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, "");
         ptr = LLVMBuildBitCast(builder, ptr,
               LLVMPointerType(setup_bld->vec_type, 0), "");
         a0aos = LLVMBuildLoad(builder, ptr, "");
         attrib_name(a0aos, attrib, 0, ".a0aos");
         break;

      case LP_INTERP_POSITION:
         /* Nothing to do as the position coeffs are already setup in slot 0 */
         continue;

      default:
         assert(0);
         break;
      }

      /*
       * a = a0 + (x * dadx + y * dady)
       * a0aos is the attrib value at top left corner of stamp
       */
      if (interp != LP_INTERP_CONSTANT &&
          interp != LP_INTERP_FACING) {
         LLVMValueRef x = lp_build_broadcast_scalar(setup_bld, bld->x);
         LLVMValueRef y = lp_build_broadcast_scalar(setup_bld, bld->y);
         a0aos = lp_build_fmuladd(builder, x, dadxaos, a0aos);
         a0aos = lp_build_fmuladd(builder, y, dadyaos, a0aos);
      }

      /*
       * dadq = {0, dadx, dady, dadx + dady}
       * for two quads (side by side) this is:
       * {0, dadx, dady, dadx+dady, 2*dadx, 2*dadx+dady, 3*dadx+dady}
       */
      for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
         /* this generates a CRAPLOAD of shuffles... */
         if (mask & (1 << chan)) {
            LLVMValueRef dadx, dady;
            LLVMValueRef dadq, dadq2;
            LLVMValueRef a;
            LLVMValueRef chan_index = lp_build_const_int32(gallivm, chan);

            if (attrib == 0 && chan == 0) {
               a = bld->x;
               if (bld->pos_offset) {
                  a = LLVMBuildFAdd(builder, a, lp_build_const_float(gallivm, bld->pos_offset), "");
               }
               a = lp_build_broadcast_scalar(coeff_bld, a);
               dadx = coeff_bld->one;
               dady = coeff_bld->zero;
            }
            else if (attrib == 0 && chan == 1) {
               a = bld->y;
               if (bld->pos_offset) {
                  a = LLVMBuildFAdd(builder, a, lp_build_const_float(gallivm, bld->pos_offset), "");
               }
               a = lp_build_broadcast_scalar(coeff_bld, a);
               dady = coeff_bld->one;
               dadx = coeff_bld->zero;
            }
            else {
               dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
                                              coeff_bld->type, dadxaos, chan_index);
               dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
                                              coeff_bld->type, dadyaos, chan_index);

               /*
                * a = {a, a, a, a}
                */
               a = lp_build_extract_broadcast(gallivm, setup_bld->type,
                                              coeff_bld->type, a0aos, chan_index);
            }

            dadx = LLVMBuildFMul(builder, dadx, pixoffx, "");
            dady = LLVMBuildFMul(builder, dady, pixoffy, "");
            dadq = LLVMBuildFAdd(builder, dadx, dady, "");

            /*
             * Compute the attrib values on the upper-left corner of each
             * group of quads.
             * Note that if we process 2 quads at once this doesn't
             * really exactly to what we want.
             * We need to access elem 0 and 2 respectively later if we process
             * 2 quads at once.
             */

            if (interp != LP_INTERP_CONSTANT &&
                interp != LP_INTERP_FACING) {
               dadq2 = LLVMBuildFAdd(builder, dadq, dadq, "");
               a = LLVMBuildFAdd(builder, a, dadq2, "");
	    }

#if PERSPECTIVE_DIVIDE_PER_QUAD
            /*
             * a *= 1 / w
             */

            /*
             * XXX since we're only going to access elements 0,2 out of 8
             * if we have 8-wide vectors we should do the division only 4-wide.
             * a is really a 2-elements in a 4-wide vector disguised as 8-wide
             * in this case.
             */
            if (interp == LP_INTERP_PERSPECTIVE) {
               LLVMValueRef w = bld->a[0][3];
               assert(attrib != 0);
               assert(bld->mask[0] & TGSI_WRITEMASK_W);
               if (!bld->oow) {
                  bld->oow = lp_build_rcp(coeff_bld, w);
                  lp_build_name(bld->oow, "oow");
               }
               a = lp_build_mul(coeff_bld, a, bld->oow);
            }
#endif

            attrib_name(a, attrib, chan, ".a");
            attrib_name(dadq, attrib, chan, ".dadq");

            bld->a[attrib][chan] = lp_build_alloca(gallivm,
                                                   LLVMTypeOf(a), "");
            LLVMBuildStore(builder, a, bld->a[attrib][chan]);
            bld->dadq[attrib][chan] = dadq;
         }
      }
   }
}
static LLVMValueRef
emit_fetch_constant(
   struct lp_build_tgsi_context * bld_base,
   const struct tgsi_full_src_register * reg,
   enum tgsi_opcode_type stype,
   unsigned swizzle)
{
   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
   struct lp_type type = bld_base->base.type;
   LLVMValueRef res;
   unsigned chan;

   assert(!reg->Register.Indirect);

   /*
    * Get the constants components
    */

   res = bld->bld_base.base.undef;
   for (chan = 0; chan < 4; ++chan) {
      LLVMValueRef index;
      LLVMValueRef scalar_ptr;
      LLVMValueRef scalar;
      LLVMValueRef swizzle;

      index = lp_build_const_int32(bld->bld_base.base.gallivm,
                                   reg->Register.Index * 4 + chan);

      scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");

      scalar = LLVMBuildLoad(builder, scalar_ptr, "");

      lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);

      /*
       * NOTE: constants array is always assumed to be RGBA
       */

      swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
                                     bld->swizzles[chan]);

      res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
   }

   /*
    * Broadcast the first quaternion to all others.
    *
    * XXX: could be factored into a reusable function.
    */

   if (type.length > 4) {
      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
      unsigned i;

      for (chan = 0; chan < 4; ++chan) {
         shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
      }

      for (i = 4; i < type.length; ++i) {
         shuffles[i] = shuffles[i % 4];
      }

      res = LLVMBuildShuffleVector(builder,
                                   res, bld->bld_base.base.undef,
                                   LLVMConstVector(shuffles, type.length),
                                   "");
   }
   return res;
}
Пример #15
0
/**
 * Register fetch.
 */
static LLVMValueRef
emit_fetch(
   struct lp_build_tgsi_aos_context *bld,
   const struct tgsi_full_instruction *inst,
   unsigned src_op)
{
   struct lp_type type = bld->base.type;
   const struct tgsi_full_src_register *reg = &inst->Src[src_op];
   LLVMValueRef res;
   unsigned chan;

   assert(!reg->Register.Indirect);

   /*
    * Fetch the from the register file.
    */

   switch (reg->Register.File) {
   case TGSI_FILE_CONSTANT:
      /*
       * Get the constants components
       */

      res = bld->base.undef;
      for (chan = 0; chan < 4; ++chan) {
         LLVMValueRef index;
         LLVMValueRef scalar_ptr;
         LLVMValueRef scalar;
         LLVMValueRef swizzle;

         index = LLVMConstInt(LLVMInt32Type(),
                              reg->Register.Index*4 + chan,
                              0);

         scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
                                   &index, 1, "");

         scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");

         lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);

         /*
          * NOTE: constants array is always assumed to be RGBA
          */

         swizzle = LLVMConstInt(LLVMInt32Type(), chan, 0);

         res = LLVMBuildInsertElement(bld->base.builder, res, scalar, swizzle, "");
      }

      /*
       * Broadcast the first quaternion to all others.
       *
       * XXX: could be factored into a reusable function.
       */

      if (type.length > 4) {
         LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
         unsigned i;

         for (chan = 0; chan < 4; ++chan) {
            shuffles[chan] = LLVMConstInt(LLVMInt32Type(), chan, 0);
         }

         for (i = 4; i < type.length; ++i) {
            shuffles[i] = shuffles[i % 4];
         }

         res = LLVMBuildShuffleVector(bld->base.builder,
                                      res, bld->base.undef,
                                      LLVMConstVector(shuffles, type.length),
                                      "");
      }
      break;

   case TGSI_FILE_IMMEDIATE:
      res = bld->immediates[reg->Register.Index];
      assert(res);
      break;

   case TGSI_FILE_INPUT:
      res = bld->inputs[reg->Register.Index];
      assert(res);
      break;

   case TGSI_FILE_TEMPORARY:
      {
         LLVMValueRef temp_ptr;
         temp_ptr = bld->temps[reg->Register.Index];
         res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
         if (!res)
            return bld->base.undef;
      }
      break;

   default:
      assert(0 && "invalid src register in emit_fetch()");
      return bld->base.undef;
   }

   /*
    * Apply sign modifier.
    */

   if (reg->Register.Absolute) {
      res = lp_build_abs(&bld->base, res);
   }

   if(reg->Register.Negate) {
      res = lp_build_negate(&bld->base, res);
   }

   /*
    * Swizzle the argument
    */

   res = swizzle_aos(bld, res,
                     reg->Register.SwizzleX,
                     reg->Register.SwizzleY,
                     reg->Register.SwizzleZ,
                     reg->Register.SwizzleW);

   return res;
}
/**
 * Build code to compare two values 'a' and 'b' of 'type' using the given func.
 * \param func  one of PIPE_FUNC_x
 * The result values will be 0 for false or ~0 for true.
 */
LLVMValueRef
lp_build_compare(struct gallivm_state *gallivm,
                 const struct lp_type type,
                 unsigned func,
                 LLVMValueRef a,
                 LLVMValueRef b)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
   LLVMValueRef zeros = LLVMConstNull(int_vec_type);
   LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
   LLVMValueRef cond;
   LLVMValueRef res;

   assert(func >= PIPE_FUNC_NEVER);
   assert(func <= PIPE_FUNC_ALWAYS);
   assert(lp_check_value(type, a));
   assert(lp_check_value(type, b));

   if(func == PIPE_FUNC_NEVER)
      return zeros;
   if(func == PIPE_FUNC_ALWAYS)
      return ones;

#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
   /*
    * There are no unsigned integer comparison instructions in SSE.
    */

   if (!type.floating && !type.sign &&
       type.width * type.length == 128 &&
       util_cpu_caps.has_sse2 &&
       (func == PIPE_FUNC_LESS ||
        func == PIPE_FUNC_LEQUAL ||
        func == PIPE_FUNC_GREATER ||
        func == PIPE_FUNC_GEQUAL) &&
       (gallivm_debug & GALLIVM_DEBUG_PERF)) {
         debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
                      __FUNCTION__, type.length, type.width);
   }
#endif

#if HAVE_LLVM < 0x0207
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
   if(type.width * type.length == 128) {
      if(type.floating && util_cpu_caps.has_sse) {
         /* float[4] comparison */
         LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type);
         LLVMValueRef args[3];
         unsigned cc;
         boolean swap;

         swap = FALSE;
         switch(func) {
         case PIPE_FUNC_EQUAL:
            cc = 0;
            break;
         case PIPE_FUNC_NOTEQUAL:
            cc = 4;
            break;
         case PIPE_FUNC_LESS:
            cc = 1;
            break;
         case PIPE_FUNC_LEQUAL:
            cc = 2;
            break;
         case PIPE_FUNC_GREATER:
            cc = 1;
            swap = TRUE;
            break;
         case PIPE_FUNC_GEQUAL:
            cc = 2;
            swap = TRUE;
            break;
         default:
            assert(0);
            return lp_build_undef(gallivm, type);
         }

         if(swap) {
            args[0] = b;
            args[1] = a;
         }
         else {
            args[0] = a;
            args[1] = b;
         }

         args[2] = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), cc, 0);
         res = lp_build_intrinsic(builder,
                                  "llvm.x86.sse.cmp.ps",
                                  vec_type,
                                  args, 3);
         res = LLVMBuildBitCast(builder, res, int_vec_type, "");
         return res;
      }
      else if(util_cpu_caps.has_sse2) {
         /* int[4] comparison */
         static const struct {
            unsigned swap:1;
            unsigned eq:1;
            unsigned gt:1;
            unsigned not:1;
         } table[] = {
            {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */
            {1, 0, 1, 0}, /* PIPE_FUNC_LESS */
            {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */
            {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */
            {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */
            {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */
            {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */
            {0, 0, 0, 0}  /* PIPE_FUNC_ALWAYS */
         };
         const char *pcmpeq;
         const char *pcmpgt;
         LLVMValueRef args[2];
         LLVMValueRef res;
         LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type);

         switch (type.width) {
         case 8:
            pcmpeq = "llvm.x86.sse2.pcmpeq.b";
            pcmpgt = "llvm.x86.sse2.pcmpgt.b";
            break;
         case 16:
            pcmpeq = "llvm.x86.sse2.pcmpeq.w";
            pcmpgt = "llvm.x86.sse2.pcmpgt.w";
            break;
         case 32:
            pcmpeq = "llvm.x86.sse2.pcmpeq.d";
            pcmpgt = "llvm.x86.sse2.pcmpgt.d";
            break;
         default:
            assert(0);
            return lp_build_undef(gallivm, type);
         }

         /* There are no unsigned comparison instructions. So flip the sign bit
          * so that the results match.
          */
         if (table[func].gt && !type.sign) {
            LLVMValueRef msb = lp_build_const_int_vec(gallivm, type, (unsigned long long)1 << (type.width - 1));
            a = LLVMBuildXor(builder, a, msb, "");
            b = LLVMBuildXor(builder, b, msb, "");
         }

         if(table[func].swap) {
            args[0] = b;
            args[1] = a;
         }
         else {
            args[0] = a;
            args[1] = b;
         }

         if(table[func].eq)
            res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2);
         else if (table[func].gt)
            res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2);
         else
            res = LLVMConstNull(vec_type);

         if(table[func].not)
            res = LLVMBuildNot(builder, res, "");

         return res;
      }
   } /* if (type.width * type.length == 128) */
#endif
#endif /* HAVE_LLVM < 0x0207 */

   /* XXX: It is not clear if we should use the ordered or unordered operators */

   if(type.floating) {
      LLVMRealPredicate op;
      switch(func) {
      case PIPE_FUNC_NEVER:
         op = LLVMRealPredicateFalse;
         break;
      case PIPE_FUNC_ALWAYS:
         op = LLVMRealPredicateTrue;
         break;
      case PIPE_FUNC_EQUAL:
         op = LLVMRealUEQ;
         break;
      case PIPE_FUNC_NOTEQUAL:
         op = LLVMRealUNE;
         break;
      case PIPE_FUNC_LESS:
         op = LLVMRealULT;
         break;
      case PIPE_FUNC_LEQUAL:
         op = LLVMRealULE;
         break;
      case PIPE_FUNC_GREATER:
         op = LLVMRealUGT;
         break;
      case PIPE_FUNC_GEQUAL:
         op = LLVMRealUGE;
         break;
      default:
         assert(0);
         return lp_build_undef(gallivm, type);
      }

#if HAVE_LLVM >= 0x0207
      cond = LLVMBuildFCmp(builder, op, a, b, "");
      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
#else
      if (type.length == 1) {
         cond = LLVMBuildFCmp(builder, op, a, b, "");
         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
      }
      else {
         unsigned i;

         res = LLVMGetUndef(int_vec_type);

         debug_printf("%s: warning: using slow element-wise float"
                      " vector comparison\n", __FUNCTION__);
         for (i = 0; i < type.length; ++i) {
            LLVMValueRef index = lp_build_const_int32(gallivm, i);
            cond = LLVMBuildFCmp(builder, op,
                                 LLVMBuildExtractElement(builder, a, index, ""),
                                 LLVMBuildExtractElement(builder, b, index, ""),
                                 "");
            cond = LLVMBuildSelect(builder, cond,
                                   LLVMConstExtractElement(ones, index),
                                   LLVMConstExtractElement(zeros, index),
                                   "");
            res = LLVMBuildInsertElement(builder, res, cond, index, "");
         }
      }
#endif
   }
   else {
      LLVMIntPredicate op;
      switch(func) {
      case PIPE_FUNC_EQUAL:
         op = LLVMIntEQ;
         break;
      case PIPE_FUNC_NOTEQUAL:
         op = LLVMIntNE;
         break;
      case PIPE_FUNC_LESS:
         op = type.sign ? LLVMIntSLT : LLVMIntULT;
         break;
      case PIPE_FUNC_LEQUAL:
         op = type.sign ? LLVMIntSLE : LLVMIntULE;
         break;
      case PIPE_FUNC_GREATER:
         op = type.sign ? LLVMIntSGT : LLVMIntUGT;
         break;
      case PIPE_FUNC_GEQUAL:
         op = type.sign ? LLVMIntSGE : LLVMIntUGE;
         break;
      default:
         assert(0);
         return lp_build_undef(gallivm, type);
      }

#if HAVE_LLVM >= 0x0207
      cond = LLVMBuildICmp(builder, op, a, b, "");
      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
#else
      if (type.length == 1) {
         cond = LLVMBuildICmp(builder, op, a, b, "");
         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
      }
      else {
         unsigned i;

         res = LLVMGetUndef(int_vec_type);

         if (gallivm_debug & GALLIVM_DEBUG_PERF) {
            debug_printf("%s: using slow element-wise int"
                         " vector comparison\n", __FUNCTION__);
         }

         for(i = 0; i < type.length; ++i) {
            LLVMValueRef index = lp_build_const_int32(gallivm, i);
            cond = LLVMBuildICmp(builder, op,
                                 LLVMBuildExtractElement(builder, a, index, ""),
                                 LLVMBuildExtractElement(builder, b, index, ""),
                                 "");
            cond = LLVMBuildSelect(builder, cond,
                                   LLVMConstExtractElement(ones, index),
                                   LLVMConstExtractElement(zeros, index),
                                   "");
            res = LLVMBuildInsertElement(builder, res, cond, index, "");
         }
      }
#endif
   }

   return res;
}
Пример #17
0
/*
 * Do a cached lookup.
 *
 * Returns (vectors of) 4x8 rgba aos value
 */
LLVMValueRef
lp_build_fetch_cached_texels(struct gallivm_state *gallivm,
                             const struct util_format_description *format_desc,
                             unsigned n,
                             LLVMValueRef base_ptr,
                             LLVMValueRef offset,
                             LLVMValueRef i,
                             LLVMValueRef j,
                             LLVMValueRef cache)

{
   LLVMBuilderRef builder = gallivm->builder;
   unsigned count, low_bit, log2size;
   LLVMValueRef color, offset_stored, addr, ptr_addrtrunc, tmp;
   LLVMValueRef ij_index, hash_index, hash_mask, block_index;
   LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
   LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context);
   struct lp_type type;
   struct lp_build_context bld32;
   memset(&type, 0, sizeof type);
   type.width = 32;
   type.length = n;

   assert(format_desc->block.width == 4);
   assert(format_desc->block.height == 4);

   lp_build_context_init(&bld32, gallivm, type);

   /*
    * compute hash - we use direct mapped cache, the hash function could
    *                be better but it needs to be simple
    * per-element:
    *    compare offset with offset stored at tag (hash)
    *    if not equal decode/store block, update tag
    *    extract color from cache
    *    assemble result vector
    */

   /* TODO: not ideal with 32bit pointers... */

   low_bit = util_logbase2(format_desc->block.bits / 8);
   log2size = util_logbase2(LP_BUILD_FORMAT_CACHE_SIZE);
   addr = LLVMBuildPtrToInt(builder, base_ptr, i64t, "");
   ptr_addrtrunc = LLVMBuildPtrToInt(builder, base_ptr, i32t, "");
   ptr_addrtrunc = lp_build_broadcast_scalar(&bld32, ptr_addrtrunc);
   /* For the hash function, first mask off the unused lowest bits. Then just
      do some xor with address bits - only use lower 32bits */
   ptr_addrtrunc = LLVMBuildAdd(builder, offset, ptr_addrtrunc, "");
   ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
                                 lp_build_const_int_vec(gallivm, type, low_bit), "");
   /* This only really makes sense for size 64,128,256 */
   hash_index = ptr_addrtrunc;
   ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
                                 lp_build_const_int_vec(gallivm, type, 2*log2size), "");
   hash_index = LLVMBuildXor(builder, ptr_addrtrunc, hash_index, "");
   tmp = LLVMBuildLShr(builder, hash_index,
                       lp_build_const_int_vec(gallivm, type, log2size), "");
   hash_index = LLVMBuildXor(builder, hash_index, tmp, "");

   hash_mask = lp_build_const_int_vec(gallivm, type, LP_BUILD_FORMAT_CACHE_SIZE - 1);
   hash_index = LLVMBuildAnd(builder, hash_index, hash_mask, "");
   ij_index = LLVMBuildShl(builder, i, lp_build_const_int_vec(gallivm, type, 2), "");
   ij_index = LLVMBuildAdd(builder, ij_index, j, "");
   block_index = LLVMBuildShl(builder, hash_index,
                              lp_build_const_int_vec(gallivm, type, 4), "");
   block_index = LLVMBuildAdd(builder, ij_index, block_index, "");

   if (n > 1) {
      color = LLVMGetUndef(LLVMVectorType(i32t, n));
      for (count = 0; count < n; count++) {
         LLVMValueRef index, cond, colorx;
         LLVMValueRef block_indexx, hash_indexx, addrx, offsetx, ptr_addrx;
         struct lp_build_if_state if_ctx;

         index = lp_build_const_int32(gallivm, count);
         offsetx = LLVMBuildExtractElement(builder, offset, index, "");
         addrx = LLVMBuildZExt(builder, offsetx, i64t, "");
         addrx = LLVMBuildAdd(builder, addrx, addr, "");
         block_indexx = LLVMBuildExtractElement(builder, block_index, index, "");
         hash_indexx = LLVMBuildLShr(builder, block_indexx,
                                     lp_build_const_int32(gallivm, 4), "");
         offset_stored = lookup_tag_data(gallivm, cache, hash_indexx);
         cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addrx, "");

         lp_build_if(&if_ctx, gallivm, cond);
         {
            ptr_addrx = LLVMBuildIntToPtr(builder, addrx,
                                          LLVMPointerType(i8t, 0), "");
            update_cached_block(gallivm, format_desc, ptr_addrx, hash_indexx, cache);
#if LP_BUILD_FORMAT_CACHE_DEBUG
            update_cache_access(gallivm, cache, 1,
                                LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
#endif
         }
         lp_build_endif(&if_ctx);

         colorx = lookup_cached_pixel(gallivm, cache, block_indexx);

         color = LLVMBuildInsertElement(builder, color, colorx,
                                        lp_build_const_int32(gallivm, count), "");
      }
   }
   else {
      LLVMValueRef cond;
      struct lp_build_if_state if_ctx;

      tmp = LLVMBuildZExt(builder, offset, i64t, "");
      addr = LLVMBuildAdd(builder, tmp, addr, "");
      offset_stored = lookup_tag_data(gallivm, cache, hash_index);
      cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addr, "");

      lp_build_if(&if_ctx, gallivm, cond);
      {
         tmp = LLVMBuildIntToPtr(builder, addr, LLVMPointerType(i8t, 0), "");
         update_cached_block(gallivm, format_desc, tmp, hash_index, cache);
#if LP_BUILD_FORMAT_CACHE_DEBUG
         update_cache_access(gallivm, cache, 1,
                             LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
#endif
      }
      lp_build_endif(&if_ctx);

      color = lookup_cached_pixel(gallivm, cache, block_index);
   }
#if LP_BUILD_FORMAT_CACHE_DEBUG
   update_cache_access(gallivm, cache, n,
                       LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL);
#endif
   return LLVMBuildBitCast(builder, color, LLVMVectorType(i8t, n * 4), "");
}
Пример #18
0
/**
 * Unpack a single pixel into its RGBA components.
 *
 * @param desc  the pixel format for the packed pixel value
 * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM
 *
 * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector.
 */
static INLINE LLVMValueRef
lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm,
                               const struct util_format_description *desc,
                               LLVMValueRef packed)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef shifted, casted, scaled, masked;
   LLVMValueRef shifts[4];
   LLVMValueRef masks[4];
   LLVMValueRef scales[4];

   boolean normalized;
   boolean needs_uitofp;
   unsigned shift;
   unsigned i;

   /* TODO: Support more formats */
   assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
   assert(desc->block.width == 1);
   assert(desc->block.height == 1);
   assert(desc->block.bits <= 32);

   /* Do the intermediate integer computations with 32bit integers since it
    * matches floating point size */
   assert (LLVMTypeOf(packed) == LLVMInt32TypeInContext(gallivm->context));

   /* Broadcast the packed value to all four channels
    * before: packed = BGRA
    * after: packed = {BGRA, BGRA, BGRA, BGRA}
    */
   packed = LLVMBuildInsertElement(builder,
                                   LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)),
                                   packed,
                                   LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)),
                                   "");
   packed = LLVMBuildShuffleVector(builder,
                                   packed,
                                   LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)),
                                   LLVMConstNull(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)),
                                   "");

   /* Initialize vector constants */
   normalized = FALSE;
   needs_uitofp = FALSE;
   shift = 0;

   /* Loop over 4 color components */
   for (i = 0; i < 4; ++i) {
      unsigned bits = desc->channel[i].size;

      if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
         shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
         masks[i] = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context));
         scales[i] =  LLVMConstNull(LLVMFloatTypeInContext(gallivm->context));
      }
      else {
         unsigned long long mask = (1ULL << bits) - 1;

         assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);

         if (bits == 32) {
            needs_uitofp = TRUE;
         }

         shifts[i] = lp_build_const_int32(gallivm, shift);
         masks[i] = lp_build_const_int32(gallivm, mask);

         if (desc->channel[i].normalized) {
            scales[i] = lp_build_const_float(gallivm, 1.0 / mask);
            normalized = TRUE;
         }
         else
            scales[i] =  lp_build_const_float(gallivm, 1.0);
      }

      shift += bits;
   }

   /* Ex: convert packed = {BGRA, BGRA, BGRA, BGRA}
    * into masked = {B, G, R, A}
    */
   shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
   masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");


   if (!needs_uitofp) {
      /* UIToFP can't be expressed in SSE2 */
      casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), "");
   } else {
      casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), "");
   }

   /* At this point 'casted' may be a vector of floats such as
    * {255.0, 255.0, 255.0, 255.0}.  Next, if the pixel values are normalized
    * we'll scale this to {1.0, 1.0, 1.0, 1.0}.
    */

   if (normalized)
      scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), "");
   else
      scaled = casted;

   return scaled;
}
/**
 * Truncate or expand the bitwidth.
 *
 * NOTE: Getting the right sign flags is crucial here, as we employ some
 * intrinsics that do saturation.
 */
void
lp_build_resize(struct gallivm_state *gallivm,
                struct lp_type src_type,
                struct lp_type dst_type,
                const LLVMValueRef *src, unsigned num_srcs,
                LLVMValueRef *dst, unsigned num_dsts)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
   unsigned i;

   /*
    * We don't support float <-> int conversion here. That must be done
    * before/after calling this function.
    */
   assert(src_type.floating == dst_type.floating);

   /*
    * We don't support double <-> float conversion yet, although it could be
    * added with little effort.
    */
   assert((!src_type.floating && !dst_type.floating) ||
          src_type.width == dst_type.width);

   /* We must not loose or gain channels. Only precision */
   assert(src_type.length * num_srcs == dst_type.length * num_dsts);

   /* We don't support M:N conversion, only 1:N, M:1, or 1:1 */
   assert(num_srcs == 1 || num_dsts == 1);

   assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
   assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);
   assert(num_srcs <= LP_MAX_VECTOR_LENGTH);
   assert(num_dsts <= LP_MAX_VECTOR_LENGTH);

   if (src_type.width > dst_type.width) {
      /*
       * Truncate bit width.
       */

      assert(num_dsts == 1);

      if (src_type.width * src_type.length == dst_type.width * dst_type.length) {
        /*
         * Register width remains constant -- use vector packing intrinsics
         */
         tmp[0] = lp_build_pack(gallivm, src_type, dst_type, TRUE, src, num_srcs);
      }
      else {
         if (src_type.width / dst_type.width > num_srcs) {
            /*
            * First change src vectors size (with shuffle) so they have the
            * same size as the destination vector, then pack normally.
            * Note: cannot use cast/extract because llvm generates atrocious code.
            */
            unsigned size_ratio = (src_type.width * src_type.length) /
                                  (dst_type.length * dst_type.width);
            unsigned new_length = src_type.length / size_ratio;

            for (i = 0; i < size_ratio * num_srcs; i++) {
               unsigned start_index = (i % size_ratio) * new_length;
               tmp[i] = lp_build_extract_range(gallivm, src[i / size_ratio],
                                               start_index, new_length);
            }
            num_srcs *= size_ratio;
            src_type.length = new_length;
            tmp[0] = lp_build_pack(gallivm, src_type, dst_type, TRUE, tmp, num_srcs);
         }
         else {
            /*
             * Truncate bit width but expand vector size - first pack
             * then expand simply because this should be more AVX-friendly
             * for the cases we probably hit.
             */
            unsigned size_ratio = (dst_type.width * dst_type.length) /
                                  (src_type.length * src_type.width);
            unsigned num_pack_srcs = num_srcs / size_ratio;
            dst_type.length = dst_type.length / size_ratio;

            for (i = 0; i < size_ratio; i++) {
               tmp[i] = lp_build_pack(gallivm, src_type, dst_type, TRUE,
                                      &src[i*num_pack_srcs], num_pack_srcs);
            }
            tmp[0] = lp_build_concat(gallivm, tmp, dst_type, size_ratio);
         }
      }
   }
   else if (src_type.width < dst_type.width) {
      /*
       * Expand bit width.
       */

      assert(num_srcs == 1);

      if (src_type.width * src_type.length == dst_type.width * dst_type.length) {
         /*
          * Register width remains constant -- use vector unpack intrinsics
          */
         lp_build_unpack(gallivm, src_type, dst_type, src[0], tmp, num_dsts);
      }
      else {
         /*
          * Do it element-wise.
          */
         assert(src_type.length * num_srcs == dst_type.length * num_dsts);

         for (i = 0; i < num_dsts; i++) {
            tmp[i] = lp_build_undef(gallivm, dst_type);
         }

         for (i = 0; i < src_type.length; ++i) {
            unsigned j = i / dst_type.length;
            LLVMValueRef srcindex = lp_build_const_int32(gallivm, i);
            LLVMValueRef dstindex = lp_build_const_int32(gallivm, i % dst_type.length);
            LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], srcindex, "");

            if (src_type.sign && dst_type.sign) {
               val = LLVMBuildSExt(builder, val, lp_build_elem_type(gallivm, dst_type), "");
            } else {
               val = LLVMBuildZExt(builder, val, lp_build_elem_type(gallivm, dst_type), "");
            }
            tmp[j] = LLVMBuildInsertElement(builder, tmp[j], val, dstindex, "");
         }
      }
   }
   else {
      /*
       * No-op
       */

      assert(num_srcs == 1);
      assert(num_dsts == 1);

      tmp[0] = src[0];
   }

   for(i = 0; i < num_dsts; ++i)
      dst[i] = tmp[i];
}
Пример #20
0
/**
 * Gather elements from scatter positions in memory into a single vector.
 * Use for fetching texels from a texture.
 * For SSE, typical values are length=4, src_width=32, dst_width=32.
 *
 * When src_width < dst_width, the return value can be justified in
 * one of two ways:
 * "integer justification" is used when the caller treats the destination
 * as a packed integer bitmask, as described by the channels' "shift" and
 * "width" fields;
 * "vector justification" is used when the caller casts the destination
 * to a vector and needs channel X to be in vector element 0.
 *
 * @param length length of the offsets
 * @param src_width src element width in bits
 * @param dst_type result element type (src will be expanded to fit,
 *        but truncation is not allowed)
 *        (this may be a vector, must be pot sized)
 * @param aligned whether the data is guaranteed to be aligned (to src_width)
 * @param base_ptr base pointer, needs to be a i8 pointer type.
 * @param offsets vector with offsets
 * @param vector_justify select vector rather than integer justification
 */
LLVMValueRef
lp_build_gather(struct gallivm_state *gallivm,
                unsigned length,
                unsigned src_width,
                struct lp_type dst_type,
                boolean aligned,
                LLVMValueRef base_ptr,
                LLVMValueRef offsets,
                boolean vector_justify)
{
   LLVMValueRef res;
   boolean need_expansion = src_width < dst_type.width * dst_type.length;
   boolean vec_fetch;
   struct lp_type fetch_type, fetch_dst_type;
   LLVMTypeRef src_type;

   assert(src_width <= dst_type.width * dst_type.length);

   /*
    * This is quite a mess...
    * Figure out if the fetch should be done as:
    * a) scalar or vector
    * b) float or int
    *
    * As an example, for a 96bit fetch expanded into 4x32bit, it is better
    * to use (3x32bit) vector type (then pad the vector). Otherwise, the
    * zext will cause extra instructions.
    * However, the same isn't true for 3x16bit (the codegen for that is
    * completely worthless on x86 simd, and for 3x8bit is is way worse
    * still, don't try that... (To get really good code out of llvm for
    * these cases, the only way is to decompose the fetches manually
    * into 1x32bit/1x16bit, or 1x16/1x8bit respectively, although the latter
    * case requires sse41, otherwise simple scalar zext is way better.
    * But probably not important enough, so don't bother.)
    * Also, we try to honor the floating bit of destination (but isn't
    * possible if caller asks for instance for 2x32bit dst_type with
    * 48bit fetch - the idea would be to use 3x16bit fetch, pad and
    * cast to 2x32f type, so the fetch is always int and on top of that
    * we avoid the vec pad and use scalar zext due the above mentioned
    * issue).
    * Note this is optimized for x86 sse2 and up backend. Could be tweaked
    * for other archs if necessary...
    */
   if (((src_width % 32) == 0) && ((src_width % dst_type.width) == 0) &&
       (dst_type.length > 1)) {
      /* use vector fetch (if dst_type is vector) */
      vec_fetch = TRUE;
      if (dst_type.floating) {
         fetch_type = lp_type_float_vec(dst_type.width, src_width);
      } else {
         fetch_type = lp_type_int_vec(dst_type.width, src_width);
      }
      /* intentionally not using lp_build_vec_type here */
      src_type = LLVMVectorType(lp_build_elem_type(gallivm, fetch_type),
                                fetch_type.length);
      fetch_dst_type = fetch_type;
      fetch_dst_type.length = dst_type.length;
    } else {
      /* use scalar fetch */
      vec_fetch = FALSE;
      if (dst_type.floating && ((src_width == 32) || (src_width == 64))) {
         fetch_type = lp_type_float(src_width);
      } else {
         fetch_type = lp_type_int(src_width);
      }
      src_type = lp_build_vec_type(gallivm, fetch_type);
      fetch_dst_type = fetch_type;
      fetch_dst_type.width = dst_type.width * dst_type.length;
   }

   if (length == 1) {
      /* Scalar */
      res = lp_build_gather_elem_vec(gallivm, length,
                                     src_width, src_type, fetch_dst_type,
                                     aligned, base_ptr, offsets, 0,
                                     vector_justify);
      return LLVMBuildBitCast(gallivm->builder, res,
                              lp_build_vec_type(gallivm, dst_type), "");
      /*
       * Excluding expansion from these paths because if you need it for
       * 32bit/64bit fetches you're doing it wrong (this is gather, not
       * conversion) and it would be awkward for floats.
       */
   } else if (util_cpu_caps.has_avx2 && !need_expansion &&
              src_width == 32 && (length == 4 || length == 8)) {
      return lp_build_gather_avx2(gallivm, length, src_width, dst_type,
                                  base_ptr, offsets);
   /*
    * This looks bad on paper wrt throughtput/latency on Haswell.
    * Even on Broadwell it doesn't look stellar.
    * Albeit no measurements were done (but tested to work).
    * Should definitely enable on Skylake.
    * (In general, should be more of a win if the fetch is 256bit wide -
    * this is true for the 32bit case above too.)
    */
   } else if (0 && util_cpu_caps.has_avx2 && !need_expansion &&
              src_width == 64 && (length == 2 || length == 4)) {
      return lp_build_gather_avx2(gallivm, length, src_width, dst_type,
                                  base_ptr, offsets);
   } else {
      /* Vector */

      LLVMValueRef elems[LP_MAX_VECTOR_WIDTH / 8];
      unsigned i;
      boolean vec_zext = FALSE;
      struct lp_type res_type, gather_res_type;
      LLVMTypeRef res_t, gather_res_t;

      res_type = fetch_dst_type;
      res_type.length *= length;
      gather_res_type = res_type;

      if (src_width == 16 && dst_type.width == 32 && dst_type.length == 1) {
         /*
          * Note that llvm is never able to optimize zext/insert combos
          * directly (i.e. zero the simd reg, then place the elements into
          * the appropriate place directly). (I think this has to do with
          * scalar/vector transition.) And scalar 16->32bit zext simd loads
          * aren't possible (instead loading to scalar reg first).
          * No idea about other archs...
          * We could do this manually, but instead we just use a vector
          * zext, which is simple enough (and, in fact, llvm might optimize
          * this away).
          * (We're not trying that with other bit widths as that might not be
          * easier, in particular with 8 bit values at least with only sse2.)
          */
         assert(vec_fetch == FALSE);
         gather_res_type.width /= 2;
         fetch_dst_type = fetch_type;
         src_type = lp_build_vec_type(gallivm, fetch_type);
         vec_zext = TRUE;
      }
      res_t = lp_build_vec_type(gallivm, res_type);
      gather_res_t = lp_build_vec_type(gallivm, gather_res_type);
      res = LLVMGetUndef(gather_res_t);
      for (i = 0; i < length; ++i) {
         LLVMValueRef index = lp_build_const_int32(gallivm, i);
         elems[i] = lp_build_gather_elem_vec(gallivm, length,
                                             src_width, src_type, fetch_dst_type,
                                             aligned, base_ptr, offsets, i,
                                             vector_justify);
         if (!vec_fetch) {
            res = LLVMBuildInsertElement(gallivm->builder, res, elems[i], index, "");
         }
      }
      if (vec_zext) {
         res = LLVMBuildZExt(gallivm->builder, res, res_t, "");
         if (vector_justify) {
#ifdef PIPE_ARCH_BIG_ENDIAN
            unsigned sv = dst_type.width - src_width;
            res = LLVMBuildShl(gallivm->builder, res,
                               lp_build_const_int_vec(gallivm, res_type, sv), "");
#endif
         }
      }
      if (vec_fetch) {
         /*
          * Do bitcast now otherwise llvm might get some funny ideas wrt
          * float/int types...
          */
         for (i = 0; i < length; i++) {
            elems[i] = LLVMBuildBitCast(gallivm->builder, elems[i],
                                        lp_build_vec_type(gallivm, dst_type), "");
         }
         res = lp_build_concat(gallivm, elems, dst_type, length);
      } else {
         struct lp_type really_final_type = dst_type;
         assert(res_type.length * res_type.width ==
                dst_type.length * dst_type.width * length);
         really_final_type.length *= length;
         res = LLVMBuildBitCast(gallivm->builder, res,
                                lp_build_vec_type(gallivm, really_final_type), "");
      }
   }

   return res;
}
Пример #21
0
/**
 * Converts float32 to int16 half-float
 * Note this can be performed in 1 instruction if vcvtps2ph exists (f16c/cvt16)
 * [llvm.x86.vcvtps2ph / _mm_cvtps_ph]
 *
 * @param src           value to convert
 *
 * Convert float32 to half floats, preserving Infs and NaNs,
 * with rounding towards zero (trunc).
 */
LLVMValueRef
lp_build_float_to_half(struct gallivm_state *gallivm,
                       LLVMValueRef src)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef f32_vec_type = LLVMTypeOf(src);
   unsigned length = LLVMGetTypeKind(f32_vec_type) == LLVMVectorTypeKind
                   ? LLVMGetVectorSize(f32_vec_type) : 1;
   struct lp_type i32_type = lp_type_int_vec(32, 32 * length);
   struct lp_type i16_type = lp_type_int_vec(16, 16 * length);
   LLVMValueRef result;

   if (util_cpu_caps.has_f16c && HAVE_LLVM >= 0x0301 &&
       (length == 4 || length == 8)) {
      struct lp_type i168_type = lp_type_int_vec(16, 16 * 8);
      unsigned mode = 3; /* same as LP_BUILD_ROUND_TRUNCATE */
      LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
      const char *intrinsic = NULL;
      if (length == 4) {
         intrinsic = "llvm.x86.vcvtps2ph.128";
      }
      else {
         intrinsic = "llvm.x86.vcvtps2ph.256";
      }
      result = lp_build_intrinsic_binary(builder, intrinsic,
                                         lp_build_vec_type(gallivm, i168_type),
                                         src, LLVMConstInt(i32t, mode, 0));
      if (length == 4) {
         result = lp_build_extract_range(gallivm, result, 0, 4);
      }
   }

   else {
      result = lp_build_float_to_smallfloat(gallivm, i32_type, src, 10, 5, 0, true);
      /* Convert int32 vector to int16 vector by trunc (might generate bad code) */
      result = LLVMBuildTrunc(builder, result, lp_build_vec_type(gallivm, i16_type), "");
   }

   /*
    * Debugging code.
    */
   if (0) {
     LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
     LLVMTypeRef i16t = LLVMInt16TypeInContext(gallivm->context);
     LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context);
     LLVMValueRef ref_result = LLVMGetUndef(LLVMVectorType(i16t, length));
     unsigned i;

     LLVMTypeRef func_type = LLVMFunctionType(i16t, &f32t, 1, 0);
     LLVMValueRef func = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer)util_float_to_half));
     func = LLVMBuildBitCast(builder, func, LLVMPointerType(func_type, 0), "util_float_to_half");

     for (i = 0; i < length; ++i) {
        LLVMValueRef index = LLVMConstInt(i32t, i, 0);
        LLVMValueRef f32 = LLVMBuildExtractElement(builder, src, index, "");
#if 0
        /* XXX: not really supported by backends */
        LLVMValueRef f16 = lp_build_intrinsic_unary(builder, "llvm.convert.to.fp16", i16t, f32);
#else
        LLVMValueRef f16 = LLVMBuildCall(builder, func, &f32, 1, "");
#endif
        ref_result = LLVMBuildInsertElement(builder, ref_result, f16, index, "");
     }

     lp_build_print_value(gallivm, "src  = ", src);
     lp_build_print_value(gallivm, "llvm = ", result);
     lp_build_print_value(gallivm, "util = ", ref_result);
     lp_build_printf(gallivm, "\n");
  }

   return result;
}
Пример #22
0
/**
 * Fetch a texels from a texture, returning them in SoA layout.
 *
 * \param type  the desired return type for 'rgba'.  The vector length
 *              is the number of texels to fetch
 *
 * \param base_ptr  points to the base of the texture mip tree.
 * \param offset    offset to start of the texture image block.  For non-
 *                  compressed formats, this simply is an offset to the texel.
 *                  For compressed formats, it is an offset to the start of the
 *                  compressed data block.
 *
 * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
 *              these will always be (0,0).  For compressed formats, i will
 *              be in [0, block_width-1] and j will be in [0, block_height-1].
 */
void
lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
                        const struct util_format_description *format_desc,
                        struct lp_type type,
                        LLVMValueRef base_ptr,
                        LLVMValueRef offset,
                        LLVMValueRef i,
                        LLVMValueRef j,
                        LLVMValueRef rgba_out[4])
{
   LLVMBuilderRef builder = gallivm->builder;

   if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
       (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ||
        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
       format_desc->block.width == 1 &&
       format_desc->block.height == 1 &&
       format_desc->block.bits <= type.width &&
       (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
        format_desc->channel[0].size == 32))
   {
      /*
       * The packed pixel fits into an element of the destination format. Put
       * the packed pixels into a vector and extract each component for all
       * vector elements in parallel.
       */

      LLVMValueRef packed;

      /*
       * gather the texels from the texture
       * Ex: packed = {XYZW, XYZW, XYZW, XYZW}
       */
      assert(format_desc->block.bits <= type.width);
      packed = lp_build_gather(gallivm,
                               type.length,
                               format_desc->block.bits,
                               type.width,
                               base_ptr, offset, FALSE);

      /*
       * convert texels to float rgba
       */
      lp_build_unpack_rgba_soa(gallivm,
                               format_desc,
                               type,
                               packed, rgba_out);
      return;
   }

   if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
       format_desc->format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
      /*
       * similar conceptually to above but requiring special
       * AoS packed -> SoA float conversion code.
       */
      LLVMValueRef packed;

      assert(type.floating);
      assert(type.width == 32);

      packed = lp_build_gather(gallivm, type.length,
                               format_desc->block.bits,
                               type.width, base_ptr, offset,
                               FALSE);
      if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
         lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
      }
      else {
         lp_build_rgb9e5_to_float(gallivm, packed, rgba_out);
      }
      return;
   }

   if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
       format_desc->block.bits == 64) {
      /*
       * special case the format is 64 bits but we only require
       * 32bit (or 8bit) from each block.
       */
      LLVMValueRef packed;

      if (format_desc->format == PIPE_FORMAT_X32_S8X24_UINT) {
         /*
          * for stencil simply fix up offsets - could in fact change
          * base_ptr instead even outside the shader.
          */
         unsigned mask = (1 << 8) - 1;
         LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4);
         offset = LLVMBuildAdd(builder, offset, s_offset, "");
         packed = lp_build_gather(gallivm, type.length,
                                  32, type.width, base_ptr, offset, FALSE);
         packed = LLVMBuildAnd(builder, packed,
                               lp_build_const_int_vec(gallivm, type, mask), "");
      }
      else {
         assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
         packed = lp_build_gather(gallivm, type.length,
                                  32, type.width, base_ptr, offset, TRUE);
         packed = LLVMBuildBitCast(builder, packed,
                                   lp_build_vec_type(gallivm, type), "");
      }
      /* for consistency with lp_build_unpack_rgba_soa() return sss1 or zzz1 */
      rgba_out[0] = rgba_out[1] = rgba_out[2] = packed;
      rgba_out[3] = lp_build_const_vec(gallivm, type, 1.0f);
      return;
   }

   /*
    * Try calling lp_build_fetch_rgba_aos for all pixels.
    */

   if (util_format_fits_8unorm(format_desc) &&
       type.floating && type.width == 32 &&
       (type.length == 1 || (type.length % 4 == 0))) {
      struct lp_type tmp_type;
      LLVMValueRef tmp;

      memset(&tmp_type, 0, sizeof tmp_type);
      tmp_type.width = 8;
      tmp_type.length = type.length * 4;
      tmp_type.norm = TRUE;

      tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
                                    base_ptr, offset, i, j);

      lp_build_rgba8_to_fi32_soa(gallivm,
                                type,
                                tmp,
                                rgba_out);

      return;
   }

   /*
    * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
    *
    * This is not the most efficient way of fetching pixels, as we
    * miss some opportunities to do vectorization, but this is
    * convenient for formats or scenarios for which there was no
    * opportunity or incentive to optimize.
    */

   {
      unsigned k, chan;
      struct lp_type tmp_type;

      if (gallivm_debug & GALLIVM_DEBUG_PERF) {
         debug_printf("%s: scalar unpacking of %s\n",
                      __FUNCTION__, format_desc->short_name);
      }

      tmp_type = type;
      tmp_type.length = 4;

      for (chan = 0; chan < 4; ++chan) {
         rgba_out[chan] = lp_build_undef(gallivm, type);
      }

      /* loop over number of pixels */
      for(k = 0; k < type.length; ++k) {
         LLVMValueRef index = lp_build_const_int32(gallivm, k);
         LLVMValueRef offset_elem;
         LLVMValueRef i_elem, j_elem;
         LLVMValueRef tmp;

         offset_elem = LLVMBuildExtractElement(builder, offset,
                                               index, "");

         i_elem = LLVMBuildExtractElement(builder, i, index, "");
         j_elem = LLVMBuildExtractElement(builder, j, index, "");

         /* Get a single float[4]={R,G,B,A} pixel */
         tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
                                       base_ptr, offset_elem,
                                       i_elem, j_elem);

         /*
          * Insert the AoS tmp value channels into the SoA result vectors at
          * position = 'index'.
          */
         for (chan = 0; chan < 4; ++chan) {
            LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan),
            tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
            rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
                                                    tmp_chan, index, "");
         }
      }
   }
}
Пример #23
0
	TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
		LLVMValueRef value = dst[chan_index];

		if (inst->Instruction.Saturate != TGSI_SAT_NONE) {
			struct lp_build_emit_data clamp_emit_data;

			memset(&clamp_emit_data, 0, sizeof(clamp_emit_data));
			clamp_emit_data.arg_count = 3;
			clamp_emit_data.args[0] = value;
			clamp_emit_data.args[2] = base.one;

			switch(inst->Instruction.Saturate) {
			case TGSI_SAT_ZERO_ONE:
				clamp_emit_data.args[1] = base.zero;
				break;
			case TGSI_SAT_MINUS_PLUS_ONE:
				clamp_emit_data.args[1] = LLVMConstReal(
						base.elem_type, -1.0f);
				break;
			default:
				assert(0);
			}
			value = lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP,
						&clamp_emit_data);
		}

		if (reg->Register.File == TGSI_FILE_ADDRESS) {
			temp_ptr = bld->addr[reg->Register.Index][chan_index];
			LLVMBuildStore(builder, value, temp_ptr);
			continue;
		}
	
		value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);

		if (reg->Register.Indirect) {
			struct tgsi_declaration_range range = get_array_range(bld_base,
				reg->Register.File, &reg->Indirect);

        		unsigned i, size = range.Last - range.First + 1;
			LLVMValueRef array = LLVMBuildInsertElement(builder,
				emit_array_fetch(bld_base, reg->Register.File, TGSI_TYPE_FLOAT, range, chan_index),
				value,  emit_array_index(bld, &reg->Indirect, reg->Register.Index - range.First), "");

        		for (i = 0; i < size; ++i) {
				switch(reg->Register.File) {
				case TGSI_FILE_OUTPUT:
					temp_ptr = bld->outputs[i + range.First][chan_index];
					break;

				case TGSI_FILE_TEMPORARY:
					temp_ptr = lp_get_temp_ptr_soa(bld, i + range.First, chan_index);
					break;

				default:
					return;
				}
				value = LLVMBuildExtractElement(builder, array, 
					lp_build_const_int32(gallivm, i), "");
				LLVMBuildStore(builder, value, temp_ptr);
			}

		} else {
			switch(reg->Register.File) {
			case TGSI_FILE_OUTPUT:
				temp_ptr = bld->outputs[reg->Register.Index][chan_index];
				break;

			case TGSI_FILE_TEMPORARY:
				temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
				break;

			default:
				return;
			}
			LLVMBuildStore(builder, value, temp_ptr);
		}
	}
Пример #24
0
/**
 * Fetch a pixel into a 4 float AoS.
 *
 * \param format_desc  describes format of the image we're fetching from
 * \param ptr  address of the pixel block (or the texel if uncompressed)
 * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
 *              these will always be (0, 0).
 * \return  a 4 element vector with the pixel's RGBA values.
 */
LLVMValueRef
lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
                        const struct util_format_description *format_desc,
                        struct lp_type type,
                        LLVMValueRef base_ptr,
                        LLVMValueRef offset,
                        LLVMValueRef i,
                        LLVMValueRef j)
{
   LLVMBuilderRef builder = gallivm->builder;
   unsigned num_pixels = type.length / 4;
   struct lp_build_context bld;

   assert(type.length <= LP_MAX_VECTOR_LENGTH);
   assert(type.length % 4 == 0);

   lp_build_context_init(&bld, gallivm, type);

   /*
    * Trivial case
    *
    * The format matches the type (apart of a swizzle) so no need for
    * scaling or converting.
    */

   if (format_matches_type(format_desc, type) &&
       format_desc->block.bits <= type.width * 4 &&
       util_is_power_of_two(format_desc->block.bits)) {
      LLVMValueRef packed;

      /*
       * The format matches the type (apart of a swizzle) so no need for
       * scaling or converting.
       */

      packed = lp_build_gather(gallivm, type.length/4,
                               format_desc->block.bits, type.width*4,
                               base_ptr, offset);

      assert(format_desc->block.bits <= type.width * type.length);

      packed = LLVMBuildBitCast(gallivm->builder, packed,
                                lp_build_vec_type(gallivm, type), "");

      return lp_build_format_swizzle_aos(format_desc, &bld, packed);
   }

   /*
    * Bit arithmetic
    */

   if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
       (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
       format_desc->block.width == 1 &&
       format_desc->block.height == 1 &&
       util_is_power_of_two(format_desc->block.bits) &&
       format_desc->block.bits <= 32 &&
       format_desc->is_bitmask &&
       !format_desc->is_mixed &&
       (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
        format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) {

      LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
      LLVMValueRef res;
      unsigned k;

      /*
       * Unpack a pixel at a time into a <4 x float> RGBA vector
       */

      for (k = 0; k < num_pixels; ++k) {
         LLVMValueRef packed;

         packed = lp_build_gather_elem(gallivm, num_pixels,
                                       format_desc->block.bits, 32,
                                       base_ptr, offset, k);

         tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm,
                                                  format_desc,
                                                  packed);
      }

      /*
       * Type conversion.
       *
       * TODO: We could avoid floating conversion for integer to
       * integer conversions.
       */

      if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) {
         debug_printf("%s: unpacking %s with floating point\n",
                      __FUNCTION__, format_desc->short_name);
      }

      lp_build_conv(gallivm,
                    lp_float32_vec4_type(),
                    type,
                    tmps, num_pixels, &res, 1);

      return lp_build_format_swizzle_aos(format_desc, &bld, res);
   }

   /*
    * YUV / subsampled formats
    */

   if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
      struct lp_type tmp_type;
      LLVMValueRef tmp;

      memset(&tmp_type, 0, sizeof tmp_type);
      tmp_type.width = 8;
      tmp_type.length = num_pixels * 4;
      tmp_type.norm = TRUE;

      tmp = lp_build_fetch_subsampled_rgba_aos(gallivm,
                                               format_desc,
                                               num_pixels,
                                               base_ptr,
                                               offset,
                                               i, j);

      lp_build_conv(gallivm,
                    tmp_type, type,
                    &tmp, 1, &tmp, 1);

      return tmp;
   }

   /*
    * Fallback to util_format_description::fetch_rgba_8unorm().
    */

   if (format_desc->fetch_rgba_8unorm &&
       !type.floating && type.width == 8 && !type.sign && type.norm) {
      /*
       * Fallback to calling util_format_description::fetch_rgba_8unorm.
       *
       * This is definitely not the most efficient way of fetching pixels, as
       * we miss the opportunity to do vectorization, but this it is a
       * convenient for formats or scenarios for which there was no opportunity
       * or incentive to optimize.
       */

      LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
      char name[256];
      LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
      LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
      LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
      LLVMValueRef function;
      LLVMValueRef tmp_ptr;
      LLVMValueRef tmp;
      LLVMValueRef res;
      LLVMValueRef callee;
      unsigned k;

      util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm",
                    format_desc->short_name);

      if (gallivm_debug & GALLIVM_DEBUG_PERF) {
         debug_printf("%s: falling back to %s\n", __FUNCTION__, name);
      }

      /*
       * Declare and bind format_desc->fetch_rgba_8unorm().
       */

      function = LLVMGetNamedFunction(module, name);
      if (!function) {
         /*
          * Function to call looks like:
          *   fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
          */
         LLVMTypeRef ret_type;
         LLVMTypeRef arg_types[4];
         LLVMTypeRef function_type;

         ret_type = LLVMVoidTypeInContext(gallivm->context);
         arg_types[0] = pi8t;
         arg_types[1] = pi8t;
         arg_types[2] = i32t;
         arg_types[3] = i32t;
         function_type = LLVMFunctionType(ret_type, arg_types,
                                          Elements(arg_types), 0);
         function = LLVMAddFunction(module, name, function_type);

         LLVMSetFunctionCallConv(function, LLVMCCallConv);
         LLVMSetLinkage(function, LLVMExternalLinkage);

         assert(LLVMIsDeclaration(function));
      }

      /* make const pointer for the C fetch_rgba_float function */
      callee = lp_build_const_int_pointer(gallivm,
         func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm));

      /* cast the callee pointer to the function's type */
      function = LLVMBuildBitCast(builder, callee,
                                  LLVMTypeOf(function), "cast callee");

      tmp_ptr = lp_build_alloca(gallivm, i32t, "");

      res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels));

      /*
       * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result
       * in the SoA vectors.
       */

      for (k = 0; k < num_pixels; ++k) {
         LLVMValueRef index = lp_build_const_int32(gallivm, k);
         LLVMValueRef args[4];

         args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");
         args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
                                            base_ptr, offset, k);

         if (num_pixels == 1) {
            args[2] = i;
            args[3] = j;
         }
         else {
            args[2] = LLVMBuildExtractElement(builder, i, index, "");
            args[3] = LLVMBuildExtractElement(builder, j, index, "");
         }

         LLVMBuildCall(builder, function, args, Elements(args), "");

         tmp = LLVMBuildLoad(builder, tmp_ptr, "");

         if (num_pixels == 1) {
            res = tmp;
         }
         else {
            res = LLVMBuildInsertElement(builder, res, tmp, index, "");
         }
      }

      /* Bitcast from <n x i32> to <4n x i8> */
      res = LLVMBuildBitCast(builder, res, bld.vec_type, "");

      return res;
   }


   /*
    * Fallback to util_format_description::fetch_rgba_float().
    */

   if (format_desc->fetch_rgba_float) {
      /*
       * Fallback to calling util_format_description::fetch_rgba_float.
       *
       * This is definitely not the most efficient way of fetching pixels, as
       * we miss the opportunity to do vectorization, but this it is a
       * convenient for formats or scenarios for which there was no opportunity
       * or incentive to optimize.
       */

      LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
      char name[256];
      LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context);
      LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4);
      LLVMTypeRef pf32t = LLVMPointerType(f32t, 0);
      LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
      LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
      LLVMValueRef function;
      LLVMValueRef tmp_ptr;
      LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
      LLVMValueRef res;
      LLVMValueRef callee;
      unsigned k;

      util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
                    format_desc->short_name);

      if (gallivm_debug & GALLIVM_DEBUG_PERF) {
         debug_printf("%s: falling back to %s\n", __FUNCTION__, name);
      }

      /*
       * Declare and bind format_desc->fetch_rgba_float().
       */

      function = LLVMGetNamedFunction(module, name);
      if (!function) {
         /*
          * Function to call looks like:
          *   fetch(float *dst, const uint8_t *src, unsigned i, unsigned j)
          */
         LLVMTypeRef ret_type;
         LLVMTypeRef arg_types[4];
         LLVMTypeRef function_type;

         ret_type = LLVMVoidTypeInContext(gallivm->context);
         arg_types[0] = pf32t;
         arg_types[1] = pi8t;
         arg_types[2] = i32t;
         arg_types[3] = i32t;
         function_type = LLVMFunctionType(ret_type, arg_types,
                                          Elements(arg_types), 0);
         function = LLVMAddFunction(module, name, function_type);

         LLVMSetFunctionCallConv(function, LLVMCCallConv);
         LLVMSetLinkage(function, LLVMExternalLinkage);

         assert(LLVMIsDeclaration(function));
      }

      /* Note: we're using this casting here instead of LLVMAddGlobalMapping()
       * to work around a bug in LLVM 2.6.
       */

      /* make const pointer for the C fetch_rgba_float function */
      callee = lp_build_const_int_pointer(gallivm,
         func_to_pointer((func_pointer) format_desc->fetch_rgba_float));

      /* cast the callee pointer to the function's type */
      function = LLVMBuildBitCast(builder, callee,
                                  LLVMTypeOf(function), "cast callee");


      tmp_ptr = lp_build_alloca(gallivm, f32x4t, "");

      /*
       * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
       * in the SoA vectors.
       */

      for (k = 0; k < num_pixels; ++k) {
         LLVMValueRef args[4];

         args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, "");
         args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
                                            base_ptr, offset, k);

         if (num_pixels == 1) {
            args[2] = i;
            args[3] = j;
         }
         else {
            LLVMValueRef index = lp_build_const_int32(gallivm, k);
            args[2] = LLVMBuildExtractElement(builder, i, index, "");
            args[3] = LLVMBuildExtractElement(builder, j, index, "");
         }

         LLVMBuildCall(builder, function, args, Elements(args), "");

         tmps[k] = LLVMBuildLoad(builder, tmp_ptr, "");
      }

      lp_build_conv(gallivm,
                    lp_float32_vec4_type(),
                    type,
                    tmps, num_pixels, &res, 1);

      return res;
   }

   assert(0);
   return lp_build_undef(gallivm, type);
}
Пример #25
0
struct cl2llvm_val_t *llvm_type_cast(struct cl2llvm_val_t * original_val, 
	struct cl2llvmTypeWrap *totype_w_sign)
{
	struct cl2llvm_val_t *llvm_val = cl2llvm_val_create();

	int i;
	struct cl2llvmTypeWrap *elem_type;
	struct cl2llvm_val_t *cast_original_val;
	LLVMValueRef index;
	LLVMValueRef vector_addr;
	LLVMValueRef vector;
	LLVMValueRef const_elems[16];
	LLVMTypeRef fromtype = cl2llvmTypeWrapGetLlvmType(original_val->type);
	LLVMTypeRef totype = cl2llvmTypeWrapGetLlvmType(totype_w_sign);
	int fromsign = cl2llvmTypeWrapGetSign(original_val->type);
	int tosign = cl2llvmTypeWrapGetSign(totype_w_sign);

	/*By default the return value is the same as the original_val*/
	llvm_val->val = original_val->val;
	cl2llvmTypeWrapSetLlvmType(llvm_val->type, cl2llvmTypeWrapGetLlvmType(original_val->type));
	cl2llvmTypeWrapSetSign(llvm_val->type, cl2llvmTypeWrapGetSign(original_val->type));
	
	snprintf(temp_var_name, sizeof temp_var_name,
		"tmp_%d", temp_var_count++);
		
	/* Check that fromtype is not a vector, unless both types are identical. */
	if (LLVMGetTypeKind(fromtype) == LLVMVectorTypeKind)
	{
		if ((LLVMGetVectorSize(fromtype) != LLVMGetVectorSize(totype) 
			|| LLVMGetElementType(fromtype) 
			!= LLVMGetElementType(totype)) 
			|| fromsign != tosign)
		{
			if (LLVMGetTypeKind(totype) == LLVMVectorTypeKind)
				cl2llvm_yyerror("Casts between vector types are forbidden");
			cl2llvm_yyerror("A vector may not be cast to any other type.");
		}
	}

	/* If totype is a vector, create a vector whose components are equal to 
	original_val */

	if (LLVMGetTypeKind(totype) == LLVMVectorTypeKind
		&& LLVMGetTypeKind(fromtype) != LLVMVectorTypeKind)
	{
		/*Go to entry block and declare vector*/
		LLVMPositionBuilder(cl2llvm_builder, cl2llvm_current_function->entry_block,
			cl2llvm_current_function->branch_instr);
		
		snprintf(temp_var_name, sizeof temp_var_name,
			"tmp_%d", temp_var_count++);
			
		vector_addr = LLVMBuildAlloca(cl2llvm_builder, 
			totype, temp_var_name);
		LLVMPositionBuilderAtEnd(cl2llvm_builder, current_basic_block);

		/* Load vector */
		snprintf(temp_var_name, sizeof temp_var_name,
			"tmp_%d", temp_var_count++);
	
		vector = LLVMBuildLoad(cl2llvm_builder, vector_addr, temp_var_name);
		
		/* Create object to represent element type of totype */
		elem_type = cl2llvmTypeWrapCreate(LLVMGetElementType(totype), tosign);

		/* If original_val is constant create a constant vector */
		if (LLVMIsConstant(original_val->val))
		{
			cast_original_val = llvm_type_cast(original_val, elem_type);
			for (i = 0; i < LLVMGetVectorSize(totype); i++)
				const_elems[i] = cast_original_val->val;

			vector = LLVMConstVector(const_elems, 	
				LLVMGetVectorSize(totype));
			llvm_val->val = vector;

			cl2llvm_val_free(cast_original_val);
		}
		/* If original value is not constant insert elements */
		else
		{
			for (i = 0; i < LLVMGetVectorSize(totype); i++)
			{
				index = LLVMConstInt(LLVMInt32Type(), i, 0);
				cast_original_val = llvm_type_cast(original_val, elem_type);
				snprintf(temp_var_name, sizeof temp_var_name,
					"tmp_%d", temp_var_count++);
	
				vector = LLVMBuildInsertElement(cl2llvm_builder, 
					vector, cast_original_val->val, index, temp_var_name);
				cl2llvm_val_free(cast_original_val);
			}
		}
		cl2llvmTypeWrapFree(elem_type);
		llvm_val->val = vector;
	}


	if (fromtype == LLVMInt64Type())
	{
		if (totype == LLVMDoubleType())
		{
			if (fromsign)
			{
				llvm_val->val =
						LLVMBuildSIToFP(cl2llvm_builder,
						  original_val->val, LLVMDoubleType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMDoubleType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMFloatType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMFloatType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMFloatType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMHalfType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMHalfType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMHalfType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMInt64Type())
		{
			if (tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
			temp_var_count--;
		}
		else if (totype == LLVMInt32Type())
		{
			llvm_val->val = LLVMBuildTrunc(cl2llvm_builder,
				  original_val->val, LLVMInt32Type(), temp_var_name);
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt16Type())
		{
			llvm_val->val = LLVMBuildTrunc(cl2llvm_builder,
				  original_val->val, LLVMInt16Type(), temp_var_name);
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt8Type())
		{
			llvm_val->val = LLVMBuildTrunc(cl2llvm_builder,
				  original_val->val, LLVMInt8Type(), temp_var_name);
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt1Type())
		{
			llvm_val->val = LLVMBuildTrunc(cl2llvm_builder,
				  original_val->val, LLVMInt1Type(), temp_var_name);
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
			
	}
	else if (fromtype == LLVMInt32Type())
	{
		if (totype == LLVMDoubleType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMDoubleType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMDoubleType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMFloatType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMFloatType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMFloatType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMHalfType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMHalfType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMHalfType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMInt64Type())
		{
			if (fromsign)
			{
				llvm_val->val = LLVMBuildSExt(cl2llvm_builder,
					  original_val->val, LLVMInt64Type(),
					temp_var_name);
			}
			else
			{
				llvm_val->val = LLVMBuildZExt(cl2llvm_builder,
					  original_val->val, LLVMInt64Type(),
					temp_var_name);
			}
			if (tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt32Type())
		{
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
			temp_var_count--;
		}
		else if (totype == LLVMInt16Type())
		{
			llvm_val->val = LLVMBuildTrunc(cl2llvm_builder,
				  original_val->val, LLVMInt16Type(), temp_var_name);
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt8Type())
		{
			llvm_val->val = LLVMBuildTrunc(cl2llvm_builder,
				 original_val->val, LLVMInt8Type(), temp_var_name);
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt1Type())
		{
			llvm_val->val = LLVMBuildTrunc(cl2llvm_builder,
				  original_val->val, LLVMInt1Type(), temp_var_name);
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
			
	}
	else if (fromtype == LLVMInt16Type())
	{
		if (totype == LLVMDoubleType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMDoubleType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMDoubleType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMFloatType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMFloatType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMFloatType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMHalfType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMHalfType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMHalfType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMInt64Type())
		{
			if (fromsign)
			{
				llvm_val->val = LLVMBuildSExt(cl2llvm_builder,
					  original_val->val, LLVMInt64Type(),
					temp_var_name);
			}
			else
			{
				llvm_val->val = LLVMBuildZExt(cl2llvm_builder,
					  original_val->val, LLVMInt64Type(),
					temp_var_name);
			}
			if (tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt32Type())
		{
			if (fromsign)
			{
				llvm_val->val = LLVMBuildSExt(cl2llvm_builder,
					  original_val->val, LLVMInt32Type(),
					temp_var_name);
			}
			else
			{
				llvm_val->val = LLVMBuildZExt(cl2llvm_builder,
					  original_val->val, LLVMInt32Type(),
					temp_var_name);
			}
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt16Type())
		{
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
			temp_var_count--;
		}
		else if (totype == LLVMInt8Type())
		{
			llvm_val->val = LLVMBuildTrunc(cl2llvm_builder,
				  original_val->val, LLVMInt8Type(), temp_var_name);
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt1Type())
		{
			llvm_val->val = LLVMBuildTrunc(cl2llvm_builder,
				  original_val->val, LLVMInt1Type(), temp_var_name);
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
			
	}
	else if (fromtype == LLVMInt8Type())
	{
		if (totype == LLVMDoubleType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMDoubleType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMDoubleType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMFloatType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMFloatType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMFloatType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMHalfType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMHalfType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMHalfType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMInt64Type())
		{
			if (fromsign)
			{
				llvm_val->val = LLVMBuildSExt(cl2llvm_builder,
					  original_val->val, LLVMInt64Type(),
					temp_var_name);
			}
			else
			{
				llvm_val->val = LLVMBuildZExt(cl2llvm_builder,
					  original_val->val, LLVMInt64Type(),
					temp_var_name);
			}
			if (tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt32Type())
		{
			if (fromsign)
			{
				llvm_val->val = LLVMBuildSExt(cl2llvm_builder,
					  original_val->val, LLVMInt32Type(),
					temp_var_name);
			}
			else
			{
				llvm_val->val = LLVMBuildZExt(cl2llvm_builder,
					  original_val->val, LLVMInt32Type(),
					temp_var_name);
			}
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt16Type())
		{
			if (fromsign)
			{
				llvm_val->val = LLVMBuildSExt(cl2llvm_builder,
					  original_val->val, LLVMInt16Type(),
					temp_var_name);
			}
			else
			{
				llvm_val->val = LLVMBuildZExt(cl2llvm_builder,
					  original_val->val, LLVMInt16Type(),
					temp_var_name);
			}
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt8Type())
		{
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
			temp_var_count--;
		}
		else if (totype == LLVMInt1Type())
		{
			llvm_val->val = LLVMBuildTrunc(cl2llvm_builder,
				  original_val->val, LLVMInt1Type(), temp_var_name);
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
			
	}
	else if (fromtype == LLVMInt1Type())
	{
		if (totype == LLVMDoubleType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMDoubleType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMDoubleType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMFloatType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMFloatType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMFloatType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMHalfType())
		{
			if (fromsign)
			{
				llvm_val->val =
					LLVMBuildSIToFP(cl2llvm_builder,
					  original_val->val, LLVMHalfType(),
					temp_var_name);
			}
			else
			{
				llvm_val->val =
					LLVMBuildUIToFP(cl2llvm_builder,
					  original_val->val, LLVMHalfType(),
					temp_var_name);
			}
			cl2llvmTypeWrapSetSign(llvm_val->type, 1);
		}
		else if (totype == LLVMInt64Type())
		{
			if (fromsign)
			{
				llvm_val->val = LLVMBuildSExt(cl2llvm_builder,
					  original_val->val, LLVMInt64Type(),
					temp_var_name);
			}
			else
			{
				llvm_val->val = LLVMBuildZExt(cl2llvm_builder,
					  original_val->val, LLVMInt64Type(),
					temp_var_name);
			}
			if (tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt32Type())
		{
			if (fromsign)
			{
				llvm_val->val = LLVMBuildSExt(cl2llvm_builder,
					  original_val->val, LLVMInt32Type(),
					temp_var_name);
			}
			else
			{
				llvm_val->val = LLVMBuildZExt(cl2llvm_builder,
					  original_val->val, LLVMInt32Type(),
					temp_var_name);
			}
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt16Type())
		{
			if (fromsign)
			{
				llvm_val->val = LLVMBuildSExt(cl2llvm_builder,
					  original_val->val, LLVMInt16Type(),
					temp_var_name);
			}
			else
			{
				llvm_val->val = LLVMBuildZExt(cl2llvm_builder,
					  original_val->val, LLVMInt16Type(),
					temp_var_name);
			}
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt8Type())
		{
			if (fromsign)
			{
				llvm_val->val = LLVMBuildSExt(cl2llvm_builder,
					  original_val->val, LLVMInt8Type(),
					temp_var_name);
			}
			else
			{
				llvm_val->val = LLVMBuildZExt(cl2llvm_builder,
					  original_val->val, LLVMInt8Type(),
					temp_var_name);
			}
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
		}
		else if (totype == LLVMInt1Type())
		{
			if(tosign)
				cl2llvmTypeWrapSetSign(llvm_val->type, 1);
			else
				cl2llvmTypeWrapSetSign(llvm_val->type, 0);
			temp_var_count--;
		}			
	}

	/*We now know that from type must be a floating point.*/

	/*Floating point to signed integer conversions*/
	else if (tosign && LLVMGetTypeKind(totype) == 8)
	{
		if (totype == LLVMInt64Type())
		{
			llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, 
				  original_val->val, LLVMInt64Type(), temp_var_name);
		}
		else if (totype == LLVMInt32Type())
		{
			llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, 
				  original_val->val, LLVMInt32Type(), temp_var_name);
		}
		else if (totype == LLVMInt16Type())
		{
			llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, 
				  original_val->val, LLVMInt16Type(), temp_var_name);
		}
		else if (totype == LLVMInt8Type())
		{
			llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, 
				  original_val->val, LLVMInt8Type(), temp_var_name);
		}
		else if (totype == LLVMInt1Type())
		{
			llvm_val->val = LLVMBuildFPToSI(cl2llvm_builder, 
				  original_val->val, LLVMInt1Type(), temp_var_name);
		}
		cl2llvmTypeWrapSetSign(llvm_val->type, 1);
	}
	/*Floating point to unsigned integer conversions*/
	else if (!tosign)
	{
		if (totype == LLVMInt64Type())
		{
			llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, 
				  original_val->val, LLVMInt64Type(), temp_var_name);
		}
		else if (totype == LLVMInt32Type())
		{
			llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, 
				  original_val->val, LLVMInt32Type(), temp_var_name);
		}
		else if (totype == LLVMInt16Type())
		{
			llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, 
				  original_val->val, LLVMInt16Type(), temp_var_name);
		}
		else if (totype == LLVMInt8Type())
		{
			llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, 
				  original_val->val, LLVMInt8Type(), temp_var_name);
		}
		else if (totype == LLVMInt1Type())
		{
			llvm_val->val = LLVMBuildFPToUI(cl2llvm_builder, 
				  original_val->val, LLVMInt1Type(), temp_var_name);
		}
		cl2llvmTypeWrapSetSign(llvm_val->type, 0);
	}
	else if (totype == LLVMDoubleType())
	{
		llvm_val->val = LLVMBuildFPExt(cl2llvm_builder, 
			  original_val->val, LLVMDoubleType(), temp_var_name);
		cl2llvmTypeWrapSetSign(llvm_val->type, 1);
	}
	else if (totype == LLVMFloatType())
	{
		if (fromtype == LLVMDoubleType())
		{
			llvm_val->val = LLVMBuildFPTrunc(cl2llvm_builder, 
				  original_val->val, LLVMFloatType(), temp_var_name);
		}
		else if (fromtype == LLVMHalfType())
		{
			llvm_val->val = LLVMBuildFPExt(cl2llvm_builder, 
				  original_val->val, LLVMFloatType(), temp_var_name);
		}
		cl2llvmTypeWrapSetSign(llvm_val->type, 1);
	}
	else if (totype == LLVMHalfType())
	{
		llvm_val->val = LLVMBuildFPTrunc(cl2llvm_builder, 
			  original_val->val, LLVMHalfType(), temp_var_name);
		cl2llvmTypeWrapSetSign(llvm_val->type, 1);
	}
	cl2llvmTypeWrapSetLlvmType(llvm_val->type, totype);
	cl2llvmTypeWrapSetSign(llvm_val->type, tosign);
	
	return llvm_val;
}