static void llvm_load_input(
	struct radeon_llvm_context * ctx,
	unsigned input_index,
	const struct tgsi_full_declaration *decl)
{
	const struct r600_shader_io * input = &ctx->r600_inputs[input_index];
	unsigned chan;
	int two_side = (ctx->two_side && input->name == TGSI_SEMANTIC_COLOR);
	LLVMValueRef v;
	boolean require_interp_intrinsic = ctx->chip_class >= EVERGREEN &&
		ctx->type == TGSI_PROCESSOR_FRAGMENT;

	if (require_interp_intrinsic && input->spi_sid) {
		v = llvm_load_input_vector(ctx, input->lds_pos, input->ij_index,
			(input->interpolate > 0));
	} else
		v = LLVMGetParam(ctx->main_fn, input->gpr);

	if (two_side) {
		struct r600_shader_io * back_input =
			&ctx->r600_inputs[input->back_color_input];
		LLVMValueRef v2;
		LLVMValueRef face = LLVMGetParam(ctx->main_fn, ctx->face_gpr);
		face = LLVMBuildExtractElement(ctx->gallivm.builder, face,
			lp_build_const_int32(&(ctx->gallivm), 0), "");

		if (require_interp_intrinsic && back_input->spi_sid)
			v2 = llvm_load_input_vector(ctx, back_input->lds_pos,
				back_input->ij_index, (back_input->interpolate > 0));
		else
			v2 = LLVMGetParam(ctx->main_fn, back_input->gpr);
		v = llvm_face_select_helper(ctx, face, v, v2);
	}

	for (chan = 0; chan < 4; chan++) {
		unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);

		ctx->inputs[soa_index] = LLVMBuildExtractElement(ctx->gallivm.builder, v,
			lp_build_const_int32(&(ctx->gallivm), chan), "");

		if (input->name == TGSI_SEMANTIC_POSITION &&
				ctx->type == TGSI_PROCESSOR_FRAGMENT && chan == 3) {
		/* RCP for fragcoord.w */
		ctx->inputs[soa_index] = LLVMBuildFDiv(ctx->gallivm.builder,
				lp_build_const_float(&(ctx->gallivm), 1.0f),
				ctx->inputs[soa_index], "");
	}
	}
}
Exemplo n.º 2
0
LLVMValueRef
lp_build_intrinsic_map(struct gallivm_state *gallivm,
                       const char *name,
                       LLVMTypeRef ret_type,
                       LLVMValueRef *args,
                       unsigned num_args)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef ret_elem_type = LLVMGetElementType(ret_type);
   unsigned n = LLVMGetVectorSize(ret_type);
   unsigned i, j;
   LLVMValueRef res;

   assert(num_args <= LP_MAX_FUNC_ARGS);

   res = LLVMGetUndef(ret_type);
   for(i = 0; i < n; ++i) {
      LLVMValueRef index = lp_build_const_int32(gallivm, i);
      LLVMValueRef arg_elems[LP_MAX_FUNC_ARGS];
      LLVMValueRef res_elem;
      for(j = 0; j < num_args; ++j)
         arg_elems[j] = LLVMBuildExtractElement(builder, args[j], index, "");
      res_elem = lp_build_intrinsic(builder, name, ret_elem_type, arg_elems, num_args, 0);
      res = LLVMBuildInsertElement(builder, res, res_elem, index, "");
   }

   return res;
}
Exemplo n.º 3
0
/**
 * Pack first element of aos values,
 * pad out to destination size.
 * i.e. x1 _ _ _ x2 _ _ _ will become x1 x2 _ _
 */
LLVMValueRef
lp_build_pack_aos_scalars(struct gallivm_state *gallivm,
                          struct lp_type src_type,
                          struct lp_type dst_type,
                          const LLVMValueRef src)
{
   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
   LLVMValueRef undef = LLVMGetUndef(i32t);
   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
   unsigned num_src = src_type.length / 4;
   unsigned num_dst = dst_type.length;
   unsigned i;

   assert(num_src <= num_dst);

   for (i = 0; i < num_src; i++) {
      shuffles[i] = LLVMConstInt(i32t, i * 4, 0);
   }
   for (i = num_src; i < num_dst; i++) {
      shuffles[i] = undef;
   }

   if (num_dst == 1) {
      return LLVMBuildExtractElement(gallivm->builder, src, shuffles[0], "");
   }
   else {
      return LLVMBuildShuffleVector(gallivm->builder, src, src,
                                    LLVMConstVector(shuffles, num_dst), "");
   }
}
Exemplo n.º 4
0
void expand_vectors(struct list_t *elem_list)
{
	int index;
	int vec_index;
	struct cl2llvm_val_t *cl2llvm_index;
	struct cl2llvm_val_t *current_vec_elem;
	struct cl2llvm_val_t *current_elem;
	
	LIST_FOR_EACH(elem_list, index)
	{
		current_elem = list_get(elem_list, index);
		if (LLVMGetTypeKind(cl2llvmTypeWrapGetLlvmType(current_elem->type)) == LLVMVectorTypeKind)
		{

			for(vec_index = 0; vec_index < LLVMGetVectorSize(cl2llvmTypeWrapGetLlvmType(current_elem->type)); vec_index++)
			{
				cl2llvm_index = cl2llvm_val_create_w_init( LLVMConstInt(
					LLVMInt32Type(), vec_index, 0), 1);


				snprintf(temp_var_name, sizeof(temp_var_name),
					"tmp_%d", temp_var_count++);

				current_vec_elem = cl2llvm_val_create_w_init( LLVMBuildExtractElement(cl2llvm_builder, current_elem->val, cl2llvm_index->val, temp_var_name), cl2llvmTypeWrapGetSign(current_elem->type));
				list_insert(elem_list, index + vec_index, current_vec_elem);
			cl2llvm_val_free(cl2llvm_index);
			}
			cl2llvm_val_free(current_elem);
			list_remove(elem_list, current_elem);
		}
	}
Exemplo n.º 5
0
/**
 * Print a intt[4] vector.
 */
LLVMValueRef
lp_build_print_ivec4(struct gallivm_state *gallivm,
                    const char *msg, LLVMValueRef vec)
{
   LLVMBuilderRef builder = gallivm->builder;
   char format[1000];
   LLVMValueRef x, y, z, w;

   x = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(gallivm, 0), "");
   y = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(gallivm, 1), "");
   z = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(gallivm, 2), "");
   w = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(gallivm, 3), "");

   util_snprintf(format, sizeof(format), "%s %%i %%i %%i %%i\n", msg);
   return lp_build_printf(gallivm, format, x, y, z, w);
}
static void llvm_load_system_value(
		struct radeon_llvm_context * ctx,
		unsigned index,
		const struct tgsi_full_declaration *decl)
{
	unsigned chan;

	switch (decl->Semantic.Name) {
	case TGSI_SEMANTIC_INSTANCEID: chan = 3; break;
	case TGSI_SEMANTIC_VERTEXID: chan = 0; break;
	default: assert(!"unknown system value");
	}

#if HAVE_LLVM >= 0x0304
	ctx->system_values[index] = LLVMBuildExtractElement(ctx->gallivm.builder,
		LLVMGetParam(ctx->main_fn, 0), lp_build_const_int32(&(ctx->gallivm), chan),
		"");
#else
	LLVMValueRef reg = lp_build_const_int32(
			ctx->soa.bld_base.base.gallivm, chan);
	ctx->system_values[index] = build_intrinsic(
			ctx->soa.bld_base.base.gallivm->builder,
			"llvm.R600.load.input",
			ctx->soa.bld_base.base.elem_type, &reg, 1,
			LLVMReadNoneAttribute);
#endif
}
Exemplo n.º 7
0
static void
emit_store(
	struct lp_build_tgsi_context * bld_base,
	const struct tgsi_full_instruction * inst,
	const struct tgsi_opcode_info * info,
	LLVMValueRef dst[4])
{
	struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
	struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
	struct lp_build_context base = bld->bld_base.base;
	const struct tgsi_full_dst_register *reg = &inst->Dst[0];
	LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
	LLVMValueRef temp_ptr;
	unsigned chan, chan_index;
	boolean is_vec_store = FALSE;
	if (dst[0]) {
		LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
		is_vec_store = (k == LLVMVectorTypeKind);
	}

	if (is_vec_store) {
		LLVMValueRef values[4] = {};
		TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
			LLVMValueRef index = lp_build_const_int32(gallivm, chan);
			values[chan]  = LLVMBuildExtractElement(gallivm->builder,
							dst[0], index, "");
		}
		bld_base->emit_store(bld_base, inst, info, values);
		return;
	}
Exemplo n.º 8
0
/**
 * Combined extract and broadcast (mere shuffle in most cases)
 */
LLVMValueRef
lp_build_extract_broadcast(struct gallivm_state *gallivm,
                           struct lp_type src_type,
                           struct lp_type dst_type,
                           LLVMValueRef vector,
                           LLVMValueRef index)
{
   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
   LLVMValueRef res;

   assert(src_type.floating == dst_type.floating);
   assert(src_type.width    == dst_type.width);

   assert(lp_check_value(src_type, vector));
   assert(LLVMTypeOf(index) == i32t);

   if (src_type.length == 1) {
      if (dst_type.length == 1) {
         /*
          * Trivial scalar -> scalar.
          */

         res = vector;
      }
      else {
         /*
          * Broadcast scalar -> vector.
          */

         res = lp_build_broadcast(gallivm,
                                  lp_build_vec_type(gallivm, dst_type),
                                  vector);
      }
   }
   else {
      if (dst_type.length > 1) {
         /*
          * shuffle - result can be of different length.
          */

         LLVMValueRef shuffle;
         shuffle = lp_build_broadcast(gallivm,
                                      LLVMVectorType(i32t, dst_type.length),
                                      index);
         res = LLVMBuildShuffleVector(gallivm->builder, vector,
                                      LLVMGetUndef(lp_build_vec_type(gallivm, src_type)),
                                      shuffle, "");
      }
      else {
         /*
          * Trivial extract scalar from vector.
          */
          res = LLVMBuildExtractElement(gallivm->builder, vector, index, "");
      }
   }

   return res;
}
Exemplo n.º 9
0
/**
 * Print a LLVM value of any type
 */
LLVMValueRef
lp_build_print_value(struct gallivm_state *gallivm,
                     const char *msg,
                     LLVMValueRef value)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeKind type_kind;
   LLVMTypeRef type_ref;
   LLVMValueRef params[2 + LP_MAX_VECTOR_LENGTH];
   char type_fmt[4] = " %x";
   char format[2 + 3 * LP_MAX_VECTOR_LENGTH + 2] = "%s";
   unsigned length;
   unsigned i;

   type_ref = LLVMTypeOf(value);
   type_kind = LLVMGetTypeKind(type_ref);

   if (type_kind == LLVMVectorTypeKind) {
      length = LLVMGetVectorSize(type_ref);

      type_ref = LLVMGetElementType(type_ref);
      type_kind = LLVMGetTypeKind(type_ref);
   } else {
      length = 1;
   }

   if (type_kind == LLVMFloatTypeKind || type_kind == LLVMDoubleTypeKind) {
      type_fmt[2] = 'f';
   } else if (type_kind == LLVMIntegerTypeKind) {
      if (LLVMGetIntTypeWidth(type_ref) == 8) {
         type_fmt[2] = 'u';
      } else {
         type_fmt[2] = 'i';
      }
   } else {
      /* Unsupported type */
      assert(0);
   }

   /* Create format string and arguments */
   assert(strlen(format) + strlen(type_fmt) * length + 2 <= sizeof format);

   params[1] = lp_build_const_string(gallivm, msg);
   if (length == 1) {
      util_strncat(format, type_fmt, sizeof(format) - strlen(format) - 1);
      params[2] = value;
   } else {
      for (i = 0; i < length; ++i) {
         util_strncat(format, type_fmt, sizeof(format) - strlen(format) - 1);
         params[2 + i] = LLVMBuildExtractElement(builder, value, lp_build_const_int32(gallivm, i), "");
      }
   }

   util_strncat(format, "\n", sizeof(format) - strlen(format) - 1);

   params[0] = lp_build_const_string(gallivm, format);
   return lp_build_print_args(gallivm, 2 + length, params);
}
Exemplo n.º 10
0
static LLVMValueRef
emit_fetch(
	struct lp_build_tgsi_context *bld_base,
	const struct tgsi_full_src_register *reg,
	enum tgsi_opcode_type type,
	unsigned swizzle)
{
	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
	struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
	LLVMValueRef result, ptr;

	if (swizzle == ~0) {
		LLVMValueRef values[TGSI_NUM_CHANNELS];
		unsigned chan;
		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
			values[chan] = emit_fetch(bld_base, reg, type, chan);
		}
		return lp_build_gather_values(bld_base->base.gallivm, values,
					      TGSI_NUM_CHANNELS);
	}

	if (reg->Register.Indirect) {
		struct tgsi_declaration_range range = get_array_range(bld_base,
			reg->Register.File, &reg->Indirect);
		return LLVMBuildExtractElement(builder,
			emit_array_fetch(bld_base, reg->Register.File, type, range, swizzle),
			emit_array_index(bld, &reg->Indirect, reg->Register.Index - range.First),
			"");
	}

	switch(reg->Register.File) {
	case TGSI_FILE_IMMEDIATE: {
		LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
		return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
	}

	case TGSI_FILE_INPUT:
		result = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)];
		break;

	case TGSI_FILE_TEMPORARY:
		ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
		result = LLVMBuildLoad(builder, ptr, "");
		break;

	case TGSI_FILE_OUTPUT:
		ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle);
		result = LLVMBuildLoad(builder, ptr, "");
		break;

	default:
		return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
	}

	return bitcast(bld_base, type, result);
}
Exemplo n.º 11
0
static void declare_input_vs(
	struct si_shader_context * si_shader_ctx,
	unsigned input_index,
	const struct tgsi_full_declaration *decl)
{
	struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base;
	unsigned divisor = si_shader_ctx->shader->key.vs.instance_divisors[input_index];

	unsigned chan;

	LLVMValueRef t_list_ptr;
	LLVMValueRef t_offset;
	LLVMValueRef t_list;
	LLVMValueRef attribute_offset;
	LLVMValueRef buffer_index;
	LLVMValueRef args[3];
	LLVMTypeRef vec4_type;
	LLVMValueRef input;

	/* Load the T list */
	t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_BUFFER);

	t_offset = lp_build_const_int32(base->gallivm, input_index);

	t_list = build_indexed_load(si_shader_ctx, t_list_ptr, t_offset);

	/* Build the attribute offset */
	attribute_offset = lp_build_const_int32(base->gallivm, 0);

	if (divisor) {
		/* Build index from instance ID, start instance and divisor */
		si_shader_ctx->shader->shader.uses_instanceid = true;
		buffer_index = get_instance_index(&si_shader_ctx->radeon_bld, divisor);
	} else {
		/* Load the buffer index, which is always stored in VGPR0
		 * for Vertex Shaders */
		buffer_index = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_ID);
	}

	vec4_type = LLVMVectorType(base->elem_type, 4);
	args[0] = t_list;
	args[1] = attribute_offset;
	args[2] = buffer_index;
	input = build_intrinsic(base->gallivm->builder,
		"llvm.SI.vs.load.input", vec4_type, args, 3,
		LLVMReadNoneAttribute | LLVMNoUnwindAttribute);

	/* Break up the vec4 into individual components */
	for (chan = 0; chan < 4; chan++) {
		LLVMValueRef llvm_chan = lp_build_const_int32(base->gallivm, chan);
		/* XXX: Use a helper function for this.  There is one in
 		 * tgsi_llvm.c. */
		si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] =
				LLVMBuildExtractElement(base->gallivm->builder,
				input, llvm_chan, "");
	}
}
static LLVMValueRef
llvm_load_input_vector(
	struct radeon_llvm_context * ctx, unsigned location, unsigned ijregs,
	boolean interp)
{
		LLVMTypeRef VecType;
		LLVMValueRef Args[3] = {
			lp_build_const_int32(&(ctx->gallivm), location)
		};
		unsigned ArgCount = 1;
		if (interp) {
			VecType = LLVMVectorType(ctx->soa.bld_base.base.elem_type, 2);
			LLVMValueRef IJIndex = LLVMGetParam(ctx->main_fn, ijregs / 2);
			Args[ArgCount++] = LLVMBuildExtractElement(ctx->gallivm.builder, IJIndex,
				lp_build_const_int32(&(ctx->gallivm), 2 * (ijregs % 2)), "");
			Args[ArgCount++] = LLVMBuildExtractElement(ctx->gallivm.builder, IJIndex,
				lp_build_const_int32(&(ctx->gallivm), 2 * (ijregs % 2) + 1), "");
			LLVMValueRef HalfVec[2] = {
				build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.xy",
					VecType, Args, ArgCount, LLVMReadNoneAttribute),
				build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.zw",
					VecType, Args, ArgCount, LLVMReadNoneAttribute)
			};
			LLVMValueRef MaskInputs[4] = {
				lp_build_const_int32(&(ctx->gallivm), 0),
				lp_build_const_int32(&(ctx->gallivm), 1),
				lp_build_const_int32(&(ctx->gallivm), 2),
				lp_build_const_int32(&(ctx->gallivm), 3)
			};
			LLVMValueRef Mask = LLVMConstVector(MaskInputs, 4);
			return LLVMBuildShuffleVector(ctx->gallivm.builder, HalfVec[0], HalfVec[1],
				Mask, "");
		} else {
			VecType = LLVMVectorType(ctx->soa.bld_base.base.elem_type, 4);
			return build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.const",
				VecType, Args, ArgCount, LLVMReadNoneAttribute);
		}
}
static void llvm_load_system_value(
		struct radeon_llvm_context * ctx,
		unsigned index,
		const struct tgsi_full_declaration *decl)
{
	unsigned chan;

	switch (decl->Semantic.Name) {
	case TGSI_SEMANTIC_INSTANCEID: chan = 3; break;
	case TGSI_SEMANTIC_VERTEXID: chan = 0; break;
	default: assert(!"unknown system value");
	}

	ctx->system_values[index] = LLVMBuildExtractElement(ctx->gallivm.builder,
		LLVMGetParam(ctx->main_fn, 0), lp_build_const_int32(&(ctx->gallivm), chan),
		"");
}
static LLVMValueRef llvm_fetch_const(
	struct lp_build_tgsi_context * bld_base,
	const struct tgsi_full_src_register *reg,
	enum tgsi_opcode_type type,
	unsigned swizzle)
{
	LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, reg->Register.Index);
	if (reg->Register.Indirect) {
		struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
		LLVMValueRef index = LLVMBuildLoad(bld_base->base.gallivm->builder, bld->addr[reg->Indirect.Index][reg->Indirect.Swizzle], "");
		offset = LLVMBuildAdd(bld_base->base.gallivm->builder, offset, index, "");
	}
	unsigned ConstantAddressSpace = CONSTANT_BUFFER_0_ADDR_SPACE ;
	if (reg->Register.Dimension) {
		ConstantAddressSpace += reg->Dimension.Index;
	}
	LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, offset, ConstantAddressSpace);
	LLVMValueRef cval = LLVMBuildExtractElement(bld_base->base.gallivm->builder, cvecval, lp_build_const_int32(bld_base->base.gallivm, swizzle), "");
	return bitcast(bld_base, type, cval);
}
/**
 * Return a vector with elements src[start:start+size]
 * Most useful for getting half the values out of a 256bit sized vector,
 * otherwise may cause data rearrangement to happen.
 */
LLVMValueRef
lp_build_extract_range(struct gallivm_state *gallivm,
                       LLVMValueRef src,
                       unsigned start,
                       unsigned size)
{
   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
   unsigned i;

   assert(size <= Elements(elems));

   for (i = 0; i < size; ++i)
      elems[i] = lp_build_const_int32(gallivm, i + start);

   if (size == 1) {
      return LLVMBuildExtractElement(gallivm->builder, src, elems[0], "");
   }
   else {
      return LLVMBuildShuffleVector(gallivm->builder, src, src,
                                    LLVMConstVector(elems, size), "");
   }
}
Exemplo n.º 16
0
/**
 * Gather elements from scatter positions in memory into a single vector.
 *
 * @param src_width src element width
 * @param dst_width result element width (source will be expanded to fit)
 * @param length length of the offsets,
 * @param base_ptr base pointer, should be a i8 pointer type.
 * @param offsets vector with offsets
 */
LLVMValueRef
lp_build_gather(LLVMBuilderRef builder,
                unsigned length,
                unsigned src_width,
                unsigned dst_width,
                LLVMValueRef base_ptr,
                LLVMValueRef offsets)
{
   LLVMTypeRef src_type = LLVMIntType(src_width);
   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
   LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
   LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
   LLVMValueRef res;
   unsigned i;

   res = LLVMGetUndef(dst_vec_type);
   for(i = 0; i < length; ++i) {
      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
      LLVMValueRef elem_offset;
      LLVMValueRef elem_ptr;
      LLVMValueRef elem;

      elem_offset = LLVMBuildExtractElement(builder, offsets, index, "");
      elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, "");
      elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, "");
      elem = LLVMBuildLoad(builder, elem_ptr, "");

      assert(src_width <= dst_width);
      if(src_width > dst_width)
         elem = LLVMBuildTrunc(builder, elem, dst_elem_type, "");
      if(src_width < dst_width)
         elem = LLVMBuildZExt(builder, elem, dst_elem_type, "");

      res = LLVMBuildInsertElement(builder, res, elem, index, "");
   }

   return res;
}
Exemplo n.º 17
0
static void si_llvm_cube_to_2d_coords(struct lp_build_tgsi_context *bld_base,
				      LLVMValueRef *in, LLVMValueRef *out)
{
	struct gallivm_state *gallivm = bld_base->base.gallivm;
	LLVMBuilderRef builder = gallivm->builder;
	LLVMTypeRef type = bld_base->base.elem_type;
	LLVMValueRef coords[4];
	LLVMValueRef mad_args[3];
	LLVMValueRef v;
	unsigned i;

	v = build_cube_intrinsic(gallivm, in);

	for (i = 0; i < 4; ++i)
		coords[i] = LLVMBuildExtractElement(builder, v,
						    lp_build_const_int32(gallivm, i), "");

	coords[2] = lp_build_intrinsic(builder, "llvm.fabs.f32",
			type, &coords[2], 1, LLVMReadNoneAttribute);
	coords[2] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_RCP, coords[2]);

	mad_args[1] = coords[2];
	mad_args[2] = LLVMConstReal(type, 1.5);

	mad_args[0] = coords[0];
	coords[0] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
			mad_args[0], mad_args[1], mad_args[2]);

	mad_args[0] = coords[1];
	coords[1] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
			mad_args[0], mad_args[1], mad_args[2]);

	/* apply xyz = yxw swizzle to cooords */
	out[0] = coords[1];
	out[1] = coords[0];
	out[2] = coords[3];
}
Exemplo n.º 18
0
/**
 * Get the pointer to one element from scatter positions in memory.
 *
 * @sa lp_build_gather()
 */
LLVMValueRef
lp_build_gather_elem_ptr(struct gallivm_state *gallivm,
                         unsigned length,
                         LLVMValueRef base_ptr,
                         LLVMValueRef offsets,
                         unsigned i)
{
   LLVMValueRef offset;
   LLVMValueRef ptr;

   assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0));

   if (length == 1) {
      assert(i == 0);
      offset = offsets;
   } else {
      LLVMValueRef index = lp_build_const_int32(gallivm, i);
      offset = LLVMBuildExtractElement(gallivm->builder, offsets, index, "");
   }

   ptr = LLVMBuildGEP(gallivm->builder, base_ptr, &offset, 1, "");

   return ptr;
}
Exemplo n.º 19
0
/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
 * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
 * or v4i32 (num_channels=3,4).
 */
void
ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
			    LLVMValueRef rsrc,
			    LLVMValueRef vdata,
			    unsigned num_channels,
			    LLVMValueRef voffset,
			    LLVMValueRef soffset,
			    unsigned inst_offset,
			    bool glc,
			    bool slc,
			    bool writeonly_memory,
			    bool has_add_tid)
{
	/* TODO: Fix stores with ADD_TID and remove the "has_add_tid" flag. */
	if (!has_add_tid) {
		/* Split 3 channel stores, becase LLVM doesn't support 3-channel
		 * intrinsics. */
		if (num_channels == 3) {
			LLVMValueRef v[3], v01;

			for (int i = 0; i < 3; i++) {
				v[i] = LLVMBuildExtractElement(ctx->builder, vdata,
						LLVMConstInt(ctx->i32, i, 0), "");
			}
			v01 = ac_build_gather_values(ctx, v, 2);

			ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset,
						    soffset, inst_offset, glc, slc,
						    writeonly_memory, has_add_tid);
			ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset,
						    soffset, inst_offset + 8,
						    glc, slc,
						    writeonly_memory, has_add_tid);
			return;
		}

		unsigned func = CLAMP(num_channels, 1, 3) - 1;
		static const char *types[] = {"f32", "v2f32", "v4f32"};
		char name[256];
		LLVMValueRef offset = soffset;

		if (inst_offset)
			offset = LLVMBuildAdd(ctx->builder, offset,
					      LLVMConstInt(ctx->i32, inst_offset, 0), "");
		if (voffset)
			offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");

		LLVMValueRef args[] = {
			bitcast_to_float(ctx, vdata),
			LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
			LLVMConstInt(ctx->i32, 0, 0),
			offset,
			LLVMConstInt(ctx->i1, glc, 0),
			LLVMConstInt(ctx->i1, slc, 0),
		};

		snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s",
			 types[func]);

		ac_build_intrinsic(ctx, name, ctx->voidt,
				   args, ARRAY_SIZE(args),
				   writeonly_memory ?
					   AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY :
					   AC_FUNC_ATTR_WRITEONLY);
		return;
	}

	static unsigned dfmt[] = {
		V_008F0C_BUF_DATA_FORMAT_32,
		V_008F0C_BUF_DATA_FORMAT_32_32,
		V_008F0C_BUF_DATA_FORMAT_32_32_32,
		V_008F0C_BUF_DATA_FORMAT_32_32_32_32
	};
	assert(num_channels >= 1 && num_channels <= 4);

	LLVMValueRef args[] = {
		rsrc,
		vdata,
		LLVMConstInt(ctx->i32, num_channels, 0),
		voffset ? voffset : LLVMGetUndef(ctx->i32),
		soffset,
		LLVMConstInt(ctx->i32, inst_offset, 0),
		LLVMConstInt(ctx->i32, dfmt[num_channels - 1], 0),
		LLVMConstInt(ctx->i32, V_008F0C_BUF_NUM_FORMAT_UINT, 0),
		LLVMConstInt(ctx->i32, voffset != NULL, 0),
		LLVMConstInt(ctx->i32, 0, 0), /* idxen */
		LLVMConstInt(ctx->i32, glc, 0),
		LLVMConstInt(ctx->i32, slc, 0),
		LLVMConstInt(ctx->i32, 0, 0), /* tfe*/
	};

	/* The instruction offset field has 12 bits */
	assert(voffset || inst_offset < (1 << 12));

	/* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */
	unsigned func = CLAMP(num_channels, 1, 3) - 1;
	const char *types[] = {"i32", "v2i32", "v4i32"};
	char name[256];
	snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]);

	ac_build_intrinsic(ctx, name, ctx->voidt,
			   args, ARRAY_SIZE(args),
			   AC_FUNC_ATTR_LEGACY);
}
static void llvm_emit_tex(
	const struct lp_build_tgsi_action * action,
	struct lp_build_tgsi_context * bld_base,
	struct lp_build_emit_data * emit_data)
{
	struct gallivm_state * gallivm = bld_base->base.gallivm;
	LLVMValueRef args[7];
	unsigned c, sampler_src;
	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);

	if (emit_data->inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
		switch (emit_data->inst->Instruction.Opcode) {
		case TGSI_OPCODE_TXQ: {
			struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
			ctx->uses_tex_buffers = true;
			bool isEgPlus = (ctx->chip_class >= EVERGREEN);
			LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm,
				isEgPlus ? 0 : 1);
			LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, offset,
				LLVM_R600_BUFFER_INFO_CONST_BUFFER);
			if (!isEgPlus) {
				LLVMValueRef maskval[4] = {
					lp_build_const_int32(gallivm, 1),
					lp_build_const_int32(gallivm, 2),
					lp_build_const_int32(gallivm, 3),
					lp_build_const_int32(gallivm, 0),
				};
				LLVMValueRef mask = LLVMConstVector(maskval, 4);
				cvecval = LLVMBuildShuffleVector(gallivm->builder, cvecval, cvecval,
					mask, "");
			}
			emit_data->output[0] = cvecval;
			return;
		}
		case TGSI_OPCODE_TXF: {
			args[0] = LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), "");
			args[1] = lp_build_const_int32(gallivm, R600_MAX_CONST_BUFFERS);
			emit_data->output[0] = build_intrinsic(gallivm->builder,
							"llvm.R600.load.texbuf",
							emit_data->dst_type, args, 2, LLVMReadNoneAttribute);
			if (ctx->chip_class >= EVERGREEN)
				return;
			ctx->uses_tex_buffers = true;
			LLVMDumpValue(emit_data->output[0]);
			emit_data->output[0] = LLVMBuildBitCast(gallivm->builder,
				emit_data->output[0], LLVMVectorType(bld_base->base.int_elem_type, 4),
				"");
			LLVMValueRef Mask = llvm_load_const_buffer(bld_base,
				lp_build_const_int32(gallivm, 0),
				LLVM_R600_BUFFER_INFO_CONST_BUFFER);
			Mask = LLVMBuildBitCast(gallivm->builder, Mask,
				LLVMVectorType(bld_base->base.int_elem_type, 4), "");
			emit_data->output[0] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_AND,
				emit_data->output[0],
				Mask);
			LLVMValueRef WComponent = LLVMBuildExtractElement(gallivm->builder,
				emit_data->output[0], lp_build_const_int32(gallivm, 3), "");
			Mask = llvm_load_const_buffer(bld_base, lp_build_const_int32(gallivm, 1),
				LLVM_R600_BUFFER_INFO_CONST_BUFFER);
			Mask = LLVMBuildExtractElement(gallivm->builder, Mask,
				lp_build_const_int32(gallivm, 0), "");
			Mask = LLVMBuildBitCast(gallivm->builder, Mask,
				bld_base->base.int_elem_type, "");
			WComponent = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_OR,
				WComponent, Mask);
			emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder,
				emit_data->output[0], WComponent, lp_build_const_int32(gallivm, 3), "");
			emit_data->output[0] = LLVMBuildBitCast(gallivm->builder,
				emit_data->output[0], LLVMVectorType(bld_base->base.elem_type, 4), "");
		}
			return;
		default:
			break;
		}
	}

	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TEX ||
		emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
		LLVMValueRef Vector[4] = {
			LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
				lp_build_const_int32(gallivm, 0), ""),
			LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
				lp_build_const_int32(gallivm, 1), ""),
			LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
				lp_build_const_int32(gallivm, 2), ""),
			LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
				lp_build_const_int32(gallivm, 3), ""),
		};
		switch (emit_data->inst->Texture.Texture) {
		case TGSI_TEXTURE_2D:
		case TGSI_TEXTURE_RECT:
			Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type);
			break;
		case TGSI_TEXTURE_1D:
			Vector[1] = Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type);
			break;
		default:
			break;
		}
		args[0] = lp_build_gather_values(gallivm, Vector, 4);
	} else {
		args[0] = emit_data->args[0];
	}

	assert(emit_data->arg_count + 2 <= Elements(args));

	for (c = 1; c < emit_data->arg_count; ++c)
		args[c] = emit_data->args[c];

	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
		args[1] = LLVMBuildShl(gallivm->builder, args[1], lp_build_const_int32(gallivm, 1), "");
		args[2] = LLVMBuildShl(gallivm->builder, args[2], lp_build_const_int32(gallivm, 1), "");
		args[3] = LLVMBuildShl(gallivm->builder, args[3], lp_build_const_int32(gallivm, 1), "");
	}

	sampler_src = emit_data->inst->Instruction.NumSrcRegs-1;

	args[c++] = lp_build_const_int32(gallivm,
					emit_data->inst->Src[sampler_src].Register.Index + R600_MAX_CONST_BUFFERS);
	args[c++] = lp_build_const_int32(gallivm,
					emit_data->inst->Src[sampler_src].Register.Index);
	args[c++] = lp_build_const_int32(gallivm,
					emit_data->inst->Texture.Texture);

	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF &&
		(emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
		emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) {

		switch (emit_data->inst->Texture.Texture) {
		case TGSI_TEXTURE_2D_MSAA:
			args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D);
			break;
		case TGSI_TEXTURE_2D_ARRAY_MSAA:
			args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D_ARRAY);
			break;
		default:
			break;
		}

		if (ctx->has_compressed_msaa_texturing) {
			LLVMValueRef ldptr_args[10] = {
				args[0], // Coord
				args[1], // Offset X
				args[2], // Offset Y
				args[3], // Offset Z
				args[4],
				args[5],
				lp_build_const_int32(gallivm, 1),
				lp_build_const_int32(gallivm, 1),
				lp_build_const_int32(gallivm, 1),
				lp_build_const_int32(gallivm, 1)
			};
			LLVMValueRef ptr = build_intrinsic(gallivm->builder,
				"llvm.R600.ldptr",
				emit_data->dst_type, ldptr_args, 10, LLVMReadNoneAttribute);
			LLVMValueRef Tmp = LLVMBuildExtractElement(gallivm->builder, args[0],
				lp_build_const_int32(gallivm, 3), "");
			Tmp = LLVMBuildMul(gallivm->builder, Tmp,
				lp_build_const_int32(gallivm, 4), "");
			LLVMValueRef ResX = LLVMBuildExtractElement(gallivm->builder, ptr,
				lp_build_const_int32(gallivm, 0), "");
			ResX = LLVMBuildBitCast(gallivm->builder, ResX,
				bld_base->base.int_elem_type, "");
			Tmp = LLVMBuildLShr(gallivm->builder, ResX, Tmp, "");
			Tmp = LLVMBuildAnd(gallivm->builder, Tmp,
				lp_build_const_int32(gallivm, 0xF), "");
			args[0] = LLVMBuildInsertElement(gallivm->builder, args[0], Tmp,
				lp_build_const_int32(gallivm, 3), "");
			args[c++] = lp_build_const_int32(gallivm,
				emit_data->inst->Texture.Texture);
		}
	}

	emit_data->output[0] = build_intrinsic(gallivm->builder,
					action->intr_name,
					emit_data->dst_type, args, c, LLVMReadNoneAttribute);

	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXQ &&
		((emit_data->inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
		emit_data->inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY)))
		if (emit_data->inst->Dst[0].Register.WriteMask & 4) {
			LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, 0);
			LLVMValueRef ZLayer = LLVMBuildExtractElement(gallivm->builder,
				llvm_load_const_buffer(bld_base, offset, CONSTANT_TXQ_BUFFER),
				lp_build_const_int32(gallivm, 0), "");

			emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder, emit_data->output[0], ZLayer, lp_build_const_int32(gallivm, 2), "");
			struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
			ctx->has_txq_cube_array_z_comp = true;
		}
}
Exemplo n.º 21
0
/**
 * Pack a single pixel.
 *
 * @param rgba 4 float vector with the unpacked components.
 *
 * XXX: This is mostly for reference and testing -- operating a single pixel at
 * a time is rarely if ever needed.
 */
LLVMValueRef
lp_build_pack_rgba_aos(struct gallivm_state *gallivm,
                       const struct util_format_description *desc,
                       LLVMValueRef rgba)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef type;
   LLVMValueRef packed = NULL;
   LLVMValueRef swizzles[4];
   LLVMValueRef shifted, casted, scaled, unswizzled;
   LLVMValueRef shifts[4];
   LLVMValueRef scales[4];
   boolean normalized;
   unsigned shift;
   unsigned i, j;

   assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
   assert(desc->block.width == 1);
   assert(desc->block.height == 1);

   type = LLVMIntTypeInContext(gallivm->context, desc->block.bits);

   /* Unswizzle the color components into the source vector. */
   for (i = 0; i < 4; ++i) {
      for (j = 0; j < 4; ++j) {
         if (desc->swizzle[j] == i)
            break;
      }
      if (j < 4)
         swizzles[i] = lp_build_const_int32(gallivm, j);
      else
         swizzles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
   }

   unswizzled = LLVMBuildShuffleVector(builder, rgba,
                                       LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4)),
                                       LLVMConstVector(swizzles, 4), "");

   normalized = FALSE;
   shift = 0;
   for (i = 0; i < 4; ++i) {
      unsigned bits = desc->channel[i].size;

      if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
         shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
         scales[i] =  LLVMGetUndef(LLVMFloatTypeInContext(gallivm->context));
      }
      else {
         unsigned mask = (1 << bits) - 1;

         assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
         assert(bits < 32);

         shifts[i] = lp_build_const_int32(gallivm, shift);

         if (desc->channel[i].normalized) {
            scales[i] = lp_build_const_float(gallivm, mask);
            normalized = TRUE;
         }
         else
            scales[i] = lp_build_const_float(gallivm, 1.0);
      }

      shift += bits;
   }

   if (normalized)
      scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
   else
      scaled = unswizzled;

   casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), "");

   shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
   
   /* Bitwise or all components */
   for (i = 0; i < 4; ++i) {
      if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
         LLVMValueRef component = LLVMBuildExtractElement(builder, shifted,
                                               lp_build_const_int32(gallivm, i), "");
         if (packed)
            packed = LLVMBuildOr(builder, packed, component, "");
         else
            packed = component;
      }
   }

   if (!packed)
      packed = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));

   if (desc->block.bits < 32)
      packed = LLVMBuildTrunc(builder, packed, type, "");

   return packed;
}
Exemplo n.º 22
0
/**
 * Converts float32 to int16 half-float
 * Note this can be performed in 1 instruction if vcvtps2ph exists (f16c/cvt16)
 * [llvm.x86.vcvtps2ph / _mm_cvtps_ph]
 *
 * @param src           value to convert
 *
 * Convert float32 to half floats, preserving Infs and NaNs,
 * with rounding towards zero (trunc).
 */
LLVMValueRef
lp_build_float_to_half(struct gallivm_state *gallivm,
                       LLVMValueRef src)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef f32_vec_type = LLVMTypeOf(src);
   unsigned length = LLVMGetTypeKind(f32_vec_type) == LLVMVectorTypeKind
                   ? LLVMGetVectorSize(f32_vec_type) : 1;
   struct lp_type i32_type = lp_type_int_vec(32, 32 * length);
   struct lp_type i16_type = lp_type_int_vec(16, 16 * length);
   LLVMValueRef result;

   if (util_cpu_caps.has_f16c && HAVE_LLVM >= 0x0301 &&
       (length == 4 || length == 8)) {
      struct lp_type i168_type = lp_type_int_vec(16, 16 * 8);
      unsigned mode = 3; /* same as LP_BUILD_ROUND_TRUNCATE */
      LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
      const char *intrinsic = NULL;
      if (length == 4) {
         intrinsic = "llvm.x86.vcvtps2ph.128";
      }
      else {
         intrinsic = "llvm.x86.vcvtps2ph.256";
      }
      result = lp_build_intrinsic_binary(builder, intrinsic,
                                         lp_build_vec_type(gallivm, i168_type),
                                         src, LLVMConstInt(i32t, mode, 0));
      if (length == 4) {
         result = lp_build_extract_range(gallivm, result, 0, 4);
      }
   }

   else {
      result = lp_build_float_to_smallfloat(gallivm, i32_type, src, 10, 5, 0, true);
      /* Convert int32 vector to int16 vector by trunc (might generate bad code) */
      result = LLVMBuildTrunc(builder, result, lp_build_vec_type(gallivm, i16_type), "");
   }

   /*
    * Debugging code.
    */
   if (0) {
     LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
     LLVMTypeRef i16t = LLVMInt16TypeInContext(gallivm->context);
     LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context);
     LLVMValueRef ref_result = LLVMGetUndef(LLVMVectorType(i16t, length));
     unsigned i;

     LLVMTypeRef func_type = LLVMFunctionType(i16t, &f32t, 1, 0);
     LLVMValueRef func = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer)util_float_to_half));
     func = LLVMBuildBitCast(builder, func, LLVMPointerType(func_type, 0), "util_float_to_half");

     for (i = 0; i < length; ++i) {
        LLVMValueRef index = LLVMConstInt(i32t, i, 0);
        LLVMValueRef f32 = LLVMBuildExtractElement(builder, src, index, "");
#if 0
        /* XXX: not really supported by backends */
        LLVMValueRef f16 = lp_build_intrinsic_unary(builder, "llvm.convert.to.fp16", i16t, f32);
#else
        LLVMValueRef f16 = LLVMBuildCall(builder, func, &f32, 1, "");
#endif
        ref_result = LLVMBuildInsertElement(builder, ref_result, f16, index, "");
     }

     lp_build_print_value(gallivm, "src  = ", src);
     lp_build_print_value(gallivm, "llvm = ", result);
     lp_build_print_value(gallivm, "util = ", ref_result);
     lp_build_printf(gallivm, "\n");
  }

   return result;
}
/**
 * Build code to compare two values 'a' and 'b' of 'type' using the given func.
 * \param func  one of PIPE_FUNC_x
 * The result values will be 0 for false or ~0 for true.
 */
LLVMValueRef
lp_build_compare(struct gallivm_state *gallivm,
                 const struct lp_type type,
                 unsigned func,
                 LLVMValueRef a,
                 LLVMValueRef b)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
   LLVMValueRef zeros = LLVMConstNull(int_vec_type);
   LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
   LLVMValueRef cond;
   LLVMValueRef res;

   assert(func >= PIPE_FUNC_NEVER);
   assert(func <= PIPE_FUNC_ALWAYS);
   assert(lp_check_value(type, a));
   assert(lp_check_value(type, b));

   if(func == PIPE_FUNC_NEVER)
      return zeros;
   if(func == PIPE_FUNC_ALWAYS)
      return ones;

#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
   /*
    * There are no unsigned integer comparison instructions in SSE.
    */

   if (!type.floating && !type.sign &&
       type.width * type.length == 128 &&
       util_cpu_caps.has_sse2 &&
       (func == PIPE_FUNC_LESS ||
        func == PIPE_FUNC_LEQUAL ||
        func == PIPE_FUNC_GREATER ||
        func == PIPE_FUNC_GEQUAL) &&
       (gallivm_debug & GALLIVM_DEBUG_PERF)) {
         debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
                      __FUNCTION__, type.length, type.width);
   }
#endif

#if HAVE_LLVM < 0x0207
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
   if(type.width * type.length == 128) {
      if(type.floating && util_cpu_caps.has_sse) {
         /* float[4] comparison */
         LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type);
         LLVMValueRef args[3];
         unsigned cc;
         boolean swap;

         swap = FALSE;
         switch(func) {
         case PIPE_FUNC_EQUAL:
            cc = 0;
            break;
         case PIPE_FUNC_NOTEQUAL:
            cc = 4;
            break;
         case PIPE_FUNC_LESS:
            cc = 1;
            break;
         case PIPE_FUNC_LEQUAL:
            cc = 2;
            break;
         case PIPE_FUNC_GREATER:
            cc = 1;
            swap = TRUE;
            break;
         case PIPE_FUNC_GEQUAL:
            cc = 2;
            swap = TRUE;
            break;
         default:
            assert(0);
            return lp_build_undef(gallivm, type);
         }

         if(swap) {
            args[0] = b;
            args[1] = a;
         }
         else {
            args[0] = a;
            args[1] = b;
         }

         args[2] = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), cc, 0);
         res = lp_build_intrinsic(builder,
                                  "llvm.x86.sse.cmp.ps",
                                  vec_type,
                                  args, 3);
         res = LLVMBuildBitCast(builder, res, int_vec_type, "");
         return res;
      }
      else if(util_cpu_caps.has_sse2) {
         /* int[4] comparison */
         static const struct {
            unsigned swap:1;
            unsigned eq:1;
            unsigned gt:1;
            unsigned not:1;
         } table[] = {
            {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */
            {1, 0, 1, 0}, /* PIPE_FUNC_LESS */
            {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */
            {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */
            {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */
            {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */
            {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */
            {0, 0, 0, 0}  /* PIPE_FUNC_ALWAYS */
         };
         const char *pcmpeq;
         const char *pcmpgt;
         LLVMValueRef args[2];
         LLVMValueRef res;
         LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type);

         switch (type.width) {
         case 8:
            pcmpeq = "llvm.x86.sse2.pcmpeq.b";
            pcmpgt = "llvm.x86.sse2.pcmpgt.b";
            break;
         case 16:
            pcmpeq = "llvm.x86.sse2.pcmpeq.w";
            pcmpgt = "llvm.x86.sse2.pcmpgt.w";
            break;
         case 32:
            pcmpeq = "llvm.x86.sse2.pcmpeq.d";
            pcmpgt = "llvm.x86.sse2.pcmpgt.d";
            break;
         default:
            assert(0);
            return lp_build_undef(gallivm, type);
         }

         /* There are no unsigned comparison instructions. So flip the sign bit
          * so that the results match.
          */
         if (table[func].gt && !type.sign) {
            LLVMValueRef msb = lp_build_const_int_vec(gallivm, type, (unsigned long long)1 << (type.width - 1));
            a = LLVMBuildXor(builder, a, msb, "");
            b = LLVMBuildXor(builder, b, msb, "");
         }

         if(table[func].swap) {
            args[0] = b;
            args[1] = a;
         }
         else {
            args[0] = a;
            args[1] = b;
         }

         if(table[func].eq)
            res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2);
         else if (table[func].gt)
            res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2);
         else
            res = LLVMConstNull(vec_type);

         if(table[func].not)
            res = LLVMBuildNot(builder, res, "");

         return res;
      }
   } /* if (type.width * type.length == 128) */
#endif
#endif /* HAVE_LLVM < 0x0207 */

   /* XXX: It is not clear if we should use the ordered or unordered operators */

   if(type.floating) {
      LLVMRealPredicate op;
      switch(func) {
      case PIPE_FUNC_NEVER:
         op = LLVMRealPredicateFalse;
         break;
      case PIPE_FUNC_ALWAYS:
         op = LLVMRealPredicateTrue;
         break;
      case PIPE_FUNC_EQUAL:
         op = LLVMRealUEQ;
         break;
      case PIPE_FUNC_NOTEQUAL:
         op = LLVMRealUNE;
         break;
      case PIPE_FUNC_LESS:
         op = LLVMRealULT;
         break;
      case PIPE_FUNC_LEQUAL:
         op = LLVMRealULE;
         break;
      case PIPE_FUNC_GREATER:
         op = LLVMRealUGT;
         break;
      case PIPE_FUNC_GEQUAL:
         op = LLVMRealUGE;
         break;
      default:
         assert(0);
         return lp_build_undef(gallivm, type);
      }

#if HAVE_LLVM >= 0x0207
      cond = LLVMBuildFCmp(builder, op, a, b, "");
      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
#else
      if (type.length == 1) {
         cond = LLVMBuildFCmp(builder, op, a, b, "");
         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
      }
      else {
         unsigned i;

         res = LLVMGetUndef(int_vec_type);

         debug_printf("%s: warning: using slow element-wise float"
                      " vector comparison\n", __FUNCTION__);
         for (i = 0; i < type.length; ++i) {
            LLVMValueRef index = lp_build_const_int32(gallivm, i);
            cond = LLVMBuildFCmp(builder, op,
                                 LLVMBuildExtractElement(builder, a, index, ""),
                                 LLVMBuildExtractElement(builder, b, index, ""),
                                 "");
            cond = LLVMBuildSelect(builder, cond,
                                   LLVMConstExtractElement(ones, index),
                                   LLVMConstExtractElement(zeros, index),
                                   "");
            res = LLVMBuildInsertElement(builder, res, cond, index, "");
         }
      }
#endif
   }
   else {
      LLVMIntPredicate op;
      switch(func) {
      case PIPE_FUNC_EQUAL:
         op = LLVMIntEQ;
         break;
      case PIPE_FUNC_NOTEQUAL:
         op = LLVMIntNE;
         break;
      case PIPE_FUNC_LESS:
         op = type.sign ? LLVMIntSLT : LLVMIntULT;
         break;
      case PIPE_FUNC_LEQUAL:
         op = type.sign ? LLVMIntSLE : LLVMIntULE;
         break;
      case PIPE_FUNC_GREATER:
         op = type.sign ? LLVMIntSGT : LLVMIntUGT;
         break;
      case PIPE_FUNC_GEQUAL:
         op = type.sign ? LLVMIntSGE : LLVMIntUGE;
         break;
      default:
         assert(0);
         return lp_build_undef(gallivm, type);
      }

#if HAVE_LLVM >= 0x0207
      cond = LLVMBuildICmp(builder, op, a, b, "");
      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
#else
      if (type.length == 1) {
         cond = LLVMBuildICmp(builder, op, a, b, "");
         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
      }
      else {
         unsigned i;

         res = LLVMGetUndef(int_vec_type);

         if (gallivm_debug & GALLIVM_DEBUG_PERF) {
            debug_printf("%s: using slow element-wise int"
                         " vector comparison\n", __FUNCTION__);
         }

         for(i = 0; i < type.length; ++i) {
            LLVMValueRef index = lp_build_const_int32(gallivm, i);
            cond = LLVMBuildICmp(builder, op,
                                 LLVMBuildExtractElement(builder, a, index, ""),
                                 LLVMBuildExtractElement(builder, b, index, ""),
                                 "");
            cond = LLVMBuildSelect(builder, cond,
                                   LLVMConstExtractElement(ones, index),
                                   LLVMConstExtractElement(zeros, index),
                                   "");
            res = LLVMBuildInsertElement(builder, res, cond, index, "");
         }
      }
#endif
   }

   return res;
}
Exemplo n.º 24
0
/*
 * Do a cached lookup.
 *
 * Returns (vectors of) 4x8 rgba aos value
 */
LLVMValueRef
lp_build_fetch_cached_texels(struct gallivm_state *gallivm,
                             const struct util_format_description *format_desc,
                             unsigned n,
                             LLVMValueRef base_ptr,
                             LLVMValueRef offset,
                             LLVMValueRef i,
                             LLVMValueRef j,
                             LLVMValueRef cache)

{
   LLVMBuilderRef builder = gallivm->builder;
   unsigned count, low_bit, log2size;
   LLVMValueRef color, offset_stored, addr, ptr_addrtrunc, tmp;
   LLVMValueRef ij_index, hash_index, hash_mask, block_index;
   LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
   LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context);
   struct lp_type type;
   struct lp_build_context bld32;
   memset(&type, 0, sizeof type);
   type.width = 32;
   type.length = n;

   assert(format_desc->block.width == 4);
   assert(format_desc->block.height == 4);

   lp_build_context_init(&bld32, gallivm, type);

   /*
    * compute hash - we use direct mapped cache, the hash function could
    *                be better but it needs to be simple
    * per-element:
    *    compare offset with offset stored at tag (hash)
    *    if not equal decode/store block, update tag
    *    extract color from cache
    *    assemble result vector
    */

   /* TODO: not ideal with 32bit pointers... */

   low_bit = util_logbase2(format_desc->block.bits / 8);
   log2size = util_logbase2(LP_BUILD_FORMAT_CACHE_SIZE);
   addr = LLVMBuildPtrToInt(builder, base_ptr, i64t, "");
   ptr_addrtrunc = LLVMBuildPtrToInt(builder, base_ptr, i32t, "");
   ptr_addrtrunc = lp_build_broadcast_scalar(&bld32, ptr_addrtrunc);
   /* For the hash function, first mask off the unused lowest bits. Then just
      do some xor with address bits - only use lower 32bits */
   ptr_addrtrunc = LLVMBuildAdd(builder, offset, ptr_addrtrunc, "");
   ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
                                 lp_build_const_int_vec(gallivm, type, low_bit), "");
   /* This only really makes sense for size 64,128,256 */
   hash_index = ptr_addrtrunc;
   ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
                                 lp_build_const_int_vec(gallivm, type, 2*log2size), "");
   hash_index = LLVMBuildXor(builder, ptr_addrtrunc, hash_index, "");
   tmp = LLVMBuildLShr(builder, hash_index,
                       lp_build_const_int_vec(gallivm, type, log2size), "");
   hash_index = LLVMBuildXor(builder, hash_index, tmp, "");

   hash_mask = lp_build_const_int_vec(gallivm, type, LP_BUILD_FORMAT_CACHE_SIZE - 1);
   hash_index = LLVMBuildAnd(builder, hash_index, hash_mask, "");
   ij_index = LLVMBuildShl(builder, i, lp_build_const_int_vec(gallivm, type, 2), "");
   ij_index = LLVMBuildAdd(builder, ij_index, j, "");
   block_index = LLVMBuildShl(builder, hash_index,
                              lp_build_const_int_vec(gallivm, type, 4), "");
   block_index = LLVMBuildAdd(builder, ij_index, block_index, "");

   if (n > 1) {
      color = LLVMGetUndef(LLVMVectorType(i32t, n));
      for (count = 0; count < n; count++) {
         LLVMValueRef index, cond, colorx;
         LLVMValueRef block_indexx, hash_indexx, addrx, offsetx, ptr_addrx;
         struct lp_build_if_state if_ctx;

         index = lp_build_const_int32(gallivm, count);
         offsetx = LLVMBuildExtractElement(builder, offset, index, "");
         addrx = LLVMBuildZExt(builder, offsetx, i64t, "");
         addrx = LLVMBuildAdd(builder, addrx, addr, "");
         block_indexx = LLVMBuildExtractElement(builder, block_index, index, "");
         hash_indexx = LLVMBuildLShr(builder, block_indexx,
                                     lp_build_const_int32(gallivm, 4), "");
         offset_stored = lookup_tag_data(gallivm, cache, hash_indexx);
         cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addrx, "");

         lp_build_if(&if_ctx, gallivm, cond);
         {
            ptr_addrx = LLVMBuildIntToPtr(builder, addrx,
                                          LLVMPointerType(i8t, 0), "");
            update_cached_block(gallivm, format_desc, ptr_addrx, hash_indexx, cache);
#if LP_BUILD_FORMAT_CACHE_DEBUG
            update_cache_access(gallivm, cache, 1,
                                LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
#endif
         }
         lp_build_endif(&if_ctx);

         colorx = lookup_cached_pixel(gallivm, cache, block_indexx);

         color = LLVMBuildInsertElement(builder, color, colorx,
                                        lp_build_const_int32(gallivm, count), "");
      }
   }
   else {
      LLVMValueRef cond;
      struct lp_build_if_state if_ctx;

      tmp = LLVMBuildZExt(builder, offset, i64t, "");
      addr = LLVMBuildAdd(builder, tmp, addr, "");
      offset_stored = lookup_tag_data(gallivm, cache, hash_index);
      cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addr, "");

      lp_build_if(&if_ctx, gallivm, cond);
      {
         tmp = LLVMBuildIntToPtr(builder, addr, LLVMPointerType(i8t, 0), "");
         update_cached_block(gallivm, format_desc, tmp, hash_index, cache);
#if LP_BUILD_FORMAT_CACHE_DEBUG
         update_cache_access(gallivm, cache, 1,
                             LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
#endif
      }
      lp_build_endif(&if_ctx);

      color = lookup_cached_pixel(gallivm, cache, block_index);
   }
#if LP_BUILD_FORMAT_CACHE_DEBUG
   update_cache_access(gallivm, cache, n,
                       LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL);
#endif
   return LLVMBuildBitCast(builder, color, LLVMVectorType(i8t, n * 4), "");
}
/**
 * Truncate or expand the bitwidth.
 *
 * NOTE: Getting the right sign flags is crucial here, as we employ some
 * intrinsics that do saturation.
 */
void
lp_build_resize(struct gallivm_state *gallivm,
                struct lp_type src_type,
                struct lp_type dst_type,
                const LLVMValueRef *src, unsigned num_srcs,
                LLVMValueRef *dst, unsigned num_dsts)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
   unsigned i;

   /*
    * We don't support float <-> int conversion here. That must be done
    * before/after calling this function.
    */
   assert(src_type.floating == dst_type.floating);

   /*
    * We don't support double <-> float conversion yet, although it could be
    * added with little effort.
    */
   assert((!src_type.floating && !dst_type.floating) ||
          src_type.width == dst_type.width);

   /* We must not loose or gain channels. Only precision */
   assert(src_type.length * num_srcs == dst_type.length * num_dsts);

   /* We don't support M:N conversion, only 1:N, M:1, or 1:1 */
   assert(num_srcs == 1 || num_dsts == 1);

   assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
   assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);
   assert(num_srcs <= LP_MAX_VECTOR_LENGTH);
   assert(num_dsts <= LP_MAX_VECTOR_LENGTH);

   if (src_type.width > dst_type.width) {
      /*
       * Truncate bit width.
       */

      assert(num_dsts == 1);

      if (src_type.width * src_type.length == dst_type.width * dst_type.length) {
        /*
         * Register width remains constant -- use vector packing intrinsics
         */
         tmp[0] = lp_build_pack(gallivm, src_type, dst_type, TRUE, src, num_srcs);
      }
      else {
         if (src_type.width / dst_type.width > num_srcs) {
            /*
            * First change src vectors size (with shuffle) so they have the
            * same size as the destination vector, then pack normally.
            * Note: cannot use cast/extract because llvm generates atrocious code.
            */
            unsigned size_ratio = (src_type.width * src_type.length) /
                                  (dst_type.length * dst_type.width);
            unsigned new_length = src_type.length / size_ratio;

            for (i = 0; i < size_ratio * num_srcs; i++) {
               unsigned start_index = (i % size_ratio) * new_length;
               tmp[i] = lp_build_extract_range(gallivm, src[i / size_ratio],
                                               start_index, new_length);
            }
            num_srcs *= size_ratio;
            src_type.length = new_length;
            tmp[0] = lp_build_pack(gallivm, src_type, dst_type, TRUE, tmp, num_srcs);
         }
         else {
            /*
             * Truncate bit width but expand vector size - first pack
             * then expand simply because this should be more AVX-friendly
             * for the cases we probably hit.
             */
            unsigned size_ratio = (dst_type.width * dst_type.length) /
                                  (src_type.length * src_type.width);
            unsigned num_pack_srcs = num_srcs / size_ratio;
            dst_type.length = dst_type.length / size_ratio;

            for (i = 0; i < size_ratio; i++) {
               tmp[i] = lp_build_pack(gallivm, src_type, dst_type, TRUE,
                                      &src[i*num_pack_srcs], num_pack_srcs);
            }
            tmp[0] = lp_build_concat(gallivm, tmp, dst_type, size_ratio);
         }
      }
   }
   else if (src_type.width < dst_type.width) {
      /*
       * Expand bit width.
       */

      assert(num_srcs == 1);

      if (src_type.width * src_type.length == dst_type.width * dst_type.length) {
         /*
          * Register width remains constant -- use vector unpack intrinsics
          */
         lp_build_unpack(gallivm, src_type, dst_type, src[0], tmp, num_dsts);
      }
      else {
         /*
          * Do it element-wise.
          */
         assert(src_type.length * num_srcs == dst_type.length * num_dsts);

         for (i = 0; i < num_dsts; i++) {
            tmp[i] = lp_build_undef(gallivm, dst_type);
         }

         for (i = 0; i < src_type.length; ++i) {
            unsigned j = i / dst_type.length;
            LLVMValueRef srcindex = lp_build_const_int32(gallivm, i);
            LLVMValueRef dstindex = lp_build_const_int32(gallivm, i % dst_type.length);
            LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], srcindex, "");

            if (src_type.sign && dst_type.sign) {
               val = LLVMBuildSExt(builder, val, lp_build_elem_type(gallivm, dst_type), "");
            } else {
               val = LLVMBuildZExt(builder, val, lp_build_elem_type(gallivm, dst_type), "");
            }
            tmp[j] = LLVMBuildInsertElement(builder, tmp[j], val, dstindex, "");
         }
      }
   }
   else {
      /*
       * No-op
       */

      assert(num_srcs == 1);
      assert(num_dsts == 1);

      tmp[0] = src[0];
   }

   for(i = 0; i < num_dsts; ++i)
      dst[i] = tmp[i];
}
Exemplo n.º 26
0
/**
 * Generate code to compute coordinate gradient (rho).
 * \param ddx  partial derivatives of (s, t, r, q) with respect to X
 * \param ddy  partial derivatives of (s, t, r, q) with respect to Y
 *
 * XXX: The resulting rho is scalar, so we ignore all but the first element of
 * derivatives that are passed by the shader.
 */
static LLVMValueRef
lp_build_rho(struct lp_build_sample_context *bld,
             unsigned unit,
             const LLVMValueRef ddx[4],
             const LLVMValueRef ddy[4])
{
   struct lp_build_context *int_size_bld = &bld->int_size_bld;
   struct lp_build_context *float_size_bld = &bld->float_size_bld;
   struct lp_build_context *float_bld = &bld->float_bld;
   const unsigned dims = bld->dims;
   LLVMBuilderRef builder = bld->gallivm->builder;
   LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
   LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
   LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
   LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0);
   LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
   LLVMValueRef rho_x, rho_y;
   LLVMValueRef rho_vec;
   LLVMValueRef int_size, float_size;
   LLVMValueRef rho;
   LLVMValueRef first_level, first_level_vec;

   dsdx = ddx[0];
   dsdy = ddy[0];

   if (dims <= 1) {
      rho_x = dsdx;
      rho_y = dsdy;
   }
   else {
      rho_x = float_size_bld->undef;
      rho_y = float_size_bld->undef;

      rho_x = LLVMBuildInsertElement(builder, rho_x, dsdx, index0, "");
      rho_y = LLVMBuildInsertElement(builder, rho_y, dsdy, index0, "");

      dtdx = ddx[1];
      dtdy = ddy[1];

      rho_x = LLVMBuildInsertElement(builder, rho_x, dtdx, index1, "");
      rho_y = LLVMBuildInsertElement(builder, rho_y, dtdy, index1, "");

      if (dims >= 3) {
         drdx = ddx[2];
         drdy = ddy[2];

         rho_x = LLVMBuildInsertElement(builder, rho_x, drdx, index2, "");
         rho_y = LLVMBuildInsertElement(builder, rho_y, drdy, index2, "");
      }
   }

   rho_x = lp_build_abs(float_size_bld, rho_x);
   rho_y = lp_build_abs(float_size_bld, rho_y);

   rho_vec = lp_build_max(float_size_bld, rho_x, rho_y);

   first_level = bld->dynamic_state->first_level(bld->dynamic_state,
                                                 bld->gallivm, unit);
   first_level_vec = lp_build_broadcast_scalar(&bld->int_size_bld, first_level);
   int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec);
   float_size = lp_build_int_to_float(float_size_bld, int_size);

   rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);

   if (dims <= 1) {
      rho = rho_vec;
   }
   else {
      if (dims >= 2) {
         LLVMValueRef rho_s, rho_t, rho_r;

         rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, "");
         rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, "");

         rho = lp_build_max(float_bld, rho_s, rho_t);
         if (dims >= 3) {
            rho_r = LLVMBuildExtractElement(builder, rho_vec, index2, "");
            rho = lp_build_max(float_bld, rho, rho_r);
         }
      }
   }

   return rho;
}
Exemplo n.º 27
0
/**
 * Fetch a texels from a texture, returning them in SoA layout.
 *
 * \param type  the desired return type for 'rgba'.  The vector length
 *              is the number of texels to fetch
 *
 * \param base_ptr  points to the base of the texture mip tree.
 * \param offset    offset to start of the texture image block.  For non-
 *                  compressed formats, this simply is an offset to the texel.
 *                  For compressed formats, it is an offset to the start of the
 *                  compressed data block.
 *
 * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
 *              these will always be (0,0).  For compressed formats, i will
 *              be in [0, block_width-1] and j will be in [0, block_height-1].
 */
void
lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
                        const struct util_format_description *format_desc,
                        struct lp_type type,
                        LLVMValueRef base_ptr,
                        LLVMValueRef offset,
                        LLVMValueRef i,
                        LLVMValueRef j,
                        LLVMValueRef rgba_out[4])
{
   LLVMBuilderRef builder = gallivm->builder;

   if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
       (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ||
        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
       format_desc->block.width == 1 &&
       format_desc->block.height == 1 &&
       format_desc->block.bits <= type.width &&
       (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
        format_desc->channel[0].size == 32))
   {
      /*
       * The packed pixel fits into an element of the destination format. Put
       * the packed pixels into a vector and extract each component for all
       * vector elements in parallel.
       */

      LLVMValueRef packed;

      /*
       * gather the texels from the texture
       * Ex: packed = {XYZW, XYZW, XYZW, XYZW}
       */
      assert(format_desc->block.bits <= type.width);
      packed = lp_build_gather(gallivm,
                               type.length,
                               format_desc->block.bits,
                               type.width,
                               base_ptr, offset, FALSE);

      /*
       * convert texels to float rgba
       */
      lp_build_unpack_rgba_soa(gallivm,
                               format_desc,
                               type,
                               packed, rgba_out);
      return;
   }

   if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
       format_desc->format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
      /*
       * similar conceptually to above but requiring special
       * AoS packed -> SoA float conversion code.
       */
      LLVMValueRef packed;

      assert(type.floating);
      assert(type.width == 32);

      packed = lp_build_gather(gallivm, type.length,
                               format_desc->block.bits,
                               type.width, base_ptr, offset,
                               FALSE);
      if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
         lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
      }
      else {
         lp_build_rgb9e5_to_float(gallivm, packed, rgba_out);
      }
      return;
   }

   if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
       format_desc->block.bits == 64) {
      /*
       * special case the format is 64 bits but we only require
       * 32bit (or 8bit) from each block.
       */
      LLVMValueRef packed;

      if (format_desc->format == PIPE_FORMAT_X32_S8X24_UINT) {
         /*
          * for stencil simply fix up offsets - could in fact change
          * base_ptr instead even outside the shader.
          */
         unsigned mask = (1 << 8) - 1;
         LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4);
         offset = LLVMBuildAdd(builder, offset, s_offset, "");
         packed = lp_build_gather(gallivm, type.length,
                                  32, type.width, base_ptr, offset, FALSE);
         packed = LLVMBuildAnd(builder, packed,
                               lp_build_const_int_vec(gallivm, type, mask), "");
      }
      else {
         assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
         packed = lp_build_gather(gallivm, type.length,
                                  32, type.width, base_ptr, offset, TRUE);
         packed = LLVMBuildBitCast(builder, packed,
                                   lp_build_vec_type(gallivm, type), "");
      }
      /* for consistency with lp_build_unpack_rgba_soa() return sss1 or zzz1 */
      rgba_out[0] = rgba_out[1] = rgba_out[2] = packed;
      rgba_out[3] = lp_build_const_vec(gallivm, type, 1.0f);
      return;
   }

   /*
    * Try calling lp_build_fetch_rgba_aos for all pixels.
    */

   if (util_format_fits_8unorm(format_desc) &&
       type.floating && type.width == 32 &&
       (type.length == 1 || (type.length % 4 == 0))) {
      struct lp_type tmp_type;
      LLVMValueRef tmp;

      memset(&tmp_type, 0, sizeof tmp_type);
      tmp_type.width = 8;
      tmp_type.length = type.length * 4;
      tmp_type.norm = TRUE;

      tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
                                    base_ptr, offset, i, j);

      lp_build_rgba8_to_fi32_soa(gallivm,
                                type,
                                tmp,
                                rgba_out);

      return;
   }

   /*
    * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
    *
    * This is not the most efficient way of fetching pixels, as we
    * miss some opportunities to do vectorization, but this is
    * convenient for formats or scenarios for which there was no
    * opportunity or incentive to optimize.
    */

   {
      unsigned k, chan;
      struct lp_type tmp_type;

      if (gallivm_debug & GALLIVM_DEBUG_PERF) {
         debug_printf("%s: scalar unpacking of %s\n",
                      __FUNCTION__, format_desc->short_name);
      }

      tmp_type = type;
      tmp_type.length = 4;

      for (chan = 0; chan < 4; ++chan) {
         rgba_out[chan] = lp_build_undef(gallivm, type);
      }

      /* loop over number of pixels */
      for(k = 0; k < type.length; ++k) {
         LLVMValueRef index = lp_build_const_int32(gallivm, k);
         LLVMValueRef offset_elem;
         LLVMValueRef i_elem, j_elem;
         LLVMValueRef tmp;

         offset_elem = LLVMBuildExtractElement(builder, offset,
                                               index, "");

         i_elem = LLVMBuildExtractElement(builder, i, index, "");
         j_elem = LLVMBuildExtractElement(builder, j, index, "");

         /* Get a single float[4]={R,G,B,A} pixel */
         tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
                                       base_ptr, offset_elem,
                                       i_elem, j_elem);

         /*
          * Insert the AoS tmp value channels into the SoA result vectors at
          * position = 'index'.
          */
         for (chan = 0; chan < 4; ++chan) {
            LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan),
            tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
            rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
                                                    tmp_chan, index, "");
         }
      }
   }
}
Exemplo n.º 28
0
/**
 * Generate code to compute texture level of detail (lambda).
 * \param ddx  partial derivatives of (s, t, r, q) with respect to X
 * \param ddy  partial derivatives of (s, t, r, q) with respect to Y
 * \param lod_bias  optional float vector with the shader lod bias
 * \param explicit_lod  optional float vector with the explicit lod
 * \param width  scalar int texture width
 * \param height  scalar int texture height
 * \param depth  scalar int texture depth
 *
 * XXX: The resulting lod is scalar, so ignore all but the first element of
 * derivatives, lod_bias, etc that are passed by the shader.
 */
void
lp_build_lod_selector(struct lp_build_sample_context *bld,
                      unsigned unit,
                      const LLVMValueRef ddx[4],
                      const LLVMValueRef ddy[4],
                      LLVMValueRef lod_bias, /* optional */
                      LLVMValueRef explicit_lod, /* optional */
                      unsigned mip_filter,
                      LLVMValueRef *out_lod_ipart,
                      LLVMValueRef *out_lod_fpart)

{
   LLVMBuilderRef builder = bld->gallivm->builder;
   struct lp_build_context *float_bld = &bld->float_bld;
   LLVMValueRef lod;

   *out_lod_ipart = bld->int_bld.zero;
   *out_lod_fpart = bld->float_bld.zero;

   if (bld->static_state->min_max_lod_equal) {
      /* User is forcing sampling from a particular mipmap level.
       * This is hit during mipmap generation.
       */
      LLVMValueRef min_lod =
         bld->dynamic_state->min_lod(bld->dynamic_state, bld->gallivm, unit);

      lod = min_lod;
   }
   else {
      LLVMValueRef sampler_lod_bias =
         bld->dynamic_state->lod_bias(bld->dynamic_state, bld->gallivm, unit);
      LLVMValueRef index0 = lp_build_const_int32(bld->gallivm, 0);

      if (explicit_lod) {
         lod = LLVMBuildExtractElement(builder, explicit_lod,
                                       index0, "");
      }
      else {
         LLVMValueRef rho;

         rho = lp_build_rho(bld, unit, ddx, ddy);

         /*
          * Compute lod = log2(rho)
          */

         if (!lod_bias &&
             !bld->static_state->lod_bias_non_zero &&
             !bld->static_state->apply_max_lod &&
             !bld->static_state->apply_min_lod) {
            /*
             * Special case when there are no post-log2 adjustments, which
             * saves instructions but keeping the integer and fractional lod
             * computations separate from the start.
             */

            if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
                mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
               *out_lod_ipart = lp_build_ilog2(float_bld, rho);
               *out_lod_fpart = bld->float_bld.zero;
               return;
            }
            if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
                !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
               lp_build_brilinear_rho(float_bld, rho, BRILINEAR_FACTOR,
                                      out_lod_ipart, out_lod_fpart);
               return;
            }
         }

         if (0) {
            lod = lp_build_log2(float_bld, rho);
         }
         else {
            lod = lp_build_fast_log2(float_bld, rho);
         }

         /* add shader lod bias */
         if (lod_bias) {
            lod_bias = LLVMBuildExtractElement(builder, lod_bias,
                                               index0, "");
            lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
         }
      }

      /* add sampler lod bias */
      if (bld->static_state->lod_bias_non_zero)
         lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias");


      /* clamp lod */
      if (bld->static_state->apply_max_lod) {
         LLVMValueRef max_lod =
            bld->dynamic_state->max_lod(bld->dynamic_state, bld->gallivm, unit);

         lod = lp_build_min(float_bld, lod, max_lod);
      }
      if (bld->static_state->apply_min_lod) {
         LLVMValueRef min_lod =
            bld->dynamic_state->min_lod(bld->dynamic_state, bld->gallivm, unit);

         lod = lp_build_max(float_bld, lod, min_lod);
      }
   }

   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
      if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
         lp_build_brilinear_lod(float_bld, lod, BRILINEAR_FACTOR,
                                out_lod_ipart, out_lod_fpart);
      }
      else {
         lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, out_lod_fpart);
      }

      lp_build_name(*out_lod_fpart, "lod_fpart");
   }
   else {
      *out_lod_ipart = lp_build_iround(float_bld, lod);
   }

   lp_build_name(*out_lod_ipart, "lod_ipart");

   return;
}
Exemplo n.º 29
0
/**
 * Fetch a pixel into a 4 float AoS.
 *
 * \param format_desc  describes format of the image we're fetching from
 * \param ptr  address of the pixel block (or the texel if uncompressed)
 * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
 *              these will always be (0, 0).
 * \return  a 4 element vector with the pixel's RGBA values.
 */
LLVMValueRef
lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
                        const struct util_format_description *format_desc,
                        struct lp_type type,
                        LLVMValueRef base_ptr,
                        LLVMValueRef offset,
                        LLVMValueRef i,
                        LLVMValueRef j)
{
   LLVMBuilderRef builder = gallivm->builder;
   unsigned num_pixels = type.length / 4;
   struct lp_build_context bld;

   assert(type.length <= LP_MAX_VECTOR_LENGTH);
   assert(type.length % 4 == 0);

   lp_build_context_init(&bld, gallivm, type);

   /*
    * Trivial case
    *
    * The format matches the type (apart of a swizzle) so no need for
    * scaling or converting.
    */

   if (format_matches_type(format_desc, type) &&
       format_desc->block.bits <= type.width * 4 &&
       util_is_power_of_two(format_desc->block.bits)) {
      LLVMValueRef packed;

      /*
       * The format matches the type (apart of a swizzle) so no need for
       * scaling or converting.
       */

      packed = lp_build_gather(gallivm, type.length/4,
                               format_desc->block.bits, type.width*4,
                               base_ptr, offset);

      assert(format_desc->block.bits <= type.width * type.length);

      packed = LLVMBuildBitCast(gallivm->builder, packed,
                                lp_build_vec_type(gallivm, type), "");

      return lp_build_format_swizzle_aos(format_desc, &bld, packed);
   }

   /*
    * Bit arithmetic
    */

   if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
       (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
       format_desc->block.width == 1 &&
       format_desc->block.height == 1 &&
       util_is_power_of_two(format_desc->block.bits) &&
       format_desc->block.bits <= 32 &&
       format_desc->is_bitmask &&
       !format_desc->is_mixed &&
       (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
        format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) {

      LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
      LLVMValueRef res;
      unsigned k;

      /*
       * Unpack a pixel at a time into a <4 x float> RGBA vector
       */

      for (k = 0; k < num_pixels; ++k) {
         LLVMValueRef packed;

         packed = lp_build_gather_elem(gallivm, num_pixels,
                                       format_desc->block.bits, 32,
                                       base_ptr, offset, k);

         tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm,
                                                  format_desc,
                                                  packed);
      }

      /*
       * Type conversion.
       *
       * TODO: We could avoid floating conversion for integer to
       * integer conversions.
       */

      if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) {
         debug_printf("%s: unpacking %s with floating point\n",
                      __FUNCTION__, format_desc->short_name);
      }

      lp_build_conv(gallivm,
                    lp_float32_vec4_type(),
                    type,
                    tmps, num_pixels, &res, 1);

      return lp_build_format_swizzle_aos(format_desc, &bld, res);
   }

   /*
    * YUV / subsampled formats
    */

   if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
      struct lp_type tmp_type;
      LLVMValueRef tmp;

      memset(&tmp_type, 0, sizeof tmp_type);
      tmp_type.width = 8;
      tmp_type.length = num_pixels * 4;
      tmp_type.norm = TRUE;

      tmp = lp_build_fetch_subsampled_rgba_aos(gallivm,
                                               format_desc,
                                               num_pixels,
                                               base_ptr,
                                               offset,
                                               i, j);

      lp_build_conv(gallivm,
                    tmp_type, type,
                    &tmp, 1, &tmp, 1);

      return tmp;
   }

   /*
    * Fallback to util_format_description::fetch_rgba_8unorm().
    */

   if (format_desc->fetch_rgba_8unorm &&
       !type.floating && type.width == 8 && !type.sign && type.norm) {
      /*
       * Fallback to calling util_format_description::fetch_rgba_8unorm.
       *
       * This is definitely not the most efficient way of fetching pixels, as
       * we miss the opportunity to do vectorization, but this it is a
       * convenient for formats or scenarios for which there was no opportunity
       * or incentive to optimize.
       */

      LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
      char name[256];
      LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
      LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
      LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
      LLVMValueRef function;
      LLVMValueRef tmp_ptr;
      LLVMValueRef tmp;
      LLVMValueRef res;
      LLVMValueRef callee;
      unsigned k;

      util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm",
                    format_desc->short_name);

      if (gallivm_debug & GALLIVM_DEBUG_PERF) {
         debug_printf("%s: falling back to %s\n", __FUNCTION__, name);
      }

      /*
       * Declare and bind format_desc->fetch_rgba_8unorm().
       */

      function = LLVMGetNamedFunction(module, name);
      if (!function) {
         /*
          * Function to call looks like:
          *   fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
          */
         LLVMTypeRef ret_type;
         LLVMTypeRef arg_types[4];
         LLVMTypeRef function_type;

         ret_type = LLVMVoidTypeInContext(gallivm->context);
         arg_types[0] = pi8t;
         arg_types[1] = pi8t;
         arg_types[2] = i32t;
         arg_types[3] = i32t;
         function_type = LLVMFunctionType(ret_type, arg_types,
                                          Elements(arg_types), 0);
         function = LLVMAddFunction(module, name, function_type);

         LLVMSetFunctionCallConv(function, LLVMCCallConv);
         LLVMSetLinkage(function, LLVMExternalLinkage);

         assert(LLVMIsDeclaration(function));
      }

      /* make const pointer for the C fetch_rgba_float function */
      callee = lp_build_const_int_pointer(gallivm,
         func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm));

      /* cast the callee pointer to the function's type */
      function = LLVMBuildBitCast(builder, callee,
                                  LLVMTypeOf(function), "cast callee");

      tmp_ptr = lp_build_alloca(gallivm, i32t, "");

      res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels));

      /*
       * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result
       * in the SoA vectors.
       */

      for (k = 0; k < num_pixels; ++k) {
         LLVMValueRef index = lp_build_const_int32(gallivm, k);
         LLVMValueRef args[4];

         args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");
         args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
                                            base_ptr, offset, k);

         if (num_pixels == 1) {
            args[2] = i;
            args[3] = j;
         }
         else {
            args[2] = LLVMBuildExtractElement(builder, i, index, "");
            args[3] = LLVMBuildExtractElement(builder, j, index, "");
         }

         LLVMBuildCall(builder, function, args, Elements(args), "");

         tmp = LLVMBuildLoad(builder, tmp_ptr, "");

         if (num_pixels == 1) {
            res = tmp;
         }
         else {
            res = LLVMBuildInsertElement(builder, res, tmp, index, "");
         }
      }

      /* Bitcast from <n x i32> to <4n x i8> */
      res = LLVMBuildBitCast(builder, res, bld.vec_type, "");

      return res;
   }


   /*
    * Fallback to util_format_description::fetch_rgba_float().
    */

   if (format_desc->fetch_rgba_float) {
      /*
       * Fallback to calling util_format_description::fetch_rgba_float.
       *
       * This is definitely not the most efficient way of fetching pixels, as
       * we miss the opportunity to do vectorization, but this it is a
       * convenient for formats or scenarios for which there was no opportunity
       * or incentive to optimize.
       */

      LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
      char name[256];
      LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context);
      LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4);
      LLVMTypeRef pf32t = LLVMPointerType(f32t, 0);
      LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
      LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
      LLVMValueRef function;
      LLVMValueRef tmp_ptr;
      LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
      LLVMValueRef res;
      LLVMValueRef callee;
      unsigned k;

      util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
                    format_desc->short_name);

      if (gallivm_debug & GALLIVM_DEBUG_PERF) {
         debug_printf("%s: falling back to %s\n", __FUNCTION__, name);
      }

      /*
       * Declare and bind format_desc->fetch_rgba_float().
       */

      function = LLVMGetNamedFunction(module, name);
      if (!function) {
         /*
          * Function to call looks like:
          *   fetch(float *dst, const uint8_t *src, unsigned i, unsigned j)
          */
         LLVMTypeRef ret_type;
         LLVMTypeRef arg_types[4];
         LLVMTypeRef function_type;

         ret_type = LLVMVoidTypeInContext(gallivm->context);
         arg_types[0] = pf32t;
         arg_types[1] = pi8t;
         arg_types[2] = i32t;
         arg_types[3] = i32t;
         function_type = LLVMFunctionType(ret_type, arg_types,
                                          Elements(arg_types), 0);
         function = LLVMAddFunction(module, name, function_type);

         LLVMSetFunctionCallConv(function, LLVMCCallConv);
         LLVMSetLinkage(function, LLVMExternalLinkage);

         assert(LLVMIsDeclaration(function));
      }

      /* Note: we're using this casting here instead of LLVMAddGlobalMapping()
       * to work around a bug in LLVM 2.6.
       */

      /* make const pointer for the C fetch_rgba_float function */
      callee = lp_build_const_int_pointer(gallivm,
         func_to_pointer((func_pointer) format_desc->fetch_rgba_float));

      /* cast the callee pointer to the function's type */
      function = LLVMBuildBitCast(builder, callee,
                                  LLVMTypeOf(function), "cast callee");


      tmp_ptr = lp_build_alloca(gallivm, f32x4t, "");

      /*
       * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
       * in the SoA vectors.
       */

      for (k = 0; k < num_pixels; ++k) {
         LLVMValueRef args[4];

         args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, "");
         args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
                                            base_ptr, offset, k);

         if (num_pixels == 1) {
            args[2] = i;
            args[3] = j;
         }
         else {
            LLVMValueRef index = lp_build_const_int32(gallivm, k);
            args[2] = LLVMBuildExtractElement(builder, i, index, "");
            args[3] = LLVMBuildExtractElement(builder, j, index, "");
         }

         LLVMBuildCall(builder, function, args, Elements(args), "");

         tmps[k] = LLVMBuildLoad(builder, tmp_ptr, "");
      }

      lp_build_conv(gallivm,
                    lp_float32_vec4_type(),
                    type,
                    tmps, num_pixels, &res, 1);

      return res;
   }

   assert(0);
   return lp_build_undef(gallivm, type);
}
Exemplo n.º 30
0
	TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
		LLVMValueRef value = dst[chan_index];

		if (inst->Instruction.Saturate != TGSI_SAT_NONE) {
			struct lp_build_emit_data clamp_emit_data;

			memset(&clamp_emit_data, 0, sizeof(clamp_emit_data));
			clamp_emit_data.arg_count = 3;
			clamp_emit_data.args[0] = value;
			clamp_emit_data.args[2] = base.one;

			switch(inst->Instruction.Saturate) {
			case TGSI_SAT_ZERO_ONE:
				clamp_emit_data.args[1] = base.zero;
				break;
			case TGSI_SAT_MINUS_PLUS_ONE:
				clamp_emit_data.args[1] = LLVMConstReal(
						base.elem_type, -1.0f);
				break;
			default:
				assert(0);
			}
			value = lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP,
						&clamp_emit_data);
		}

		if (reg->Register.File == TGSI_FILE_ADDRESS) {
			temp_ptr = bld->addr[reg->Register.Index][chan_index];
			LLVMBuildStore(builder, value, temp_ptr);
			continue;
		}
	
		value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);

		if (reg->Register.Indirect) {
			struct tgsi_declaration_range range = get_array_range(bld_base,
				reg->Register.File, &reg->Indirect);

        		unsigned i, size = range.Last - range.First + 1;
			LLVMValueRef array = LLVMBuildInsertElement(builder,
				emit_array_fetch(bld_base, reg->Register.File, TGSI_TYPE_FLOAT, range, chan_index),
				value,  emit_array_index(bld, &reg->Indirect, reg->Register.Index - range.First), "");

        		for (i = 0; i < size; ++i) {
				switch(reg->Register.File) {
				case TGSI_FILE_OUTPUT:
					temp_ptr = bld->outputs[i + range.First][chan_index];
					break;

				case TGSI_FILE_TEMPORARY:
					temp_ptr = lp_get_temp_ptr_soa(bld, i + range.First, chan_index);
					break;

				default:
					return;
				}
				value = LLVMBuildExtractElement(builder, array, 
					lp_build_const_int32(gallivm, i), "");
				LLVMBuildStore(builder, value, temp_ptr);
			}

		} else {
			switch(reg->Register.File) {
			case TGSI_FILE_OUTPUT:
				temp_ptr = bld->outputs[reg->Register.Index][chan_index];
				break;

			case TGSI_FILE_TEMPORARY:
				temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
				break;

			default:
				return;
			}
			LLVMBuildStore(builder, value, temp_ptr);
		}
	}