Пример #1
0
/**
 * Gather one element from scatter positions in memory.
 *
 * @sa lp_build_gather()
 */
LLVMValueRef
lp_build_gather_elem(struct gallivm_state *gallivm,
                     unsigned length,
                     unsigned src_width,
                     unsigned dst_width,
                     LLVMValueRef base_ptr,
                     LLVMValueRef offsets,
                     unsigned i)
{
   LLVMTypeRef src_type = LLVMIntTypeInContext(gallivm->context, src_width);
   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
   LLVMTypeRef dst_elem_type = LLVMIntTypeInContext(gallivm->context, dst_width);
   LLVMValueRef ptr;
   LLVMValueRef res;

   assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0));

   ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i);
   ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, "");
   res = LLVMBuildLoad(gallivm->builder, ptr, "");

   assert(src_width <= dst_width);
   if (src_width > dst_width)
      res = LLVMBuildTrunc(gallivm->builder, res, dst_elem_type, "");
   if (src_width < dst_width)
      res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, "");

   return res;
}
Пример #2
0
/**
 * Generates LLVM IR to call debug_printf.
 */
static LLVMValueRef
lp_build_print_args(struct gallivm_state* gallivm,
                    int argcount,
                    LLVMValueRef* args)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMContextRef context = gallivm->context;
   LLVMValueRef func_printf;
   LLVMTypeRef printf_type;
   int i;

   assert(args);
   assert(argcount > 0);
   assert(LLVMTypeOf(args[0]) == LLVMPointerType(LLVMInt8TypeInContext(context), 0));

   /* Cast any float arguments to doubles as printf expects */
   for (i = 1; i < argcount; i++) {
      LLVMTypeRef type = LLVMTypeOf(args[i]);

      if (LLVMGetTypeKind(type) == LLVMFloatTypeKind)
         args[i] = LLVMBuildFPExt(builder, args[i], LLVMDoubleTypeInContext(context), "");
   }

   printf_type = LLVMFunctionType(LLVMInt32TypeInContext(context), NULL, 0, 1);
   func_printf = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer)debug_printf));
   func_printf = LLVMBuildBitCast(builder, func_printf, LLVMPointerType(printf_type, 0), "debug_printf");

   return LLVMBuildCall(builder, func_printf, args, argcount, "");
}
Пример #3
0
/**
 * Gather one element from scatter positions in memory.
 *
 * @sa lp_build_gather()
 */
LLVMValueRef
lp_build_gather_elem(struct gallivm_state *gallivm,
                     unsigned length,
                     unsigned src_width,
                     unsigned dst_width,
                     boolean aligned,
                     LLVMValueRef base_ptr,
                     LLVMValueRef offsets,
                     unsigned i,
                     boolean vector_justify)
{
   LLVMTypeRef src_type = LLVMIntTypeInContext(gallivm->context, src_width);
   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
   LLVMTypeRef dst_elem_type = LLVMIntTypeInContext(gallivm->context, dst_width);
   LLVMValueRef ptr;
   LLVMValueRef res;

   assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0));

   ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i);
   ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, "");
   res = LLVMBuildLoad(gallivm->builder, ptr, "");

   /* XXX
    * On some archs we probably really want to avoid having to deal
    * with alignments lower than 4 bytes (if fetch size is a power of
    * two >= 32). On x86 it doesn't matter, however.
    * We should be able to guarantee full alignment for any kind of texture
    * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch
    * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends
    * but I don't think that's quite what we wanted).
    * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT
    * looks like a good fit, but it seems this cap bit (and OpenGL) aren't
    * enforcing what we want (which is what d3d10 does, the offset needs to
    * be aligned to element size, but GL has bytes regardless of element
    * size which would only leave us with minimum alignment restriction of 16
    * which doesn't make much sense if the type isn't 4x32bit). Due to
    * translation of offsets to first_elem in sampler_views it actually seems
    * gallium could not do anything else except 16 no matter what...
    */
  if (!aligned) {
      LLVMSetAlignment(res, 1);
   }

   assert(src_width <= dst_width);
   if (src_width > dst_width) {
      res = LLVMBuildTrunc(gallivm->builder, res, dst_elem_type, "");
   } else if (src_width < dst_width) {
      res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, "");
      if (vector_justify) {
#ifdef PIPE_ARCH_BIG_ENDIAN
         res = LLVMBuildShl(gallivm->builder, res,
                            LLVMConstInt(dst_elem_type, dst_width - src_width, 0), "");
#endif
      }
   }

   return res;
}
Пример #4
0
LLVMTypeRef ty_class_indirect()
{
    if(indirect_struct_type)
        return indirect_struct_type;

    indirect_struct_type = LLVMStructCreateNamed(utl_get_current_context(), "__egl_class_indirect");

    LLVMTypeRef tys[] = {
        LLVMInt32TypeInContext(utl_get_current_context()),
        LLVMPointerType(LLVMPointerType(LLVMInt8TypeInContext(utl_get_current_context()), 0), 0),
        LLVMPointerType(LLVMInt64TypeInContext(utl_get_current_context()), 0),
        LLVMPointerType(LLVMPointerType(LLVMInt8TypeInContext(utl_get_current_context()), 0), 0)
    };

    LLVMStructSetBody(indirect_struct_type, tys, 4, 0);
    return indirect_struct_type;
}
Пример #5
0
/* returns a LLVM representation corresponding to the C translation of the
 * given IDL type.
 */
LLVMTypeRef llvm_value_type(struct llvm_ctx *ctx, IDL_tree type)
{
	if(type == NULL) return LLVMVoidTypeInContext(ctx->ctx);
	switch(IDL_NODE_TYPE(type)) {
		case IDLN_TYPE_INTEGER: {
			static short bitlens[] = {
				[IDL_INTEGER_TYPE_SHORT] = 16,
				[IDL_INTEGER_TYPE_LONG] = 32,
				[IDL_INTEGER_TYPE_LONGLONG] = 64,
			};
			int t = IDL_TYPE_INTEGER(type).f_type;
			assert(t < G_N_ELEMENTS(bitlens));
			return LLVMIntTypeInContext(ctx->ctx, bitlens[t]);
		}

		case IDLN_NATIVE: {
			/* each of these is the size of a single word, which is all LLVM
			 * wants to know.
			 */
			if(IS_WORD_TYPE(type) || IS_FPAGE_TYPE(type)
				|| IS_TIME_TYPE(type))
			{
				return ctx->wordt;
			} else {
				fprintf(stderr, "%s: native type `%s' not supported\n",
					__FUNCTION__, NATIVE_NAME(type));
				abort();
			}
			break;
		}

		case IDLN_TYPE_FLOAT:
			switch(IDL_TYPE_FLOAT(type).f_type) {
				case IDL_FLOAT_TYPE_FLOAT:
					return LLVMFloatTypeInContext(ctx->ctx);
				case IDL_FLOAT_TYPE_DOUBLE:
					return LLVMDoubleTypeInContext(ctx->ctx);
				case IDL_FLOAT_TYPE_LONGDOUBLE:
					return LLVMFP128TypeInContext(ctx->ctx);
			}
			g_assert_not_reached();

		case IDLN_TYPE_BOOLEAN:
		case IDLN_TYPE_OCTET:
		case IDLN_TYPE_CHAR:
			return LLVMInt8TypeInContext(ctx->ctx);

		case IDLN_TYPE_WIDE_CHAR:
			return ctx->i32t;

		case IDLN_TYPE_ENUM: return LLVMInt16TypeInContext(ctx->ctx);

		default:
			NOTDEFINED(type);
	}
}
Пример #6
0
LLVMValueRef 
lp_build_const_string_variable(LLVMModuleRef module,
                               LLVMContextRef context,
                               const char *str, int len)
{
   LLVMValueRef string = LLVMAddGlobal(module, LLVMArrayType(LLVMInt8TypeInContext(context), len + 1), "");
   LLVMSetGlobalConstant(string, TRUE);
   LLVMSetLinkage(string, LLVMInternalLinkage);
   LLVMSetInitializer(string, LLVMConstStringInContext(context, str, len + 1, TRUE));
   return string;
}
Пример #7
0
LLVMTypeRef ett_closure_type(EagleComplexType *type) { if(!ET_IS_CLOSURE(type)) return NULL;
    EagleFunctionType *ft = (EagleFunctionType *)type;

    LLVMTypeRef *tys = malloc(sizeof(LLVMTypeRef) * (ft->pct + 1));
    int i;
    for(i = 1; i < ft->pct + 1; i++)
        tys[i] = ett_llvm_type(ft->params[i - 1]);
    tys[0] = LLVMPointerType(LLVMInt8TypeInContext(utl_get_current_context()), 0);

    LLVMTypeRef out = LLVMFunctionType(ett_llvm_type(ft->retType), tys, ft->pct + 1, 0);
    free(tys);
    return out;
}
Пример #8
0
/**
 * @param n  is the number of pixels processed
 * @param packed  is a <n x i32> vector with the packed YUYV blocks
 * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
 * @return  a <4*n x i8> vector with the pixel RGBA values in AoS
 */
LLVMValueRef
lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
                                   const struct util_format_description *format_desc,
                                   unsigned n,
                                   LLVMValueRef base_ptr,
                                   LLVMValueRef offset,
                                   LLVMValueRef i,
                                   LLVMValueRef j)
{
   LLVMValueRef packed;
   LLVMValueRef rgba;
   struct lp_type fetch_type;

   assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED);
   assert(format_desc->block.bits == 32);
   assert(format_desc->block.width == 2);
   assert(format_desc->block.height == 1);

   fetch_type = lp_type_uint(32);
   packed = lp_build_gather(gallivm, n, 32, fetch_type, TRUE, base_ptr, offset, FALSE);

   (void)j;

   switch (format_desc->format) {
   case PIPE_FORMAT_UYVY:
      rgba = uyvy_to_rgba_aos(gallivm, n, packed, i);
      break;
   case PIPE_FORMAT_YUYV:
      rgba = yuyv_to_rgba_aos(gallivm, n, packed, i);
      break;
   case PIPE_FORMAT_R8G8_B8G8_UNORM:
      rgba = rgbg_to_rgba_aos(gallivm, n, packed, i);
      break;
   case PIPE_FORMAT_G8R8_G8B8_UNORM:
      rgba = grgb_to_rgba_aos(gallivm, n, packed, i);
      break;
   case PIPE_FORMAT_G8R8_B8R8_UNORM:
      rgba = grbr_to_rgba_aos(gallivm, n, packed, i);
      break;
   case PIPE_FORMAT_R8G8_R8B8_UNORM:
      rgba = rgrb_to_rgba_aos(gallivm, n, packed, i);
      break;
   default:
      assert(0);
      rgba =  LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n));
      break;
   }

   return rgba;
}
Пример #9
0
static LLVMValueRef
rgb_to_rgba_aos(struct gallivm_state *gallivm,
                unsigned n,
                LLVMValueRef r, LLVMValueRef g, LLVMValueRef b)
{
   LLVMBuilderRef builder = gallivm->builder;
   struct lp_type type;
   LLVMValueRef a;
   LLVMValueRef rgba;

   memset(&type, 0, sizeof type);
   type.sign = TRUE;
   type.width = 32;
   type.length = n;

   assert(lp_check_value(type, r));
   assert(lp_check_value(type, g));
   assert(lp_check_value(type, b));

   /*
    * Make a 4 x unorm8 vector
    */

#ifdef PIPE_ARCH_LITTLE_ENDIAN
   r = r;
   g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), "");
   b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), "");
   a = lp_build_const_int_vec(gallivm, type, 0xff000000);
#else
   r = LLVMBuildShl(builder, r, lp_build_const_int_vec(gallivm, type, 24), "");
   g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 16), "");
   b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 8), "");
   a = lp_build_const_int_vec(gallivm, type, 0x000000ff);
#endif

   rgba = r;
   rgba = LLVMBuildOr(builder, rgba, g, "");
   rgba = LLVMBuildOr(builder, rgba, b, "");
   rgba = LLVMBuildOr(builder, rgba, a, "");

   rgba = LLVMBuildBitCast(builder, rgba,
                           LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), "");

   return rgba;
}
Пример #10
0
/* Initialize module-independent parts of the context.
 *
 * The caller is responsible for initializing ctx::module and ctx::builder.
 */
void
ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context)
{
	LLVMValueRef args[1];

	ctx->context = context;
	ctx->module = NULL;
	ctx->builder = NULL;

	ctx->voidt = LLVMVoidTypeInContext(ctx->context);
	ctx->i1 = LLVMInt1TypeInContext(ctx->context);
	ctx->i8 = LLVMInt8TypeInContext(ctx->context);
	ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
	ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
	ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
	ctx->f16 = LLVMHalfTypeInContext(ctx->context);
	ctx->f32 = LLVMFloatTypeInContext(ctx->context);
	ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
	ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
	ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
	ctx->v8i32 = LLVMVectorType(ctx->i32, 8);

	ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
	ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
	ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
	ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0);

	ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
						     "range", 5);

	ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
							       "invariant.load", 14);

	ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6);

	args[0] = LLVMConstReal(ctx->f32, 2.5);
	ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);

	ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context,
							"amdgpu.uniform", 14);

	ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
}
Пример #11
0
/**
 * Get the pointer to one element from scatter positions in memory.
 *
 * @sa lp_build_gather()
 */
LLVMValueRef
lp_build_gather_elem_ptr(struct gallivm_state *gallivm,
                         unsigned length,
                         LLVMValueRef base_ptr,
                         LLVMValueRef offsets,
                         unsigned i)
{
   LLVMValueRef offset;
   LLVMValueRef ptr;

   assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0));

   if (length == 1) {
      assert(i == 0);
      offset = offsets;
   } else {
      LLVMValueRef index = lp_build_const_int32(gallivm, i);
      offset = LLVMBuildExtractElement(gallivm->builder, offsets, index, "");
   }

   ptr = LLVMBuildGEP(gallivm->builder, base_ptr, &offset, 1, "");

   return ptr;
}
Пример #12
0
static void init_runtime(compile_t* c)
{
  c->str_builtin = stringtab("$0");
  c->str_Bool = stringtab("Bool");
  c->str_I8 = stringtab("I8");
  c->str_I16 = stringtab("I16");
  c->str_I32 = stringtab("I32");
  c->str_I64 = stringtab("I64");
  c->str_I128 = stringtab("I128");
  c->str_ILong = stringtab("ILong");
  c->str_ISize = stringtab("ISize");
  c->str_U8 = stringtab("U8");
  c->str_U16 = stringtab("U16");
  c->str_U32 = stringtab("U32");
  c->str_U64 = stringtab("U64");
  c->str_U128 = stringtab("U128");
  c->str_ULong = stringtab("ULong");
  c->str_USize = stringtab("USize");
  c->str_F32 = stringtab("F32");
  c->str_F64 = stringtab("F64");
  c->str_Pointer = stringtab("Pointer");
  c->str_Maybe = stringtab("MaybePointer");
  c->str_DoNotOptimise = stringtab("DoNotOptimise");
  c->str_Array = stringtab("Array");
  c->str_String = stringtab("String");
  c->str_Platform = stringtab("Platform");
  c->str_Main = stringtab("Main");
  c->str_Env = stringtab("Env");

  c->str_add = stringtab("add");
  c->str_sub = stringtab("sub");
  c->str_mul = stringtab("mul");
  c->str_div = stringtab("div");
  c->str_mod = stringtab("mod");
  c->str_neg = stringtab("neg");
  c->str_and = stringtab("op_and");
  c->str_or = stringtab("op_or");
  c->str_xor = stringtab("op_xor");
  c->str_not = stringtab("op_not");
  c->str_shl = stringtab("shl");
  c->str_shr = stringtab("shr");
  c->str_eq = stringtab("eq");
  c->str_ne = stringtab("ne");
  c->str_lt = stringtab("lt");
  c->str_le = stringtab("le");
  c->str_ge = stringtab("ge");
  c->str_gt = stringtab("gt");

  c->str_this = stringtab("this");
  c->str_create = stringtab("create");
  c->str__create = stringtab("_create");
  c->str__init = stringtab("_init");
  c->str__final = stringtab("_final");
  c->str__event_notify = stringtab("_event_notify");

  LLVMTypeRef type;
  LLVMTypeRef params[5];
  LLVMValueRef value;

  c->void_type = LLVMVoidTypeInContext(c->context);
  c->ibool = LLVMInt8TypeInContext(c->context);
  c->i1 = LLVMInt1TypeInContext(c->context);
  c->i8 = LLVMInt8TypeInContext(c->context);
  c->i16 = LLVMInt16TypeInContext(c->context);
  c->i32 = LLVMInt32TypeInContext(c->context);
  c->i64 = LLVMInt64TypeInContext(c->context);
  c->i128 = LLVMIntTypeInContext(c->context, 128);
  c->f32 = LLVMFloatTypeInContext(c->context);
  c->f64 = LLVMDoubleTypeInContext(c->context);
  c->intptr = LLVMIntPtrTypeInContext(c->context, c->target_data);

  // i8*
  c->void_ptr = LLVMPointerType(c->i8, 0);

  // forward declare object
  c->object_type = LLVMStructCreateNamed(c->context, "__object");
  c->object_ptr = LLVMPointerType(c->object_type, 0);

  // padding required in an actor between the descriptor and fields
  c->actor_pad = LLVMArrayType(c->i8, PONY_ACTOR_PAD_SIZE);

  // message
  params[0] = c->i32; // size
  params[1] = c->i32; // id
  c->msg_type = LLVMStructCreateNamed(c->context, "__message");
  c->msg_ptr = LLVMPointerType(c->msg_type, 0);
  LLVMStructSetBody(c->msg_type, params, 2, false);

  // trace
  // void (*)(i8*, __object*)
  params[0] = c->void_ptr;
  params[1] = c->object_ptr;
  c->trace_type = LLVMFunctionType(c->void_type, params, 2, false);
  c->trace_fn = LLVMPointerType(c->trace_type, 0);

  // serialise
  // void (*)(i8*, __object*, i8*, intptr, i32)
  params[0] = c->void_ptr;
  params[1] = c->object_ptr;
  params[2] = c->void_ptr;
  params[3] = c->intptr;
  params[4] = c->i32;
  c->serialise_type = LLVMFunctionType(c->void_type, params, 5, false);
  c->serialise_fn = LLVMPointerType(c->serialise_type, 0);

  // dispatch
  // void (*)(i8*, __object*, $message*)
  params[0] = c->void_ptr;
  params[1] = c->object_ptr;
  params[2] = c->msg_ptr;
  c->dispatch_type = LLVMFunctionType(c->void_type, params, 3, false);
  c->dispatch_fn = LLVMPointerType(c->dispatch_type, 0);

  // void (*)(__object*)
  params[0] = c->object_ptr;
  c->final_fn = LLVMPointerType(
    LLVMFunctionType(c->void_type, params, 1, false), 0);

  // descriptor, opaque version
  // We need this in order to build our own structure.
  const char* desc_name = genname_descriptor(NULL);
  c->descriptor_type = LLVMStructCreateNamed(c->context, desc_name);
  c->descriptor_ptr = LLVMPointerType(c->descriptor_type, 0);

  // field descriptor
  // Also needed to build a descriptor structure.
  params[0] = c->i32;
  params[1] = c->descriptor_ptr;
  c->field_descriptor = LLVMStructTypeInContext(c->context, params, 2, false);

  // descriptor, filled in
  gendesc_basetype(c, c->descriptor_type);

  // define object
  params[0] = c->descriptor_ptr;
  LLVMStructSetBody(c->object_type, params, 1, false);

  // $i8* pony_ctx()
  type = LLVMFunctionType(c->void_ptr, NULL, 0, false);
  value = LLVMAddFunction(c->module, "pony_ctx", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
  LLVMAddFunctionAttr(value, LLVMReadNoneAttribute);

  // __object* pony_create(i8*, __Desc*)
  params[0] = c->void_ptr;
  params[1] = c->descriptor_ptr;
  type = LLVMFunctionType(c->object_ptr, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_create", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
  LLVMSetReturnNoAlias(value);
  LLVMSetDereferenceable(value, 0, PONY_ACTOR_PAD_SIZE);

  // void ponyint_destroy(__object*)
  params[0] = c->object_ptr;
  type = LLVMFunctionType(c->void_type, params, 1, false);
  value = LLVMAddFunction(c->module, "ponyint_destroy", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // void pony_sendv(i8*, __object*, $message*);
  params[0] = c->void_ptr;
  params[1] = c->object_ptr;
  params[2] = c->msg_ptr;
  type = LLVMFunctionType(c->void_type, params, 3, false);
  value = LLVMAddFunction(c->module, "pony_sendv", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // i8* pony_alloc(i8*, intptr)
  params[0] = c->void_ptr;
  params[1] = c->intptr;
  type = LLVMFunctionType(c->void_ptr, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_alloc", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
  LLVMSetReturnNoAlias(value);
#if PONY_LLVM >= 307
  LLVMSetDereferenceableOrNull(value, 0, HEAP_MIN);
#endif

  // i8* pony_alloc_small(i8*, i32)
  params[0] = c->void_ptr;
  params[1] = c->i32;
  type = LLVMFunctionType(c->void_ptr, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_alloc_small", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
  LLVMSetReturnNoAlias(value);
  LLVMSetDereferenceable(value, 0, HEAP_MIN);

  // i8* pony_alloc_large(i8*, intptr)
  params[0] = c->void_ptr;
  params[1] = c->intptr;
  type = LLVMFunctionType(c->void_ptr, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_alloc_large", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
  LLVMSetReturnNoAlias(value);
  LLVMSetDereferenceable(value, 0, HEAP_MAX + 1);

  // i8* pony_realloc(i8*, i8*, intptr)
  params[0] = c->void_ptr;
  params[1] = c->void_ptr;
  params[2] = c->intptr;
  type = LLVMFunctionType(c->void_ptr, params, 3, false);
  value = LLVMAddFunction(c->module, "pony_realloc", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
  LLVMSetReturnNoAlias(value);
#if PONY_LLVM >= 307
  LLVMSetDereferenceableOrNull(value, 0, HEAP_MIN);
#endif

  // i8* pony_alloc_final(i8*, intptr, c->final_fn)
  params[0] = c->void_ptr;
  params[1] = c->intptr;
  params[2] = c->final_fn;
  type = LLVMFunctionType(c->void_ptr, params, 3, false);
  value = LLVMAddFunction(c->module, "pony_alloc_final", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
  LLVMSetReturnNoAlias(value);
#if PONY_LLVM >= 307
  LLVMSetDereferenceableOrNull(value, 0, HEAP_MIN);
#endif

  // $message* pony_alloc_msg(i32, i32)
  params[0] = c->i32;
  params[1] = c->i32;
  type = LLVMFunctionType(c->msg_ptr, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_alloc_msg", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
  LLVMSetReturnNoAlias(value);

  // void pony_trace(i8*, i8*)
  params[0] = c->void_ptr;
  params[1] = c->void_ptr;
  type = LLVMFunctionType(c->void_type, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_trace", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // i8* pony_traceobject(i8*, __object*, __Desc*, i32)
  params[0] = c->void_ptr;
  params[1] = c->object_ptr;
  params[2] = c->descriptor_ptr;
  params[3] = c->i32;
  type = LLVMFunctionType(c->void_ptr, params, 4, false);
  value = LLVMAddFunction(c->module, "pony_traceknown", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // i8* pony_traceunknown(i8*, __object*, i32)
  params[0] = c->void_ptr;
  params[1] = c->object_ptr;
  params[2] = c->i32;
  type = LLVMFunctionType(c->void_ptr, params, 3, false);
  value = LLVMAddFunction(c->module, "pony_traceunknown", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // void pony_gc_send(i8*)
  params[0] = c->void_ptr;
  type = LLVMFunctionType(c->void_type, params, 1, false);
  value = LLVMAddFunction(c->module, "pony_gc_send", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // void pony_gc_recv(i8*)
  params[0] = c->void_ptr;
  type = LLVMFunctionType(c->void_type, params, 1, false);
  value = LLVMAddFunction(c->module, "pony_gc_recv", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // void pony_send_done(i8*)
  params[0] = c->void_ptr;
  type = LLVMFunctionType(c->void_type, params, 1, false);
  value = LLVMAddFunction(c->module, "pony_send_done", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // void pony_recv_done(i8*)
  params[0] = c->void_ptr;
  type = LLVMFunctionType(c->void_type, params, 1, false);
  value = LLVMAddFunction(c->module, "pony_recv_done", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // void pony_serialise_reserve(i8*, i8*, intptr)
  params[0] = c->void_ptr;
  params[1] = c->void_ptr;
  params[2] = c->intptr;
  type = LLVMFunctionType(c->void_type, params, 3, false);
  value = LLVMAddFunction(c->module, "pony_serialise_reserve", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // intptr pony_serialise_offset(i8*, i8*)
  params[0] = c->void_ptr;
  params[1] = c->void_ptr;
  type = LLVMFunctionType(c->intptr, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_serialise_offset", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // i8* pony_deserialise_offset(i8*, __desc*, intptr)
  params[0] = c->void_ptr;
  params[1] = c->descriptor_ptr;
  params[2] = c->intptr;
  type = LLVMFunctionType(c->void_ptr, params, 3, false);
  value = LLVMAddFunction(c->module, "pony_deserialise_offset", type);

  // i8* pony_deserialise_block(i8*, intptr, intptr)
  params[0] = c->void_ptr;
  params[1] = c->intptr;
  params[2] = c->intptr;
  type = LLVMFunctionType(c->void_ptr, params, 3, false);
  value = LLVMAddFunction(c->module, "pony_deserialise_block", type);

  // i32 pony_init(i32, i8**)
  params[0] = c->i32;
  params[1] = LLVMPointerType(c->void_ptr, 0);
  type = LLVMFunctionType(c->i32, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_init", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // void pony_become(i8*, __object*)
  params[0] = c->void_ptr;
  params[1] = c->object_ptr;
  type = LLVMFunctionType(c->void_type, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_become", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // i32 pony_start(i32)
  params[0] = c->i32;
  type = LLVMFunctionType(c->i32, params, 1, false);
  value = LLVMAddFunction(c->module, "pony_start", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // void pony_throw()
  type = LLVMFunctionType(c->void_type, NULL, 0, false);
  LLVMAddFunction(c->module, "pony_throw", type);

  // i32 pony_personality_v0(...)
  type = LLVMFunctionType(c->i32, NULL, 0, true);
  c->personality = LLVMAddFunction(c->module, "pony_personality_v0", type);

  // void llvm.memcpy.*(i8*, i8*, i32/64, i32, i1)
  params[0] = c->void_ptr;
  params[1] = c->void_ptr;
  params[3] = c->i32;
  params[4] = c->i1;
  if(target_is_ilp32(c->opt->triple))
  {
    params[2] = c->i32;
    type = LLVMFunctionType(c->void_type, params, 5, false);
    value = LLVMAddFunction(c->module, "llvm.memcpy.p0i8.p0i8.i32", type);
  } else {
    params[2] = c->i64;
    type = LLVMFunctionType(c->void_type, params, 5, false);
    value = LLVMAddFunction(c->module, "llvm.memcpy.p0i8.p0i8.i64", type);
  }
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // void llvm.memmove.*(i8*, i8*, i32/64, i32, i1)
  params[0] = c->void_ptr;
  params[1] = c->void_ptr;
  params[3] = c->i32;
  params[4] = c->i1;
  if(target_is_ilp32(c->opt->triple))
  {
    params[2] = c->i32;
    type = LLVMFunctionType(c->void_type, params, 5, false);
    value = LLVMAddFunction(c->module, "llvm.memmove.p0i8.p0i8.i32", type);
  } else {
    params[2] = c->i64;
    type = LLVMFunctionType(c->void_type, params, 5, false);
    value = LLVMAddFunction(c->module, "llvm.memmove.p0i8.p0i8.i64", type);
  }
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
}
Пример #13
0
LLVMTypeRef ett_llvm_type(EagleComplexType *type)
{
    switch(type->type)
    {
        case ETVoid:
            return LLVMVoidTypeInContext(utl_get_current_context());
        case ETFloat:
            return LLVMFloatTypeInContext(utl_get_current_context());
        case ETDouble:
            return LLVMDoubleTypeInContext(utl_get_current_context());
        case ETInt1:
            return LLVMInt1TypeInContext(utl_get_current_context());
        case ETGeneric: // In practice this doesn't matter
        case ETAny:
        case ETInt8:
        case ETUInt8:
            return LLVMInt8TypeInContext(utl_get_current_context());
        case ETInt16:
        case ETUInt16:
            return LLVMInt16TypeInContext(utl_get_current_context());
        case ETInt32:
        case ETUInt32:
            return LLVMInt32TypeInContext(utl_get_current_context());
        case ETInt64:
        case ETUInt64:
            return LLVMInt64TypeInContext(utl_get_current_context());
        case ETCString:
            return LLVMPointerType(LLVMInt8TypeInContext(utl_get_current_context()), 0);
        case ETEnum:
            return LLVMInt64TypeInContext(utl_get_current_context());
        case ETGenerator:
        {
            if(generator_type)
                return generator_type;

            LLVMTypeRef ptmp[2];
            ptmp[0] = LLVMPointerType(LLVMInt8TypeInContext(utl_get_current_context()), 0);
            ptmp[1] = LLVMPointerType(LLVMInt8TypeInContext(utl_get_current_context()), 0);

            generator_type = LLVMStructCreateNamed(utl_get_current_context(), "__egl_gen_strct");
            LLVMStructSetBody(generator_type, ptmp, 2, 0);
            return generator_type;
        }
        case ETClass:
        case ETStruct:
        {
            EagleStructType *st = (EagleStructType *)type;
            LLVMTypeRef loaded = LLVMGetTypeByName(the_module, st->name);
            if(loaded)
                return loaded;

            return NULL;
           // LLVMTypeRef ty = LLVMStructTypeInContext(utl_get_current_context(),
        }
        case ETInterface:
        {
            return LLVMInt8TypeInContext(utl_get_current_context());
        }
        case ETPointer:
        {
            EaglePointerType *pt = (EaglePointerType *)type;
            if(pt->counted || pt->weak)
            {
                LLVMTypeRef ptmp[2];
                ptmp[0] = LLVMPointerType(LLVMInt8TypeInContext(utl_get_current_context()), 0);
                ptmp[1] = LLVMInt1TypeInContext(utl_get_current_context());

                LLVMTypeRef tys[6];
                tys[0] = LLVMInt64TypeInContext(utl_get_current_context());
                tys[1] = LLVMInt16TypeInContext(utl_get_current_context());
                tys[2] = LLVMInt16TypeInContext(utl_get_current_context());
                tys[3] = LLVMPointerType(LLVMInt8TypeInContext(utl_get_current_context()), 0);
                tys[4] = LLVMPointerType(LLVMFunctionType(LLVMVoidTypeInContext(utl_get_current_context()), ptmp, 2, 0), 0);
                tys[5] = ett_llvm_type(pt->to);

                return LLVMPointerType(ty_get_counted(LLVMStructTypeInContext(utl_get_current_context(), tys, 6, 0)), 0);
            }
            return LLVMPointerType(ett_llvm_type(((EaglePointerType *)type)->to), 0);
        }
        case ETArray:
            {
                EagleArrayType *at = (EagleArrayType *)type;
                if(at->ct < 0)
                    return LLVMPointerType(ett_llvm_type(at->of), 0);
                else
                    return LLVMArrayType(ett_llvm_type(at->of), at->ct);
            }
        case ETFunction:
            {
                EagleFunctionType *ft = (EagleFunctionType *)type;
                if(ET_IS_CLOSURE(type))
                {
                    LLVMTypeRef tys[2];
                    tys[0] = LLVMPointerType(LLVMInt8TypeInContext(utl_get_current_context()), 0);
                    tys[1] = LLVMPointerType(LLVMInt8TypeInContext(utl_get_current_context()), 0);
                    return LLVMStructTypeInContext(utl_get_current_context(), tys, 2, 0);
                }

                LLVMTypeRef *tys = malloc(sizeof(LLVMTypeRef) * ft->pct);
                int i;
                for(i = 0; i < ft->pct; i++)
                    tys[i] = ett_llvm_type(ft->params[i]);
                LLVMTypeRef out = LLVMFunctionType(ett_llvm_type(ft->retType), tys, ft->pct, 0);
                free(tys);
                return out;
            }
        default:
            return NULL;
    }
}
Пример #14
0
static void
lp_jit_create_types(struct lp_fragment_shader_variant *lp)
{
   struct gallivm_state *gallivm = lp->gallivm;
   LLVMContextRef lc = gallivm->context;
   LLVMTypeRef viewport_type, texture_type, sampler_type;

   /* struct lp_jit_viewport */
   {
      LLVMTypeRef elem_types[LP_JIT_VIEWPORT_NUM_FIELDS];

      elem_types[LP_JIT_VIEWPORT_MIN_DEPTH] =
      elem_types[LP_JIT_VIEWPORT_MAX_DEPTH] = LLVMFloatTypeInContext(lc);

      viewport_type = LLVMStructTypeInContext(lc, elem_types,
                                              Elements(elem_types), 0);

      LP_CHECK_MEMBER_OFFSET(struct lp_jit_viewport, min_depth,
                             gallivm->target, viewport_type,
                             LP_JIT_VIEWPORT_MIN_DEPTH);
      LP_CHECK_MEMBER_OFFSET(struct lp_jit_viewport, max_depth,
                             gallivm->target, viewport_type,
                             LP_JIT_VIEWPORT_MAX_DEPTH);
      LP_CHECK_STRUCT_SIZE(struct lp_jit_viewport,
                           gallivm->target, viewport_type);
   }

   /* struct lp_jit_texture */
   {
      LLVMTypeRef elem_types[LP_JIT_TEXTURE_NUM_FIELDS];

      elem_types[LP_JIT_TEXTURE_WIDTH]  =
      elem_types[LP_JIT_TEXTURE_HEIGHT] =
      elem_types[LP_JIT_TEXTURE_DEPTH] =
      elem_types[LP_JIT_TEXTURE_FIRST_LEVEL] =
      elem_types[LP_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32TypeInContext(lc);
      elem_types[LP_JIT_TEXTURE_BASE] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0);
      elem_types[LP_JIT_TEXTURE_ROW_STRIDE] =
      elem_types[LP_JIT_TEXTURE_IMG_STRIDE] =
      elem_types[LP_JIT_TEXTURE_MIP_OFFSETS] =
         LLVMArrayType(LLVMInt32TypeInContext(lc), LP_MAX_TEXTURE_LEVELS);

      texture_type = LLVMStructTypeInContext(lc, elem_types,
                                             Elements(elem_types), 0);

      LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width,
                             gallivm->target, texture_type,
                             LP_JIT_TEXTURE_WIDTH);
      LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, height,
                             gallivm->target, texture_type,
                             LP_JIT_TEXTURE_HEIGHT);
      LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, depth,
                             gallivm->target, texture_type,
                             LP_JIT_TEXTURE_DEPTH);
      LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, first_level,
                             gallivm->target, texture_type,
                             LP_JIT_TEXTURE_FIRST_LEVEL);
      LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, last_level,
                             gallivm->target, texture_type,
                             LP_JIT_TEXTURE_LAST_LEVEL);
      LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, base,
                             gallivm->target, texture_type,
                             LP_JIT_TEXTURE_BASE);
      LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, row_stride,
                             gallivm->target, texture_type,
                             LP_JIT_TEXTURE_ROW_STRIDE);
      LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, img_stride,
                             gallivm->target, texture_type,
                             LP_JIT_TEXTURE_IMG_STRIDE);
      LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, mip_offsets,
                             gallivm->target, texture_type,
                             LP_JIT_TEXTURE_MIP_OFFSETS);
      LP_CHECK_STRUCT_SIZE(struct lp_jit_texture,
                           gallivm->target, texture_type);
   }

   /* struct lp_jit_sampler */
   {
      LLVMTypeRef elem_types[LP_JIT_SAMPLER_NUM_FIELDS];
      elem_types[LP_JIT_SAMPLER_MIN_LOD] =
      elem_types[LP_JIT_SAMPLER_MAX_LOD] =
      elem_types[LP_JIT_SAMPLER_LOD_BIAS] = LLVMFloatTypeInContext(lc);
      elem_types[LP_JIT_SAMPLER_BORDER_COLOR] =
         LLVMArrayType(LLVMFloatTypeInContext(lc), 4);

      sampler_type = LLVMStructTypeInContext(lc, elem_types,
                                             Elements(elem_types), 0);

      LP_CHECK_MEMBER_OFFSET(struct lp_jit_sampler, min_lod,
                             gallivm->target, sampler_type,
                             LP_JIT_SAMPLER_MIN_LOD);
      LP_CHECK_MEMBER_OFFSET(struct lp_jit_sampler, max_lod,
                             gallivm->target, sampler_type,
                             LP_JIT_SAMPLER_MAX_LOD);
      LP_CHECK_MEMBER_OFFSET(struct lp_jit_sampler, lod_bias,
                             gallivm->target, sampler_type,
                             LP_JIT_SAMPLER_LOD_BIAS);
      LP_CHECK_MEMBER_OFFSET(struct lp_jit_sampler, border_color,
                             gallivm->target, sampler_type,
                             LP_JIT_SAMPLER_BORDER_COLOR);
      LP_CHECK_STRUCT_SIZE(struct lp_jit_sampler,
                           gallivm->target, sampler_type);
   }

   /* struct lp_jit_context */
   {
      LLVMTypeRef elem_types[LP_JIT_CTX_COUNT];
      LLVMTypeRef context_type;

      elem_types[LP_JIT_CTX_CONSTANTS] =
         LLVMArrayType(LLVMPointerType(LLVMFloatTypeInContext(lc), 0), LP_MAX_TGSI_CONST_BUFFERS);
      elem_types[LP_JIT_CTX_NUM_CONSTANTS] =
            LLVMArrayType(LLVMInt32TypeInContext(lc), LP_MAX_TGSI_CONST_BUFFERS);
      elem_types[LP_JIT_CTX_ALPHA_REF] = LLVMFloatTypeInContext(lc);
      elem_types[LP_JIT_CTX_STENCIL_REF_FRONT] =
      elem_types[LP_JIT_CTX_STENCIL_REF_BACK] = LLVMInt32TypeInContext(lc);
      elem_types[LP_JIT_CTX_U8_BLEND_COLOR] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0);
      elem_types[LP_JIT_CTX_F_BLEND_COLOR] = LLVMPointerType(LLVMFloatTypeInContext(lc), 0);
      elem_types[LP_JIT_CTX_VIEWPORTS] = LLVMPointerType(viewport_type, 0);
      elem_types[LP_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type,
                                                      PIPE_MAX_SHADER_SAMPLER_VIEWS);
      elem_types[LP_JIT_CTX_SAMPLERS] = LLVMArrayType(sampler_type,
                                                      PIPE_MAX_SAMPLERS);

      context_type = LLVMStructTypeInContext(lc, elem_types,
                                             Elements(elem_types), 0);

      LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants,
                             gallivm->target, context_type,
                             LP_JIT_CTX_CONSTANTS);
      LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, num_constants,
                             gallivm->target, context_type,
                             LP_JIT_CTX_NUM_CONSTANTS);
      LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, alpha_ref_value,
                             gallivm->target, context_type,
                             LP_JIT_CTX_ALPHA_REF);
      LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, stencil_ref_front,
                             gallivm->target, context_type,
                             LP_JIT_CTX_STENCIL_REF_FRONT);
      LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, stencil_ref_back,
                             gallivm->target, context_type,
                             LP_JIT_CTX_STENCIL_REF_BACK);
      LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, u8_blend_color,
                             gallivm->target, context_type,
                             LP_JIT_CTX_U8_BLEND_COLOR);
      LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, f_blend_color,
                             gallivm->target, context_type,
                             LP_JIT_CTX_F_BLEND_COLOR);
      LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, viewports,
                             gallivm->target, context_type,
                             LP_JIT_CTX_VIEWPORTS);
      LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures,
                             gallivm->target, context_type,
                             LP_JIT_CTX_TEXTURES);
      LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, samplers,
                             gallivm->target, context_type,
                             LP_JIT_CTX_SAMPLERS);
      LP_CHECK_STRUCT_SIZE(struct lp_jit_context,
                           gallivm->target, context_type);

      lp->jit_context_ptr_type = LLVMPointerType(context_type, 0);
   }

   /* struct lp_jit_thread_data */
   {
      LLVMTypeRef elem_types[LP_JIT_THREAD_DATA_COUNT];
      LLVMTypeRef thread_data_type;

      elem_types[LP_JIT_THREAD_DATA_COUNTER] = LLVMInt64TypeInContext(lc);
      elem_types[LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX] =
            LLVMInt32TypeInContext(lc);

      thread_data_type = LLVMStructTypeInContext(lc, elem_types,
                                                 Elements(elem_types), 0);

      lp->jit_thread_data_ptr_type = LLVMPointerType(thread_data_type, 0);
   }

   if (gallivm_debug & GALLIVM_DEBUG_IR) {
      LLVMDumpModule(gallivm->module);
   }
}
Пример #15
0
static void init_runtime(compile_t* c)
{
  c->str_builtin = stringtab("$0");
  c->str_Bool = stringtab("Bool");
  c->str_I8 = stringtab("I8");
  c->str_I16 = stringtab("I16");
  c->str_I32 = stringtab("I32");
  c->str_I64 = stringtab("I64");
  c->str_I128 = stringtab("I128");
  c->str_ILong = stringtab("ILong");
  c->str_ISize = stringtab("ISize");
  c->str_U8 = stringtab("U8");
  c->str_U16 = stringtab("U16");
  c->str_U32 = stringtab("U32");
  c->str_U64 = stringtab("U64");
  c->str_U128 = stringtab("U128");
  c->str_ULong = stringtab("ULong");
  c->str_USize = stringtab("USize");
  c->str_F32 = stringtab("F32");
  c->str_F64 = stringtab("F64");
  c->str_Pointer = stringtab("Pointer");
  c->str_Maybe = stringtab("MaybePointer");
  c->str_DoNotOptimise = stringtab("DoNotOptimise");
  c->str_Array = stringtab("Array");
  c->str_String = stringtab("String");
  c->str_Platform = stringtab("Platform");
  c->str_Main = stringtab("Main");
  c->str_Env = stringtab("Env");

  c->str_add = stringtab("add");
  c->str_sub = stringtab("sub");
  c->str_mul = stringtab("mul");
  c->str_div = stringtab("div");
  c->str_mod = stringtab("mod");
  c->str_neg = stringtab("neg");
  c->str_add_unsafe = stringtab("add_unsafe");
  c->str_sub_unsafe = stringtab("sub_unsafe");
  c->str_mul_unsafe = stringtab("mul_unsafe");
  c->str_div_unsafe = stringtab("div_unsafe");
  c->str_mod_unsafe = stringtab("mod_unsafe");
  c->str_neg_unsafe = stringtab("neg_unsafe");
  c->str_and = stringtab("op_and");
  c->str_or = stringtab("op_or");
  c->str_xor = stringtab("op_xor");
  c->str_not = stringtab("op_not");
  c->str_shl = stringtab("shl");
  c->str_shr = stringtab("shr");
  c->str_shl_unsafe = stringtab("shl_unsafe");
  c->str_shr_unsafe = stringtab("shr_unsafe");
  c->str_eq = stringtab("eq");
  c->str_ne = stringtab("ne");
  c->str_lt = stringtab("lt");
  c->str_le = stringtab("le");
  c->str_ge = stringtab("ge");
  c->str_gt = stringtab("gt");
  c->str_eq_unsafe = stringtab("eq_unsafe");
  c->str_ne_unsafe = stringtab("ne_unsafe");
  c->str_lt_unsafe = stringtab("lt_unsafe");
  c->str_le_unsafe = stringtab("le_unsafe");
  c->str_ge_unsafe = stringtab("ge_unsafe");
  c->str_gt_unsafe = stringtab("gt_unsafe");

  c->str_this = stringtab("this");
  c->str_create = stringtab("create");
  c->str__create = stringtab("_create");
  c->str__init = stringtab("_init");
  c->str__final = stringtab("_final");
  c->str__event_notify = stringtab("_event_notify");
  c->str__serialise_space = stringtab("_serialise_space");
  c->str__serialise = stringtab("_serialise");
  c->str__deserialise = stringtab("_deserialise");

  LLVMTypeRef type;
  LLVMTypeRef params[5];
  LLVMValueRef value;

  c->void_type = LLVMVoidTypeInContext(c->context);
  c->i1 = LLVMInt1TypeInContext(c->context);
  c->i8 = LLVMInt8TypeInContext(c->context);
  c->i16 = LLVMInt16TypeInContext(c->context);
  c->i32 = LLVMInt32TypeInContext(c->context);
  c->i64 = LLVMInt64TypeInContext(c->context);
  c->i128 = LLVMIntTypeInContext(c->context, 128);
  c->f32 = LLVMFloatTypeInContext(c->context);
  c->f64 = LLVMDoubleTypeInContext(c->context);
  c->intptr = LLVMIntPtrTypeInContext(c->context, c->target_data);

  // i8*
  c->void_ptr = LLVMPointerType(c->i8, 0);

  // forward declare object
  c->object_type = LLVMStructCreateNamed(c->context, "__object");
  c->object_ptr = LLVMPointerType(c->object_type, 0);

  // padding required in an actor between the descriptor and fields
  c->actor_pad = LLVMArrayType(c->i8, PONY_ACTOR_PAD_SIZE);

  // message
  params[0] = c->i32; // size
  params[1] = c->i32; // id
  c->msg_type = LLVMStructCreateNamed(c->context, "__message");
  c->msg_ptr = LLVMPointerType(c->msg_type, 0);
  LLVMStructSetBody(c->msg_type, params, 2, false);

  // trace
  // void (*)(i8*, __object*)
  params[0] = c->void_ptr;
  params[1] = c->object_ptr;
  c->trace_type = LLVMFunctionType(c->void_type, params, 2, false);
  c->trace_fn = LLVMPointerType(c->trace_type, 0);

  // serialise
  // void (*)(i8*, __object*, i8*, intptr, i32)
  params[0] = c->void_ptr;
  params[1] = c->object_ptr;
  params[2] = c->void_ptr;
  params[3] = c->intptr;
  params[4] = c->i32;
  c->serialise_type = LLVMFunctionType(c->void_type, params, 5, false);
  c->serialise_fn = LLVMPointerType(c->serialise_type, 0);

  // serialise_space
  // i64 (__object*)
  params[0] = c->object_ptr;
  c->custom_serialise_space_fn = LLVMPointerType(
    LLVMFunctionType(c->i64, params, 1, false), 0);

  // custom_deserialise
  // void (*)(__object*, void*)
  params[0] = c->object_ptr;
  params[1] = c->void_ptr;
  c->custom_deserialise_fn = LLVMPointerType(
  LLVMFunctionType(c->void_type, params, 2, false), 0);

  // dispatch
  // void (*)(i8*, __object*, $message*)
  params[0] = c->void_ptr;
  params[1] = c->object_ptr;
  params[2] = c->msg_ptr;
  c->dispatch_type = LLVMFunctionType(c->void_type, params, 3, false);
  c->dispatch_fn = LLVMPointerType(c->dispatch_type, 0);

  // void (*)(__object*)
  params[0] = c->object_ptr;
  c->final_fn = LLVMPointerType(
    LLVMFunctionType(c->void_type, params, 1, false), 0);

  // descriptor, opaque version
  // We need this in order to build our own structure.
  const char* desc_name = genname_descriptor(NULL);
  c->descriptor_type = LLVMStructCreateNamed(c->context, desc_name);
  c->descriptor_ptr = LLVMPointerType(c->descriptor_type, 0);

  // field descriptor
  // Also needed to build a descriptor structure.
  params[0] = c->i32;
  params[1] = c->descriptor_ptr;
  c->field_descriptor = LLVMStructTypeInContext(c->context, params, 2, false);

  // descriptor, filled in
  gendesc_basetype(c, c->descriptor_type);

  // define object
  params[0] = c->descriptor_ptr;
  LLVMStructSetBody(c->object_type, params, 1, false);

#if PONY_LLVM >= 309
  LLVM_DECLARE_ATTRIBUTEREF(nounwind_attr, nounwind, 0);
  LLVM_DECLARE_ATTRIBUTEREF(readnone_attr, readnone, 0);
  LLVM_DECLARE_ATTRIBUTEREF(readonly_attr, readonly, 0);
  LLVM_DECLARE_ATTRIBUTEREF(inacc_or_arg_mem_attr,
    inaccessiblemem_or_argmemonly, 0);
  LLVM_DECLARE_ATTRIBUTEREF(noalias_attr, noalias, 0);
  LLVM_DECLARE_ATTRIBUTEREF(noreturn_attr, noreturn, 0);
  LLVM_DECLARE_ATTRIBUTEREF(deref_actor_attr, dereferenceable,
    PONY_ACTOR_PAD_SIZE + (target_is_ilp32(c->opt->triple) ? 4 : 8));
  LLVM_DECLARE_ATTRIBUTEREF(align_pool_attr, align, ponyint_pool_size(0));
  LLVM_DECLARE_ATTRIBUTEREF(align_heap_attr, align, HEAP_MIN);
  LLVM_DECLARE_ATTRIBUTEREF(deref_or_null_alloc_attr, dereferenceable_or_null,
    HEAP_MIN);
  LLVM_DECLARE_ATTRIBUTEREF(deref_alloc_small_attr, dereferenceable, HEAP_MIN);
  LLVM_DECLARE_ATTRIBUTEREF(deref_alloc_large_attr, dereferenceable,
    HEAP_MAX << 1);
#endif

  // i8* pony_ctx()
  type = LLVMFunctionType(c->void_ptr, NULL, 0, false);
  value = LLVMAddFunction(c->module, "pony_ctx", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, readnone_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
  LLVMAddFunctionAttr(value, LLVMReadNoneAttribute);
#endif

  // __object* pony_create(i8*, __Desc*)
  params[0] = c->void_ptr;
  params[1] = c->descriptor_ptr;
  type = LLVMFunctionType(c->object_ptr, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_create", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
    inacc_or_arg_mem_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, noalias_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, deref_actor_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, align_pool_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
#  if PONY_LLVM >= 308
  LLVMSetInaccessibleMemOrArgMemOnly(value);
#  endif
  LLVMSetReturnNoAlias(value);
  LLVMSetDereferenceable(value, 0, PONY_ACTOR_PAD_SIZE +
    (target_is_ilp32(c->opt->triple) ? 4 : 8));
#endif

  // void ponyint_destroy(__object*)
  params[0] = c->object_ptr;
  type = LLVMFunctionType(c->void_type, params, 1, false);
  value = LLVMAddFunction(c->module, "ponyint_destroy", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
    inacc_or_arg_mem_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
#  if PONY_LLVM >= 308
  LLVMSetInaccessibleMemOrArgMemOnly(value);
#  endif
#endif

  // void pony_sendv(i8*, __object*, $message*, $message*)
  params[0] = c->void_ptr;
  params[1] = c->object_ptr;
  params[2] = c->msg_ptr;
  params[3] = c->msg_ptr;
  type = LLVMFunctionType(c->void_type, params, 4, false);
  value = LLVMAddFunction(c->module, "pony_sendv", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
    inacc_or_arg_mem_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
#  if PONY_LLVM >= 308
  LLVMSetInaccessibleMemOrArgMemOnly(value);
#  endif
#endif

  // void pony_sendv_single(i8*, __object*, $message*, $message*)
  params[0] = c->void_ptr;
  params[1] = c->object_ptr;
  params[2] = c->msg_ptr;
  params[3] = c->msg_ptr;
  type = LLVMFunctionType(c->void_type, params, 4, false);
  value = LLVMAddFunction(c->module, "pony_sendv_single", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
    inacc_or_arg_mem_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
#  if PONY_LLVM >= 308
  LLVMSetInaccessibleMemOrArgMemOnly(value);
#  endif
#endif

  // i8* pony_alloc(i8*, intptr)
  params[0] = c->void_ptr;
  params[1] = c->intptr;
  type = LLVMFunctionType(c->void_ptr, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_alloc", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
    inacc_or_arg_mem_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, noalias_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex,
    deref_or_null_alloc_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, align_heap_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
#  if PONY_LLVM >= 308
  LLVMSetInaccessibleMemOrArgMemOnly(value);
#  endif
  LLVMSetReturnNoAlias(value);
  LLVMSetDereferenceableOrNull(value, 0, HEAP_MIN);
#endif

  // i8* pony_alloc_small(i8*, i32)
  params[0] = c->void_ptr;
  params[1] = c->i32;
  type = LLVMFunctionType(c->void_ptr, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_alloc_small", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
    inacc_or_arg_mem_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, noalias_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex,
    deref_alloc_small_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, align_heap_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
#  if PONY_LLVM >= 308
  LLVMSetInaccessibleMemOrArgMemOnly(value);
#  endif
  LLVMSetReturnNoAlias(value);
  LLVMSetDereferenceable(value, 0, HEAP_MIN);
#endif

  // i8* pony_alloc_large(i8*, intptr)
  params[0] = c->void_ptr;
  params[1] = c->intptr;
  type = LLVMFunctionType(c->void_ptr, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_alloc_large", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
    inacc_or_arg_mem_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, noalias_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex,
    deref_alloc_large_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, align_heap_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
#  if PONY_LLVM >= 308
  LLVMSetInaccessibleMemOrArgMemOnly(value);
#  endif
  LLVMSetReturnNoAlias(value);
  LLVMSetDereferenceable(value, 0, HEAP_MAX << 1);
#endif

  // i8* pony_realloc(i8*, i8*, intptr)
  params[0] = c->void_ptr;
  params[1] = c->void_ptr;
  params[2] = c->intptr;
  type = LLVMFunctionType(c->void_ptr, params, 3, false);
  value = LLVMAddFunction(c->module, "pony_realloc", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
    inacc_or_arg_mem_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, noalias_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex,
    deref_or_null_alloc_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, align_heap_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
#  if PONY_LLVM >= 308
  LLVMSetInaccessibleMemOrArgMemOnly(value);
#  endif
  LLVMSetReturnNoAlias(value);
  LLVMSetDereferenceableOrNull(value, 0, HEAP_MIN);
#endif

  // i8* pony_alloc_final(i8*, intptr)
  params[0] = c->void_ptr;
  params[1] = c->intptr;
  type = LLVMFunctionType(c->void_ptr, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_alloc_final", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
    inacc_or_arg_mem_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, noalias_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex,
    deref_or_null_alloc_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, align_heap_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
#  if PONY_LLVM >= 308
  LLVMSetInaccessibleMemOrArgMemOnly(value);
#  endif
  LLVMSetReturnNoAlias(value);
  LLVMSetDereferenceableOrNull(value, 0, HEAP_MIN);
#endif

  // i8* pony_alloc_small_final(i8*, i32)
  params[0] = c->void_ptr;
  params[1] = c->i32;
  type = LLVMFunctionType(c->void_ptr, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_alloc_small_final", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
    inacc_or_arg_mem_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, noalias_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex,
    deref_alloc_small_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, align_heap_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
#  if PONY_LLVM >= 308
  LLVMSetInaccessibleMemOrArgMemOnly(value);
#  endif
  LLVMSetReturnNoAlias(value);
  LLVMSetDereferenceable(value, 0, HEAP_MIN);
#endif

  // i8* pony_alloc_large_final(i8*, intptr)
  params[0] = c->void_ptr;
  params[1] = c->intptr;
  type = LLVMFunctionType(c->void_ptr, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_alloc_large_final", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
    inacc_or_arg_mem_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, noalias_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex,
    deref_alloc_large_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, align_heap_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
#  if PONY_LLVM >= 308
  LLVMSetInaccessibleMemOrArgMemOnly(value);
#  endif
  LLVMSetReturnNoAlias(value);
  LLVMSetDereferenceable(value, 0, HEAP_MAX << 1);
#endif

  // $message* pony_alloc_msg(i32, i32)
  params[0] = c->i32;
  params[1] = c->i32;
  type = LLVMFunctionType(c->msg_ptr, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_alloc_msg", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
    inacc_or_arg_mem_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, noalias_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, align_pool_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
#  if PONY_LLVM >= 308
  LLVMSetInaccessibleMemOrArgMemOnly(value);
#  endif
  LLVMSetReturnNoAlias(value);
#endif

  // void pony_trace(i8*, i8*)
  params[0] = c->void_ptr;
  params[1] = c->void_ptr;
  type = LLVMFunctionType(c->void_type, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_trace", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
    inacc_or_arg_mem_attr);
  LLVMAddAttributeAtIndex(value, 2, readnone_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
#  if PONY_LLVM >= 308
  LLVMSetInaccessibleMemOrArgMemOnly(value);
#  endif
  value = LLVMGetParam(value, 1);
  LLVMAddAttribute(value, LLVMReadNoneAttribute);
#endif

  // void pony_traceknown(i8*, __object*, __Desc*, i32)
  params[0] = c->void_ptr;
  params[1] = c->object_ptr;
  params[2] = c->descriptor_ptr;
  params[3] = c->i32;
  type = LLVMFunctionType(c->void_type, params, 4, false);
  value = LLVMAddFunction(c->module, "pony_traceknown", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
    inacc_or_arg_mem_attr);
  LLVMAddAttributeAtIndex(value, 2, readonly_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
#  if PONY_LLVM >= 308
  LLVMSetInaccessibleMemOrArgMemOnly(value);
#  endif
  value = LLVMGetParam(value, 1);
  LLVMAddAttribute(value, LLVMReadOnlyAttribute);
#endif

  // void pony_traceunknown(i8*, __object*, i32)
  params[0] = c->void_ptr;
  params[1] = c->object_ptr;
  params[2] = c->i32;
  type = LLVMFunctionType(c->void_type, params, 3, false);
  value = LLVMAddFunction(c->module, "pony_traceunknown", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
    inacc_or_arg_mem_attr);
  LLVMAddAttributeAtIndex(value, 2, readonly_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
#  if PONY_LLVM >= 308
  LLVMSetInaccessibleMemOrArgMemOnly(value);
#  endif
  value = LLVMGetParam(value, 1);
  LLVMAddAttribute(value, LLVMReadOnlyAttribute);
#endif

  // void pony_gc_send(i8*)
  params[0] = c->void_ptr;
  type = LLVMFunctionType(c->void_type, params, 1, false);
  value = LLVMAddFunction(c->module, "pony_gc_send", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
    inacc_or_arg_mem_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
#  if PONY_LLVM >= 308
  LLVMSetInaccessibleMemOrArgMemOnly(value);
#  endif
#endif

  // void pony_gc_recv(i8*)
  params[0] = c->void_ptr;
  type = LLVMFunctionType(c->void_type, params, 1, false);
  value = LLVMAddFunction(c->module, "pony_gc_recv", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
    inacc_or_arg_mem_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
#  if PONY_LLVM >= 308
  LLVMSetInaccessibleMemOrArgMemOnly(value);
#  endif
#endif

  // void pony_send_done(i8*)
  params[0] = c->void_ptr;
  type = LLVMFunctionType(c->void_type, params, 1, false);
  value = LLVMAddFunction(c->module, "pony_send_done", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
#endif

  // void pony_recv_done(i8*)
  params[0] = c->void_ptr;
  type = LLVMFunctionType(c->void_type, params, 1, false);
  value = LLVMAddFunction(c->module, "pony_recv_done", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
#endif

  // void pony_serialise_reserve(i8*, i8*, intptr)
  params[0] = c->void_ptr;
  params[1] = c->void_ptr;
  params[2] = c->intptr;
  type = LLVMFunctionType(c->void_type, params, 3, false);
  value = LLVMAddFunction(c->module, "pony_serialise_reserve", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
    inacc_or_arg_mem_attr);
  LLVMAddAttributeAtIndex(value, 2, readnone_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
#  if PONY_LLVM >= 308
  LLVMSetInaccessibleMemOrArgMemOnly(value);
#  endif
  value = LLVMGetParam(value, 1);
  LLVMAddAttribute(value, LLVMReadNoneAttribute);
#endif

  // intptr pony_serialise_offset(i8*, i8*)
  params[0] = c->void_ptr;
  params[1] = c->void_ptr;
  type = LLVMFunctionType(c->intptr, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_serialise_offset", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
    inacc_or_arg_mem_attr);
  LLVMAddAttributeAtIndex(value, 2, readonly_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
#  if PONY_LLVM >= 308
  LLVMSetInaccessibleMemOrArgMemOnly(value);
#  endif
  value = LLVMGetParam(value, 1);
  LLVMAddAttribute(value, LLVMReadOnlyAttribute);
#endif

  // i8* pony_deserialise_offset(i8*, __desc*, intptr)
  params[0] = c->void_ptr;
  params[1] = c->descriptor_ptr;
  params[2] = c->intptr;
  type = LLVMFunctionType(c->void_ptr, params, 3, false);
  value = LLVMAddFunction(c->module, "pony_deserialise_offset", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
    inacc_or_arg_mem_attr);
#elif PONY_LLVM == 308
  LLVMSetInaccessibleMemOrArgMemOnly(value);
#endif

  // i8* pony_deserialise_block(i8*, intptr, intptr)
  params[0] = c->void_ptr;
  params[1] = c->intptr;
  params[2] = c->intptr;
  type = LLVMFunctionType(c->void_ptr, params, 3, false);
  value = LLVMAddFunction(c->module, "pony_deserialise_block", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
    inacc_or_arg_mem_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeReturnIndex, noalias_attr);
#else
#  if PONY_LLVM >= 308
  LLVMSetInaccessibleMemOrArgMemOnly(value);
#  endif
  LLVMSetReturnNoAlias(value);
#endif

  // i32 pony_init(i32, i8**)
  params[0] = c->i32;
  params[1] = LLVMPointerType(c->void_ptr, 0);
  type = LLVMFunctionType(c->i32, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_init", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
    inacc_or_arg_mem_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
#  if PONY_LLVM >= 308
  LLVMSetInaccessibleMemOrArgMemOnly(value);
#  endif
#endif

  // void pony_become(i8*, __object*)
  params[0] = c->void_ptr;
  params[1] = c->object_ptr;
  type = LLVMFunctionType(c->void_type, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_become", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
    inacc_or_arg_mem_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
#  if PONY_LLVM >= 308
  LLVMSetInaccessibleMemOrArgMemOnly(value);
#  endif
#endif

  // i32 pony_start(i32, i32)
  params[0] = c->i32;
  params[1] = c->i32;
  type = LLVMFunctionType(c->i32, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_start", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex,
    inacc_or_arg_mem_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
#  if PONY_LLVM >= 308
  LLVMSetInaccessibleMemOrArgMemOnly(value);
#  endif
#endif

  // i32 pony_get_exitcode()
  type = LLVMFunctionType(c->i32, NULL, 0, false);
  value = LLVMAddFunction(c->module, "pony_get_exitcode", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, readonly_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
  LLVMAddFunctionAttr(value, LLVMReadOnlyAttribute);
#endif

  // void pony_throw()
  type = LLVMFunctionType(c->void_type, NULL, 0, false);
  value = LLVMAddFunction(c->module, "pony_throw", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, noreturn_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoReturnAttribute);
#endif

  // i32 pony_personality_v0(...)
  type = LLVMFunctionType(c->i32, NULL, 0, true);
  c->personality = LLVMAddFunction(c->module, "pony_personality_v0", type);

  // i32 memcmp(i8*, i8*, intptr)
  params[0] = c->void_ptr;
  params[1] = c->void_ptr;
  params[2] = c->intptr;
  type = LLVMFunctionType(c->i32, params, 3, false);
  value = LLVMAddFunction(c->module, "memcmp", type);
#if PONY_LLVM >= 309
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, nounwind_attr);
  LLVMAddAttributeAtIndex(value, LLVMAttributeFunctionIndex, readonly_attr);
  LLVMAddAttributeAtIndex(value, 1, readonly_attr);
  LLVMAddAttributeAtIndex(value, 2, readonly_attr);
#else
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
  LLVMAddFunctionAttr(value, LLVMReadOnlyAttribute);
  LLVMValueRef param = LLVMGetParam(value, 0);
  LLVMAddAttribute(param, LLVMReadOnlyAttribute);
  param = LLVMGetParam(value, 1);
  LLVMAddAttribute(param, LLVMReadOnlyAttribute);
#endif
}
Пример #16
0
static void init_runtime(compile_t* c)
{
  c->str_1 = stringtab("$1");
  c->str_Bool = stringtab("Bool");
  c->str_I8 = stringtab("I8");
  c->str_I16 = stringtab("I16");
  c->str_I32 = stringtab("I32");
  c->str_I64 = stringtab("I64");
  c->str_I128 = stringtab("I128");
  c->str_U8 = stringtab("U8");
  c->str_U16 = stringtab("U16");
  c->str_U32 = stringtab("U32");
  c->str_U64 = stringtab("U64");
  c->str_U128 = stringtab("U128");
  c->str_F32 = stringtab("F32");
  c->str_F64 = stringtab("F64");
  c->str_Pointer = stringtab("Pointer");
  c->str_Array = stringtab("Array");
  c->str_Platform = stringtab("Platform");

  c->str_add = stringtab("add");
  c->str_sub = stringtab("sub");
  c->str_mul = stringtab("mul");
  c->str_div = stringtab("div");
  c->str_mod = stringtab("mod");
  c->str_neg = stringtab("neg");
  c->str_and = stringtab("op_and");
  c->str_or = stringtab("op_or");
  c->str_xor = stringtab("op_xor");
  c->str_not = stringtab("op_not");
  c->str_shl = stringtab("shl");
  c->str_shr = stringtab("shr");
  c->str_eq = stringtab("eq");
  c->str_ne = stringtab("ne");
  c->str_lt = stringtab("lt");
  c->str_le = stringtab("le");
  c->str_ge = stringtab("ge");
  c->str_gt = stringtab("gt");

  LLVMTypeRef type;
  LLVMTypeRef params[4];
  LLVMValueRef value;

  c->void_type = LLVMVoidTypeInContext(c->context);
  c->i1 = LLVMInt1TypeInContext(c->context);
  c->i8 = LLVMInt8TypeInContext(c->context);
  c->i16 = LLVMInt16TypeInContext(c->context);
  c->i32 = LLVMInt32TypeInContext(c->context);
  c->i64 = LLVMInt64TypeInContext(c->context);
  c->i128 = LLVMIntTypeInContext(c->context, 128);
  c->f32 = LLVMFloatTypeInContext(c->context);
  c->f64 = LLVMDoubleTypeInContext(c->context);
  c->intptr = LLVMIntPtrTypeInContext(c->context, c->target_data);

  // i8*
  c->void_ptr = LLVMPointerType(c->i8, 0);

  // forward declare object
  c->object_type = LLVMStructCreateNamed(c->context, "$object");
  c->object_ptr = LLVMPointerType(c->object_type, 0);

  // padding required in an actor between the descriptor and fields
  c->actor_pad = LLVMArrayType(c->i8, PONY_ACTOR_PAD_SIZE);

  // message
  params[0] = c->i32; // size
  params[1] = c->i32; // id
  c->msg_type = LLVMStructCreateNamed(c->context, "$message");
  c->msg_ptr = LLVMPointerType(c->msg_type, 0);
  LLVMStructSetBody(c->msg_type, params, 2, false);

  // trace
  // void (*)($object*)
  params[0] = c->object_ptr;
  c->trace_type = LLVMFunctionType(c->void_type, params, 1, false);
  c->trace_fn = LLVMPointerType(c->trace_type, 0);

  // dispatch
  // void (*)($object*, $message*)
  params[0] = c->object_ptr;
  params[1] = c->msg_ptr;
  c->dispatch_type = LLVMFunctionType(c->void_type, params, 2, false);
  c->dispatch_fn = LLVMPointerType(c->dispatch_type, 0);

  // void (*)($object*)
  params[0] = c->object_ptr;
  c->final_fn = LLVMPointerType(
    LLVMFunctionType(c->void_type, params, 1, false), 0);

  // descriptor, opaque version
  // We need this in order to build our own structure.
  const char* desc_name = genname_descriptor(NULL);
  c->descriptor_type = LLVMStructCreateNamed(c->context, desc_name);
  c->descriptor_ptr = LLVMPointerType(c->descriptor_type, 0);

  // field descriptor
  // Also needed to build a descriptor structure.
  params[0] = c->i32;
  params[1] = c->descriptor_ptr;
  c->field_descriptor = LLVMStructTypeInContext(c->context, params, 2, false);

  // descriptor, filled in
  c->descriptor_type = gendesc_type(c, NULL);

  // define object
  params[0] = c->descriptor_ptr;
  LLVMStructSetBody(c->object_type, params, 1, false);

  // $object* pony_create($desc*)
  params[0] = c->descriptor_ptr;
  type = LLVMFunctionType(c->object_ptr, params, 1, false);
  value = LLVMAddFunction(c->module, "pony_create", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
  LLVMSetReturnNoAlias(value);

  // void pony_destroy($object*)
  params[0] = c->object_ptr;
  type = LLVMFunctionType(c->void_type, params, 1, false);
  value = LLVMAddFunction(c->module, "pony_destroy", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
  //LLVMSetReturnNoAlias(value);

  // void pony_sendv($object*, $message*);
  params[0] = c->object_ptr;
  params[1] = c->msg_ptr;
  type = LLVMFunctionType(c->void_type, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_sendv", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // i8* pony_alloc(i64)
  params[0] = c->i64;
  type = LLVMFunctionType(c->void_ptr, params, 1, false);
  value = LLVMAddFunction(c->module, "pony_alloc", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
  LLVMSetReturnNoAlias(value);

  // i8* pony_realloc(i8*, i64)
  params[0] = c->void_ptr;
  params[1] = c->i64;
  type = LLVMFunctionType(c->void_ptr, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_realloc", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
  LLVMSetReturnNoAlias(value);

  // i8* pony_alloc_final(i64, c->final_fn)
  params[0] = c->i64;
  params[1] = c->final_fn;
  type = LLVMFunctionType(c->void_ptr, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_alloc_final", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
  LLVMSetReturnNoAlias(value);

  // $message* pony_alloc_msg(i32, i32)
  params[0] = c->i32;
  params[1] = c->i32;
  type = LLVMFunctionType(c->msg_ptr, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_alloc_msg", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
  LLVMSetReturnNoAlias(value);

  // void pony_trace(i8*)
  params[0] = c->void_ptr;
  type = LLVMFunctionType(c->void_type, params, 1, false);
  value = LLVMAddFunction(c->module, "pony_trace", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // void pony_traceactor($object*)
  params[0] = c->object_ptr;
  type = LLVMFunctionType(c->void_type, params, 1, false);
  value = LLVMAddFunction(c->module, "pony_traceactor", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // void pony_traceobject($object*, trace_fn)
  params[0] = c->object_ptr;
  params[1] = c->trace_fn;
  type = LLVMFunctionType(c->void_type, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_traceobject", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // void pony_traceunknown($object*)
  params[0] = c->object_ptr;
  type = LLVMFunctionType(c->void_type, params, 1, false);
  value = LLVMAddFunction(c->module, "pony_traceunknown", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // void pony_trace_tag_or_actor($object*)
  params[0] = c->object_ptr;
  type = LLVMFunctionType(c->void_type, params, 1, false);
  value = LLVMAddFunction(c->module, "pony_trace_tag_or_actor", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // void pony_gc_send()
  type = LLVMFunctionType(c->void_type, NULL, 0, false);
  value = LLVMAddFunction(c->module, "pony_gc_send", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // void pony_gc_recv()
  type = LLVMFunctionType(c->void_type, NULL, 0, false);
  value = LLVMAddFunction(c->module, "pony_gc_recv", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // void pony_send_done()
  type = LLVMFunctionType(c->void_type, NULL, 0, false);
  value = LLVMAddFunction(c->module, "pony_send_done", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // void pony_recv_done()
  type = LLVMFunctionType(c->void_type, NULL, 0, false);
  value = LLVMAddFunction(c->module, "pony_recv_done", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // i32 pony_init(i32, i8**)
  params[0] = c->i32;
  params[1] = LLVMPointerType(c->void_ptr, 0);
  type = LLVMFunctionType(c->i32, params, 2, false);
  value = LLVMAddFunction(c->module, "pony_init", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // void pony_become($object*)
  params[0] = c->object_ptr;
  type = LLVMFunctionType(c->void_type, params, 1, false);
  value = LLVMAddFunction(c->module, "pony_become", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // i32 pony_start(i32)
  params[0] = c->i32;
  type = LLVMFunctionType(c->i32, params, 1, false);
  value = LLVMAddFunction(c->module, "pony_start", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // void pony_throw()
  type = LLVMFunctionType(c->void_type, NULL, 0, false);
  LLVMAddFunction(c->module, "pony_throw", type);

  // i32 pony_personality_v0(...)
  type = LLVMFunctionType(c->i32, NULL, 0, true);
  c->personality = LLVMAddFunction(c->module, "pony_personality_v0", type);

  // i8* memcpy(...)
  type = LLVMFunctionType(c->void_ptr, NULL, 0, true);
  value = LLVMAddFunction(c->module, "memcpy", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);

  // i8* memmove(...)
  type = LLVMFunctionType(c->void_ptr, NULL, 0, true);
  value = LLVMAddFunction(c->module, "memmove", type);
  LLVMAddFunctionAttr(value, LLVMNoUnwindAttribute);
}
Пример #17
0
static LLVMValueRef
lp_build_gather_avx2(struct gallivm_state *gallivm,
                     unsigned length,
                     unsigned src_width,
                     struct lp_type dst_type,
                     LLVMValueRef base_ptr,
                     LLVMValueRef offsets)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef src_type, src_vec_type;
   LLVMValueRef res;
   struct lp_type res_type = dst_type;
   res_type.length *= length;

   if (dst_type.floating) {
      src_type = src_width == 64 ? LLVMDoubleTypeInContext(gallivm->context) :
                                   LLVMFloatTypeInContext(gallivm->context);
   } else {
      src_type = LLVMIntTypeInContext(gallivm->context, src_width);
   }
   src_vec_type = LLVMVectorType(src_type, length);

   /* XXX should allow hw scaling (can handle i8, i16, i32, i64 for x86) */
   assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0));

   if (0) {
      /*
       * XXX: This will cause LLVM pre 3.7 to hang; it works on LLVM 3.8 but
       * will not use the AVX2 gather instrinsics (even with llvm 4.0), at
       * least with Haswell. See
       * http://lists.llvm.org/pipermail/llvm-dev/2016-January/094448.html
       * And the generated code doing the emulation is quite a bit worse
       * than what we get by doing it ourselves too.
       */
      LLVMTypeRef i32_type = LLVMIntTypeInContext(gallivm->context, 32);
      LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length);
      LLVMTypeRef i1_type = LLVMIntTypeInContext(gallivm->context, 1);
      LLVMTypeRef i1_vec_type = LLVMVectorType(i1_type, length);
      LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
      LLVMValueRef src_ptr;

      base_ptr = LLVMBuildBitCast(builder, base_ptr, src_ptr_type, "");

      /* Rescale offsets from bytes to elements */
      LLVMValueRef scale = LLVMConstInt(i32_type, src_width/8, 0);
      scale = lp_build_broadcast(gallivm, i32_vec_type, scale);
      assert(LLVMTypeOf(offsets) == i32_vec_type);
      offsets = LLVMBuildSDiv(builder, offsets, scale, "");

      src_ptr = LLVMBuildGEP(builder, base_ptr, &offsets, 1, "vector-gep");

      char intrinsic[64];
      util_snprintf(intrinsic, sizeof intrinsic, "llvm.masked.gather.v%u%s%u",
                    length, dst_type.floating ? "f" : "i", src_width);
      LLVMValueRef alignment = LLVMConstInt(i32_type, src_width/8, 0);
      LLVMValueRef mask = LLVMConstAllOnes(i1_vec_type);
      LLVMValueRef passthru = LLVMGetUndef(src_vec_type);

      LLVMValueRef args[] = { src_ptr, alignment, mask, passthru };

      res = lp_build_intrinsic(builder, intrinsic, src_vec_type, args, 4, 0);
   } else {
      LLVMTypeRef i8_type = LLVMIntTypeInContext(gallivm->context, 8);
      const char *intrinsic = NULL;
      unsigned l_idx = 0;

      assert(src_width == 32 || src_width == 64);
      if (src_width == 32) {
         assert(length == 4 || length == 8);
      } else {
         assert(length == 2 || length == 4);
      }

      static const char *intrinsics[2][2][2] = {

         {{"llvm.x86.avx2.gather.d.d",
           "llvm.x86.avx2.gather.d.d.256"},
          {"llvm.x86.avx2.gather.d.q",
           "llvm.x86.avx2.gather.d.q.256"}},

         {{"llvm.x86.avx2.gather.d.ps",
           "llvm.x86.avx2.gather.d.ps.256"},
          {"llvm.x86.avx2.gather.d.pd",
           "llvm.x86.avx2.gather.d.pd.256"}},
      };

      if ((src_width == 32 && length == 8) ||
          (src_width == 64 && length == 4)) {
         l_idx = 1;
      }
      intrinsic = intrinsics[dst_type.floating][src_width == 64][l_idx];

      LLVMValueRef passthru = LLVMGetUndef(src_vec_type);
      LLVMValueRef mask = LLVMConstAllOnes(src_vec_type);
      mask = LLVMConstBitCast(mask, src_vec_type);
      LLVMValueRef scale = LLVMConstInt(i8_type, 1, 0);

      LLVMValueRef args[] = { passthru, base_ptr, offsets, mask, scale };

      res = lp_build_intrinsic(builder, intrinsic, src_vec_type, args, 5, 0);
   }
   res = LLVMBuildBitCast(builder, res, lp_build_vec_type(gallivm, res_type), "");

   return res;
}
/**
 * Return mask ? a : b;
 *
 * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value
 * will yield unpredictable results.
 */
LLVMValueRef
lp_build_select(struct lp_build_context *bld,
                LLVMValueRef mask,
                LLVMValueRef a,
                LLVMValueRef b)
{
   LLVMBuilderRef builder = bld->gallivm->builder;
   LLVMContextRef lc = bld->gallivm->context;
   struct lp_type type = bld->type;
   LLVMValueRef res;

   assert(lp_check_value(type, a));
   assert(lp_check_value(type, b));

   if(a == b)
      return a;

   if (type.length == 1) {
      mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), "");
      res = LLVMBuildSelect(builder, mask, a, b, "");
   }
   else if (0) {
      /* Generate a vector select.
       *
       * XXX: Using vector selects would avoid emitting intrinsics, but they aren't
       * properly supported yet.
       *
       * LLVM 3.0 includes experimental support provided the -promote-elements
       * options is passed to LLVM's command line (e.g., via
       * llvm::cl::ParseCommandLineOptions), but resulting code quality is much
       * worse, probably because some optimization passes don't know how to
       * handle vector selects.
       *
       * See also:
       * - http://lists.cs.uiuc.edu/pipermail/llvmdev/2011-October/043659.html
       */

      /* Convert the mask to a vector of booleans.
       * XXX: There are two ways to do this. Decide what's best.
       */
      if (1) {
         LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length);
         mask = LLVMBuildTrunc(builder, mask, bool_vec_type, "");
      } else {
         mask = LLVMBuildICmp(builder, LLVMIntNE, mask, LLVMConstNull(bld->int_vec_type), "");
      }
      res = LLVMBuildSelect(builder, mask, a, b, "");
   }
   else if (((util_cpu_caps.has_sse4_1 &&
              type.width * type.length == 128) ||
             (util_cpu_caps.has_avx &&
              type.width * type.length == 256 && type.width >= 32)) &&
            !LLVMIsConstant(a) &&
            !LLVMIsConstant(b) &&
            !LLVMIsConstant(mask)) {
      const char *intrinsic;
      LLVMTypeRef arg_type;
      LLVMValueRef args[3];

      /*
       *  There's only float blend in AVX but can just cast i32/i64
       *  to float.
       */
      if (type.width * type.length == 256) {
         if (type.width == 64) {
           intrinsic = "llvm.x86.avx.blendv.pd.256";
           arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4);
         }
         else {
            intrinsic = "llvm.x86.avx.blendv.ps.256";
            arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8);
         }
      }
      else if (type.floating &&
               type.width == 64) {
         intrinsic = "llvm.x86.sse41.blendvpd";
         arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2);
      } else if (type.floating &&
                 type.width == 32) {
         intrinsic = "llvm.x86.sse41.blendvps";
         arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4);
      } else {
         intrinsic = "llvm.x86.sse41.pblendvb";
         arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16);
      }

      if (arg_type != bld->int_vec_type) {
         mask = LLVMBuildBitCast(builder, mask, arg_type, "");
      }

      if (arg_type != bld->vec_type) {
         a = LLVMBuildBitCast(builder, a, arg_type, "");
         b = LLVMBuildBitCast(builder, b, arg_type, "");
      }

      args[0] = b;
      args[1] = a;
      args[2] = mask;

      res = lp_build_intrinsic(builder, intrinsic,
                               arg_type, args, Elements(args));

      if (arg_type != bld->vec_type) {
         res = LLVMBuildBitCast(builder, res, bld->vec_type, "");
      }
   }
   else {
      res = lp_build_select_bitwise(bld, mask, a, b);
   }

   return res;
}
Пример #19
0
/*
 * Do a cached lookup.
 *
 * Returns (vectors of) 4x8 rgba aos value
 */
LLVMValueRef
lp_build_fetch_cached_texels(struct gallivm_state *gallivm,
                             const struct util_format_description *format_desc,
                             unsigned n,
                             LLVMValueRef base_ptr,
                             LLVMValueRef offset,
                             LLVMValueRef i,
                             LLVMValueRef j,
                             LLVMValueRef cache)

{
   LLVMBuilderRef builder = gallivm->builder;
   unsigned count, low_bit, log2size;
   LLVMValueRef color, offset_stored, addr, ptr_addrtrunc, tmp;
   LLVMValueRef ij_index, hash_index, hash_mask, block_index;
   LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
   LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context);
   struct lp_type type;
   struct lp_build_context bld32;
   memset(&type, 0, sizeof type);
   type.width = 32;
   type.length = n;

   assert(format_desc->block.width == 4);
   assert(format_desc->block.height == 4);

   lp_build_context_init(&bld32, gallivm, type);

   /*
    * compute hash - we use direct mapped cache, the hash function could
    *                be better but it needs to be simple
    * per-element:
    *    compare offset with offset stored at tag (hash)
    *    if not equal decode/store block, update tag
    *    extract color from cache
    *    assemble result vector
    */

   /* TODO: not ideal with 32bit pointers... */

   low_bit = util_logbase2(format_desc->block.bits / 8);
   log2size = util_logbase2(LP_BUILD_FORMAT_CACHE_SIZE);
   addr = LLVMBuildPtrToInt(builder, base_ptr, i64t, "");
   ptr_addrtrunc = LLVMBuildPtrToInt(builder, base_ptr, i32t, "");
   ptr_addrtrunc = lp_build_broadcast_scalar(&bld32, ptr_addrtrunc);
   /* For the hash function, first mask off the unused lowest bits. Then just
      do some xor with address bits - only use lower 32bits */
   ptr_addrtrunc = LLVMBuildAdd(builder, offset, ptr_addrtrunc, "");
   ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
                                 lp_build_const_int_vec(gallivm, type, low_bit), "");
   /* This only really makes sense for size 64,128,256 */
   hash_index = ptr_addrtrunc;
   ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
                                 lp_build_const_int_vec(gallivm, type, 2*log2size), "");
   hash_index = LLVMBuildXor(builder, ptr_addrtrunc, hash_index, "");
   tmp = LLVMBuildLShr(builder, hash_index,
                       lp_build_const_int_vec(gallivm, type, log2size), "");
   hash_index = LLVMBuildXor(builder, hash_index, tmp, "");

   hash_mask = lp_build_const_int_vec(gallivm, type, LP_BUILD_FORMAT_CACHE_SIZE - 1);
   hash_index = LLVMBuildAnd(builder, hash_index, hash_mask, "");
   ij_index = LLVMBuildShl(builder, i, lp_build_const_int_vec(gallivm, type, 2), "");
   ij_index = LLVMBuildAdd(builder, ij_index, j, "");
   block_index = LLVMBuildShl(builder, hash_index,
                              lp_build_const_int_vec(gallivm, type, 4), "");
   block_index = LLVMBuildAdd(builder, ij_index, block_index, "");

   if (n > 1) {
      color = LLVMGetUndef(LLVMVectorType(i32t, n));
      for (count = 0; count < n; count++) {
         LLVMValueRef index, cond, colorx;
         LLVMValueRef block_indexx, hash_indexx, addrx, offsetx, ptr_addrx;
         struct lp_build_if_state if_ctx;

         index = lp_build_const_int32(gallivm, count);
         offsetx = LLVMBuildExtractElement(builder, offset, index, "");
         addrx = LLVMBuildZExt(builder, offsetx, i64t, "");
         addrx = LLVMBuildAdd(builder, addrx, addr, "");
         block_indexx = LLVMBuildExtractElement(builder, block_index, index, "");
         hash_indexx = LLVMBuildLShr(builder, block_indexx,
                                     lp_build_const_int32(gallivm, 4), "");
         offset_stored = lookup_tag_data(gallivm, cache, hash_indexx);
         cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addrx, "");

         lp_build_if(&if_ctx, gallivm, cond);
         {
            ptr_addrx = LLVMBuildIntToPtr(builder, addrx,
                                          LLVMPointerType(i8t, 0), "");
            update_cached_block(gallivm, format_desc, ptr_addrx, hash_indexx, cache);
#if LP_BUILD_FORMAT_CACHE_DEBUG
            update_cache_access(gallivm, cache, 1,
                                LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
#endif
         }
         lp_build_endif(&if_ctx);

         colorx = lookup_cached_pixel(gallivm, cache, block_indexx);

         color = LLVMBuildInsertElement(builder, color, colorx,
                                        lp_build_const_int32(gallivm, count), "");
      }
   }
   else {
      LLVMValueRef cond;
      struct lp_build_if_state if_ctx;

      tmp = LLVMBuildZExt(builder, offset, i64t, "");
      addr = LLVMBuildAdd(builder, tmp, addr, "");
      offset_stored = lookup_tag_data(gallivm, cache, hash_index);
      cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addr, "");

      lp_build_if(&if_ctx, gallivm, cond);
      {
         tmp = LLVMBuildIntToPtr(builder, addr, LLVMPointerType(i8t, 0), "");
         update_cached_block(gallivm, format_desc, tmp, hash_index, cache);
#if LP_BUILD_FORMAT_CACHE_DEBUG
         update_cache_access(gallivm, cache, 1,
                             LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
#endif
      }
      lp_build_endif(&if_ctx);

      color = lookup_cached_pixel(gallivm, cache, block_index);
   }
#if LP_BUILD_FORMAT_CACHE_DEBUG
   update_cache_access(gallivm, cache, n,
                       LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL);
#endif
   return LLVMBuildBitCast(builder, color, LLVMVectorType(i8t, n * 4), "");
}
Пример #20
0
static void
update_cached_block(struct gallivm_state *gallivm,
                    const struct util_format_description *format_desc,
                    LLVMValueRef ptr_addr,
                    LLVMValueRef hash_index,
                    LLVMValueRef cache)

{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
   LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
   LLVMTypeRef i32x4 = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4);
   LLVMValueRef function;
   LLVMValueRef tag_value, tmp_ptr;
   LLVMValueRef col[4];
   unsigned i, j;

   /*
    * Use format_desc->fetch_rgba_8unorm() for each pixel in the block.
    * This doesn't actually make any sense whatsoever, someone would need
    * to write a function doing this for all pixels in a block (either as
    * an external c function or with generated code). Don't ask.
    */

   {
      /*
       * Function to call looks like:
       *   fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
       */
      LLVMTypeRef ret_type;
      LLVMTypeRef arg_types[4];
      LLVMTypeRef function_type;

      assert(format_desc->fetch_rgba_8unorm);

      ret_type = LLVMVoidTypeInContext(gallivm->context);
      arg_types[0] = pi8t;
      arg_types[1] = pi8t;
      arg_types[2] = i32t;
      arg_types[3] = i32t;
      function_type = LLVMFunctionType(ret_type, arg_types,
                                       ARRAY_SIZE(arg_types), 0);

      /* make const pointer for the C fetch_rgba_8unorm function */
      function = lp_build_const_int_pointer(gallivm,
         func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm));

      /* cast the callee pointer to the function's type */
      function = LLVMBuildBitCast(builder, function,
                                  LLVMPointerType(function_type, 0),
                                  "cast callee");
   }

   tmp_ptr = lp_build_array_alloca(gallivm, i32x4,
                                   lp_build_const_int32(gallivm, 16),
                                   "tmp_decode_store");
   tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");

   /*
    * Invoke format_desc->fetch_rgba_8unorm() for each pixel.
    * This is going to be really really slow.
    * Note: the block store format is actually
    * x0y0x0y1x0y2x0y3 x1y0x1y1x1y2x1y3 ...
    */
   for (i = 0; i < 4; ++i) {
      for (j = 0; j < 4; ++j) {
         LLVMValueRef args[4];
         LLVMValueRef dst_offset = lp_build_const_int32(gallivm, (i * 4 + j) * 4);

         /*
          * Note we actually supply a pointer to the start of the block,
          * not the start of the texture.
          */
         args[0] = LLVMBuildGEP(gallivm->builder, tmp_ptr, &dst_offset, 1, "");
         args[1] = ptr_addr;
         args[2] = LLVMConstInt(i32t, i, 0);
         args[3] = LLVMConstInt(i32t, j, 0);
         LLVMBuildCall(builder, function, args, ARRAY_SIZE(args), "");
      }
   }

   /* Finally store the block - pointless mem copy + update tag. */
   tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, LLVMPointerType(i32x4, 0), "");
   for (i = 0; i < 4; ++i) {
      LLVMValueRef tmp_offset = lp_build_const_int32(gallivm, i);
      LLVMValueRef ptr = LLVMBuildGEP(gallivm->builder, tmp_ptr, &tmp_offset, 1, "");
      col[i] = LLVMBuildLoad(builder, ptr, "");
   }

   tag_value = LLVMBuildPtrToInt(gallivm->builder, ptr_addr,
                                 LLVMInt64TypeInContext(gallivm->context), "");
   store_cached_block(gallivm, col, tag_value, hash_index, cache);
}
Пример #21
0
// Generates the function prototype but overrides the name. This is used by
// the metadata to create external APIs to C.
//
// node          - The node.
// module        - The compilation unit this node is a part of.
// function_name - The name of the function to generate.
// is_external   - A flag stating if this is an external prototype.
// value         - A pointer to where the LLVM value should be returned to.
//
// Returns 0 if successful, otherwise returns -1.
int qip_ast_function_codegen_prototype_with_name(qip_ast_node *node,
                                                 qip_module *module,
                                                 bstring function_name,
                                                 bool is_external,
                                                 LLVMValueRef *value)
{
    int rc;
    unsigned int i;
    LLVMValueRef func = NULL;
    bstring arg_type_name = NULL;
    bstring return_type_name = NULL;

    LLVMContextRef context = LLVMGetModuleContext(module->llvm_module);

    // Determine the number of arguments. External calls always have an
    // additional first argument that is the module reference.
    unsigned int offset = (is_external ? 1 : 0);
    unsigned int total_arg_count = node->function.arg_count + offset;

    // Check for an existing prototype.
    func = LLVMGetNamedFunction(module->llvm_module, bdata(function_name));
    
    // If a prototype exists then simply verify it matches and return it.
    if(func != NULL) {
        check(LLVMCountBasicBlocks(func) == 0, "Illegal function redefinition");
        check(LLVMCountParams(func) == total_arg_count, "Function prototype already exists with different arguments");
    }
    // If there is no prototype then create one.
    else {
        // Dynamically generate the return type of the function if it is missing.
        if(node->function.return_type == NULL) {
            rc = qip_ast_function_generate_return_type(node, module);
            check(rc == 0, "Unable to generate return type for function");
        }

        // Create a list of function argument types.
        qip_ast_node *arg;
        LLVMTypeRef *params = malloc(sizeof(LLVMTypeRef) * total_arg_count);

        // Create module argument for external calls.
        if(is_external) {
            params[0] = LLVMPointerType(LLVMInt8TypeInContext(context), 0);
        }

        // Create arguments.
        for(i=0; i<node->function.arg_count; i++) {
            qip_ast_node *arg = node->function.args[i];
            LLVMTypeRef param = NULL;
            rc = qip_module_get_type_ref(module, arg->farg.var_decl->var_decl.type, NULL, &param);
            check(rc == 0, "Unable to determine function argument type");
        
            // Pass argument as reference if this is a complex type.
            if(qip_module_is_complex_type(module, param)) {
                params[i+offset] = LLVMPointerType(param, 0);
            }
            // Otherwise pass it by value.
            else {
                params[i+offset] = param;
            }
        }

        // Determine return type.
        LLVMTypeRef return_type;
        rc = qip_module_get_type_ref(module, node->function.return_type, NULL, &return_type);
        check(rc == 0, "Unable to determine function return type");

        if(qip_module_is_complex_type(module, return_type)) {
            return_type = LLVMPointerType(return_type, 0);
        }

        // Create function type.
        LLVMTypeRef funcType = LLVMFunctionType(return_type, params, total_arg_count, false);
        check(funcType != NULL, "Unable to create function type");

        // Create function.
        func = LLVMAddFunction(module->llvm_module, bdata(function_name), funcType);
        check(func != NULL, "Unable to create function");
    
        // Assign module argument name.
        if(is_external) {
            LLVMSetValueName(LLVMGetParam(func, 0), "module");
        }
    
        // Assign names to function arguments.
        for(i=0; i<node->function.arg_count; i++) {
            arg = node->function.args[i];
            LLVMValueRef param = LLVMGetParam(func, i+offset);
            LLVMSetValueName(param, bdata(arg->farg.var_decl->var_decl.name));
        }
    }
    
    // Return function prototype;
    *value = func;
    
    bdestroy(arg_type_name);
    bdestroy(return_type_name);
    return 0;

error:
    bdestroy(arg_type_name);
    bdestroy(return_type_name);
    if(func) LLVMDeleteFunction(func);
    *value = NULL;
    return -1;
}
Пример #22
0
/**
 * Perform the occlusion test and increase the counter.
 * Test the depth mask. Add the number of channel which has none zero mask
 * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}.
 * The counter will add 4.
 *
 * \param type holds element type of the mask vector.
 * \param maskvalue is the depth test mask.
 * \param counter is a pointer of the uint32 counter.
 */
void
lp_build_occlusion_count(struct gallivm_state *gallivm,
                         struct lp_type type,
                         LLVMValueRef maskvalue,
                         LLVMValueRef counter)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMContextRef context = gallivm->context;
   LLVMValueRef countmask = lp_build_const_int_vec(gallivm, type, 1);
   LLVMValueRef count, newcount;

   assert(type.length <= 16);
   assert(type.floating);

   if(util_cpu_caps.has_sse && type.length == 4) {
      const char *movmskintr = "llvm.x86.sse.movmsk.ps";
      const char *popcntintr = "llvm.ctpop.i32";
      LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
                                           lp_build_vec_type(gallivm, type), "");
      bits = lp_build_intrinsic_unary(builder, movmskintr,
                                      LLVMInt32TypeInContext(context), bits);
      count = lp_build_intrinsic_unary(builder, popcntintr,
                                       LLVMInt32TypeInContext(context), bits);
   }
   else if(util_cpu_caps.has_avx && type.length == 8) {
      const char *movmskintr = "llvm.x86.avx.movmsk.ps.256";
      const char *popcntintr = "llvm.ctpop.i32";
      LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
                                           lp_build_vec_type(gallivm, type), "");
      bits = lp_build_intrinsic_unary(builder, movmskintr,
                                      LLVMInt32TypeInContext(context), bits);
      count = lp_build_intrinsic_unary(builder, popcntintr,
                                       LLVMInt32TypeInContext(context), bits);
   }
   else {
      unsigned i;
      LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv");
      LLVMTypeRef counttype = LLVMIntTypeInContext(context, type.length * 8);
      LLVMTypeRef i8vntype = LLVMVectorType(LLVMInt8TypeInContext(context), type.length * 4);
      LLVMValueRef shufflev, countd;
      LLVMValueRef shuffles[16];
      const char *popcntintr = NULL;

      countv = LLVMBuildBitCast(builder, countv, i8vntype, "");

       for (i = 0; i < type.length; i++) {
          shuffles[i] = lp_build_const_int32(gallivm, 4*i);
       }

       shufflev = LLVMConstVector(shuffles, type.length);
       countd = LLVMBuildShuffleVector(builder, countv, LLVMGetUndef(i8vntype), shufflev, "");
       countd = LLVMBuildBitCast(builder, countd, counttype, "countd");

       /*
        * XXX FIXME
        * this is bad on cpus without popcount (on x86 supported by intel
        * nehalem, amd barcelona, and up - not tied to sse42).
        * Would be much faster to just sum the 4 elements of the vector with
        * some horizontal add (shuffle/add/shuffle/add after the initial and).
        */
       switch (type.length) {
       case 4:
          popcntintr = "llvm.ctpop.i32";
          break;
       case 8:
          popcntintr = "llvm.ctpop.i64";
          break;
       case 16:
          popcntintr = "llvm.ctpop.i128";
          break;
       default:
          assert(0);
       }
       count = lp_build_intrinsic_unary(builder, popcntintr, counttype, countd);

       if (type.length > 4) {
          count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 32), "");
       }
   }
   newcount = LLVMBuildLoad(builder, counter, "origcount");
   newcount = LLVMBuildAdd(builder, newcount, count, "newcount");
   LLVMBuildStore(builder, newcount, counter);
}
Пример #23
0
/**
 * Fetch a pixel into a 4 float AoS.
 *
 * \param format_desc  describes format of the image we're fetching from
 * \param ptr  address of the pixel block (or the texel if uncompressed)
 * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
 *              these will always be (0, 0).
 * \return  a 4 element vector with the pixel's RGBA values.
 */
LLVMValueRef
lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
                        const struct util_format_description *format_desc,
                        struct lp_type type,
                        LLVMValueRef base_ptr,
                        LLVMValueRef offset,
                        LLVMValueRef i,
                        LLVMValueRef j)
{
   LLVMBuilderRef builder = gallivm->builder;
   unsigned num_pixels = type.length / 4;
   struct lp_build_context bld;

   assert(type.length <= LP_MAX_VECTOR_LENGTH);
   assert(type.length % 4 == 0);

   lp_build_context_init(&bld, gallivm, type);

   /*
    * Trivial case
    *
    * The format matches the type (apart of a swizzle) so no need for
    * scaling or converting.
    */

   if (format_matches_type(format_desc, type) &&
       format_desc->block.bits <= type.width * 4 &&
       util_is_power_of_two(format_desc->block.bits)) {
      LLVMValueRef packed;

      /*
       * The format matches the type (apart of a swizzle) so no need for
       * scaling or converting.
       */

      packed = lp_build_gather(gallivm, type.length/4,
                               format_desc->block.bits, type.width*4,
                               base_ptr, offset);

      assert(format_desc->block.bits <= type.width * type.length);

      packed = LLVMBuildBitCast(gallivm->builder, packed,
                                lp_build_vec_type(gallivm, type), "");

      return lp_build_format_swizzle_aos(format_desc, &bld, packed);
   }

   /*
    * Bit arithmetic
    */

   if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
       (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
       format_desc->block.width == 1 &&
       format_desc->block.height == 1 &&
       util_is_power_of_two(format_desc->block.bits) &&
       format_desc->block.bits <= 32 &&
       format_desc->is_bitmask &&
       !format_desc->is_mixed &&
       (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
        format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) {

      LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
      LLVMValueRef res;
      unsigned k;

      /*
       * Unpack a pixel at a time into a <4 x float> RGBA vector
       */

      for (k = 0; k < num_pixels; ++k) {
         LLVMValueRef packed;

         packed = lp_build_gather_elem(gallivm, num_pixels,
                                       format_desc->block.bits, 32,
                                       base_ptr, offset, k);

         tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm,
                                                  format_desc,
                                                  packed);
      }

      /*
       * Type conversion.
       *
       * TODO: We could avoid floating conversion for integer to
       * integer conversions.
       */

      if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) {
         debug_printf("%s: unpacking %s with floating point\n",
                      __FUNCTION__, format_desc->short_name);
      }

      lp_build_conv(gallivm,
                    lp_float32_vec4_type(),
                    type,
                    tmps, num_pixels, &res, 1);

      return lp_build_format_swizzle_aos(format_desc, &bld, res);
   }

   /*
    * YUV / subsampled formats
    */

   if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
      struct lp_type tmp_type;
      LLVMValueRef tmp;

      memset(&tmp_type, 0, sizeof tmp_type);
      tmp_type.width = 8;
      tmp_type.length = num_pixels * 4;
      tmp_type.norm = TRUE;

      tmp = lp_build_fetch_subsampled_rgba_aos(gallivm,
                                               format_desc,
                                               num_pixels,
                                               base_ptr,
                                               offset,
                                               i, j);

      lp_build_conv(gallivm,
                    tmp_type, type,
                    &tmp, 1, &tmp, 1);

      return tmp;
   }

   /*
    * Fallback to util_format_description::fetch_rgba_8unorm().
    */

   if (format_desc->fetch_rgba_8unorm &&
       !type.floating && type.width == 8 && !type.sign && type.norm) {
      /*
       * Fallback to calling util_format_description::fetch_rgba_8unorm.
       *
       * This is definitely not the most efficient way of fetching pixels, as
       * we miss the opportunity to do vectorization, but this it is a
       * convenient for formats or scenarios for which there was no opportunity
       * or incentive to optimize.
       */

      LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
      char name[256];
      LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
      LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
      LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
      LLVMValueRef function;
      LLVMValueRef tmp_ptr;
      LLVMValueRef tmp;
      LLVMValueRef res;
      LLVMValueRef callee;
      unsigned k;

      util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm",
                    format_desc->short_name);

      if (gallivm_debug & GALLIVM_DEBUG_PERF) {
         debug_printf("%s: falling back to %s\n", __FUNCTION__, name);
      }

      /*
       * Declare and bind format_desc->fetch_rgba_8unorm().
       */

      function = LLVMGetNamedFunction(module, name);
      if (!function) {
         /*
          * Function to call looks like:
          *   fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
          */
         LLVMTypeRef ret_type;
         LLVMTypeRef arg_types[4];
         LLVMTypeRef function_type;

         ret_type = LLVMVoidTypeInContext(gallivm->context);
         arg_types[0] = pi8t;
         arg_types[1] = pi8t;
         arg_types[2] = i32t;
         arg_types[3] = i32t;
         function_type = LLVMFunctionType(ret_type, arg_types,
                                          Elements(arg_types), 0);
         function = LLVMAddFunction(module, name, function_type);

         LLVMSetFunctionCallConv(function, LLVMCCallConv);
         LLVMSetLinkage(function, LLVMExternalLinkage);

         assert(LLVMIsDeclaration(function));
      }

      /* make const pointer for the C fetch_rgba_float function */
      callee = lp_build_const_int_pointer(gallivm,
         func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm));

      /* cast the callee pointer to the function's type */
      function = LLVMBuildBitCast(builder, callee,
                                  LLVMTypeOf(function), "cast callee");

      tmp_ptr = lp_build_alloca(gallivm, i32t, "");

      res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels));

      /*
       * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result
       * in the SoA vectors.
       */

      for (k = 0; k < num_pixels; ++k) {
         LLVMValueRef index = lp_build_const_int32(gallivm, k);
         LLVMValueRef args[4];

         args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");
         args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
                                            base_ptr, offset, k);

         if (num_pixels == 1) {
            args[2] = i;
            args[3] = j;
         }
         else {
            args[2] = LLVMBuildExtractElement(builder, i, index, "");
            args[3] = LLVMBuildExtractElement(builder, j, index, "");
         }

         LLVMBuildCall(builder, function, args, Elements(args), "");

         tmp = LLVMBuildLoad(builder, tmp_ptr, "");

         if (num_pixels == 1) {
            res = tmp;
         }
         else {
            res = LLVMBuildInsertElement(builder, res, tmp, index, "");
         }
      }

      /* Bitcast from <n x i32> to <4n x i8> */
      res = LLVMBuildBitCast(builder, res, bld.vec_type, "");

      return res;
   }


   /*
    * Fallback to util_format_description::fetch_rgba_float().
    */

   if (format_desc->fetch_rgba_float) {
      /*
       * Fallback to calling util_format_description::fetch_rgba_float.
       *
       * This is definitely not the most efficient way of fetching pixels, as
       * we miss the opportunity to do vectorization, but this it is a
       * convenient for formats or scenarios for which there was no opportunity
       * or incentive to optimize.
       */

      LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
      char name[256];
      LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context);
      LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4);
      LLVMTypeRef pf32t = LLVMPointerType(f32t, 0);
      LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
      LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
      LLVMValueRef function;
      LLVMValueRef tmp_ptr;
      LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
      LLVMValueRef res;
      LLVMValueRef callee;
      unsigned k;

      util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
                    format_desc->short_name);

      if (gallivm_debug & GALLIVM_DEBUG_PERF) {
         debug_printf("%s: falling back to %s\n", __FUNCTION__, name);
      }

      /*
       * Declare and bind format_desc->fetch_rgba_float().
       */

      function = LLVMGetNamedFunction(module, name);
      if (!function) {
         /*
          * Function to call looks like:
          *   fetch(float *dst, const uint8_t *src, unsigned i, unsigned j)
          */
         LLVMTypeRef ret_type;
         LLVMTypeRef arg_types[4];
         LLVMTypeRef function_type;

         ret_type = LLVMVoidTypeInContext(gallivm->context);
         arg_types[0] = pf32t;
         arg_types[1] = pi8t;
         arg_types[2] = i32t;
         arg_types[3] = i32t;
         function_type = LLVMFunctionType(ret_type, arg_types,
                                          Elements(arg_types), 0);
         function = LLVMAddFunction(module, name, function_type);

         LLVMSetFunctionCallConv(function, LLVMCCallConv);
         LLVMSetLinkage(function, LLVMExternalLinkage);

         assert(LLVMIsDeclaration(function));
      }

      /* Note: we're using this casting here instead of LLVMAddGlobalMapping()
       * to work around a bug in LLVM 2.6.
       */

      /* make const pointer for the C fetch_rgba_float function */
      callee = lp_build_const_int_pointer(gallivm,
         func_to_pointer((func_pointer) format_desc->fetch_rgba_float));

      /* cast the callee pointer to the function's type */
      function = LLVMBuildBitCast(builder, callee,
                                  LLVMTypeOf(function), "cast callee");


      tmp_ptr = lp_build_alloca(gallivm, f32x4t, "");

      /*
       * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
       * in the SoA vectors.
       */

      for (k = 0; k < num_pixels; ++k) {
         LLVMValueRef args[4];

         args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, "");
         args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
                                            base_ptr, offset, k);

         if (num_pixels == 1) {
            args[2] = i;
            args[3] = j;
         }
         else {
            LLVMValueRef index = lp_build_const_int32(gallivm, k);
            args[2] = LLVMBuildExtractElement(builder, i, index, "");
            args[3] = LLVMBuildExtractElement(builder, j, index, "");
         }

         LLVMBuildCall(builder, function, args, Elements(args), "");

         tmps[k] = LLVMBuildLoad(builder, tmp_ptr, "");
      }

      lp_build_conv(gallivm,
                    lp_float32_vec4_type(),
                    type,
                    tmps, num_pixels, &res, 1);

      return res;
   }

   assert(0);
   return lp_build_undef(gallivm, type);
}
/**
 * Build code to compare two values 'a' and 'b' of 'type' using the given func.
 * \param func  one of PIPE_FUNC_x
 * The result values will be 0 for false or ~0 for true.
 */
LLVMValueRef
lp_build_compare(struct gallivm_state *gallivm,
                 const struct lp_type type,
                 unsigned func,
                 LLVMValueRef a,
                 LLVMValueRef b)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
   LLVMValueRef zeros = LLVMConstNull(int_vec_type);
   LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
   LLVMValueRef cond;
   LLVMValueRef res;

   assert(func >= PIPE_FUNC_NEVER);
   assert(func <= PIPE_FUNC_ALWAYS);
   assert(lp_check_value(type, a));
   assert(lp_check_value(type, b));

   if(func == PIPE_FUNC_NEVER)
      return zeros;
   if(func == PIPE_FUNC_ALWAYS)
      return ones;

#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
   /*
    * There are no unsigned integer comparison instructions in SSE.
    */

   if (!type.floating && !type.sign &&
       type.width * type.length == 128 &&
       util_cpu_caps.has_sse2 &&
       (func == PIPE_FUNC_LESS ||
        func == PIPE_FUNC_LEQUAL ||
        func == PIPE_FUNC_GREATER ||
        func == PIPE_FUNC_GEQUAL) &&
       (gallivm_debug & GALLIVM_DEBUG_PERF)) {
         debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
                      __FUNCTION__, type.length, type.width);
   }
#endif

#if HAVE_LLVM < 0x0207
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
   if(type.width * type.length == 128) {
      if(type.floating && util_cpu_caps.has_sse) {
         /* float[4] comparison */
         LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type);
         LLVMValueRef args[3];
         unsigned cc;
         boolean swap;

         swap = FALSE;
         switch(func) {
         case PIPE_FUNC_EQUAL:
            cc = 0;
            break;
         case PIPE_FUNC_NOTEQUAL:
            cc = 4;
            break;
         case PIPE_FUNC_LESS:
            cc = 1;
            break;
         case PIPE_FUNC_LEQUAL:
            cc = 2;
            break;
         case PIPE_FUNC_GREATER:
            cc = 1;
            swap = TRUE;
            break;
         case PIPE_FUNC_GEQUAL:
            cc = 2;
            swap = TRUE;
            break;
         default:
            assert(0);
            return lp_build_undef(gallivm, type);
         }

         if(swap) {
            args[0] = b;
            args[1] = a;
         }
         else {
            args[0] = a;
            args[1] = b;
         }

         args[2] = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), cc, 0);
         res = lp_build_intrinsic(builder,
                                  "llvm.x86.sse.cmp.ps",
                                  vec_type,
                                  args, 3);
         res = LLVMBuildBitCast(builder, res, int_vec_type, "");
         return res;
      }
      else if(util_cpu_caps.has_sse2) {
         /* int[4] comparison */
         static const struct {
            unsigned swap:1;
            unsigned eq:1;
            unsigned gt:1;
            unsigned not:1;
         } table[] = {
            {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */
            {1, 0, 1, 0}, /* PIPE_FUNC_LESS */
            {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */
            {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */
            {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */
            {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */
            {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */
            {0, 0, 0, 0}  /* PIPE_FUNC_ALWAYS */
         };
         const char *pcmpeq;
         const char *pcmpgt;
         LLVMValueRef args[2];
         LLVMValueRef res;
         LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type);

         switch (type.width) {
         case 8:
            pcmpeq = "llvm.x86.sse2.pcmpeq.b";
            pcmpgt = "llvm.x86.sse2.pcmpgt.b";
            break;
         case 16:
            pcmpeq = "llvm.x86.sse2.pcmpeq.w";
            pcmpgt = "llvm.x86.sse2.pcmpgt.w";
            break;
         case 32:
            pcmpeq = "llvm.x86.sse2.pcmpeq.d";
            pcmpgt = "llvm.x86.sse2.pcmpgt.d";
            break;
         default:
            assert(0);
            return lp_build_undef(gallivm, type);
         }

         /* There are no unsigned comparison instructions. So flip the sign bit
          * so that the results match.
          */
         if (table[func].gt && !type.sign) {
            LLVMValueRef msb = lp_build_const_int_vec(gallivm, type, (unsigned long long)1 << (type.width - 1));
            a = LLVMBuildXor(builder, a, msb, "");
            b = LLVMBuildXor(builder, b, msb, "");
         }

         if(table[func].swap) {
            args[0] = b;
            args[1] = a;
         }
         else {
            args[0] = a;
            args[1] = b;
         }

         if(table[func].eq)
            res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2);
         else if (table[func].gt)
            res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2);
         else
            res = LLVMConstNull(vec_type);

         if(table[func].not)
            res = LLVMBuildNot(builder, res, "");

         return res;
      }
   } /* if (type.width * type.length == 128) */
#endif
#endif /* HAVE_LLVM < 0x0207 */

   /* XXX: It is not clear if we should use the ordered or unordered operators */

   if(type.floating) {
      LLVMRealPredicate op;
      switch(func) {
      case PIPE_FUNC_NEVER:
         op = LLVMRealPredicateFalse;
         break;
      case PIPE_FUNC_ALWAYS:
         op = LLVMRealPredicateTrue;
         break;
      case PIPE_FUNC_EQUAL:
         op = LLVMRealUEQ;
         break;
      case PIPE_FUNC_NOTEQUAL:
         op = LLVMRealUNE;
         break;
      case PIPE_FUNC_LESS:
         op = LLVMRealULT;
         break;
      case PIPE_FUNC_LEQUAL:
         op = LLVMRealULE;
         break;
      case PIPE_FUNC_GREATER:
         op = LLVMRealUGT;
         break;
      case PIPE_FUNC_GEQUAL:
         op = LLVMRealUGE;
         break;
      default:
         assert(0);
         return lp_build_undef(gallivm, type);
      }

#if HAVE_LLVM >= 0x0207
      cond = LLVMBuildFCmp(builder, op, a, b, "");
      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
#else
      if (type.length == 1) {
         cond = LLVMBuildFCmp(builder, op, a, b, "");
         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
      }
      else {
         unsigned i;

         res = LLVMGetUndef(int_vec_type);

         debug_printf("%s: warning: using slow element-wise float"
                      " vector comparison\n", __FUNCTION__);
         for (i = 0; i < type.length; ++i) {
            LLVMValueRef index = lp_build_const_int32(gallivm, i);
            cond = LLVMBuildFCmp(builder, op,
                                 LLVMBuildExtractElement(builder, a, index, ""),
                                 LLVMBuildExtractElement(builder, b, index, ""),
                                 "");
            cond = LLVMBuildSelect(builder, cond,
                                   LLVMConstExtractElement(ones, index),
                                   LLVMConstExtractElement(zeros, index),
                                   "");
            res = LLVMBuildInsertElement(builder, res, cond, index, "");
         }
      }
#endif
   }
   else {
      LLVMIntPredicate op;
      switch(func) {
      case PIPE_FUNC_EQUAL:
         op = LLVMIntEQ;
         break;
      case PIPE_FUNC_NOTEQUAL:
         op = LLVMIntNE;
         break;
      case PIPE_FUNC_LESS:
         op = type.sign ? LLVMIntSLT : LLVMIntULT;
         break;
      case PIPE_FUNC_LEQUAL:
         op = type.sign ? LLVMIntSLE : LLVMIntULE;
         break;
      case PIPE_FUNC_GREATER:
         op = type.sign ? LLVMIntSGT : LLVMIntUGT;
         break;
      case PIPE_FUNC_GEQUAL:
         op = type.sign ? LLVMIntSGE : LLVMIntUGE;
         break;
      default:
         assert(0);
         return lp_build_undef(gallivm, type);
      }

#if HAVE_LLVM >= 0x0207
      cond = LLVMBuildICmp(builder, op, a, b, "");
      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
#else
      if (type.length == 1) {
         cond = LLVMBuildICmp(builder, op, a, b, "");
         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
      }
      else {
         unsigned i;

         res = LLVMGetUndef(int_vec_type);

         if (gallivm_debug & GALLIVM_DEBUG_PERF) {
            debug_printf("%s: using slow element-wise int"
                         " vector comparison\n", __FUNCTION__);
         }

         for(i = 0; i < type.length; ++i) {
            LLVMValueRef index = lp_build_const_int32(gallivm, i);
            cond = LLVMBuildICmp(builder, op,
                                 LLVMBuildExtractElement(builder, a, index, ""),
                                 LLVMBuildExtractElement(builder, b, index, ""),
                                 "");
            cond = LLVMBuildSelect(builder, cond,
                                   LLVMConstExtractElement(ones, index),
                                   LLVMConstExtractElement(zeros, index),
                                   "");
            res = LLVMBuildInsertElement(builder, res, cond, index, "");
         }
      }
#endif
   }

   return res;
}
Пример #25
0
/**
 * Gather one element from scatter positions in memory.
 * Nearly the same as above, however the individual elements
 * may be vectors themselves, and fetches may be float type.
 * Can also do pad vector instead of ZExt.
 *
 * @sa lp_build_gather()
 */
static LLVMValueRef
lp_build_gather_elem_vec(struct gallivm_state *gallivm,
                         unsigned length,
                         unsigned src_width,
                         LLVMTypeRef src_type,
                         struct lp_type dst_type,
                         boolean aligned,
                         LLVMValueRef base_ptr,
                         LLVMValueRef offsets,
                         unsigned i,
                         boolean vector_justify)
{
   LLVMValueRef ptr, res;
   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
   assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0));

   ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i);
   ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, "");
   res = LLVMBuildLoad(gallivm->builder, ptr, "");

   /* XXX
    * On some archs we probably really want to avoid having to deal
    * with alignments lower than 4 bytes (if fetch size is a power of
    * two >= 32). On x86 it doesn't matter, however.
    * We should be able to guarantee full alignment for any kind of texture
    * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch
    * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends
    * but I don't think that's quite what we wanted).
    * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT
    * looks like a good fit, but it seems this cap bit (and OpenGL) aren't
    * enforcing what we want (which is what d3d10 does, the offset needs to
    * be aligned to element size, but GL has bytes regardless of element
    * size which would only leave us with minimum alignment restriction of 16
    * which doesn't make much sense if the type isn't 4x32bit). Due to
    * translation of offsets to first_elem in sampler_views it actually seems
    * gallium could not do anything else except 16 no matter what...
    */
   if (!aligned) {
      LLVMSetAlignment(res, 1);
   } else if (!util_is_power_of_two(src_width)) {
      /*
       * Full alignment is impossible, assume the caller really meant
       * the individual elements were aligned (e.g. 3x32bit format).
       * And yes the generated code may otherwise crash, llvm will
       * really assume 128bit alignment with a 96bit fetch (I suppose
       * that makes sense as it can just assume the upper 32bit to be
       * whatever).
       * Maybe the caller should be able to explicitly set this, but
       * this should cover all the 3-channel formats.
       */
      if (((src_width / 24) * 24 == src_width) &&
           util_is_power_of_two(src_width / 24)) {
          LLVMSetAlignment(res, src_width / 24);
      } else {
         LLVMSetAlignment(res, 1);
      }
   }

   assert(src_width <= dst_type.width * dst_type.length);
   if (src_width < dst_type.width * dst_type.length) {
      if (dst_type.length > 1) {
         res = lp_build_pad_vector(gallivm, res, dst_type.length);
         /*
          * vector_justify hopefully a non-issue since we only deal
          * with src_width >= 32 here?
          */
      } else {
         LLVMTypeRef dst_elem_type = lp_build_vec_type(gallivm, dst_type);

         /*
          * Only valid if src_ptr_type is int type...
          */
         res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, "");

#ifdef PIPE_ARCH_BIG_ENDIAN
         if (vector_justify) {
         res = LLVMBuildShl(gallivm->builder, res,
                            LLVMConstInt(dst_elem_type,
                                         dst_type.width - src_width, 0), "");
         }
         if (src_width == 48) {
            /* Load 3x16 bit vector.
             * The sequence of loads on big-endian hardware proceeds as follows.
             * 16-bit fields are denoted by X, Y, Z, and 0.  In memory, the sequence
             * of three fields appears in the order X, Y, Z.
             *
             * Load 32-bit word: 0.0.X.Y
             * Load 16-bit halfword: 0.0.0.Z
             * Rotate left: 0.X.Y.0
             * Bitwise OR: 0.X.Y.Z
             *
             * The order in which we need the fields in the result is 0.Z.Y.X,
             * the same as on little-endian; permute 16-bit fields accordingly
             * within 64-bit register:
             */
            LLVMValueRef shuffles[4] = {
               lp_build_const_int32(gallivm, 2),
               lp_build_const_int32(gallivm, 1),
               lp_build_const_int32(gallivm, 0),
               lp_build_const_int32(gallivm, 3),
            };
            res = LLVMBuildBitCast(gallivm->builder, res,
                                   lp_build_vec_type(gallivm, lp_type_uint_vec(16, 4*16)), "");
            res = LLVMBuildShuffleVector(gallivm->builder, res, res, LLVMConstVector(shuffles, 4), "");
            res = LLVMBuildBitCast(gallivm->builder, res, dst_elem_type, "");
         }
#endif
      }
   }
   return res;
}