static void txf_fetch_args(
	struct lp_build_tgsi_context * bld_base,
	struct lp_build_emit_data * emit_data)
{
	const struct tgsi_full_instruction * inst = emit_data->inst;
	struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
	const struct tgsi_texture_offset * off = inst->TexOffsets;
	LLVMTypeRef offset_type = bld_base->int_bld.elem_type;

	/* fetch tex coords */
	tex_fetch_args(bld_base, emit_data);

	/* fetch tex offsets */
	if (inst->Texture.NumOffsets) {
		assert(inst->Texture.NumOffsets == 1);

		emit_data->args[1] = LLVMConstBitCast(
			bld->immediates[off->Index][off->SwizzleX],
			offset_type);
		emit_data->args[2] = LLVMConstBitCast(
			bld->immediates[off->Index][off->SwizzleY],
			offset_type);
		emit_data->args[3] = LLVMConstBitCast(
			bld->immediates[off->Index][off->SwizzleZ],
			offset_type);
	} else {
		emit_data->args[1] = bld_base->int_bld.zero;
		emit_data->args[2] = bld_base->int_bld.zero;
		emit_data->args[3] = bld_base->int_bld.zero;
	}

	emit_data->arg_count = 4;
}
Exemplo n.º 2
0
static LLVMValueRef
emit_fetch_immediate(
	struct lp_build_tgsi_context *bld_base,
	const struct tgsi_full_src_register *reg,
	enum tgsi_opcode_type type,
	unsigned swizzle)
{
	LLVMTypeRef ctype;
	LLVMContextRef ctx = bld_base->base.gallivm->context;

	switch (type) {
	case TGSI_TYPE_UNSIGNED:
	case TGSI_TYPE_SIGNED:
		ctype = LLVMInt32TypeInContext(ctx);
		break;
	case TGSI_TYPE_UNTYPED:
	case TGSI_TYPE_FLOAT:
		ctype = LLVMFloatTypeInContext(ctx);
		break;
	default:
		ctype = 0;
		break;
	}

	struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
	return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
}
Exemplo n.º 3
0
static LLVMValueRef make_desc_ptr(LLVMValueRef func, LLVMTypeRef type)
{
  if(func == NULL)
    return LLVMConstNull(type);

  return LLVMConstBitCast(func, type);
}
Exemplo n.º 4
0
LLVMValueRef gencall_create(compile_t* c, reach_type_t* t)
{
  LLVMValueRef args[2];
  args[0] = codegen_ctx(c);
  args[1] = LLVMConstBitCast(t->desc, c->descriptor_ptr);

  LLVMValueRef result = gencall_runtime(c, "pony_create", args, 2, "");
  return LLVMBuildBitCast(c->builder, result, t->use_type, "");
}
Exemplo n.º 5
0
static LLVMValueRef
emit_fetch(
	struct lp_build_tgsi_context *bld_base,
	const struct tgsi_full_src_register *reg,
	enum tgsi_opcode_type type,
	unsigned swizzle)
{
	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
	struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
	LLVMValueRef result, ptr;

	if (swizzle == ~0) {
		LLVMValueRef values[TGSI_NUM_CHANNELS];
		unsigned chan;
		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
			values[chan] = emit_fetch(bld_base, reg, type, chan);
		}
		return lp_build_gather_values(bld_base->base.gallivm, values,
					      TGSI_NUM_CHANNELS);
	}

	if (reg->Register.Indirect) {
		struct tgsi_declaration_range range = get_array_range(bld_base,
			reg->Register.File, &reg->Indirect);
		return LLVMBuildExtractElement(builder,
			emit_array_fetch(bld_base, reg->Register.File, type, range, swizzle),
			emit_array_index(bld, &reg->Indirect, reg->Register.Index - range.First),
			"");
	}

	switch(reg->Register.File) {
	case TGSI_FILE_IMMEDIATE: {
		LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
		return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
	}

	case TGSI_FILE_INPUT:
		result = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)];
		break;

	case TGSI_FILE_TEMPORARY:
		ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
		result = LLVMBuildLoad(builder, ptr, "");
		break;

	case TGSI_FILE_OUTPUT:
		ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle);
		result = LLVMBuildLoad(builder, ptr, "");
		break;

	default:
		return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
	}

	return bitcast(bld_base, type, result);
}
Exemplo n.º 6
0
static LLVMValueRef make_function_ptr(compile_t* c, const char* name,
  LLVMTypeRef type)
{
  LLVMValueRef fun = LLVMGetNamedFunction(c->module, name);

  if(fun == NULL)
    return LLVMConstNull(type);

  return LLVMConstBitCast(fun, type);
}
Exemplo n.º 7
0
static LLVMValueRef make_field_list(compile_t* c, gentype_t* g)
{
  // The list is an array of field descriptors.
  int count;

  if(g->underlying == TK_TUPLETYPE)
    count = g->field_count;
  else
    count = 0;

  LLVMTypeRef type = LLVMArrayType(c->field_descriptor, count);

  // If we aren't a tuple, return a null pointer to a list.
  if(count == 0)
    return LLVMConstNull(LLVMPointerType(type, 0));

  // Create a constant array of field descriptors.
  size_t buf_size = count *sizeof(LLVMValueRef);
  LLVMValueRef* list = (LLVMValueRef*)pool_alloc_size(buf_size);

  for(int i = 0; i < count; i++)
  {
    gentype_t fg;

    if(!gentype(c, g->fields[i], &fg))
      return NULL;

    LLVMValueRef fdesc[2];
    fdesc[0] = LLVMConstInt(c->i32,
      LLVMOffsetOfElement(c->target_data, g->primitive, i), false);

    if(fg.desc != NULL)
    {
      // We are a concrete type.
      fdesc[1] = LLVMConstBitCast(fg.desc, c->descriptor_ptr);
    } else {
      // We aren't a concrete type.
      fdesc[1] = LLVMConstNull(c->descriptor_ptr);
    }

    list[i] = LLVMConstStructInContext(c->context, fdesc, 2, false);
  }

  LLVMValueRef field_array = LLVMConstArray(c->field_descriptor, list, count);

  // Create a global to hold the array.
  const char* name = genname_fieldlist(g->type_name);
  LLVMValueRef global = LLVMAddGlobal(c->module, type, name);
  LLVMSetGlobalConstant(global, true);
  LLVMSetLinkage(global, LLVMInternalLinkage);
  LLVMSetInitializer(global, field_array);

  pool_free_size(buf_size, list);
  return global;
}
Exemplo n.º 8
0
LLVMValueRef gencall_create(compile_t* c, gentype_t* g)
{
  // Disable debug anchor
  dwarf_location(&c->dwarf, NULL);

  LLVMValueRef args[2];
  args[0] = codegen_ctx(c);
  args[1] = LLVMConstBitCast(g->desc, c->descriptor_ptr);

  LLVMValueRef result = gencall_runtime(c, "pony_create", args, 2, "");
  return LLVMBuildBitCast(c->builder, result, g->use_type, "");
}
Exemplo n.º 9
0
static LLVMValueRef create_main(compile_t* c, gentype_t* g, LLVMValueRef ctx)
{
  // Create the main actor and become it.
  LLVMValueRef args[2];
  args[0] = ctx;
  args[1] = LLVMConstBitCast(g->desc, c->descriptor_ptr);
  LLVMValueRef actor = gencall_runtime(c, "pony_create", args, 2, "");

  args[0] = ctx;
  args[1] = actor;
  gencall_runtime(c, "pony_become", args, 2, "");

  return actor;
}
Exemplo n.º 10
0
static LLVMValueRef make_unbox_function(compile_t* c, gentype_t* g,
  const char* name)
{
  LLVMValueRef fun = LLVMGetNamedFunction(c->module, name);

  if(fun == NULL)
    return LLVMConstNull(c->void_ptr);

  // Create a new unboxing function that forwards to the real function.
  LLVMTypeRef f_type = LLVMGetElementType(LLVMTypeOf(fun));
  int count = LLVMCountParamTypes(f_type);

  // If it takes no arguments, it's a special number constructor. Don't put it
  // in the vtable.
  if(count == 0)
    return LLVMConstNull(c->void_ptr);

  size_t buf_size = count *sizeof(LLVMTypeRef);
  LLVMTypeRef* params = (LLVMTypeRef*)pool_alloc_size(buf_size);
  LLVMGetParamTypes(f_type, params);
  LLVMTypeRef ret_type = LLVMGetReturnType(f_type);

  // It's the same type, but it takes the boxed type instead of the primitive
  // type as the receiver.
  params[0] = g->structure_ptr;

  const char* unbox_name = genname_unbox(name);
  LLVMTypeRef unbox_type = LLVMFunctionType(ret_type, params, count, false);
  LLVMValueRef unbox_fun = codegen_addfun(c, unbox_name, unbox_type);
  codegen_startfun(c, unbox_fun, false);

  // Extract the primitive type from element 1 and call the real function.
  LLVMValueRef this_ptr = LLVMGetParam(unbox_fun, 0);
  LLVMValueRef primitive_ptr = LLVMBuildStructGEP(c->builder, this_ptr, 1, "");
  LLVMValueRef primitive = LLVMBuildLoad(c->builder, primitive_ptr, "");

  LLVMValueRef* args = (LLVMValueRef*)pool_alloc_size(buf_size);
  args[0] = primitive;

  for(int i = 1; i < count; i++)
    args[i] = LLVMGetParam(unbox_fun, i);

  LLVMValueRef result = codegen_call(c, fun, args, count);
  LLVMBuildRet(c->builder, result);
  codegen_finishfun(c);

  pool_free_size(buf_size, params);
  pool_free_size(buf_size, args);
  return LLVMConstBitCast(unbox_fun, c->void_ptr);
}
Exemplo n.º 11
0
static LLVMValueRef
emit_fetch_immediate(
	struct lp_build_tgsi_context *bld_base,
	const struct tgsi_full_src_register *reg,
	enum tgsi_opcode_type type,
	unsigned swizzle)
{
	LLVMTypeRef ctype;
	LLVMContextRef ctx = bld_base->base.gallivm->context;

	switch (type) {
	case TGSI_TYPE_UNSIGNED:
	case TGSI_TYPE_SIGNED:
		ctype = LLVMInt32TypeInContext(ctx);
		break;
	case TGSI_TYPE_UNTYPED:
	case TGSI_TYPE_FLOAT:
		ctype = LLVMFloatTypeInContext(ctx);
		break;
	default:
		ctype = 0;
		break;
	}

	struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
	if (swizzle == ~0) {
		LLVMValueRef values[TGSI_NUM_CHANNELS] = {};
		unsigned chan;
		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
                   values[chan] = LLVMConstBitCast(bld->immediates[reg->Register.Index][chan], ctype);
		}
		return lp_build_gather_values(bld_base->base.gallivm, values,
						TGSI_NUM_CHANNELS);
	} else {
		return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
	}
}
Exemplo n.º 12
0
LLVMValueRef gencall_allocstruct(compile_t* c, gentype_t* g)
{
  // Disable debug anchor
  dwarf_location(&c->dwarf, NULL);

  // We explicitly want a boxed version.
  // Get the size of the structure.
  size_t size = (size_t)LLVMABISizeOfType(c->target_data, g->structure);

  // Get the finaliser, if there is one.
  const char* final = genname_finalise(g->type_name);
  LLVMValueRef final_fun = LLVMGetNamedFunction(c->module, final);

  // Allocate the object.
  LLVMValueRef args[3];
  args[0] = codegen_ctx(c);

  LLVMValueRef result;

  if(final_fun == NULL)
  {
    if(size <= HEAP_MAX)
    {
      uint32_t index = ponyint_heap_index(size);
      args[1] = LLVMConstInt(c->i32, index, false);
      result = gencall_runtime(c, "pony_alloc_small", args, 2, "");
    } else {
      args[1] = LLVMConstInt(c->intptr, size, false);
      result = gencall_runtime(c, "pony_alloc_large", args, 2, "");
    }
  } else {
    args[1] = LLVMConstInt(c->intptr, size, false);
    args[2] = LLVMConstBitCast(final_fun, c->final_fn);
    result = gencall_runtime(c, "pony_alloc_final", args, 3, "");
  }

  result = LLVMBuildBitCast(c->builder, result, g->structure_ptr, "");

  // Set the descriptor.
  if(g->underlying != TK_STRUCT)
  {
    LLVMValueRef desc_ptr = LLVMBuildStructGEP(c->builder, result, 0, "");
    LLVMBuildStore(c->builder, g->desc, desc_ptr);
  }

  return result;
}
Exemplo n.º 13
0
/**
 * Convert float[] to int[] with floor().
 */
LLVMValueRef
lp_build_ifloor(struct lp_build_context *bld,
                LLVMValueRef a)
{
   const struct lp_type type = bld->type;
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
   LLVMValueRef res;

   assert(type.floating);
   assert(lp_check_value(type, a));

   if(util_cpu_caps.has_sse4_1) {
      res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
   }
   else {
      /* Take the sign bit and add it to 1 constant */
      LLVMTypeRef vec_type = lp_build_vec_type(type);
      unsigned mantissa = lp_mantissa(type);
      LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
      LLVMValueRef sign;
      LLVMValueRef offset;

      /* sign = a < 0 ? ~0 : 0 */
      sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
      sign = LLVMBuildAnd(bld->builder, sign, mask, "");
      sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), "");
      lp_build_name(sign, "floor.sign");

      /* offset = -0.99999(9)f */
      offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa));
      offset = LLVMConstBitCast(offset, int_vec_type);

      /* offset = a < 0 ? -0.99999(9)f : 0.0f */
      offset = LLVMBuildAnd(bld->builder, offset, sign, "");
      offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "");
      lp_build_name(offset, "floor.offset");

      res = LLVMBuildAdd(bld->builder, a, offset, "");
      lp_build_name(res, "floor.res");
   }

   res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
   lp_build_name(res, "floor");

   return res;
}
Exemplo n.º 14
0
LLVMValueRef
lp_build_sgn(struct lp_build_context *bld,
             LLVMValueRef a)
{
   const struct lp_type type = bld->type;
   LLVMTypeRef vec_type = lp_build_vec_type(type);
   LLVMValueRef cond;
   LLVMValueRef res;

   /* Handle non-zero case */
   if(!type.sign) {
      /* if not zero then sign must be positive */
      res = bld->one;
   }
   else if(type.floating) {
      /* Take the sign bit and add it to 1 constant */
      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
      LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
      LLVMValueRef sign;
      LLVMValueRef one;
      sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
      sign = LLVMBuildAnd(bld->builder, sign, mask, "");
      one = LLVMConstBitCast(bld->one, int_vec_type);
      res = LLVMBuildOr(bld->builder, sign, one, "");
      res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
   }
   else
   {
      LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0);
      cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero);
      res = lp_build_select(bld, cond, bld->one, minus_one);
   }

   /* Handle zero */
   cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero);
   res = lp_build_select(bld, cond, bld->zero, bld->one);

   return res;
}
Exemplo n.º 15
0
/**
 * See http://www.devmaster.net/forums/showthread.php?p=43580
 */
void
lp_build_log2_approx(struct lp_build_context *bld,
                     LLVMValueRef x,
                     LLVMValueRef *p_exp,
                     LLVMValueRef *p_floor_log2,
                     LLVMValueRef *p_log2)
{
   const struct lp_type type = bld->type;
   LLVMTypeRef vec_type = lp_build_vec_type(type);
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);

   LLVMValueRef expmask = lp_build_int_const_scalar(type, 0x7f800000);
   LLVMValueRef mantmask = lp_build_int_const_scalar(type, 0x007fffff);
   LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type);

   LLVMValueRef i = NULL;
   LLVMValueRef exp = NULL;
   LLVMValueRef mant = NULL;
   LLVMValueRef logexp = NULL;
   LLVMValueRef logmant = NULL;
   LLVMValueRef res = NULL;

   if(p_exp || p_floor_log2 || p_log2) {
      /* TODO: optimize the constant case */
      if(LLVMIsConstant(x))
         debug_printf("%s: inefficient/imprecise constant arithmetic\n",
                      __FUNCTION__);

      assert(type.floating && type.width == 32);

      i = LLVMBuildBitCast(bld->builder, x, int_vec_type, "");

      /* exp = (float) exponent(x) */
      exp = LLVMBuildAnd(bld->builder, i, expmask, "");
   }

   if(p_floor_log2 || p_log2) {
      logexp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_scalar(type, 23), "");
      logexp = LLVMBuildSub(bld->builder, logexp, lp_build_int_const_scalar(type, 127), "");
      logexp = LLVMBuildSIToFP(bld->builder, logexp, vec_type, "");
   }

   if(p_log2) {
      /* mant = (float) mantissa(x) */
      mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
      mant = LLVMBuildOr(bld->builder, mant, one, "");
      mant = LLVMBuildBitCast(bld->builder, mant, vec_type, "");

      logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial,
                                    Elements(lp_build_log2_polynomial));

      /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
      logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), "");

      res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
   }

   if(p_exp)
      *p_exp = exp;

   if(p_floor_log2)
      *p_floor_log2 = logexp;

   if(p_log2)
      *p_log2 = res;
}
Exemplo n.º 16
0
static LLVMValueRef make_unbox_function(compile_t* c, reach_type_t* t,
  reach_method_t* m)
{
  // Create a new unboxing function that forwards to the real function.
  LLVMTypeRef f_type = LLVMGetElementType(LLVMTypeOf(m->func));
  int count = LLVMCountParamTypes(f_type);

  // Leave space for a receiver if it's a constructor vtable entry.
  size_t buf_size = (count + 1) * sizeof(LLVMTypeRef);
  LLVMTypeRef* params = (LLVMTypeRef*)ponyint_pool_alloc_size(buf_size);
  LLVMGetParamTypes(f_type, params);
  LLVMTypeRef ret_type = LLVMGetReturnType(f_type);

  const char* unbox_name = genname_unbox(m->full_name);

  if(ast_id(m->r_fun) != TK_NEW)
  {
    // It's the same type, but it takes the boxed type instead of the primitive
    // type as the receiver.
    params[0] = t->structure_ptr;
  } else {
    // For a constructor, the unbox_fun has a receiver, even though the real
    // method does not.
    memmove(&params[1], &params[0], count * sizeof(LLVMTypeRef*));
    params[0] = t->structure_ptr;
    count++;
  }

  LLVMTypeRef unbox_type = LLVMFunctionType(ret_type, params, count, false);
  LLVMValueRef unbox_fun = codegen_addfun(c, unbox_name, unbox_type);
  codegen_startfun(c, unbox_fun, NULL, NULL);

  // Extract the primitive type from element 1 and call the real function.
  LLVMValueRef this_ptr = LLVMGetParam(unbox_fun, 0);
  LLVMValueRef primitive_ptr = LLVMBuildStructGEP(c->builder, this_ptr, 1, "");
  LLVMValueRef primitive = LLVMBuildLoad(c->builder, primitive_ptr, "");

  LLVMValueRef* args = (LLVMValueRef*)ponyint_pool_alloc_size(buf_size);

  if(ast_id(m->r_fun) != TK_NEW)
  {
    // If it's not a constructor, pass the extracted primitive as the receiver.
    args[0] = primitive;

    for(int i = 1; i < count; i++)
      args[i] = LLVMGetParam(unbox_fun, i);
  } else {
    count--;

    for(int i = 0; i < count; i++)
      args[i] = LLVMGetParam(unbox_fun, i + 1);
  }

  LLVMValueRef result = codegen_call(c, m->func, args, count);
  LLVMBuildRet(c->builder, result);
  codegen_finishfun(c);

  ponyint_pool_free_size(buf_size, params);
  ponyint_pool_free_size(buf_size, args);
  return LLVMConstBitCast(unbox_fun, c->void_ptr);
}
Exemplo n.º 17
0
static LLVMValueRef
lp_build_gather_avx2(struct gallivm_state *gallivm,
                     unsigned length,
                     unsigned src_width,
                     struct lp_type dst_type,
                     LLVMValueRef base_ptr,
                     LLVMValueRef offsets)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef src_type, src_vec_type;
   LLVMValueRef res;
   struct lp_type res_type = dst_type;
   res_type.length *= length;

   if (dst_type.floating) {
      src_type = src_width == 64 ? LLVMDoubleTypeInContext(gallivm->context) :
                                   LLVMFloatTypeInContext(gallivm->context);
   } else {
      src_type = LLVMIntTypeInContext(gallivm->context, src_width);
   }
   src_vec_type = LLVMVectorType(src_type, length);

   /* XXX should allow hw scaling (can handle i8, i16, i32, i64 for x86) */
   assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0));

   if (0) {
      /*
       * XXX: This will cause LLVM pre 3.7 to hang; it works on LLVM 3.8 but
       * will not use the AVX2 gather instrinsics (even with llvm 4.0), at
       * least with Haswell. See
       * http://lists.llvm.org/pipermail/llvm-dev/2016-January/094448.html
       * And the generated code doing the emulation is quite a bit worse
       * than what we get by doing it ourselves too.
       */
      LLVMTypeRef i32_type = LLVMIntTypeInContext(gallivm->context, 32);
      LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length);
      LLVMTypeRef i1_type = LLVMIntTypeInContext(gallivm->context, 1);
      LLVMTypeRef i1_vec_type = LLVMVectorType(i1_type, length);
      LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
      LLVMValueRef src_ptr;

      base_ptr = LLVMBuildBitCast(builder, base_ptr, src_ptr_type, "");

      /* Rescale offsets from bytes to elements */
      LLVMValueRef scale = LLVMConstInt(i32_type, src_width/8, 0);
      scale = lp_build_broadcast(gallivm, i32_vec_type, scale);
      assert(LLVMTypeOf(offsets) == i32_vec_type);
      offsets = LLVMBuildSDiv(builder, offsets, scale, "");

      src_ptr = LLVMBuildGEP(builder, base_ptr, &offsets, 1, "vector-gep");

      char intrinsic[64];
      util_snprintf(intrinsic, sizeof intrinsic, "llvm.masked.gather.v%u%s%u",
                    length, dst_type.floating ? "f" : "i", src_width);
      LLVMValueRef alignment = LLVMConstInt(i32_type, src_width/8, 0);
      LLVMValueRef mask = LLVMConstAllOnes(i1_vec_type);
      LLVMValueRef passthru = LLVMGetUndef(src_vec_type);

      LLVMValueRef args[] = { src_ptr, alignment, mask, passthru };

      res = lp_build_intrinsic(builder, intrinsic, src_vec_type, args, 4, 0);
   } else {
      LLVMTypeRef i8_type = LLVMIntTypeInContext(gallivm->context, 8);
      const char *intrinsic = NULL;
      unsigned l_idx = 0;

      assert(src_width == 32 || src_width == 64);
      if (src_width == 32) {
         assert(length == 4 || length == 8);
      } else {
         assert(length == 2 || length == 4);
      }

      static const char *intrinsics[2][2][2] = {

         {{"llvm.x86.avx2.gather.d.d",
           "llvm.x86.avx2.gather.d.d.256"},
          {"llvm.x86.avx2.gather.d.q",
           "llvm.x86.avx2.gather.d.q.256"}},

         {{"llvm.x86.avx2.gather.d.ps",
           "llvm.x86.avx2.gather.d.ps.256"},
          {"llvm.x86.avx2.gather.d.pd",
           "llvm.x86.avx2.gather.d.pd.256"}},
      };

      if ((src_width == 32 && length == 8) ||
          (src_width == 64 && length == 4)) {
         l_idx = 1;
      }
      intrinsic = intrinsics[dst_type.floating][src_width == 64][l_idx];

      LLVMValueRef passthru = LLVMGetUndef(src_vec_type);
      LLVMValueRef mask = LLVMConstAllOnes(src_vec_type);
      mask = LLVMConstBitCast(mask, src_vec_type);
      LLVMValueRef scale = LLVMConstInt(i8_type, 1, 0);

      LLVMValueRef args[] = { passthru, base_ptr, offsets, mask, scale };

      res = lp_build_intrinsic(builder, intrinsic, src_vec_type, args, 5, 0);
   }
   res = LLVMBuildBitCast(builder, res, lp_build_vec_type(gallivm, res_type), "");

   return res;
}