Beispiel #1
0
/**
 * Gather one element from scatter positions in memory.
 *
 * @sa lp_build_gather()
 */
LLVMValueRef
lp_build_gather_elem(struct gallivm_state *gallivm,
                     unsigned length,
                     unsigned src_width,
                     unsigned dst_width,
                     boolean aligned,
                     LLVMValueRef base_ptr,
                     LLVMValueRef offsets,
                     unsigned i,
                     boolean vector_justify)
{
   LLVMTypeRef src_type = LLVMIntTypeInContext(gallivm->context, src_width);
   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
   LLVMTypeRef dst_elem_type = LLVMIntTypeInContext(gallivm->context, dst_width);
   LLVMValueRef ptr;
   LLVMValueRef res;

   assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0));

   ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i);
   ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, "");
   res = LLVMBuildLoad(gallivm->builder, ptr, "");

   /* XXX
    * On some archs we probably really want to avoid having to deal
    * with alignments lower than 4 bytes (if fetch size is a power of
    * two >= 32). On x86 it doesn't matter, however.
    * We should be able to guarantee full alignment for any kind of texture
    * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch
    * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends
    * but I don't think that's quite what we wanted).
    * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT
    * looks like a good fit, but it seems this cap bit (and OpenGL) aren't
    * enforcing what we want (which is what d3d10 does, the offset needs to
    * be aligned to element size, but GL has bytes regardless of element
    * size which would only leave us with minimum alignment restriction of 16
    * which doesn't make much sense if the type isn't 4x32bit). Due to
    * translation of offsets to first_elem in sampler_views it actually seems
    * gallium could not do anything else except 16 no matter what...
    */
  if (!aligned) {
      LLVMSetAlignment(res, 1);
   }

   assert(src_width <= dst_width);
   if (src_width > dst_width) {
      res = LLVMBuildTrunc(gallivm->builder, res, dst_elem_type, "");
   } else if (src_width < dst_width) {
      res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, "");
      if (vector_justify) {
#ifdef PIPE_ARCH_BIG_ENDIAN
         res = LLVMBuildShl(gallivm->builder, res,
                            LLVMConstInt(dst_elem_type, dst_width - src_width, 0), "");
#endif
      }
   }

   return res;
}
/**
 * @brief lp_build_fetch_rgba_aos_array
 *
 * \param format_desc   describes format of the image we're fetching from
 * \param dst_type      output type
 * \param base_ptr      address of the pixel block (or the texel if uncompressed)
 * \param offset        ptr offset
 */
LLVMValueRef
lp_build_fetch_rgba_aos_array(struct gallivm_state *gallivm,
                              const struct util_format_description *format_desc,
                              struct lp_type dst_type,
                              LLVMValueRef base_ptr,
                              LLVMValueRef offset)
{
   struct lp_build_context bld;
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef src_vec_type;
   LLVMValueRef ptr, res = NULL;
   struct lp_type src_type;
   boolean pure_integer = format_desc->channel[0].pure_integer;
   struct lp_type tmp_type;

   lp_type_from_format_desc(&src_type, format_desc);

   assert(src_type.length <= dst_type.length);

   src_vec_type  = lp_build_vec_type(gallivm,  src_type);

   /* Read whole vector from memory, unaligned */
   ptr = LLVMBuildGEP(builder, base_ptr, &offset, 1, "");
   ptr = LLVMBuildPointerCast(builder, ptr, LLVMPointerType(src_vec_type, 0), "");
   res = LLVMBuildLoad(builder, ptr, "");
   LLVMSetAlignment(res, src_type.width / 8);

   /* Truncate doubles to float */
   if (src_type.floating && src_type.width == 64) {
      src_type.width = 32;
      src_vec_type  = lp_build_vec_type(gallivm,  src_type);

      res = LLVMBuildFPTrunc(builder, res, src_vec_type, "");
   }

   /* Expand to correct length */
   if (src_type.length < dst_type.length) {
      res = lp_build_pad_vector(gallivm, res, dst_type.length);
      src_type.length = dst_type.length;
   }

   tmp_type = dst_type;
   if (pure_integer) {
       /* some callers expect (fake) floats other real ints. */
      tmp_type.floating = 0;
      tmp_type.sign = src_type.sign;
   }

   /* Convert to correct format */
   lp_build_conv(gallivm, src_type, tmp_type, &res, 1, &res, 1);

   /* Swizzle it */
   lp_build_context_init(&bld, gallivm, tmp_type);
   res = lp_build_format_swizzle_aos(format_desc, &bld, res);

   /* Bitcast to floats (for pure integers) when requested */
   if (pure_integer && dst_type.floating) {
      res = LLVMBuildBitCast(builder, res, lp_build_vec_type(gallivm, dst_type), "");
   }

   return res;
}
Beispiel #3
0
/**
 * Gather one element from scatter positions in memory.
 * Nearly the same as above, however the individual elements
 * may be vectors themselves, and fetches may be float type.
 * Can also do pad vector instead of ZExt.
 *
 * @sa lp_build_gather()
 */
static LLVMValueRef
lp_build_gather_elem_vec(struct gallivm_state *gallivm,
                         unsigned length,
                         unsigned src_width,
                         LLVMTypeRef src_type,
                         struct lp_type dst_type,
                         boolean aligned,
                         LLVMValueRef base_ptr,
                         LLVMValueRef offsets,
                         unsigned i,
                         boolean vector_justify)
{
   LLVMValueRef ptr, res;
   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
   assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0));

   ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i);
   ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, "");
   res = LLVMBuildLoad(gallivm->builder, ptr, "");

   /* XXX
    * On some archs we probably really want to avoid having to deal
    * with alignments lower than 4 bytes (if fetch size is a power of
    * two >= 32). On x86 it doesn't matter, however.
    * We should be able to guarantee full alignment for any kind of texture
    * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch
    * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends
    * but I don't think that's quite what we wanted).
    * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT
    * looks like a good fit, but it seems this cap bit (and OpenGL) aren't
    * enforcing what we want (which is what d3d10 does, the offset needs to
    * be aligned to element size, but GL has bytes regardless of element
    * size which would only leave us with minimum alignment restriction of 16
    * which doesn't make much sense if the type isn't 4x32bit). Due to
    * translation of offsets to first_elem in sampler_views it actually seems
    * gallium could not do anything else except 16 no matter what...
    */
   if (!aligned) {
      LLVMSetAlignment(res, 1);
   } else if (!util_is_power_of_two(src_width)) {
      /*
       * Full alignment is impossible, assume the caller really meant
       * the individual elements were aligned (e.g. 3x32bit format).
       * And yes the generated code may otherwise crash, llvm will
       * really assume 128bit alignment with a 96bit fetch (I suppose
       * that makes sense as it can just assume the upper 32bit to be
       * whatever).
       * Maybe the caller should be able to explicitly set this, but
       * this should cover all the 3-channel formats.
       */
      if (((src_width / 24) * 24 == src_width) &&
           util_is_power_of_two(src_width / 24)) {
          LLVMSetAlignment(res, src_width / 24);
      } else {
         LLVMSetAlignment(res, 1);
      }
   }

   assert(src_width <= dst_type.width * dst_type.length);
   if (src_width < dst_type.width * dst_type.length) {
      if (dst_type.length > 1) {
         res = lp_build_pad_vector(gallivm, res, dst_type.length);
         /*
          * vector_justify hopefully a non-issue since we only deal
          * with src_width >= 32 here?
          */
      } else {
         LLVMTypeRef dst_elem_type = lp_build_vec_type(gallivm, dst_type);

         /*
          * Only valid if src_ptr_type is int type...
          */
         res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, "");

#ifdef PIPE_ARCH_BIG_ENDIAN
         if (vector_justify) {
         res = LLVMBuildShl(gallivm->builder, res,
                            LLVMConstInt(dst_elem_type,
                                         dst_type.width - src_width, 0), "");
         }
         if (src_width == 48) {
            /* Load 3x16 bit vector.
             * The sequence of loads on big-endian hardware proceeds as follows.
             * 16-bit fields are denoted by X, Y, Z, and 0.  In memory, the sequence
             * of three fields appears in the order X, Y, Z.
             *
             * Load 32-bit word: 0.0.X.Y
             * Load 16-bit halfword: 0.0.0.Z
             * Rotate left: 0.X.Y.0
             * Bitwise OR: 0.X.Y.Z
             *
             * The order in which we need the fields in the result is 0.Z.Y.X,
             * the same as on little-endian; permute 16-bit fields accordingly
             * within 64-bit register:
             */
            LLVMValueRef shuffles[4] = {
               lp_build_const_int32(gallivm, 2),
               lp_build_const_int32(gallivm, 1),
               lp_build_const_int32(gallivm, 0),
               lp_build_const_int32(gallivm, 3),
            };
            res = LLVMBuildBitCast(gallivm->builder, res,
                                   lp_build_vec_type(gallivm, lp_type_uint_vec(16, 4*16)), "");
            res = LLVMBuildShuffleVector(gallivm->builder, res, res, LLVMConstVector(shuffles, 4), "");
            res = LLVMBuildBitCast(gallivm->builder, res, dst_elem_type, "");
         }
#endif
      }
   }
   return res;
}
Beispiel #4
0
static LLVMValueRef box_is_box(compile_t* c, ast_t* left_type,
  LLVMValueRef l_value, LLVMValueRef r_value, int possible_boxes)
{
  pony_assert(LLVMGetTypeKind(LLVMTypeOf(l_value)) == LLVMPointerTypeKind);
  pony_assert(LLVMGetTypeKind(LLVMTypeOf(r_value)) == LLVMPointerTypeKind);

  LLVMBasicBlockRef this_block = LLVMGetInsertBlock(c->builder);
  LLVMBasicBlockRef checkbox_block = codegen_block(c, "is_checkbox");
  LLVMBasicBlockRef box_block = codegen_block(c, "is_box");
  LLVMBasicBlockRef num_block = NULL;
  if((possible_boxes & BOXED_SUBTYPES_NUMERIC) != 0)
    num_block = codegen_block(c, "is_num");
  LLVMBasicBlockRef tuple_block = NULL;
  if((possible_boxes & BOXED_SUBTYPES_TUPLE) != 0)
    tuple_block = codegen_block(c, "is_tuple");
  LLVMBasicBlockRef post_block = codegen_block(c, "is_post");

  LLVMValueRef eq_addr = LLVMBuildICmp(c->builder, LLVMIntEQ, l_value, r_value,
    "");
  LLVMBuildCondBr(c->builder, eq_addr, post_block, checkbox_block);

  // Check whether we have two boxed objects of the same type.
  LLVMPositionBuilderAtEnd(c->builder, checkbox_block);
  LLVMValueRef l_desc = gendesc_fetch(c, l_value);
  LLVMValueRef r_desc = gendesc_fetch(c, r_value);
  LLVMValueRef same_type = LLVMBuildICmp(c->builder, LLVMIntEQ, l_desc, r_desc,
    "");
  LLVMValueRef l_typeid = NULL;
  if((possible_boxes & BOXED_SUBTYPES_UNBOXED) != 0)
  {
    l_typeid = gendesc_typeid(c, l_value);
    LLVMValueRef boxed_mask = LLVMConstInt(c->i32, 1, false);
    LLVMValueRef left_boxed = LLVMBuildAnd(c->builder, l_typeid, boxed_mask,
      "");
    LLVMValueRef zero = LLVMConstInt(c->i32, 0, false);
    left_boxed = LLVMBuildICmp(c->builder, LLVMIntEQ, left_boxed, zero, "");
    LLVMValueRef both_boxed = LLVMBuildAnd(c->builder, same_type, left_boxed,
      "");
    LLVMBuildCondBr(c->builder, both_boxed, box_block, post_block);
  } else {
    LLVMBuildCondBr(c->builder, same_type, box_block, post_block);
  }

  // Check whether it's a numeric primitive or a tuple.
  LLVMPositionBuilderAtEnd(c->builder, box_block);
  if((possible_boxes & BOXED_SUBTYPES_BOXED) == BOXED_SUBTYPES_BOXED)
  {
    if(l_typeid == NULL)
      l_typeid = gendesc_typeid(c, l_value);
    LLVMValueRef num_mask = LLVMConstInt(c->i32, 2, false);
    LLVMValueRef boxed_num = LLVMBuildAnd(c->builder, l_typeid, num_mask, "");
    LLVMValueRef zero = LLVMConstInt(c->i32, 0, false);
    boxed_num = LLVMBuildICmp(c->builder, LLVMIntEQ, boxed_num, zero, "");
    LLVMBuildCondBr(c->builder, boxed_num, num_block, tuple_block);
  } else if((possible_boxes & BOXED_SUBTYPES_NUMERIC) != 0) {
    LLVMBuildBr(c->builder, num_block);
  } else {
    pony_assert((possible_boxes & BOXED_SUBTYPES_TUPLE) != 0);
    LLVMBuildBr(c->builder, tuple_block);
  }

  LLVMValueRef args[3];
  LLVMValueRef is_num = NULL;
  if(num_block != NULL)
  {
    // Get the machine word size and memcmp without unboxing.
    LLVMPositionBuilderAtEnd(c->builder, num_block);
    if(l_typeid == NULL)
      l_typeid = gendesc_typeid(c, l_value);
    LLVMValueRef num_sizes = LLVMBuildBitCast(c->builder, c->numeric_sizes,
      c->void_ptr, "");
    args[0] = LLVMBuildZExt(c->builder, l_typeid, c->intptr, "");
    LLVMValueRef size = LLVMBuildInBoundsGEP(c->builder, num_sizes, args, 1,
      "");
    size = LLVMBuildBitCast(c->builder, size, LLVMPointerType(c->i32, 0), "");
    size = LLVMBuildLoad(c->builder, size, "");
    LLVMSetAlignment(size, 4);
    LLVMValueRef one = LLVMConstInt(c->i32, 1, false);
    args[0] = LLVMBuildInBoundsGEP(c->builder, l_value, &one, 1, "");
    args[0] = LLVMBuildBitCast(c->builder, args[0], c->void_ptr, "");
    args[1] = LLVMBuildInBoundsGEP(c->builder, r_value, &one, 1, "");
    args[1] = LLVMBuildBitCast(c->builder, args[1], c->void_ptr, "");
    args[2] = LLVMBuildZExt(c->builder, size, c->intptr, "");
    is_num = gencall_runtime(c, "memcmp", args, 3, "");
    is_num = LLVMBuildICmp(c->builder, LLVMIntEQ, is_num,
      LLVMConstInt(c->i32, 0, false), "");
    LLVMBuildBr(c->builder, post_block);
  }

  LLVMValueRef is_tuple = NULL;
  if(tuple_block != NULL)
  {
    // Call the type-specific __is function, which will unbox the tuples.
    LLVMPositionBuilderAtEnd(c->builder, tuple_block);
    reach_type_t* r_left = reach_type(c->reach, left_type);
    reach_method_t* is_fn = reach_method(r_left, TK_BOX, stringtab("__is"),
      NULL);
    pony_assert(is_fn != NULL);
    LLVMValueRef func = gendesc_vtable(c, l_value, is_fn->vtable_index);
    LLVMTypeRef params[2];
    params[0] = c->object_ptr;
    params[1] = c->object_ptr;
    LLVMTypeRef type = LLVMFunctionType(c->i1, params, 2, false);
    func = LLVMBuildBitCast(c->builder, func, LLVMPointerType(type, 0), "");
    args[0] = l_value;
    args[1] = r_value;
    is_tuple = codegen_call(c, func, args, 2);
    LLVMBuildBr(c->builder, post_block);
  }

  LLVMPositionBuilderAtEnd(c->builder, post_block);
  LLVMValueRef phi = LLVMBuildPhi(c->builder, c->i1, "");
  LLVMValueRef one = LLVMConstInt(c->i1, 1, false);
  LLVMValueRef zero = LLVMConstInt(c->i1, 0, false);
  LLVMAddIncoming(phi, &one, &this_block, 1);
  if(is_num != NULL)
    LLVMAddIncoming(phi, &is_num, &num_block, 1);
  if(is_tuple != NULL)
    LLVMAddIncoming(phi, &is_tuple, &tuple_block, 1);
  LLVMAddIncoming(phi, &zero, &checkbox_block, 1);
  return phi;
}