Esempio n. 1
0
/**
 * Converts int16 half-float to float32
 * Note this can be performed in 1 instruction if vcvtph2ps exists (f16c/cvt16)
 * [llvm.x86.vcvtph2ps / _mm_cvtph_ps]
 *
 * @param src           value to convert
 *
 */
LLVMValueRef
lp_build_half_to_float(struct gallivm_state *gallivm,
                       LLVMValueRef src)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef src_type = LLVMTypeOf(src);
   unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ?
                            LLVMGetVectorSize(src_type) : 1;

   struct lp_type f32_type = lp_type_float_vec(32, 32 * src_length);
   struct lp_type i32_type = lp_type_int_vec(32, 32 * src_length);
   LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type);
   LLVMValueRef h;

   if (util_cpu_caps.has_f16c && HAVE_LLVM >= 0x0301 &&
       (src_length == 4 || src_length == 8)) {
      const char *intrinsic = NULL;
      if (src_length == 4) {
         src = lp_build_pad_vector(gallivm, src, 8);
         intrinsic = "llvm.x86.vcvtph2ps.128";
      }
      else {
         intrinsic = "llvm.x86.vcvtph2ps.256";
      }
      return lp_build_intrinsic_unary(builder, intrinsic,
                                      lp_build_vec_type(gallivm, f32_type), src);
   }

   /* Convert int16 vector to int32 vector by zero ext (might generate bad code) */
   h = LLVMBuildZExt(builder, src, int_vec_type, "");
   return lp_build_smallfloat_to_float(gallivm, f32_type, h, 10, 5, 0, true);
}
Esempio n. 2
0
/**
 * @brief lp_build_fetch_rgba_aos_array
 *
 * \param format_desc   describes format of the image we're fetching from
 * \param dst_type      output type
 * \param base_ptr      address of the pixel block (or the texel if uncompressed)
 * \param offset        ptr offset
 */
LLVMValueRef
lp_build_fetch_rgba_aos_array(struct gallivm_state *gallivm,
                              const struct util_format_description *format_desc,
                              struct lp_type dst_type,
                              LLVMValueRef base_ptr,
                              LLVMValueRef offset)
{
    struct lp_build_context bld;
    LLVMBuilderRef builder = gallivm->builder;
    LLVMTypeRef src_elem_type, src_vec_type;
    LLVMValueRef ptr, res = NULL;
    struct lp_type src_type;

    memset(&src_type, 0, sizeof src_type);
    src_type.floating = format_desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT;
    src_type.fixed    = format_desc->channel[0].type == UTIL_FORMAT_TYPE_FIXED;
    src_type.sign     = format_desc->channel[0].type != UTIL_FORMAT_TYPE_UNSIGNED;
    src_type.norm     = format_desc->channel[0].normalized;
    src_type.width    = format_desc->channel[0].size;
    src_type.length   = format_desc->nr_channels;

    assert(src_type.length <= dst_type.length);

    src_elem_type = lp_build_elem_type(gallivm, src_type);
    src_vec_type  = lp_build_vec_type(gallivm,  src_type);

    /* Read whole vector from memory, unaligned */
    if (!res) {
        ptr = LLVMBuildGEP(builder, base_ptr, &offset, 1, "");
        ptr = LLVMBuildPointerCast(builder, ptr, LLVMPointerType(src_vec_type, 0), "");
        res = LLVMBuildLoad(builder, ptr, "");
        lp_set_load_alignment(res, src_type.width / 8);
    }

    /* Truncate doubles to float */
    if (src_type.floating && src_type.width == 64) {
        src_type.width = 32;
        src_vec_type  = lp_build_vec_type(gallivm,  src_type);

        res = LLVMBuildFPTrunc(builder, res, src_vec_type, "");
    }

    /* Expand to correct length */
    if (src_type.length < dst_type.length) {
        res = lp_build_pad_vector(gallivm, res, src_type, dst_type.length);
        src_type.length = dst_type.length;
    }

    /* Convert to correct format */
    lp_build_conv(gallivm, src_type, dst_type, &res, 1, &res, 1);

    /* Swizzle it */
    lp_build_context_init(&bld, gallivm, dst_type);
    return lp_build_format_swizzle_aos(format_desc, &bld, res);
}
Esempio n. 3
0
/**
 * @brief lp_build_fetch_rgba_aos_array
 *
 * \param format_desc   describes format of the image we're fetching from
 * \param dst_type      output type
 * \param base_ptr      address of the pixel block (or the texel if uncompressed)
 * \param offset        ptr offset
 */
LLVMValueRef
lp_build_fetch_rgba_aos_array(struct gallivm_state *gallivm,
                              const struct util_format_description *format_desc,
                              struct lp_type dst_type,
                              LLVMValueRef base_ptr,
                              LLVMValueRef offset)
{
   struct lp_build_context bld;
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef src_vec_type;
   LLVMValueRef ptr, res = NULL;
   struct lp_type src_type;
   boolean pure_integer = format_desc->channel[0].pure_integer;
   struct lp_type tmp_type;

   lp_type_from_format_desc(&src_type, format_desc);

   assert(src_type.length <= dst_type.length);

   src_vec_type  = lp_build_vec_type(gallivm,  src_type);

   /* Read whole vector from memory, unaligned */
   ptr = LLVMBuildGEP(builder, base_ptr, &offset, 1, "");
   ptr = LLVMBuildPointerCast(builder, ptr, LLVMPointerType(src_vec_type, 0), "");
   res = LLVMBuildLoad(builder, ptr, "");
   LLVMSetAlignment(res, src_type.width / 8);

   /* Truncate doubles to float */
   if (src_type.floating && src_type.width == 64) {
      src_type.width = 32;
      src_vec_type  = lp_build_vec_type(gallivm,  src_type);

      res = LLVMBuildFPTrunc(builder, res, src_vec_type, "");
   }

   /* Expand to correct length */
   if (src_type.length < dst_type.length) {
      res = lp_build_pad_vector(gallivm, res, dst_type.length);
      src_type.length = dst_type.length;
   }

   tmp_type = dst_type;
   if (pure_integer) {
       /* some callers expect (fake) floats other real ints. */
      tmp_type.floating = 0;
      tmp_type.sign = src_type.sign;
   }

   /* Convert to correct format */
   lp_build_conv(gallivm, src_type, tmp_type, &res, 1, &res, 1);

   /* Swizzle it */
   lp_build_context_init(&bld, gallivm, tmp_type);
   res = lp_build_format_swizzle_aos(format_desc, &bld, res);

   /* Bitcast to floats (for pure integers) when requested */
   if (pure_integer && dst_type.floating) {
      res = LLVMBuildBitCast(builder, res, lp_build_vec_type(gallivm, dst_type), "");
   }

   return res;
}
Esempio n. 4
0
/**
 * Gather one element from scatter positions in memory.
 * Nearly the same as above, however the individual elements
 * may be vectors themselves, and fetches may be float type.
 * Can also do pad vector instead of ZExt.
 *
 * @sa lp_build_gather()
 */
static LLVMValueRef
lp_build_gather_elem_vec(struct gallivm_state *gallivm,
                         unsigned length,
                         unsigned src_width,
                         LLVMTypeRef src_type,
                         struct lp_type dst_type,
                         boolean aligned,
                         LLVMValueRef base_ptr,
                         LLVMValueRef offsets,
                         unsigned i,
                         boolean vector_justify)
{
   LLVMValueRef ptr, res;
   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
   assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0));

   ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i);
   ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, "");
   res = LLVMBuildLoad(gallivm->builder, ptr, "");

   /* XXX
    * On some archs we probably really want to avoid having to deal
    * with alignments lower than 4 bytes (if fetch size is a power of
    * two >= 32). On x86 it doesn't matter, however.
    * We should be able to guarantee full alignment for any kind of texture
    * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch
    * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends
    * but I don't think that's quite what we wanted).
    * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT
    * looks like a good fit, but it seems this cap bit (and OpenGL) aren't
    * enforcing what we want (which is what d3d10 does, the offset needs to
    * be aligned to element size, but GL has bytes regardless of element
    * size which would only leave us with minimum alignment restriction of 16
    * which doesn't make much sense if the type isn't 4x32bit). Due to
    * translation of offsets to first_elem in sampler_views it actually seems
    * gallium could not do anything else except 16 no matter what...
    */
   if (!aligned) {
      LLVMSetAlignment(res, 1);
   } else if (!util_is_power_of_two(src_width)) {
      /*
       * Full alignment is impossible, assume the caller really meant
       * the individual elements were aligned (e.g. 3x32bit format).
       * And yes the generated code may otherwise crash, llvm will
       * really assume 128bit alignment with a 96bit fetch (I suppose
       * that makes sense as it can just assume the upper 32bit to be
       * whatever).
       * Maybe the caller should be able to explicitly set this, but
       * this should cover all the 3-channel formats.
       */
      if (((src_width / 24) * 24 == src_width) &&
           util_is_power_of_two(src_width / 24)) {
          LLVMSetAlignment(res, src_width / 24);
      } else {
         LLVMSetAlignment(res, 1);
      }
   }

   assert(src_width <= dst_type.width * dst_type.length);
   if (src_width < dst_type.width * dst_type.length) {
      if (dst_type.length > 1) {
         res = lp_build_pad_vector(gallivm, res, dst_type.length);
         /*
          * vector_justify hopefully a non-issue since we only deal
          * with src_width >= 32 here?
          */
      } else {
         LLVMTypeRef dst_elem_type = lp_build_vec_type(gallivm, dst_type);

         /*
          * Only valid if src_ptr_type is int type...
          */
         res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, "");

#ifdef PIPE_ARCH_BIG_ENDIAN
         if (vector_justify) {
         res = LLVMBuildShl(gallivm->builder, res,
                            LLVMConstInt(dst_elem_type,
                                         dst_type.width - src_width, 0), "");
         }
         if (src_width == 48) {
            /* Load 3x16 bit vector.
             * The sequence of loads on big-endian hardware proceeds as follows.
             * 16-bit fields are denoted by X, Y, Z, and 0.  In memory, the sequence
             * of three fields appears in the order X, Y, Z.
             *
             * Load 32-bit word: 0.0.X.Y
             * Load 16-bit halfword: 0.0.0.Z
             * Rotate left: 0.X.Y.0
             * Bitwise OR: 0.X.Y.Z
             *
             * The order in which we need the fields in the result is 0.Z.Y.X,
             * the same as on little-endian; permute 16-bit fields accordingly
             * within 64-bit register:
             */
            LLVMValueRef shuffles[4] = {
               lp_build_const_int32(gallivm, 2),
               lp_build_const_int32(gallivm, 1),
               lp_build_const_int32(gallivm, 0),
               lp_build_const_int32(gallivm, 3),
            };
            res = LLVMBuildBitCast(gallivm->builder, res,
                                   lp_build_vec_type(gallivm, lp_type_uint_vec(16, 4*16)), "");
            res = LLVMBuildShuffleVector(gallivm->builder, res, res, LLVMConstVector(shuffles, 4), "");
            res = LLVMBuildBitCast(gallivm->builder, res, dst_elem_type, "");
         }
#endif
      }
   }
   return res;
}