コード例 #1
0
ファイル: lp_bld_format_soa.c プロジェクト: CSRedRat/mesa
/**
 * Convert a vector of rgba8 values into 32bit wide SoA vectors.
 *
 * \param dst_type  The desired return type. For pure integer formats
 *                  this should be a 32bit wide int or uint vector type,
 *                  otherwise a float vector type.
 *
 * \param packed    The rgba8 values to pack.
 *
 * \param rgba      The 4 SoA return vectors.
 */
void
lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
                           struct lp_type dst_type,
                           LLVMValueRef packed,
                           LLVMValueRef *rgba)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff);
   unsigned chan;

   /* XXX technically shouldn't use that for uint dst_type */
   packed = LLVMBuildBitCast(builder, packed,
                             lp_build_int_vec_type(gallivm, dst_type), "");

   /* Decode the input vector components */
   for (chan = 0; chan < 4; ++chan) {
      unsigned start = chan*8;
      unsigned stop = start + 8;
      LLVMValueRef input;

      input = packed;

      if (start)
         input = LLVMBuildLShr(builder, input,
                               lp_build_const_int_vec(gallivm, dst_type, start), "");

      if (stop < 32)
         input = LLVMBuildAnd(builder, input, mask, "");

      if (dst_type.floating)
         input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input);

      rgba[chan] = input;
   }
}
コード例 #2
0
ファイル: lp_bld_depth.c プロジェクト: blckshrk/Mesa
/**
 * Do the one or two-sided stencil test op/update.
 */
static LLVMValueRef
lp_build_stencil_op(struct lp_build_context *bld,
                    const struct pipe_stencil_state stencil[2],
                    enum stencil_op op,
                    LLVMValueRef stencilRefs[2],
                    LLVMValueRef stencilVals,
                    LLVMValueRef mask,
                    LLVMValueRef front_facing)

{
   LLVMBuilderRef builder = bld->gallivm->builder;
   LLVMValueRef res;

   assert(stencil[0].enabled);

   /* do front face op */
   res = lp_build_stencil_op_single(bld, &stencil[0], op,
                                     stencilRefs[0], stencilVals);

   if (stencil[1].enabled && front_facing != NULL) {
      /* do back face op */
      LLVMValueRef back_res;

      back_res = lp_build_stencil_op_single(bld, &stencil[1], op,
                                            stencilRefs[1], stencilVals);

      res = lp_build_select(bld, front_facing, res, back_res);
   }

   if (stencil[0].writemask != 0xff ||
       (stencil[1].enabled && front_facing != NULL && stencil[1].writemask != 0xff)) {
      /* mask &= stencil[0].writemask */
      LLVMValueRef writemask = lp_build_const_int_vec(bld->gallivm, bld->type,
                                                      stencil[0].writemask);
      if (stencil[1].enabled && stencil[1].writemask != stencil[0].writemask && front_facing != NULL) {
         LLVMValueRef back_writemask = lp_build_const_int_vec(bld->gallivm, bld->type,
                                                         stencil[1].writemask);
         writemask = lp_build_select(bld, front_facing, writemask, back_writemask);
      }

      mask = LLVMBuildAnd(builder, mask, writemask, "");
      /* res = (res & mask) | (stencilVals & ~mask) */
      res = lp_build_select_bitwise(bld, mask, res, stencilVals);
   }
   else {
      /* res = mask ? res : stencilVals */
      res = lp_build_select(bld, mask, res, stencilVals);
   }

   return res;
}
コード例 #3
0
/**
 * Shift right with immediate.
 */
LLVMValueRef
lp_build_shr_imm(struct lp_build_context *bld, LLVMValueRef a, unsigned imm)
{
   LLVMValueRef b = lp_build_const_int_vec(bld->gallivm, bld->type, imm);
   assert(imm <= bld->type.width);
   return lp_build_shr(bld, a, b);
}
コード例 #4
0
/**
 * Do the stencil test comparison (compare FB stencil values against ref value).
 * This will be used twice when generating two-sided stencil code.
 * \param stencil  the front/back stencil state
 * \param stencilRef  the stencil reference value, replicated as a vector
 * \param stencilVals  vector of stencil values from framebuffer
 * \return vector mask of pass/fail values (~0 or 0)
 */
static LLVMValueRef
lp_build_stencil_test_single(struct lp_build_context *bld,
                             const struct pipe_stencil_state *stencil,
                             LLVMValueRef stencilRef,
                             LLVMValueRef stencilVals)
{
   const unsigned stencilMax = 255; /* XXX fix */
   struct lp_type type = bld->type;
   LLVMValueRef res;

   assert(type.sign);

   assert(stencil->enabled);

   if (stencil->valuemask != stencilMax) {
      /* compute stencilRef = stencilRef & valuemask */
      LLVMValueRef valuemask = lp_build_const_int_vec(type, stencil->valuemask);
      stencilRef = LLVMBuildAnd(bld->builder, stencilRef, valuemask, "");
      /* compute stencilVals = stencilVals & valuemask */
      stencilVals = LLVMBuildAnd(bld->builder, stencilVals, valuemask, "");
   }

   res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals);

   return res;
}
コード例 #5
0
ファイル: lp_bld_sample.c プロジェクト: mslusarz/mesa
/**
 * Compute the partial offset of a pixel block along an arbitrary axis.
 *
 * @param coord   coordinate in pixels
 * @param stride  number of bytes between rows of successive pixel blocks
 * @param block_length  number of pixels in a pixels block along the coordinate
 *                      axis
 * @param out_offset    resulting relative offset of the pixel block in bytes
 * @param out_subcoord  resulting sub-block pixel coordinate
 */
void
lp_build_sample_partial_offset(struct lp_build_context *bld,
                               unsigned block_length,
                               LLVMValueRef coord,
                               LLVMValueRef stride,
                               LLVMValueRef *out_offset,
                               LLVMValueRef *out_subcoord)
{
   LLVMBuilderRef builder = bld->gallivm->builder;
   LLVMValueRef offset;
   LLVMValueRef subcoord;

   if (block_length == 1) {
      subcoord = bld->zero;
   }
   else {
      /*
       * Pixel blocks have power of two dimensions. LLVM should convert the
       * rem/div to bit arithmetic.
       * TODO: Verify this.
       * It does indeed BUT it does transform it to scalar (and back) when doing so
       * (using roughly extract, shift/and, mov, unpack) (llvm 2.7).
       * The generated code looks seriously unfunny and is quite expensive.
       */
#if 0
      LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length);
      subcoord = LLVMBuildURem(builder, coord, block_width, "");
      coord    = LLVMBuildUDiv(builder, coord, block_width, "");
#else
      unsigned logbase2 = util_logbase2(block_length);
      LLVMValueRef block_shift = lp_build_const_int_vec(bld->gallivm, bld->type, logbase2);
      LLVMValueRef block_mask = lp_build_const_int_vec(bld->gallivm, bld->type, block_length - 1);
      subcoord = LLVMBuildAnd(builder, coord, block_mask, "");
      coord = LLVMBuildLShr(builder, coord, block_shift, "");
#endif
   }

   offset = lp_build_mul(bld, coord, stride);

   assert(out_offset);
   assert(out_subcoord);

   *out_offset = offset;
   *out_subcoord = subcoord;
}
コード例 #6
0
/**
 * Extract Y, U, V channels from packed YUYV.
 * @param packed  is a <n x i32> vector with the packed YUYV blocks
 * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
 */
static void
yuyv_to_yuv_soa(struct gallivm_state *gallivm,
                unsigned n,
                LLVMValueRef packed,
                LLVMValueRef i,
                LLVMValueRef *y,
                LLVMValueRef *u,
                LLVMValueRef *v)
{
   LLVMBuilderRef builder = gallivm->builder;
   struct lp_type type;
   LLVMValueRef mask;

   memset(&type, 0, sizeof type);
   type.width = 32;
   type.length = n;

   assert(lp_check_value(type, packed));
   assert(lp_check_value(type, i));

   /*
    * y = (yuyv >> 16*i) & 0xff
    * u = (yuyv >> 8   ) & 0xff
    * v = (yuyv >> 24  ) & 0xff
    */

#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
   /*
    * Avoid shift with per-element count.
    * No support on x86, gets translated to roughly 5 instructions
    * per element. Didn't measure performance but cuts shader size
    * by quite a bit (less difference if cpu has no sse4.1 support).
    */
   if (util_cpu_caps.has_sse2 && n == 4) {
      LLVMValueRef sel, tmp;
      struct lp_build_context bld32;

      lp_build_context_init(&bld32, gallivm, type);

      tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
      sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
       *y = lp_build_select(&bld32, sel, packed, tmp);
   } else
#endif
   {
      LLVMValueRef shift;
      shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
      *y = LLVMBuildLShr(builder, packed, shift, "");
   }

   *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
   *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");

   mask = lp_build_const_int_vec(gallivm, type, 0xff);

   *y = LLVMBuildAnd(builder, *y, mask, "y");
   *u = LLVMBuildAnd(builder, *u, mask, "u");
   *v = LLVMBuildAnd(builder, *v, mask, "v");
}
コード例 #7
0
static LLVMValueRef
rgb_to_rgba_aos(struct gallivm_state *gallivm,
                unsigned n,
                LLVMValueRef r, LLVMValueRef g, LLVMValueRef b)
{
   LLVMBuilderRef builder = gallivm->builder;
   struct lp_type type;
   LLVMValueRef a;
   LLVMValueRef rgba;

   memset(&type, 0, sizeof type);
   type.sign = TRUE;
   type.width = 32;
   type.length = n;

   assert(lp_check_value(type, r));
   assert(lp_check_value(type, g));
   assert(lp_check_value(type, b));

   /*
    * Make a 4 x unorm8 vector
    */

   r = r;
   g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), "");
   b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), "");
   a = lp_build_const_int_vec(gallivm, type, 0xff000000);

   rgba = r;
   rgba = LLVMBuildOr(builder, rgba, g, "");
   rgba = LLVMBuildOr(builder, rgba, b, "");
   rgba = LLVMBuildOr(builder, rgba, a, "");

   rgba = LLVMBuildBitCast(builder, rgba,
                           LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), "");

   return rgba;
}
コード例 #8
0
/**
 * Extract Y, U, V channels from packed UYVY.
 * @param packed  is a <n x i32> vector with the packed UYVY blocks
 * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
 */
static void
uyvy_to_yuv_soa(LLVMBuilderRef builder,
                unsigned n,
                LLVMValueRef packed,
                LLVMValueRef i,
                LLVMValueRef *y,
                LLVMValueRef *u,
                LLVMValueRef *v)
{
   struct lp_type type;
   LLVMValueRef shift, mask;

   memset(&type, 0, sizeof type);
   type.width = 32;
   type.length = n;

   assert(lp_check_value(type, packed));
   assert(lp_check_value(type, i));

   /*
    * y = (uyvy >> 16*i) & 0xff
    * u = (uyvy        ) & 0xff
    * v = (uyvy >> 16  ) & 0xff
    */

   shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
   shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), "");
   *y = LLVMBuildLShr(builder, packed, shift, "");
   *u = packed;
   *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), "");

   mask = lp_build_const_int_vec(type, 0xff);

   *y = LLVMBuildAnd(builder, *y, mask, "y");
   *u = LLVMBuildAnd(builder, *u, mask, "u");
   *v = LLVMBuildAnd(builder, *v, mask, "v");
}
コード例 #9
0
/**
 * Inverse of lp_build_clamped_float_to_unsigned_norm above.
 * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1]
 * return {float, float, float, float} with values in range [0, 1].
 */
LLVMValueRef
lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
                                unsigned src_width,
                                struct lp_type dst_type,
                                LLVMValueRef src)
{
   LLVMTypeRef vec_type = lp_build_vec_type(dst_type);
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(dst_type);
   LLVMValueRef bias_;
   LLVMValueRef res;
   unsigned mantissa;
   unsigned n;
   unsigned long long ubound;
   unsigned long long mask;
   double scale;
   double bias;

   assert(dst_type.floating);

   mantissa = lp_mantissa(dst_type);

   n = MIN2(mantissa, src_width);

   ubound = ((unsigned long long)1 << n);
   mask = ubound - 1;
   scale = (double)ubound/mask;
   bias = (double)((unsigned long long)1 << (mantissa - n));

   res = src;

   if(src_width > mantissa) {
      int shift = src_width - mantissa;
      res = LLVMBuildLShr(builder, res, lp_build_const_int_vec(dst_type, shift), "");
   }

   bias_ = lp_build_const_vec(dst_type, bias);

   res = LLVMBuildOr(builder,
                     res,
                     LLVMBuildBitCast(builder, bias_, int_vec_type, ""), "");

   res = LLVMBuildBitCast(builder, res, vec_type, "");

   res = LLVMBuildFSub(builder, res, bias_, "");
   res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), "");

   return res;
}
コード例 #10
0
LLVMValueRef
lp_build_one(struct lp_type type)
{
   LLVMTypeRef elem_type;
   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
   unsigned i;

   assert(type.length <= LP_MAX_VECTOR_LENGTH);

   elem_type = lp_build_elem_type(type);

   if(type.floating)
      elems[0] = LLVMConstReal(elem_type, 1.0);
   else if(type.fixed)
      elems[0] = LLVMConstInt(elem_type, 1LL << (type.width/2), 0);
   else if(!type.norm)
      elems[0] = LLVMConstInt(elem_type, 1, 0);
   else if(type.sign)
      elems[0] = LLVMConstInt(elem_type, (1LL << (type.width - 1)) - 1, 0);
   else {
      /* special case' -- 1.0 for normalized types is more easily attained if
       * we start with a vector consisting of all bits set */
      LLVMTypeRef vec_type = LLVMVectorType(elem_type, type.length);
      LLVMValueRef vec = LLVMConstAllOnes(vec_type);

#if 0
      if(type.sign)
         /* TODO: Unfortunately this caused "Tried to create a shift operation
          * on a non-integer type!" */
         vec = LLVMConstLShr(vec, lp_build_const_int_vec(type, 1));
#endif

      return vec;
   }

   for(i = 1; i < type.length; ++i)
      elems[i] = elems[0];

   if (type.length == 1)
      return elems[0];
   else
      return LLVMConstVector(elems, type.length);
}
コード例 #11
0
/**
 * Convert linear float soa values to packed srgb AoS values.
 * This only handles packed formats which are 4x8bit in size
 * (rgba and rgbx plus swizzles), and 16bit 565-style formats
 * with no alpha. (In the latter case the return values won't be
 * fully packed, it will look like r5g6b5x16r5g6b5x16...)
 *
 * @param src   float SoA (vector) values to convert.
 */
LLVMValueRef
lp_build_float_to_srgb_packed(struct gallivm_state *gallivm,
                              const struct util_format_description *dst_fmt,
                              struct lp_type src_type,
                              LLVMValueRef *src)
{
   LLVMBuilderRef builder = gallivm->builder;
   unsigned chan;
   struct lp_build_context f32_bld;
   struct lp_type int32_type = lp_int_type(src_type);
   LLVMValueRef tmpsrgb[4], alpha, dst;

   lp_build_context_init(&f32_bld, gallivm, src_type);

   /* rgb is subject to linear->srgb conversion, alpha is not */
   for (chan = 0; chan < 3; chan++) {
      unsigned chan_bits = dst_fmt->channel[dst_fmt->swizzle[chan]].size;
      tmpsrgb[chan] = lp_build_linear_to_srgb(gallivm, src_type, chan_bits, src[chan]);
   }
   /*
    * can't use lp_build_conv since we want to keep values as 32bit
    * here so we can interleave with rgb to go from SoA->AoS.
    */
   alpha = lp_build_clamp_zero_one_nanzero(&f32_bld, src[3]);
   alpha = lp_build_mul(&f32_bld, alpha,
                        lp_build_const_vec(gallivm, src_type, 255.0f));
   tmpsrgb[3] = lp_build_iround(&f32_bld, alpha);

   dst = lp_build_zero(gallivm, int32_type);
   for (chan = 0; chan < dst_fmt->nr_channels; chan++) {
      if (dst_fmt->swizzle[chan] <= PIPE_SWIZZLE_W) {
         unsigned ls;
         LLVMValueRef shifted, shift_val;
         ls = dst_fmt->channel[dst_fmt->swizzle[chan]].shift;
         shift_val = lp_build_const_int_vec(gallivm, int32_type, ls);
         shifted = LLVMBuildShl(builder, tmpsrgb[chan], shift_val, "");
         dst = LLVMBuildOr(builder, dst, shifted, "");
      }
   }
   return dst;
}
コード例 #12
0
ファイル: lp_bld_pack.c プロジェクト: ChillyWillyGuru/RSXGL
/**
 * Double the bit width.
 *
 * This will only change the number of bits the values are represented, not the
 * values themselves.
 */
void
lp_build_unpack2(struct gallivm_state *gallivm,
                 struct lp_type src_type,
                 struct lp_type dst_type,
                 LLVMValueRef src,
                 LLVMValueRef *dst_lo,
                 LLVMValueRef *dst_hi)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef msb;
   LLVMTypeRef dst_vec_type;

   assert(!src_type.floating);
   assert(!dst_type.floating);
   assert(dst_type.width == src_type.width * 2);
   assert(dst_type.length * 2 == src_type.length);

   if(dst_type.sign && src_type.sign) {
      /* Replicate the sign bit in the most significant bits */
      msb = LLVMBuildAShr(builder, src, lp_build_const_int_vec(gallivm, src_type, src_type.width - 1), "");
   }
   else
      /* Most significant bits always zero */
      msb = lp_build_zero(gallivm, src_type);

   /* Interleave bits */
#ifdef PIPE_ARCH_LITTLE_ENDIAN
   *dst_lo = lp_build_interleave2(gallivm, src_type, src, msb, 0);
   *dst_hi = lp_build_interleave2(gallivm, src_type, src, msb, 1);
#else
   *dst_lo = lp_build_interleave2(gallivm, src_type, msb, src, 0);
   *dst_hi = lp_build_interleave2(gallivm, src_type, msb, src, 1);
#endif

   /* Cast the result into the new type (twice as wide) */

   dst_vec_type = lp_build_vec_type(gallivm, dst_type);

   *dst_lo = LLVMBuildBitCast(builder, *dst_lo, dst_vec_type, "");
   *dst_hi = LLVMBuildBitCast(builder, *dst_hi, dst_vec_type, "");
}
コード例 #13
0
ファイル: lp_bld_conv.c プロジェクト: RobinWuDev/Qt
/**
 * Converts int16 half-float to float32
 * Note this can be performed in 1 instruction if vcvtph2ps exists (sse5 i think?)
 * [llvm.x86.vcvtph2ps / _mm_cvtph_ps]
 *
 * @param src_type      <vector> type of int16
 * @param src           value to convert
 *
 * ref http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
 */
LLVMValueRef
lp_build_half_to_float(struct gallivm_state *gallivm,
                       struct lp_type src_type,
                       LLVMValueRef src)
{
    struct lp_type f32_type = lp_type_float_vec(32, 32 * src_type.length);
    struct lp_type i32_type = lp_type_int_vec(32, 32 * src_type.length);

    LLVMBuilderRef builder = gallivm->builder;
    LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type);
    LLVMTypeRef float_vec_type = lp_build_vec_type(gallivm, f32_type);

    /* Constants */
    LLVMValueRef i32_13          = lp_build_const_int_vec(gallivm, i32_type, 13);
    LLVMValueRef i32_16          = lp_build_const_int_vec(gallivm, i32_type, 16);
    LLVMValueRef i32_mask_nosign = lp_build_const_int_vec(gallivm, i32_type, 0x7fff);
    LLVMValueRef i32_was_infnan  = lp_build_const_int_vec(gallivm, i32_type, 0x7bff);
    LLVMValueRef i32_exp_infnan  = lp_build_const_int_vec(gallivm, i32_type, 0xff << 23);
    LLVMValueRef f32_magic       = LLVMBuildBitCast(builder,
                                   lp_build_const_int_vec(gallivm, i32_type, (254 - 15) << 23),
                                   float_vec_type, "");

    /* Convert int16 vector to int32 vector by zero ext */
    LLVMValueRef h             = LLVMBuildZExt(builder, src, int_vec_type, "");

    /* Exponent / mantissa bits */
    LLVMValueRef expmant       = LLVMBuildAnd(builder, i32_mask_nosign, h, "");
    LLVMValueRef shifted       = LLVMBuildBitCast(builder, LLVMBuildShl(builder, expmant, i32_13, ""), float_vec_type, "");

    /* Exponent adjust */
    LLVMValueRef scaled        = LLVMBuildBitCast(builder, LLVMBuildFMul(builder, shifted, f32_magic, ""), int_vec_type, "");

    /* Make sure Inf/NaN survive */
    LLVMValueRef b_wasinfnan   = lp_build_compare(gallivm, i32_type, PIPE_FUNC_GREATER, expmant, i32_was_infnan);
    LLVMValueRef infnanexp     = LLVMBuildAnd(builder, b_wasinfnan, i32_exp_infnan, "");

    /* Sign bit */
    LLVMValueRef justsign      = LLVMBuildXor(builder, h, expmant, "");
    LLVMValueRef sign          = LLVMBuildShl(builder, justsign, i32_16, "");

    /* Combine result */
    LLVMValueRef sign_inf      = LLVMBuildOr(builder, sign, infnanexp, "");
    LLVMValueRef final         = LLVMBuildOr(builder, scaled, sign_inf, "");

    /* Cast from int32 vector to float32 vector */
    return LLVMBuildBitCast(builder, final, float_vec_type, "");
}
コード例 #14
0
/**
 * Non-interleaved pack and saturate.
 *
 * Same as lp_build_pack2 but will saturate values so that they fit into the
 * destination type.
 */
LLVMValueRef
lp_build_packs2(struct gallivm_state *gallivm,
                struct lp_type src_type,
                struct lp_type dst_type,
                LLVMValueRef lo,
                LLVMValueRef hi)
{
   boolean clamp;

   assert(!src_type.floating);
   assert(!dst_type.floating);
   assert(src_type.sign == dst_type.sign);
   assert(src_type.width == dst_type.width * 2);
   assert(src_type.length * 2 == dst_type.length);

   clamp = TRUE;

   /* All X86 SSE non-interleaved pack instructions take signed inputs and
    * saturate them, so no need to clamp for those cases. */
   if(util_cpu_caps.has_sse2 &&
      src_type.width * src_type.length >= 128 &&
      src_type.sign &&
      (src_type.width == 32 || src_type.width == 16))
      clamp = FALSE;

   if(clamp) {
      struct lp_build_context bld;
      unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width;
      LLVMValueRef dst_max = lp_build_const_int_vec(gallivm, src_type, ((unsigned long long)1 << dst_bits) - 1);
      lp_build_context_init(&bld, gallivm, src_type);
      lo = lp_build_min(&bld, lo, dst_max);
      hi = lp_build_min(&bld, hi, dst_max);
      /* FIXME: What about lower bound? */
   }

   return lp_build_pack2(gallivm, src_type, dst_type, lo, hi);
}
コード例 #15
0
/**
 * Perform the occlusion test and increase the counter.
 * Test the depth mask. Add the number of channel which has none zero mask
 * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}.
 * The counter will add 4.
 *
 * \param type holds element type of the mask vector.
 * \param maskvalue is the depth test mask.
 * \param counter is a pointer of the uint32 counter.
 */
static void
lp_build_occlusion_count(LLVMBuilderRef builder,
                         struct lp_type type,
                         LLVMValueRef maskvalue,
                         LLVMValueRef counter)
{
   LLVMValueRef countmask = lp_build_const_int_vec(type, 1);
   LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv");
   LLVMTypeRef i8v16 = LLVMVectorType(LLVMInt8Type(), 16);
   LLVMValueRef counti = LLVMBuildBitCast(builder, countv, i8v16, "counti");
   LLVMValueRef maskarray[4] = {
      LLVMConstInt(LLVMInt32Type(), 0, 0),
      LLVMConstInt(LLVMInt32Type(), 4, 0),
      LLVMConstInt(LLVMInt32Type(), 8, 0),
      LLVMConstInt(LLVMInt32Type(), 12, 0),
   };
   LLVMValueRef shufflemask = LLVMConstVector(maskarray, 4);
   LLVMValueRef shufflev =  LLVMBuildShuffleVector(builder, counti, LLVMGetUndef(i8v16), shufflemask, "shufflev");
   LLVMValueRef shuffle = LLVMBuildBitCast(builder, shufflev, LLVMInt32Type(), "shuffle");
   LLVMValueRef count = lp_build_intrinsic_unary(builder, "llvm.ctpop.i32", LLVMInt32Type(), shuffle);
   LLVMValueRef orig = LLVMBuildLoad(builder, counter, "orig");
   LLVMValueRef incr = LLVMBuildAdd(builder, orig, count, "incr");
   LLVMBuildStore(builder, incr, counter);
}
コード例 #16
0
ファイル: lp_bld_depth.c プロジェクト: blckshrk/Mesa
/**
 * Do the stencil test comparison (compare FB stencil values against ref value).
 * This will be used twice when generating two-sided stencil code.
 * \param stencil  the front/back stencil state
 * \param stencilRef  the stencil reference value, replicated as a vector
 * \param stencilVals  vector of stencil values from framebuffer
 * \return vector mask of pass/fail values (~0 or 0)
 */
static LLVMValueRef
lp_build_stencil_test_single(struct lp_build_context *bld,
                             const struct pipe_stencil_state *stencil,
                             LLVMValueRef stencilRef,
                             LLVMValueRef stencilVals)
{
   LLVMBuilderRef builder = bld->gallivm->builder;
   const unsigned stencilMax = 255; /* XXX fix */
   struct lp_type type = bld->type;
   LLVMValueRef res;

   /*
    * SSE2 has intrinsics for signed comparisons, but not unsigned ones. Values
    * are between 0..255 so ensure we generate the fastest comparisons for
    * wider elements.
    */
   if (type.width <= 8) {
      assert(!type.sign);
   } else {
      assert(type.sign);
   }

   assert(stencil->enabled);

   if (stencil->valuemask != stencilMax) {
      /* compute stencilRef = stencilRef & valuemask */
      LLVMValueRef valuemask = lp_build_const_int_vec(bld->gallivm, type, stencil->valuemask);
      stencilRef = LLVMBuildAnd(builder, stencilRef, valuemask, "");
      /* compute stencilVals = stencilVals & valuemask */
      stencilVals = LLVMBuildAnd(builder, stencilVals, valuemask, "");
   }

   res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals);

   return res;
}
コード例 #17
0
ファイル: lp_bld_conv.c プロジェクト: RobinWuDev/Qt
/**
 * Generic type conversion.
 *
 * TODO: Take a precision argument, or even better, add a new precision member
 * to the lp_type union.
 */
void
lp_build_conv(struct gallivm_state *gallivm,
              struct lp_type src_type,
              struct lp_type dst_type,
              const LLVMValueRef *src, unsigned num_srcs,
              LLVMValueRef *dst, unsigned num_dsts)
{
    LLVMBuilderRef builder = gallivm->builder;
    struct lp_type tmp_type;
    LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
    unsigned num_tmps;
    unsigned i;

    /* We must not loose or gain channels. Only precision */
    assert(src_type.length * num_srcs == dst_type.length * num_dsts);

    assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
    assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);
    assert(num_srcs <= LP_MAX_VECTOR_LENGTH);
    assert(num_dsts <= LP_MAX_VECTOR_LENGTH);

    tmp_type = src_type;
    for(i = 0; i < num_srcs; ++i) {
        assert(lp_check_value(src_type, src[i]));
        tmp[i] = src[i];
    }
    num_tmps = num_srcs;


    /* Special case 4x4f --> 1x16ub
     */
    if (src_type.floating == 1 &&
            src_type.fixed    == 0 &&
            src_type.sign     == 1 &&
            src_type.norm     == 0 &&
            src_type.width    == 32 &&
            src_type.length   == 4 &&

            dst_type.floating == 0 &&
            dst_type.fixed    == 0 &&
            dst_type.sign     == 0 &&
            dst_type.norm     == 1 &&
            dst_type.width    == 8 &&
            dst_type.length   == 16 &&

            4 * num_dsts      == num_srcs &&

            util_cpu_caps.has_sse2)
    {
        struct lp_build_context bld;
        struct lp_type int16_type = dst_type;
        struct lp_type int32_type = dst_type;
        LLVMValueRef const_255f;
        unsigned i, j;

        lp_build_context_init(&bld, gallivm, src_type);

        int16_type.width *= 2;
        int16_type.length /= 2;
        int16_type.sign = 1;

        int32_type.width *= 4;
        int32_type.length /= 4;
        int32_type.sign = 1;

        const_255f = lp_build_const_vec(gallivm, src_type, 255.0f);

        for (i = 0; i < num_dsts; ++i, src += 4) {
            LLVMValueRef lo, hi;

            for (j = 0; j < 4; ++j) {
                tmp[j] = LLVMBuildFMul(builder, src[j], const_255f, "");
                tmp[j] = lp_build_iround(&bld, tmp[j]);
            }

            /* relying on clamping behavior of sse2 intrinsics here */
            lo = lp_build_pack2(gallivm, int32_type, int16_type, tmp[0], tmp[1]);
            hi = lp_build_pack2(gallivm, int32_type, int16_type, tmp[2], tmp[3]);
            dst[i] = lp_build_pack2(gallivm, int16_type, dst_type, lo, hi);
        }

        return;
    }

    /* Special case 2x8f --> 1x16ub
     */
    else if (src_type.floating == 1 &&
             src_type.fixed    == 0 &&
             src_type.sign     == 1 &&
             src_type.norm     == 0 &&
             src_type.width    == 32 &&
             src_type.length   == 8 &&

             dst_type.floating == 0 &&
             dst_type.fixed    == 0 &&
             dst_type.sign     == 0 &&
             dst_type.norm     == 1 &&
             dst_type.width    == 8 &&
             dst_type.length   == 16 &&

             2 * num_dsts      == num_srcs &&

             util_cpu_caps.has_avx) {

        struct lp_build_context bld;
        struct lp_type int16_type = dst_type;
        struct lp_type int32_type = dst_type;
        LLVMValueRef const_255f;
        unsigned i;

        lp_build_context_init(&bld, gallivm, src_type);

        int16_type.width *= 2;
        int16_type.length /= 2;
        int16_type.sign = 1;

        int32_type.width *= 4;
        int32_type.length /= 4;
        int32_type.sign = 1;

        const_255f = lp_build_const_vec(gallivm, src_type, 255.0f);

        for (i = 0; i < num_dsts; ++i, src += 2) {
            LLVMValueRef lo, hi, a, b;

            a = LLVMBuildFMul(builder, src[0], const_255f, "");
            b = LLVMBuildFMul(builder, src[1], const_255f, "");

            a = lp_build_iround(&bld, a);
            b = lp_build_iround(&bld, b);

            tmp[0] = lp_build_extract_range(gallivm, a, 0, 4);
            tmp[1] = lp_build_extract_range(gallivm, a, 4, 4);
            tmp[2] = lp_build_extract_range(gallivm, b, 0, 4);
            tmp[3] = lp_build_extract_range(gallivm, b, 4, 4);

            /* relying on clamping behavior of sse2 intrinsics here */
            lo = lp_build_pack2(gallivm, int32_type, int16_type, tmp[0], tmp[1]);
            hi = lp_build_pack2(gallivm, int32_type, int16_type, tmp[2], tmp[3]);
            dst[i] = lp_build_pack2(gallivm, int16_type, dst_type, lo, hi);
        }
        return;
    }

    /* Pre convert half-floats to floats
     */
    else if (src_type.floating && src_type.width == 16)
    {
        for(i = 0; i < num_tmps; ++i)
            tmp[i] = lp_build_half_to_float(gallivm, src_type, tmp[i]);

        tmp_type.width = 32;
    }

    /*
     * Clamp if necessary
     */

    if(memcmp(&src_type, &dst_type, sizeof src_type) != 0) {
        struct lp_build_context bld;
        double src_min = lp_const_min(src_type);
        double dst_min = lp_const_min(dst_type);
        double src_max = lp_const_max(src_type);
        double dst_max = lp_const_max(dst_type);
        LLVMValueRef thres;

        lp_build_context_init(&bld, gallivm, tmp_type);

        if(src_min < dst_min) {
            if(dst_min == 0.0)
                thres = bld.zero;
            else
                thres = lp_build_const_vec(gallivm, src_type, dst_min);
            for(i = 0; i < num_tmps; ++i)
                tmp[i] = lp_build_max(&bld, tmp[i], thres);
        }

        if(src_max > dst_max) {
            if(dst_max == 1.0)
                thres = bld.one;
            else
                thres = lp_build_const_vec(gallivm, src_type, dst_max);
            for(i = 0; i < num_tmps; ++i)
                tmp[i] = lp_build_min(&bld, tmp[i], thres);
        }
    }

    /*
     * Scale to the narrowest range
     */

    if(dst_type.floating) {
        /* Nothing to do */
    }
    else if(tmp_type.floating) {
        if(!dst_type.fixed && !dst_type.sign && dst_type.norm) {
            for(i = 0; i < num_tmps; ++i) {
                tmp[i] = lp_build_clamped_float_to_unsigned_norm(gallivm,
                         tmp_type,
                         dst_type.width,
                         tmp[i]);
            }
            tmp_type.floating = FALSE;
        }
        else {
            double dst_scale = lp_const_scale(dst_type);
            LLVMTypeRef tmp_vec_type;

            if (dst_scale != 1.0) {
                LLVMValueRef scale = lp_build_const_vec(gallivm, tmp_type, dst_scale);
                for(i = 0; i < num_tmps; ++i)
                    tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, "");
            }

            /* Use an equally sized integer for intermediate computations */
            tmp_type.floating = FALSE;
            tmp_vec_type = lp_build_vec_type(gallivm, tmp_type);
            for(i = 0; i < num_tmps; ++i) {
#if 0
                if(dst_type.sign)
                    tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, "");
                else
                    tmp[i] = LLVMBuildFPToUI(builder, tmp[i], tmp_vec_type, "");
#else
                /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */
                tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, "");
#endif
            }
        }
    }
    else {
        unsigned src_shift = lp_const_shift(src_type);
        unsigned dst_shift = lp_const_shift(dst_type);
        unsigned src_offset = lp_const_offset(src_type);
        unsigned dst_offset = lp_const_offset(dst_type);

        /* Compensate for different offsets */
        if (dst_offset > src_offset && src_type.width > dst_type.width) {
            for (i = 0; i < num_tmps; ++i) {
                LLVMValueRef shifted;
                LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, src_shift - 1);
                if(src_type.sign)
                    shifted = LLVMBuildAShr(builder, tmp[i], shift, "");
                else
                    shifted = LLVMBuildLShr(builder, tmp[i], shift, "");

                tmp[i] = LLVMBuildSub(builder, tmp[i], shifted, "");
            }
        }

        if(src_shift > dst_shift) {
            LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type,
                                 src_shift - dst_shift);
            for(i = 0; i < num_tmps; ++i)
                if(src_type.sign)
                    tmp[i] = LLVMBuildAShr(builder, tmp[i], shift, "");
                else
                    tmp[i] = LLVMBuildLShr(builder, tmp[i], shift, "");
        }
    }

    /*
     * Truncate or expand bit width
     *
     * No data conversion should happen here, although the sign bits are
     * crucial to avoid bad clamping.
     */

    {
        struct lp_type new_type;

        new_type = tmp_type;
        new_type.sign   = dst_type.sign;
        new_type.width  = dst_type.width;
        new_type.length = dst_type.length;

        lp_build_resize(gallivm, tmp_type, new_type, tmp, num_srcs, tmp, num_dsts);

        tmp_type = new_type;
        num_tmps = num_dsts;
    }

    /*
     * Scale to the widest range
     */

    if(src_type.floating) {
        /* Nothing to do */
    }
    else if(!src_type.floating && dst_type.floating) {
        if(!src_type.fixed && !src_type.sign && src_type.norm) {
            for(i = 0; i < num_tmps; ++i) {
                tmp[i] = lp_build_unsigned_norm_to_float(gallivm,
                         src_type.width,
                         dst_type,
                         tmp[i]);
            }
            tmp_type.floating = TRUE;
        }
        else {
            double src_scale = lp_const_scale(src_type);
            LLVMTypeRef tmp_vec_type;

            /* Use an equally sized integer for intermediate computations */
            tmp_type.floating = TRUE;
            tmp_type.sign = TRUE;
            tmp_vec_type = lp_build_vec_type(gallivm, tmp_type);
            for(i = 0; i < num_tmps; ++i) {
#if 0
                if(dst_type.sign)
                    tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, "");
                else
                    tmp[i] = LLVMBuildUIToFP(builder, tmp[i], tmp_vec_type, "");
#else
                /* FIXME: there is no SSE counterpart for LLVMBuildUIToFP */
                tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, "");
#endif
            }

            if (src_scale != 1.0) {
                LLVMValueRef scale = lp_build_const_vec(gallivm, tmp_type, 1.0/src_scale);
                for(i = 0; i < num_tmps; ++i)
                    tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, "");
            }
        }
    }
    else {
        unsigned src_shift = lp_const_shift(src_type);
        unsigned dst_shift = lp_const_shift(dst_type);
        unsigned src_offset = lp_const_offset(src_type);
        unsigned dst_offset = lp_const_offset(dst_type);

        if (src_shift < dst_shift) {
            LLVMValueRef pre_shift[LP_MAX_VECTOR_LENGTH];
            LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, dst_shift - src_shift);

            for (i = 0; i < num_tmps; ++i) {
                pre_shift[i] = tmp[i];
                tmp[i] = LLVMBuildShl(builder, tmp[i], shift, "");
            }

            /* Compensate for different offsets */
            if (dst_offset > src_offset) {
                for (i = 0; i < num_tmps; ++i) {
                    tmp[i] = LLVMBuildSub(builder, tmp[i], pre_shift[i], "");
                }
            }
        }
    }

    for(i = 0; i < num_dsts; ++i) {
        dst[i] = tmp[i];
        assert(lp_check_value(dst_type, dst[i]));
    }
}
コード例 #18
0
ファイル: lp_bld_conv.c プロジェクト: RobinWuDev/Qt
/**
 * Inverse of lp_build_clamped_float_to_unsigned_norm above.
 * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1]
 * return {float, float, float, float} with values in range [0, 1].
 */
LLVMValueRef
lp_build_unsigned_norm_to_float(struct gallivm_state *gallivm,
                                unsigned src_width,
                                struct lp_type dst_type,
                                LLVMValueRef src)
{
    LLVMBuilderRef builder = gallivm->builder;
    LLVMTypeRef vec_type = lp_build_vec_type(gallivm, dst_type);
    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, dst_type);
    LLVMValueRef bias_;
    LLVMValueRef res;
    unsigned mantissa;
    unsigned n;
    unsigned long long ubound;
    unsigned long long mask;
    double scale;
    double bias;

    assert(dst_type.floating);

    mantissa = lp_mantissa(dst_type);

    if (src_width <= (mantissa + 1)) {
        /*
         * The source width matches fits what can be represented in floating
         * point (i.e., mantissa + 1 bits). So do a straight multiplication
         * followed by casting. No further rounding is necessary.
         */

        scale = 1.0/(double)((1ULL << src_width) - 1);
        res = LLVMBuildSIToFP(builder, src, vec_type, "");
        res = LLVMBuildFMul(builder, res,
                            lp_build_const_vec(gallivm, dst_type, scale), "");
        return res;
    }
    else {
        /*
         * The source width exceeds what can be represented in floating
         * point. So truncate the incoming values.
         */

        n = MIN2(mantissa, src_width);

        ubound = ((unsigned long long)1 << n);
        mask = ubound - 1;
        scale = (double)ubound/mask;
        bias = (double)((unsigned long long)1 << (mantissa - n));

        res = src;

        if (src_width > mantissa) {
            int shift = src_width - mantissa;
            res = LLVMBuildLShr(builder, res,
                                lp_build_const_int_vec(gallivm, dst_type, shift), "");
        }

        bias_ = lp_build_const_vec(gallivm, dst_type, bias);

        res = LLVMBuildOr(builder,
                          res,
                          LLVMBuildBitCast(builder, bias_, int_vec_type, ""), "");

        res = LLVMBuildBitCast(builder, res, vec_type, "");

        res = LLVMBuildFSub(builder, res, bias_, "");
        res = LLVMBuildFMul(builder, res, lp_build_const_vec(gallivm, dst_type, scale), "");
    }

    return res;
}
コード例 #19
0
ファイル: lp_bld_conv.c プロジェクト: RobinWuDev/Qt
/**
 * Special case for converting clamped IEEE-754 floats to unsigned norms.
 *
 * The mathematical voodoo below may seem excessive but it is actually
 * paramount we do it this way for several reasons. First, there is no single
 * precision FP to unsigned integer conversion Intel SSE instruction. Second,
 * secondly, even if there was, since the FP's mantissa takes only a fraction
 * of register bits the typically scale and cast approach would require double
 * precision for accurate results, and therefore half the throughput
 *
 * Although the result values can be scaled to an arbitrary bit width specified
 * by dst_width, the actual result type will have the same width.
 *
 * Ex: src = { float, float, float, float }
 * return { i32, i32, i32, i32 } where each value is in [0, 2^dst_width-1].
 */
LLVMValueRef
lp_build_clamped_float_to_unsigned_norm(struct gallivm_state *gallivm,
                                        struct lp_type src_type,
                                        unsigned dst_width,
                                        LLVMValueRef src)
{
    LLVMBuilderRef builder = gallivm->builder;
    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, src_type);
    LLVMValueRef res;
    unsigned mantissa;

    assert(src_type.floating);
    assert(dst_width <= src_type.width);
    src_type.sign = FALSE;

    mantissa = lp_mantissa(src_type);

    if (dst_width <= mantissa) {
        /*
         * Apply magic coefficients that will make the desired result to appear
         * in the lowest significant bits of the mantissa, with correct rounding.
         *
         * This only works if the destination width fits in the mantissa.
         */

        unsigned long long ubound;
        unsigned long long mask;
        double scale;
        double bias;

        ubound = (1ULL << dst_width);
        mask = ubound - 1;
        scale = (double)mask/ubound;
        bias = (double)(1ULL << (mantissa - dst_width));

        res = LLVMBuildFMul(builder, src, lp_build_const_vec(gallivm, src_type, scale), "");
        res = LLVMBuildFAdd(builder, res, lp_build_const_vec(gallivm, src_type, bias), "");
        res = LLVMBuildBitCast(builder, res, int_vec_type, "");
        res = LLVMBuildAnd(builder, res,
                           lp_build_const_int_vec(gallivm, src_type, mask), "");
    }
    else if (dst_width == (mantissa + 1)) {
        /*
         * The destination width matches exactly what can be represented in
         * floating point (i.e., mantissa + 1 bits). So do a straight
         * multiplication followed by casting. No further rounding is necessary.
         */

        double scale;

        scale = (double)((1ULL << dst_width) - 1);

        res = LLVMBuildFMul(builder, src,
                            lp_build_const_vec(gallivm, src_type, scale), "");
        res = LLVMBuildFPToSI(builder, res, int_vec_type, "");
    }
    else {
        /*
         * The destination exceeds what can be represented in the floating point.
         * So multiply by the largest power two we get away with, and when
         * subtract the most significant bit to rescale to normalized values.
         *
         * The largest power of two factor we can get away is
         * (1 << (src_type.width - 1)), because we need to use signed . In theory it
         * should be (1 << (src_type.width - 2)), but IEEE 754 rules states
         * INT_MIN should be returned in FPToSI, which is the correct result for
         * values near 1.0!
         *
         * This means we get (src_type.width - 1) correct bits for values near 0.0,
         * and (mantissa + 1) correct bits for values near 1.0. Equally or more
         * important, we also get exact results for 0.0 and 1.0.
         */

        unsigned n = MIN2(src_type.width - 1, dst_width);

        double scale = (double)(1ULL << n);
        unsigned lshift = dst_width - n;
        unsigned rshift = n;
        LLVMValueRef lshifted;
        LLVMValueRef rshifted;

        res = LLVMBuildFMul(builder, src,
                            lp_build_const_vec(gallivm, src_type, scale), "");
        res = LLVMBuildFPToSI(builder, res, int_vec_type, "");

        /*
         * Align the most significant bit to its final place.
         *
         * This will cause 1.0 to overflow to 0, but the later adjustment will
         * get it right.
         */
        if (lshift) {
            lshifted = LLVMBuildShl(builder, res,
                                    lp_build_const_int_vec(gallivm, src_type,
                                            lshift), "");
        } else {
            lshifted = res;
        }

        /*
         * Align the most significant bit to the right.
         */
        rshifted =  LLVMBuildLShr(builder, res,
                                  lp_build_const_int_vec(gallivm, src_type, rshift),
                                  "");

        /*
         * Subtract the MSB to the LSB, therefore re-scaling from
         * (1 << dst_width) to ((1 << dst_width) - 1).
         */

        res = LLVMBuildSub(builder, lshifted, rshifted, "");
    }

    return res;
}
コード例 #20
0
/**
 * Build code to compare two values 'a' and 'b' of 'type' using the given func.
 * \param func  one of PIPE_FUNC_x
 * The result values will be 0 for false or ~0 for true.
 */
LLVMValueRef
lp_build_compare(struct gallivm_state *gallivm,
                 const struct lp_type type,
                 unsigned func,
                 LLVMValueRef a,
                 LLVMValueRef b)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
   LLVMValueRef zeros = LLVMConstNull(int_vec_type);
   LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
   LLVMValueRef cond;
   LLVMValueRef res;

   assert(func >= PIPE_FUNC_NEVER);
   assert(func <= PIPE_FUNC_ALWAYS);
   assert(lp_check_value(type, a));
   assert(lp_check_value(type, b));

   if(func == PIPE_FUNC_NEVER)
      return zeros;
   if(func == PIPE_FUNC_ALWAYS)
      return ones;

#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
   /*
    * There are no unsigned integer comparison instructions in SSE.
    */

   if (!type.floating && !type.sign &&
       type.width * type.length == 128 &&
       util_cpu_caps.has_sse2 &&
       (func == PIPE_FUNC_LESS ||
        func == PIPE_FUNC_LEQUAL ||
        func == PIPE_FUNC_GREATER ||
        func == PIPE_FUNC_GEQUAL) &&
       (gallivm_debug & GALLIVM_DEBUG_PERF)) {
         debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
                      __FUNCTION__, type.length, type.width);
   }
#endif

#if HAVE_LLVM < 0x0207
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
   if(type.width * type.length == 128) {
      if(type.floating && util_cpu_caps.has_sse) {
         /* float[4] comparison */
         LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type);
         LLVMValueRef args[3];
         unsigned cc;
         boolean swap;

         swap = FALSE;
         switch(func) {
         case PIPE_FUNC_EQUAL:
            cc = 0;
            break;
         case PIPE_FUNC_NOTEQUAL:
            cc = 4;
            break;
         case PIPE_FUNC_LESS:
            cc = 1;
            break;
         case PIPE_FUNC_LEQUAL:
            cc = 2;
            break;
         case PIPE_FUNC_GREATER:
            cc = 1;
            swap = TRUE;
            break;
         case PIPE_FUNC_GEQUAL:
            cc = 2;
            swap = TRUE;
            break;
         default:
            assert(0);
            return lp_build_undef(gallivm, type);
         }

         if(swap) {
            args[0] = b;
            args[1] = a;
         }
         else {
            args[0] = a;
            args[1] = b;
         }

         args[2] = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), cc, 0);
         res = lp_build_intrinsic(builder,
                                  "llvm.x86.sse.cmp.ps",
                                  vec_type,
                                  args, 3);
         res = LLVMBuildBitCast(builder, res, int_vec_type, "");
         return res;
      }
      else if(util_cpu_caps.has_sse2) {
         /* int[4] comparison */
         static const struct {
            unsigned swap:1;
            unsigned eq:1;
            unsigned gt:1;
            unsigned not:1;
         } table[] = {
            {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */
            {1, 0, 1, 0}, /* PIPE_FUNC_LESS */
            {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */
            {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */
            {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */
            {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */
            {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */
            {0, 0, 0, 0}  /* PIPE_FUNC_ALWAYS */
         };
         const char *pcmpeq;
         const char *pcmpgt;
         LLVMValueRef args[2];
         LLVMValueRef res;
         LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type);

         switch (type.width) {
         case 8:
            pcmpeq = "llvm.x86.sse2.pcmpeq.b";
            pcmpgt = "llvm.x86.sse2.pcmpgt.b";
            break;
         case 16:
            pcmpeq = "llvm.x86.sse2.pcmpeq.w";
            pcmpgt = "llvm.x86.sse2.pcmpgt.w";
            break;
         case 32:
            pcmpeq = "llvm.x86.sse2.pcmpeq.d";
            pcmpgt = "llvm.x86.sse2.pcmpgt.d";
            break;
         default:
            assert(0);
            return lp_build_undef(gallivm, type);
         }

         /* There are no unsigned comparison instructions. So flip the sign bit
          * so that the results match.
          */
         if (table[func].gt && !type.sign) {
            LLVMValueRef msb = lp_build_const_int_vec(gallivm, type, (unsigned long long)1 << (type.width - 1));
            a = LLVMBuildXor(builder, a, msb, "");
            b = LLVMBuildXor(builder, b, msb, "");
         }

         if(table[func].swap) {
            args[0] = b;
            args[1] = a;
         }
         else {
            args[0] = a;
            args[1] = b;
         }

         if(table[func].eq)
            res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2);
         else if (table[func].gt)
            res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2);
         else
            res = LLVMConstNull(vec_type);

         if(table[func].not)
            res = LLVMBuildNot(builder, res, "");

         return res;
      }
   } /* if (type.width * type.length == 128) */
#endif
#endif /* HAVE_LLVM < 0x0207 */

   /* XXX: It is not clear if we should use the ordered or unordered operators */

   if(type.floating) {
      LLVMRealPredicate op;
      switch(func) {
      case PIPE_FUNC_NEVER:
         op = LLVMRealPredicateFalse;
         break;
      case PIPE_FUNC_ALWAYS:
         op = LLVMRealPredicateTrue;
         break;
      case PIPE_FUNC_EQUAL:
         op = LLVMRealUEQ;
         break;
      case PIPE_FUNC_NOTEQUAL:
         op = LLVMRealUNE;
         break;
      case PIPE_FUNC_LESS:
         op = LLVMRealULT;
         break;
      case PIPE_FUNC_LEQUAL:
         op = LLVMRealULE;
         break;
      case PIPE_FUNC_GREATER:
         op = LLVMRealUGT;
         break;
      case PIPE_FUNC_GEQUAL:
         op = LLVMRealUGE;
         break;
      default:
         assert(0);
         return lp_build_undef(gallivm, type);
      }

#if HAVE_LLVM >= 0x0207
      cond = LLVMBuildFCmp(builder, op, a, b, "");
      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
#else
      if (type.length == 1) {
         cond = LLVMBuildFCmp(builder, op, a, b, "");
         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
      }
      else {
         unsigned i;

         res = LLVMGetUndef(int_vec_type);

         debug_printf("%s: warning: using slow element-wise float"
                      " vector comparison\n", __FUNCTION__);
         for (i = 0; i < type.length; ++i) {
            LLVMValueRef index = lp_build_const_int32(gallivm, i);
            cond = LLVMBuildFCmp(builder, op,
                                 LLVMBuildExtractElement(builder, a, index, ""),
                                 LLVMBuildExtractElement(builder, b, index, ""),
                                 "");
            cond = LLVMBuildSelect(builder, cond,
                                   LLVMConstExtractElement(ones, index),
                                   LLVMConstExtractElement(zeros, index),
                                   "");
            res = LLVMBuildInsertElement(builder, res, cond, index, "");
         }
      }
#endif
   }
   else {
      LLVMIntPredicate op;
      switch(func) {
      case PIPE_FUNC_EQUAL:
         op = LLVMIntEQ;
         break;
      case PIPE_FUNC_NOTEQUAL:
         op = LLVMIntNE;
         break;
      case PIPE_FUNC_LESS:
         op = type.sign ? LLVMIntSLT : LLVMIntULT;
         break;
      case PIPE_FUNC_LEQUAL:
         op = type.sign ? LLVMIntSLE : LLVMIntULE;
         break;
      case PIPE_FUNC_GREATER:
         op = type.sign ? LLVMIntSGT : LLVMIntUGT;
         break;
      case PIPE_FUNC_GEQUAL:
         op = type.sign ? LLVMIntSGE : LLVMIntUGE;
         break;
      default:
         assert(0);
         return lp_build_undef(gallivm, type);
      }

#if HAVE_LLVM >= 0x0207
      cond = LLVMBuildICmp(builder, op, a, b, "");
      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
#else
      if (type.length == 1) {
         cond = LLVMBuildICmp(builder, op, a, b, "");
         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
      }
      else {
         unsigned i;

         res = LLVMGetUndef(int_vec_type);

         if (gallivm_debug & GALLIVM_DEBUG_PERF) {
            debug_printf("%s: using slow element-wise int"
                         " vector comparison\n", __FUNCTION__);
         }

         for(i = 0; i < type.length; ++i) {
            LLVMValueRef index = lp_build_const_int32(gallivm, i);
            cond = LLVMBuildICmp(builder, op,
                                 LLVMBuildExtractElement(builder, a, index, ""),
                                 LLVMBuildExtractElement(builder, b, index, ""),
                                 "");
            cond = LLVMBuildSelect(builder, cond,
                                   LLVMConstExtractElement(ones, index),
                                   LLVMConstExtractElement(zeros, index),
                                   "");
            res = LLVMBuildInsertElement(builder, res, cond, index, "");
         }
      }
#endif
   }

   return res;
}
コード例 #21
0
ファイル: lp_bld_format_soa.c プロジェクト: CSRedRat/mesa-1
/**
 * Fetch a texels from a texture, returning them in SoA layout.
 *
 * \param type  the desired return type for 'rgba'.  The vector length
 *              is the number of texels to fetch
 *
 * \param base_ptr  points to the base of the texture mip tree.
 * \param offset    offset to start of the texture image block.  For non-
 *                  compressed formats, this simply is an offset to the texel.
 *                  For compressed formats, it is an offset to the start of the
 *                  compressed data block.
 *
 * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
 *              these will always be (0,0).  For compressed formats, i will
 *              be in [0, block_width-1] and j will be in [0, block_height-1].
 */
void
lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
                        const struct util_format_description *format_desc,
                        struct lp_type type,
                        LLVMValueRef base_ptr,
                        LLVMValueRef offset,
                        LLVMValueRef i,
                        LLVMValueRef j,
                        LLVMValueRef rgba_out[4])
{
   LLVMBuilderRef builder = gallivm->builder;

   if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
       (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ||
        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
       format_desc->block.width == 1 &&
       format_desc->block.height == 1 &&
       format_desc->block.bits <= type.width &&
       (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
        format_desc->channel[0].size == 32))
   {
      /*
       * The packed pixel fits into an element of the destination format. Put
       * the packed pixels into a vector and extract each component for all
       * vector elements in parallel.
       */

      LLVMValueRef packed;

      /*
       * gather the texels from the texture
       * Ex: packed = {XYZW, XYZW, XYZW, XYZW}
       */
      assert(format_desc->block.bits <= type.width);
      packed = lp_build_gather(gallivm,
                               type.length,
                               format_desc->block.bits,
                               type.width,
                               base_ptr, offset, FALSE);

      /*
       * convert texels to float rgba
       */
      lp_build_unpack_rgba_soa(gallivm,
                               format_desc,
                               type,
                               packed, rgba_out);
      return;
   }

   if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
       format_desc->format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
      /*
       * similar conceptually to above but requiring special
       * AoS packed -> SoA float conversion code.
       */
      LLVMValueRef packed;

      assert(type.floating);
      assert(type.width == 32);

      packed = lp_build_gather(gallivm, type.length,
                               format_desc->block.bits,
                               type.width, base_ptr, offset,
                               FALSE);
      if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
         lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
      }
      else {
         lp_build_rgb9e5_to_float(gallivm, packed, rgba_out);
      }
      return;
   }

   if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
       format_desc->block.bits == 64) {
      /*
       * special case the format is 64 bits but we only require
       * 32bit (or 8bit) from each block.
       */
      LLVMValueRef packed;

      if (format_desc->format == PIPE_FORMAT_X32_S8X24_UINT) {
         /*
          * for stencil simply fix up offsets - could in fact change
          * base_ptr instead even outside the shader.
          */
         unsigned mask = (1 << 8) - 1;
         LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4);
         offset = LLVMBuildAdd(builder, offset, s_offset, "");
         packed = lp_build_gather(gallivm, type.length,
                                  32, type.width, base_ptr, offset, FALSE);
         packed = LLVMBuildAnd(builder, packed,
                               lp_build_const_int_vec(gallivm, type, mask), "");
      }
      else {
         assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
         packed = lp_build_gather(gallivm, type.length,
                                  32, type.width, base_ptr, offset, TRUE);
         packed = LLVMBuildBitCast(builder, packed,
                                   lp_build_vec_type(gallivm, type), "");
      }
      /* for consistency with lp_build_unpack_rgba_soa() return sss1 or zzz1 */
      rgba_out[0] = rgba_out[1] = rgba_out[2] = packed;
      rgba_out[3] = lp_build_const_vec(gallivm, type, 1.0f);
      return;
   }

   /*
    * Try calling lp_build_fetch_rgba_aos for all pixels.
    */

   if (util_format_fits_8unorm(format_desc) &&
       type.floating && type.width == 32 &&
       (type.length == 1 || (type.length % 4 == 0))) {
      struct lp_type tmp_type;
      LLVMValueRef tmp;

      memset(&tmp_type, 0, sizeof tmp_type);
      tmp_type.width = 8;
      tmp_type.length = type.length * 4;
      tmp_type.norm = TRUE;

      tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
                                    base_ptr, offset, i, j);

      lp_build_rgba8_to_fi32_soa(gallivm,
                                type,
                                tmp,
                                rgba_out);

      return;
   }

   /*
    * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
    *
    * This is not the most efficient way of fetching pixels, as we
    * miss some opportunities to do vectorization, but this is
    * convenient for formats or scenarios for which there was no
    * opportunity or incentive to optimize.
    */

   {
      unsigned k, chan;
      struct lp_type tmp_type;

      if (gallivm_debug & GALLIVM_DEBUG_PERF) {
         debug_printf("%s: scalar unpacking of %s\n",
                      __FUNCTION__, format_desc->short_name);
      }

      tmp_type = type;
      tmp_type.length = 4;

      for (chan = 0; chan < 4; ++chan) {
         rgba_out[chan] = lp_build_undef(gallivm, type);
      }

      /* loop over number of pixels */
      for(k = 0; k < type.length; ++k) {
         LLVMValueRef index = lp_build_const_int32(gallivm, k);
         LLVMValueRef offset_elem;
         LLVMValueRef i_elem, j_elem;
         LLVMValueRef tmp;

         offset_elem = LLVMBuildExtractElement(builder, offset,
                                               index, "");

         i_elem = LLVMBuildExtractElement(builder, i, index, "");
         j_elem = LLVMBuildExtractElement(builder, j, index, "");

         /* Get a single float[4]={R,G,B,A} pixel */
         tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
                                       base_ptr, offset_elem,
                                       i_elem, j_elem);

         /*
          * Insert the AoS tmp value channels into the SoA result vectors at
          * position = 'index'.
          */
         for (chan = 0; chan < 4; ++chan) {
            LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan),
            tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
            rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
                                                    tmp_chan, index, "");
         }
      }
   }
}
コード例 #22
0
ファイル: lp_bld_depth.c プロジェクト: blckshrk/Mesa
/**
 * Apply the stencil operator (add/sub/keep/etc) to the given vector
 * of stencil values.
 * \return  new stencil values vector
 */
static LLVMValueRef
lp_build_stencil_op_single(struct lp_build_context *bld,
                           const struct pipe_stencil_state *stencil,
                           enum stencil_op op,
                           LLVMValueRef stencilRef,
                           LLVMValueRef stencilVals)

{
   LLVMBuilderRef builder = bld->gallivm->builder;
   struct lp_type type = bld->type;
   LLVMValueRef res;
   LLVMValueRef max = lp_build_const_int_vec(bld->gallivm, type, 0xff);
   unsigned stencil_op;

   assert(type.sign);

   switch (op) {
   case S_FAIL_OP:
      stencil_op = stencil->fail_op;
      break;
   case Z_FAIL_OP:
      stencil_op = stencil->zfail_op;
      break;
   case Z_PASS_OP:
      stencil_op = stencil->zpass_op;
      break;
   default:
      assert(0 && "Invalid stencil_op mode");
      stencil_op = PIPE_STENCIL_OP_KEEP;
   }

   switch (stencil_op) {
   case PIPE_STENCIL_OP_KEEP:
      res = stencilVals;
      /* we can return early for this case */
      return res;
   case PIPE_STENCIL_OP_ZERO:
      res = bld->zero;
      break;
   case PIPE_STENCIL_OP_REPLACE:
      res = stencilRef;
      break;
   case PIPE_STENCIL_OP_INCR:
      res = lp_build_add(bld, stencilVals, bld->one);
      res = lp_build_min(bld, res, max);
      break;
   case PIPE_STENCIL_OP_DECR:
      res = lp_build_sub(bld, stencilVals, bld->one);
      res = lp_build_max(bld, res, bld->zero);
      break;
   case PIPE_STENCIL_OP_INCR_WRAP:
      res = lp_build_add(bld, stencilVals, bld->one);
      res = LLVMBuildAnd(builder, res, max, "");
      break;
   case PIPE_STENCIL_OP_DECR_WRAP:
      res = lp_build_sub(bld, stencilVals, bld->one);
      res = LLVMBuildAnd(builder, res, max, "");
      break;
   case PIPE_STENCIL_OP_INVERT:
      res = LLVMBuildNot(builder, stencilVals, "");
      res = LLVMBuildAnd(builder, res, max, "");
      break;
   default:
      assert(0 && "bad stencil op mode");
      res = bld->undef;
   }

   return res;
}
コード例 #23
0
ファイル: lp_bld_swizzle.c プロジェクト: BNieuwenhuizen/mesa
LLVMValueRef
lp_build_swizzle_aos(struct lp_build_context *bld,
                     LLVMValueRef a,
                     const unsigned char swizzles[4])
{
   LLVMBuilderRef builder = bld->gallivm->builder;
   const struct lp_type type = bld->type;
   const unsigned n = type.length;
   unsigned i, j;

   if (swizzles[0] == PIPE_SWIZZLE_X &&
       swizzles[1] == PIPE_SWIZZLE_Y &&
       swizzles[2] == PIPE_SWIZZLE_Z &&
       swizzles[3] == PIPE_SWIZZLE_W) {
      return a;
   }

   if (swizzles[0] == swizzles[1] &&
       swizzles[1] == swizzles[2] &&
       swizzles[2] == swizzles[3]) {
      switch (swizzles[0]) {
      case PIPE_SWIZZLE_X:
      case PIPE_SWIZZLE_Y:
      case PIPE_SWIZZLE_Z:
      case PIPE_SWIZZLE_W:
         return lp_build_swizzle_scalar_aos(bld, a, swizzles[0], 4);
      case PIPE_SWIZZLE_0:
         return bld->zero;
      case PIPE_SWIZZLE_1:
         return bld->one;
      case LP_BLD_SWIZZLE_DONTCARE:
         return bld->undef;
      default:
         assert(0);
         return bld->undef;
      }
   }

   if (LLVMIsConstant(a) ||
       type.width >= 16) {
      /*
       * Shuffle.
       */
      LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type));
      LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
      LLVMValueRef aux[LP_MAX_VECTOR_LENGTH];

      memset(aux, 0, sizeof aux);

      for(j = 0; j < n; j += 4) {
         for(i = 0; i < 4; ++i) {
            unsigned shuffle;
            switch (swizzles[i]) {
            default:
               assert(0);
               /* fall through */
            case PIPE_SWIZZLE_X:
            case PIPE_SWIZZLE_Y:
            case PIPE_SWIZZLE_Z:
            case PIPE_SWIZZLE_W:
               shuffle = j + swizzles[i];
               shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
               break;
            case PIPE_SWIZZLE_0:
               shuffle = type.length + 0;
               shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
               if (!aux[0]) {
                  aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0);
               }
               break;
            case PIPE_SWIZZLE_1:
               shuffle = type.length + 1;
               shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
               if (!aux[1]) {
                  aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0);
               }
               break;
            case LP_BLD_SWIZZLE_DONTCARE:
               shuffles[j + i] = LLVMGetUndef(i32t);
               break;
            }
         }
      }

      for (i = 0; i < n; ++i) {
         if (!aux[i]) {
            aux[i] = undef;
         }
      }

      return LLVMBuildShuffleVector(builder, a,
                                    LLVMConstVector(aux, n),
                                    LLVMConstVector(shuffles, n), "");
   } else {
      /*
       * Bit mask and shifts.
       *
       * For example, this will convert BGRA to RGBA by doing
       *
       * Little endian:
       *   rgba = (bgra & 0x00ff0000) >> 16
       *        | (bgra & 0xff00ff00)
       *        | (bgra & 0x000000ff) << 16
       *
       * Big endian:A
       *   rgba = (bgra & 0x0000ff00) << 16
       *        | (bgra & 0x00ff00ff)
       *        | (bgra & 0xff000000) >> 16
       *
       * This is necessary not only for faster cause, but because X86 backend
       * will refuse shuffles of <4 x i8> vectors
       */
      LLVMValueRef res;
      struct lp_type type4;
      unsigned cond = 0;
      unsigned chan;
      int shift;

      /*
       * Start with a mixture of 1 and 0.
       */
      for (chan = 0; chan < 4; ++chan) {
         if (swizzles[chan] == PIPE_SWIZZLE_1) {
            cond |= 1 << chan;
         }
      }
      res = lp_build_select_aos(bld, cond, bld->one, bld->zero, 4);

      /*
       * Build a type where each element is an integer that cover the four
       * channels.
       */
      type4 = type;
      type4.floating = FALSE;
      type4.width *= 4;
      type4.length /= 4;

      a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
      res = LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type4), "");

      /*
       * Mask and shift the channels, trying to group as many channels in the
       * same shift as possible.  The shift amount is positive for shifts left
       * and negative for shifts right.
       */
      for (shift = -3; shift <= 3; ++shift) {
         uint64_t mask = 0;

         assert(type4.width <= sizeof(mask)*8);

         /*
          * Vector element numbers follow the XYZW order, so 0 is always X, etc.
          * After widening 4 times we have:
          *
          *                                3210
          * Little-endian register layout: WZYX
          *
          *                                0123
          * Big-endian register layout:    XYZW
          *
          * For little-endian, higher-numbered channels are obtained by a shift right
          * (negative shift amount) and lower-numbered channels by a shift left
          * (positive shift amount).  The opposite is true for big-endian.
          */
         for (chan = 0; chan < 4; ++chan) {
            if (swizzles[chan] < 4) {
               /* We need to move channel swizzles[chan] into channel chan */
#ifdef PIPE_ARCH_LITTLE_ENDIAN
               if (swizzles[chan] - chan == -shift) {
                  mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
               }
#else
               if (swizzles[chan] - chan == shift) {
                  mask |= ((1ULL << type.width) - 1) << (type4.width - type.width) >> (swizzles[chan] * type.width);
               }
#endif
            }
         }

         if (mask) {
            LLVMValueRef masked;
            LLVMValueRef shifted;
            if (0)
               debug_printf("shift = %i, mask = %" PRIx64 "\n", shift, mask);

            masked = LLVMBuildAnd(builder, a,
                                  lp_build_const_int_vec(bld->gallivm, type4, mask), "");
            if (shift > 0) {
               shifted = LLVMBuildShl(builder, masked,
                                      lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
            } else if (shift < 0) {
               shifted = LLVMBuildLShr(builder, masked,
                                       lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
            } else {
               shifted = masked;
            }

            res = LLVMBuildOr(builder, res, shifted, "");
         }
      }

      return LLVMBuildBitCast(builder, res,
                              lp_build_vec_type(bld->gallivm, type), "");
   }
コード例 #24
0
ファイル: lp_bld_swizzle.c プロジェクト: BNieuwenhuizen/mesa
/**
 * Swizzle one channel into other channels.
 */
LLVMValueRef
lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
                            LLVMValueRef a,
                            unsigned channel,
                            unsigned num_channels)
{
   LLVMBuilderRef builder = bld->gallivm->builder;
   const struct lp_type type = bld->type;
   const unsigned n = type.length;
   unsigned i, j;

   if(a == bld->undef || a == bld->zero || a == bld->one || num_channels == 1)
      return a;

   assert(num_channels == 2 || num_channels == 4);

   /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
    * using shuffles here actually causes worst results. More investigation is
    * needed. */
   if (LLVMIsConstant(a) ||
       type.width >= 16) {
      /*
       * Shuffle.
       */
      LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];

      for(j = 0; j < n; j += num_channels)
         for(i = 0; i < num_channels; ++i)
            shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);

      return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
   }
   else if (num_channels == 2) {
      /*
       * Bit mask and shifts
       *
       *   XY XY .... XY  <= input
       *   0Y 0Y .... 0Y
       *   YY YY .... YY
       *   YY YY .... YY  <= output
       */
      struct lp_type type2;
      LLVMValueRef tmp = NULL;
      int shift;

      a = LLVMBuildAnd(builder, a,
                       lp_build_const_mask_aos(bld->gallivm,
                                               type, 1 << channel, num_channels), "");

      type2 = type;
      type2.floating = FALSE;
      type2.width *= 2;
      type2.length /= 2;

      a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), "");

      /*
       * Vector element 0 is always channel X.
       *
       *                        76 54 32 10 (array numbering)
       * Little endian reg in:  YX YX YX YX
       * Little endian reg out: YY YY YY YY if shift right (shift == -1)
       *                        XX XX XX XX if shift left (shift == 1)
       *
       *                        01 23 45 67 (array numbering)
       * Big endian reg in:     XY XY XY XY
       * Big endian reg out:    YY YY YY YY if shift left (shift == 1)
       *                        XX XX XX XX if shift right (shift == -1)
       *
       */
#ifdef PIPE_ARCH_LITTLE_ENDIAN
      shift = channel == 0 ? 1 : -1;
#else
      shift = channel == 0 ? -1 : 1;
#endif

      if (shift > 0) {
         tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type2, shift * type.width), "");
      } else if (shift < 0) {
         tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type2, -shift * type.width), "");
      }

      assert(tmp);
      if (tmp) {
         a = LLVMBuildOr(builder, a, tmp, "");
      }

      return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
   }
   else {
      /*
       * Bit mask and recursive shifts
       *
       * Little-endian registers:
       *
       *   7654 3210
       *   WZYX WZYX .... WZYX  <= input
       *   00Y0 00Y0 .... 00Y0  <= mask
       *   00YY 00YY .... 00YY  <= shift right 1 (shift amount -1)
       *   YYYY YYYY .... YYYY  <= shift left 2 (shift amount 2)
       *
       * Big-endian registers:
       *
       *   0123 4567
       *   XYZW XYZW .... XYZW  <= input
       *   0Y00 0Y00 .... 0Y00  <= mask
       *   YY00 YY00 .... YY00  <= shift left 1 (shift amount 1)
       *   YYYY YYYY .... YYYY  <= shift right 2 (shift amount -2)
       *
       * shifts[] gives little-endian shift amounts; we need to negate for big-endian.
       */
      struct lp_type type4;
      const int shifts[4][2] = {
         { 1,  2},
         {-1,  2},
         { 1, -2},
         {-1, -2}
      };
      unsigned i;

      a = LLVMBuildAnd(builder, a,
                       lp_build_const_mask_aos(bld->gallivm,
                                               type, 1 << channel, 4), "");

      /*
       * Build a type where each element is an integer that cover the four
       * channels.
       */

      type4 = type;
      type4.floating = FALSE;
      type4.width *= 4;
      type4.length /= 4;

      a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");

      for(i = 0; i < 2; ++i) {
         LLVMValueRef tmp = NULL;
         int shift = shifts[channel][i];

         /* See endianness diagram above */
#ifdef PIPE_ARCH_BIG_ENDIAN
         shift = -shift;
#endif

         if(shift > 0)
            tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
         if(shift < 0)
            tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");

         assert(tmp);
         if(tmp)
            a = LLVMBuildOr(builder, a, tmp, "");
      }

      return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
   }
}
コード例 #25
0
/*
 * Do a cached lookup.
 *
 * Returns (vectors of) 4x8 rgba aos value
 */
LLVMValueRef
lp_build_fetch_cached_texels(struct gallivm_state *gallivm,
                             const struct util_format_description *format_desc,
                             unsigned n,
                             LLVMValueRef base_ptr,
                             LLVMValueRef offset,
                             LLVMValueRef i,
                             LLVMValueRef j,
                             LLVMValueRef cache)

{
   LLVMBuilderRef builder = gallivm->builder;
   unsigned count, low_bit, log2size;
   LLVMValueRef color, offset_stored, addr, ptr_addrtrunc, tmp;
   LLVMValueRef ij_index, hash_index, hash_mask, block_index;
   LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
   LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context);
   struct lp_type type;
   struct lp_build_context bld32;
   memset(&type, 0, sizeof type);
   type.width = 32;
   type.length = n;

   assert(format_desc->block.width == 4);
   assert(format_desc->block.height == 4);

   lp_build_context_init(&bld32, gallivm, type);

   /*
    * compute hash - we use direct mapped cache, the hash function could
    *                be better but it needs to be simple
    * per-element:
    *    compare offset with offset stored at tag (hash)
    *    if not equal decode/store block, update tag
    *    extract color from cache
    *    assemble result vector
    */

   /* TODO: not ideal with 32bit pointers... */

   low_bit = util_logbase2(format_desc->block.bits / 8);
   log2size = util_logbase2(LP_BUILD_FORMAT_CACHE_SIZE);
   addr = LLVMBuildPtrToInt(builder, base_ptr, i64t, "");
   ptr_addrtrunc = LLVMBuildPtrToInt(builder, base_ptr, i32t, "");
   ptr_addrtrunc = lp_build_broadcast_scalar(&bld32, ptr_addrtrunc);
   /* For the hash function, first mask off the unused lowest bits. Then just
      do some xor with address bits - only use lower 32bits */
   ptr_addrtrunc = LLVMBuildAdd(builder, offset, ptr_addrtrunc, "");
   ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
                                 lp_build_const_int_vec(gallivm, type, low_bit), "");
   /* This only really makes sense for size 64,128,256 */
   hash_index = ptr_addrtrunc;
   ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
                                 lp_build_const_int_vec(gallivm, type, 2*log2size), "");
   hash_index = LLVMBuildXor(builder, ptr_addrtrunc, hash_index, "");
   tmp = LLVMBuildLShr(builder, hash_index,
                       lp_build_const_int_vec(gallivm, type, log2size), "");
   hash_index = LLVMBuildXor(builder, hash_index, tmp, "");

   hash_mask = lp_build_const_int_vec(gallivm, type, LP_BUILD_FORMAT_CACHE_SIZE - 1);
   hash_index = LLVMBuildAnd(builder, hash_index, hash_mask, "");
   ij_index = LLVMBuildShl(builder, i, lp_build_const_int_vec(gallivm, type, 2), "");
   ij_index = LLVMBuildAdd(builder, ij_index, j, "");
   block_index = LLVMBuildShl(builder, hash_index,
                              lp_build_const_int_vec(gallivm, type, 4), "");
   block_index = LLVMBuildAdd(builder, ij_index, block_index, "");

   if (n > 1) {
      color = LLVMGetUndef(LLVMVectorType(i32t, n));
      for (count = 0; count < n; count++) {
         LLVMValueRef index, cond, colorx;
         LLVMValueRef block_indexx, hash_indexx, addrx, offsetx, ptr_addrx;
         struct lp_build_if_state if_ctx;

         index = lp_build_const_int32(gallivm, count);
         offsetx = LLVMBuildExtractElement(builder, offset, index, "");
         addrx = LLVMBuildZExt(builder, offsetx, i64t, "");
         addrx = LLVMBuildAdd(builder, addrx, addr, "");
         block_indexx = LLVMBuildExtractElement(builder, block_index, index, "");
         hash_indexx = LLVMBuildLShr(builder, block_indexx,
                                     lp_build_const_int32(gallivm, 4), "");
         offset_stored = lookup_tag_data(gallivm, cache, hash_indexx);
         cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addrx, "");

         lp_build_if(&if_ctx, gallivm, cond);
         {
            ptr_addrx = LLVMBuildIntToPtr(builder, addrx,
                                          LLVMPointerType(i8t, 0), "");
            update_cached_block(gallivm, format_desc, ptr_addrx, hash_indexx, cache);
#if LP_BUILD_FORMAT_CACHE_DEBUG
            update_cache_access(gallivm, cache, 1,
                                LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
#endif
         }
         lp_build_endif(&if_ctx);

         colorx = lookup_cached_pixel(gallivm, cache, block_indexx);

         color = LLVMBuildInsertElement(builder, color, colorx,
                                        lp_build_const_int32(gallivm, count), "");
      }
   }
   else {
      LLVMValueRef cond;
      struct lp_build_if_state if_ctx;

      tmp = LLVMBuildZExt(builder, offset, i64t, "");
      addr = LLVMBuildAdd(builder, tmp, addr, "");
      offset_stored = lookup_tag_data(gallivm, cache, hash_index);
      cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addr, "");

      lp_build_if(&if_ctx, gallivm, cond);
      {
         tmp = LLVMBuildIntToPtr(builder, addr, LLVMPointerType(i8t, 0), "");
         update_cached_block(gallivm, format_desc, tmp, hash_index, cache);
#if LP_BUILD_FORMAT_CACHE_DEBUG
         update_cache_access(gallivm, cache, 1,
                             LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
#endif
      }
      lp_build_endif(&if_ctx);

      color = lookup_cached_pixel(gallivm, cache, block_index);
   }
#if LP_BUILD_FORMAT_CACHE_DEBUG
   update_cache_access(gallivm, cache, n,
                       LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL);
#endif
   return LLVMBuildBitCast(builder, color, LLVMVectorType(i8t, n * 4), "");
}
コード例 #26
0
/**
 * Generate code for performing depth and/or stencil tests.
 * We operate on a vector of values (typically a 2x2 quad).
 *
 * \param depth  the depth test state
 * \param stencil  the front/back stencil state
 * \param type  the data type of the fragment depth/stencil values
 * \param format_desc  description of the depth/stencil surface
 * \param mask  the alive/dead pixel mask for the quad (vector)
 * \param stencil_refs  the front/back stencil ref values (scalar)
 * \param z_src  the incoming depth/stencil values (a 2x2 quad)
 * \param zs_dst_ptr  pointer to depth/stencil values in framebuffer
 * \param facing  contains float value indicating front/back facing polygon
 */
void
lp_build_depth_stencil_test(LLVMBuilderRef builder,
                            const struct pipe_depth_state *depth,
                            const struct pipe_stencil_state stencil[2],
                            struct lp_type type,
                            const struct util_format_description *format_desc,
                            struct lp_build_mask_context *mask,
                            LLVMValueRef stencil_refs[2],
                            LLVMValueRef z_src,
                            LLVMValueRef zs_dst_ptr,
                            LLVMValueRef face,
                            LLVMValueRef counter)
{
   struct lp_build_context bld;
   struct lp_build_context sbld;
   struct lp_type s_type;
   LLVMValueRef zs_dst, z_dst = NULL;
   LLVMValueRef stencil_vals = NULL;
   LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
   LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
   LLVMValueRef orig_mask = mask->value;

   /* Sanity checking */
   {
      const unsigned z_swizzle = format_desc->swizzle[0];
      const unsigned s_swizzle = format_desc->swizzle[1];

      assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE ||
             s_swizzle != UTIL_FORMAT_SWIZZLE_NONE);

      assert(depth->enabled || stencil[0].enabled);

      assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
      assert(format_desc->block.width == 1);
      assert(format_desc->block.height == 1);

      if (stencil[0].enabled) {
         assert(format_desc->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED ||
                format_desc->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM);
      }

      assert(z_swizzle < 4);
      assert(format_desc->block.bits == type.width);
      if (type.floating) {
         assert(z_swizzle == 0);
         assert(format_desc->channel[z_swizzle].type ==
                UTIL_FORMAT_TYPE_FLOAT);
         assert(format_desc->channel[z_swizzle].size ==
                format_desc->block.bits);
      }
      else {
         assert(format_desc->channel[z_swizzle].type ==
                UTIL_FORMAT_TYPE_UNSIGNED);
         assert(format_desc->channel[z_swizzle].normalized);
         assert(!type.fixed);
         assert(!type.sign);
         assert(type.norm);
      }
   }


   /* Setup build context for Z vals */
   lp_build_context_init(&bld, builder, type);

   /* Setup build context for stencil vals */
   s_type = lp_type_int_vec(type.width);
   lp_build_context_init(&sbld, builder, s_type);

   /* Load current z/stencil value from z/stencil buffer */
   zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, "");

   lp_build_name(zs_dst, "zsbufval");


   /* Compute and apply the Z/stencil bitmasks and shifts.
    */
   {
      unsigned z_shift, z_mask;
      unsigned s_shift, s_mask;

      if (get_z_shift_and_mask(format_desc, &z_shift, &z_mask)) {
         if (z_shift) {
            LLVMValueRef shift = lp_build_const_int_vec(type, z_shift);
            z_src = LLVMBuildLShr(builder, z_src, shift, "");
         }

         if (z_mask != 0xffffffff) {
            LLVMValueRef mask = lp_build_const_int_vec(type, z_mask);
            z_src = LLVMBuildAnd(builder, z_src, mask, "");
            z_dst = LLVMBuildAnd(builder, zs_dst, mask, "");
            z_bitmask = mask;  /* used below */
         }
         else {
            z_dst = zs_dst;
         }

         lp_build_name(z_dst, "zsbuf.z");
      }

      if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) {
         if (s_shift) {
            LLVMValueRef shift = lp_build_const_int_vec(type, s_shift);
            stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, "");
            stencil_shift = shift;  /* used below */
         }
         else {
            stencil_vals = zs_dst;
         }

         if (s_mask != 0xffffffff) {
            LLVMValueRef mask = lp_build_const_int_vec(type, s_mask);
            stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, "");
         }

         lp_build_name(stencil_vals, "stencil");
      }
   }


   if (stencil[0].enabled) {
      /* convert scalar stencil refs into vectors */
      stencil_refs[0] = lp_build_broadcast_scalar(&bld, stencil_refs[0]);
      stencil_refs[1] = lp_build_broadcast_scalar(&bld, stencil_refs[1]);

      s_pass_mask = lp_build_stencil_test(&sbld, stencil,
                                          stencil_refs, stencil_vals, face);

      /* apply stencil-fail operator */
      {
         LLVMValueRef s_fail_mask = lp_build_andc(&bld, orig_mask, s_pass_mask);
         stencil_vals = lp_build_stencil_op(&sbld, stencil, S_FAIL_OP,
                                            stencil_refs, stencil_vals,
                                            s_fail_mask, face);
      }
   }

   if (depth->enabled) {
      /* compare src Z to dst Z, returning 'pass' mask */
      z_pass = lp_build_cmp(&bld, depth->func, z_src, z_dst);

      if (!stencil[0].enabled) {
         /* We can potentially skip all remaining operations here, but only
          * if stencil is disabled because we still need to update the stencil
          * buffer values.  Don't need to update Z buffer values.
          */
         lp_build_mask_update(mask, z_pass);
      }

      if (depth->writemask) {
         LLVMValueRef zselectmask = mask->value;

         /* mask off bits that failed Z test */
         zselectmask = LLVMBuildAnd(builder, zselectmask, z_pass, "");

         /* mask off bits that failed stencil test */
         if (s_pass_mask) {
            zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, "");
         }

         /* if combined Z/stencil format, mask off the stencil bits */
         if (z_bitmask) {
            zselectmask = LLVMBuildAnd(builder, zselectmask, z_bitmask, "");
         }

         /* Mix the old and new Z buffer values.
          * z_dst[i] = (zselectmask[i] & z_src[i]) | (~zselectmask[i] & z_dst[i])
          */
         z_dst = lp_build_select_bitwise(&bld, zselectmask, z_src, z_dst);
      }

      if (stencil[0].enabled) {
         /* update stencil buffer values according to z pass/fail result */
         LLVMValueRef z_fail_mask, z_pass_mask;

         /* apply Z-fail operator */
         z_fail_mask = lp_build_andc(&bld, orig_mask, z_pass);
         stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_FAIL_OP,
                                            stencil_refs, stencil_vals,
                                            z_fail_mask, face);

         /* apply Z-pass operator */
         z_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, z_pass, "");
         stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP,
                                            stencil_refs, stencil_vals,
                                            z_pass_mask, face);
      }
   }
   else {
      /* No depth test: apply Z-pass operator to stencil buffer values which
       * passed the stencil test.
       */
      s_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, s_pass_mask, "");
      stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP,
                                         stencil_refs, stencil_vals,
                                         s_pass_mask, face);
   }

   /* The Z bits are already in the right place but we may need to shift the
    * stencil bits before ORing Z with Stencil to make the final pixel value.
    */
   if (stencil_vals && stencil_shift)
      stencil_vals = LLVMBuildShl(bld.builder, stencil_vals,
                                  stencil_shift, "");

   /* Finally, merge/store the z/stencil values */
   if ((depth->enabled && depth->writemask) ||
       (stencil[0].enabled && stencil[0].writemask)) {

      if (z_dst && stencil_vals)
         zs_dst = LLVMBuildOr(bld.builder, z_dst, stencil_vals, "");
      else if (z_dst)
         zs_dst = z_dst;
      else
         zs_dst = stencil_vals;

      LLVMBuildStore(builder, zs_dst, zs_dst_ptr);
   }

   if (s_pass_mask)
      lp_build_mask_update(mask, s_pass_mask);

   if (depth->enabled && stencil[0].enabled)
      lp_build_mask_update(mask, z_pass);

   if (counter)
      lp_build_occlusion_count(builder, type, mask->value, counter);
}
コード例 #27
0
ファイル: lp_bld_format_soa.c プロジェクト: CSRedRat/mesa-1
/**
 * Unpack several pixels in SoA.
 *
 * It takes a vector of packed pixels:
 *
 *   packed = {P0, P1, P2, P3, ..., Pn}
 *
 * And will produce four vectors:
 *
 *   red    = {R0, R1, R2, R3, ..., Rn}
 *   green  = {G0, G1, G2, G3, ..., Gn}
 *   blue   = {B0, B1, B2, B3, ..., Bn}
 *   alpha  = {A0, A1, A2, A3, ..., An}
 *
 * It requires that a packed pixel fits into an element of the output
 * channels. The common case is when converting pixel with a depth of 32 bit or
 * less into floats.
 *
 * \param format_desc  the format of the 'packed' incoming pixel vector
 * \param type  the desired type for rgba_out (type.length = n, above)
 * \param packed  the incoming vector of packed pixels
 * \param rgba_out  returns the SoA R,G,B,A vectors
 */
void
lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
                         const struct util_format_description *format_desc,
                         struct lp_type type,
                         LLVMValueRef packed,
                         LLVMValueRef rgba_out[4])
{
   LLVMBuilderRef builder = gallivm->builder;
   struct lp_build_context bld;
   LLVMValueRef inputs[4];
   unsigned chan;

   assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
   assert(format_desc->block.width == 1);
   assert(format_desc->block.height == 1);
   assert(format_desc->block.bits <= type.width);
   /* FIXME: Support more output types */
   assert(type.width == 32);

   lp_build_context_init(&bld, gallivm, type);

   /* Decode the input vector components */
   for (chan = 0; chan < format_desc->nr_channels; ++chan) {
      const unsigned width = format_desc->channel[chan].size;
      const unsigned start = format_desc->channel[chan].shift;
      const unsigned stop = start + width;
      LLVMValueRef input;

      input = packed;

      switch(format_desc->channel[chan].type) {
      case UTIL_FORMAT_TYPE_VOID:
         input = lp_build_undef(gallivm, type);
         break;

      case UTIL_FORMAT_TYPE_UNSIGNED:
         /*
          * Align the LSB
          */

         if (start) {
            input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), "");
         }

         /*
          * Zero the MSBs
          */

         if (stop < format_desc->block.bits) {
            unsigned mask = ((unsigned long long)1 << width) - 1;
            input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), "");
         }

         /*
          * Type conversion
          */

         if (type.floating) {
            if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
               assert(width == 8);
               if (format_desc->swizzle[3] == chan) {
                  input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
               }
               else {
                  struct lp_type conv_type = lp_uint_type(type);
                  input = lp_build_srgb_to_linear(gallivm, conv_type, input);
               }
            }
            else {
               if(format_desc->channel[chan].normalized)
                  input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
               else
                  input = LLVMBuildSIToFP(builder, input,
                                          lp_build_vec_type(gallivm, type), "");
            }
         }
         else if (format_desc->channel[chan].pure_integer) {
            /* Nothing to do */
         } else {
             /* FIXME */
             assert(0);
         }

         break;

      case UTIL_FORMAT_TYPE_SIGNED:
         /*
          * Align the sign bit first.
          */

         if (stop < type.width) {
            unsigned bits = type.width - stop;
            LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
            input = LLVMBuildShl(builder, input, bits_val, "");
         }

         /*
          * Align the LSB (with an arithmetic shift to preserve the sign)
          */

         if (format_desc->channel[chan].size < type.width) {
            unsigned bits = type.width - format_desc->channel[chan].size;
            LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
            input = LLVMBuildAShr(builder, input, bits_val, "");
         }

         /*
          * Type conversion
          */

         if (type.floating) {
            input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
            if (format_desc->channel[chan].normalized) {
               double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
               LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
               input = LLVMBuildFMul(builder, input, scale_val, "");
               /* the formula above will produce value below -1.0 for most negative
                * value but everything seems happy with that hence disable for now */
               if (0)
                  input = lp_build_max(&bld, input,
                                       lp_build_const_vec(gallivm, type, -1.0f));
            }
         }
         else if (format_desc->channel[chan].pure_integer) {
            /* Nothing to do */
         } else {
             /* FIXME */
             assert(0);
         }

         break;

      case UTIL_FORMAT_TYPE_FLOAT:
         if (type.floating) {
            assert(start == 0);
            assert(stop == 32);
            assert(type.width == 32);
            input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), "");
         }
         else {
            /* FIXME */
            assert(0);
            input = lp_build_undef(gallivm, type);
         }
         break;

      case UTIL_FORMAT_TYPE_FIXED:
         if (type.floating) {
            double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
            LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
            input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
            input = LLVMBuildFMul(builder, input, scale_val, "");
         }
         else {
            /* FIXME */
            assert(0);
            input = lp_build_undef(gallivm, type);
         }
         break;

      default:
         assert(0);
         input = lp_build_undef(gallivm, type);
         break;
      }

      inputs[chan] = input;
   }

   lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
}
コード例 #28
0
ファイル: lp_bld_depth.c プロジェクト: blckshrk/Mesa
/**
 * Perform the occlusion test and increase the counter.
 * Test the depth mask. Add the number of channel which has none zero mask
 * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}.
 * The counter will add 4.
 *
 * \param type holds element type of the mask vector.
 * \param maskvalue is the depth test mask.
 * \param counter is a pointer of the uint32 counter.
 */
void
lp_build_occlusion_count(struct gallivm_state *gallivm,
                         struct lp_type type,
                         LLVMValueRef maskvalue,
                         LLVMValueRef counter)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMContextRef context = gallivm->context;
   LLVMValueRef countmask = lp_build_const_int_vec(gallivm, type, 1);
   LLVMValueRef count, newcount;

   assert(type.length <= 16);
   assert(type.floating);

   if(util_cpu_caps.has_sse && type.length == 4) {
      const char *movmskintr = "llvm.x86.sse.movmsk.ps";
      const char *popcntintr = "llvm.ctpop.i32";
      LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
                                           lp_build_vec_type(gallivm, type), "");
      bits = lp_build_intrinsic_unary(builder, movmskintr,
                                      LLVMInt32TypeInContext(context), bits);
      count = lp_build_intrinsic_unary(builder, popcntintr,
                                       LLVMInt32TypeInContext(context), bits);
   }
   else if(util_cpu_caps.has_avx && type.length == 8) {
      const char *movmskintr = "llvm.x86.avx.movmsk.ps.256";
      const char *popcntintr = "llvm.ctpop.i32";
      LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
                                           lp_build_vec_type(gallivm, type), "");
      bits = lp_build_intrinsic_unary(builder, movmskintr,
                                      LLVMInt32TypeInContext(context), bits);
      count = lp_build_intrinsic_unary(builder, popcntintr,
                                       LLVMInt32TypeInContext(context), bits);
   }
   else {
      unsigned i;
      LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv");
      LLVMTypeRef counttype = LLVMIntTypeInContext(context, type.length * 8);
      LLVMTypeRef i8vntype = LLVMVectorType(LLVMInt8TypeInContext(context), type.length * 4);
      LLVMValueRef shufflev, countd;
      LLVMValueRef shuffles[16];
      const char *popcntintr = NULL;

      countv = LLVMBuildBitCast(builder, countv, i8vntype, "");

       for (i = 0; i < type.length; i++) {
          shuffles[i] = lp_build_const_int32(gallivm, 4*i);
       }

       shufflev = LLVMConstVector(shuffles, type.length);
       countd = LLVMBuildShuffleVector(builder, countv, LLVMGetUndef(i8vntype), shufflev, "");
       countd = LLVMBuildBitCast(builder, countd, counttype, "countd");

       /*
        * XXX FIXME
        * this is bad on cpus without popcount (on x86 supported by intel
        * nehalem, amd barcelona, and up - not tied to sse42).
        * Would be much faster to just sum the 4 elements of the vector with
        * some horizontal add (shuffle/add/shuffle/add after the initial and).
        */
       switch (type.length) {
       case 4:
          popcntintr = "llvm.ctpop.i32";
          break;
       case 8:
          popcntintr = "llvm.ctpop.i64";
          break;
       case 16:
          popcntintr = "llvm.ctpop.i128";
          break;
       default:
          assert(0);
       }
       count = lp_build_intrinsic_unary(builder, popcntintr, counttype, countd);

       if (type.length > 4) {
          count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 32), "");
       }
   }
   newcount = LLVMBuildLoad(builder, counter, "origcount");
   newcount = LLVMBuildAdd(builder, newcount, count, "newcount");
   LLVMBuildStore(builder, newcount, counter);
}
コード例 #29
0
ファイル: lp_bld_format_yuv.c プロジェクト: Gnurou/mesa
static INLINE void
yuv_to_rgb_soa(struct gallivm_state *gallivm,
               unsigned n,
               LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
               LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)
{
   LLVMBuilderRef builder = gallivm->builder;
   struct lp_type type;
   struct lp_build_context bld;

   LLVMValueRef c0;
   LLVMValueRef c8;
   LLVMValueRef c16;
   LLVMValueRef c128;
   LLVMValueRef c255;

   LLVMValueRef cy;
   LLVMValueRef cug;
   LLVMValueRef cub;
   LLVMValueRef cvr;
   LLVMValueRef cvg;

   memset(&type, 0, sizeof type);
   type.sign = TRUE;
   type.width = 32;
   type.length = n;

   lp_build_context_init(&bld, gallivm, type);

   assert(lp_check_value(type, y));
   assert(lp_check_value(type, u));
   assert(lp_check_value(type, v));

   /*
    * Constants
    */

   c0   = lp_build_const_int_vec(gallivm, type,   0);
   c8   = lp_build_const_int_vec(gallivm, type,   8);
   c16  = lp_build_const_int_vec(gallivm, type,  16);
   c128 = lp_build_const_int_vec(gallivm, type, 128);
   c255 = lp_build_const_int_vec(gallivm, type, 255);

   cy  = lp_build_const_int_vec(gallivm, type,  298);
   cug = lp_build_const_int_vec(gallivm, type, -100);
   cub = lp_build_const_int_vec(gallivm, type,  516);
   cvr = lp_build_const_int_vec(gallivm, type,  409);
   cvg = lp_build_const_int_vec(gallivm, type, -208);

   /*
    *  y -= 16;
    *  u -= 128;
    *  v -= 128;
    */

   y = LLVMBuildSub(builder, y, c16, "");
   u = LLVMBuildSub(builder, u, c128, "");
   v = LLVMBuildSub(builder, v, c128, "");

   /*
    * r = 298 * _y            + 409 * _v + 128;
    * g = 298 * _y - 100 * _u - 208 * _v + 128;
    * b = 298 * _y + 516 * _u            + 128;
    */

   y = LLVMBuildMul(builder, y, cy, "");
   y = LLVMBuildAdd(builder, y, c128, "");

   *r = LLVMBuildMul(builder, v, cvr, "");
   *g = LLVMBuildAdd(builder,
                     LLVMBuildMul(builder, u, cug, ""),
                     LLVMBuildMul(builder, v, cvg, ""),
                     "");
   *b = LLVMBuildMul(builder, u, cub, "");

   *r = LLVMBuildAdd(builder, *r, y, "");
   *g = LLVMBuildAdd(builder, *g, y, "");
   *b = LLVMBuildAdd(builder, *b, y, "");

   /*
    * r >>= 8;
    * g >>= 8;
    * b >>= 8;
    */

   *r = LLVMBuildAShr(builder, *r, c8, "r");
   *g = LLVMBuildAShr(builder, *g, c8, "g");
   *b = LLVMBuildAShr(builder, *b, c8, "b");

   /*
    * Clamp
    */

   *r = lp_build_clamp(&bld, *r, c0, c255);
   *g = lp_build_clamp(&bld, *g, c0, c255);
   *b = lp_build_clamp(&bld, *b, c0, c255);
}
コード例 #30
0
ファイル: lp_bld_depth.c プロジェクト: blckshrk/Mesa
/**
 * Generate code for performing depth and/or stencil tests.
 * We operate on a vector of values (typically n 2x2 quads).
 *
 * \param depth  the depth test state
 * \param stencil  the front/back stencil state
 * \param type  the data type of the fragment depth/stencil values
 * \param format_desc  description of the depth/stencil surface
 * \param mask  the alive/dead pixel mask for the quad (vector)
 * \param stencil_refs  the front/back stencil ref values (scalar)
 * \param z_src  the incoming depth/stencil values (n 2x2 quad values, float32)
 * \param zs_dst  the depth/stencil values in framebuffer
 * \param face  contains boolean value indicating front/back facing polygon
 */
void
lp_build_depth_stencil_test(struct gallivm_state *gallivm,
                            const struct pipe_depth_state *depth,
                            const struct pipe_stencil_state stencil[2],
                            struct lp_type z_src_type,
                            const struct util_format_description *format_desc,
                            struct lp_build_mask_context *mask,
                            LLVMValueRef stencil_refs[2],
                            LLVMValueRef z_src,
                            LLVMValueRef z_fb,
                            LLVMValueRef s_fb,
                            LLVMValueRef face,
                            LLVMValueRef *z_value,
                            LLVMValueRef *s_value,
                            boolean do_branch)
{
   LLVMBuilderRef builder = gallivm->builder;
   struct lp_type z_type;
   struct lp_build_context z_bld;
   struct lp_build_context s_bld;
   struct lp_type s_type;
   unsigned z_shift = 0, z_width = 0, z_mask = 0;
   LLVMValueRef z_dst = NULL;
   LLVMValueRef stencil_vals = NULL;
   LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
   LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
   LLVMValueRef orig_mask = lp_build_mask_value(mask);
   LLVMValueRef front_facing = NULL;
   boolean have_z, have_s;

   /*
    * Depths are expected to be between 0 and 1, even if they are stored in
    * floats. Setting these bits here will ensure that the lp_build_conv() call
    * below won't try to unnecessarily clamp the incoming values.
    */
   if(z_src_type.floating) {
      z_src_type.sign = FALSE;
      z_src_type.norm = TRUE;
   }
   else {
      assert(!z_src_type.sign);
      assert(z_src_type.norm);
   }

   /* Pick the type matching the depth-stencil format. */
   z_type = lp_depth_type(format_desc, z_src_type.length);

   /* Pick the intermediate type for depth operations. */
   z_type.width = z_src_type.width;
   assert(z_type.length == z_src_type.length);

   /* FIXME: for non-float depth/stencil might generate better code
    * if we'd always split it up to use 128bit operations.
    * For stencil we'd almost certainly want to pack to 8xi16 values,
    * for z just run twice.
    */

   /* Sanity checking */
   {
      const unsigned z_swizzle = format_desc->swizzle[0];
      const unsigned s_swizzle = format_desc->swizzle[1];

      assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE ||
             s_swizzle != UTIL_FORMAT_SWIZZLE_NONE);

      assert(depth->enabled || stencil[0].enabled);

      assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
      assert(format_desc->block.width == 1);
      assert(format_desc->block.height == 1);

      if (stencil[0].enabled) {
         assert(s_swizzle < 4);
         assert(format_desc->channel[s_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED);
         assert(format_desc->channel[s_swizzle].pure_integer);
         assert(!format_desc->channel[s_swizzle].normalized);
         assert(format_desc->channel[s_swizzle].size == 8);
      }

      if (depth->enabled) {
         assert(z_swizzle < 4);
         if (z_type.floating) {
            assert(z_swizzle == 0);
            assert(format_desc->channel[z_swizzle].type ==
                   UTIL_FORMAT_TYPE_FLOAT);
            assert(format_desc->channel[z_swizzle].size == 32);
         }
         else {
            assert(format_desc->channel[z_swizzle].type ==
                   UTIL_FORMAT_TYPE_UNSIGNED);
            assert(format_desc->channel[z_swizzle].normalized);
            assert(!z_type.fixed);
         }
      }
   }


   /* Setup build context for Z vals */
   lp_build_context_init(&z_bld, gallivm, z_type);

   /* Setup build context for stencil vals */
   s_type = lp_int_type(z_type);
   lp_build_context_init(&s_bld, gallivm, s_type);

   /* Compute and apply the Z/stencil bitmasks and shifts.
    */
   {
      unsigned s_shift, s_mask;

      z_dst = z_fb;
      stencil_vals = s_fb;

      have_z = get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask);
      have_s = get_s_shift_and_mask(format_desc, &s_shift, &s_mask);

      if (have_z) {
         if (z_mask != 0xffffffff) {
            z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask);
         }

         /*
          * Align the framebuffer Z 's LSB to the right.
          */
         if (z_shift) {
            LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
            z_dst = LLVMBuildLShr(builder, z_dst, shift, "z_dst");
         } else if (z_bitmask) {
            z_dst = LLVMBuildAnd(builder, z_dst, z_bitmask, "z_dst");
         } else {
            lp_build_name(z_dst, "z_dst");
         }
      }

      if (have_s) {
         if (s_shift) {
            LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift);
            stencil_vals = LLVMBuildLShr(builder, stencil_vals, shift, "");
            stencil_shift = shift;  /* used below */
         }

         if (s_mask != 0xffffffff) {
            LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask);
            stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, "");
         }

         lp_build_name(stencil_vals, "s_dst");
      }
   }

   if (stencil[0].enabled) {

      if (face) {
         LLVMValueRef zero = lp_build_const_int32(gallivm, 0);

         /* front_facing = face != 0 ? ~0 : 0 */
         front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, "");
         front_facing = LLVMBuildSExt(builder, front_facing,
                                      LLVMIntTypeInContext(gallivm->context,
                                             s_bld.type.length*s_bld.type.width),
                                      "");
         front_facing = LLVMBuildBitCast(builder, front_facing,
                                         s_bld.int_vec_type, "");
      }

      /* convert scalar stencil refs into vectors */
      stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]);
      stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]);

      s_pass_mask = lp_build_stencil_test(&s_bld, stencil,
                                          stencil_refs, stencil_vals,
                                          front_facing);

      /* apply stencil-fail operator */
      {
         LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask);
         stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP,
                                            stencil_refs, stencil_vals,
                                            s_fail_mask, front_facing);
      }
   }

   if (depth->enabled) {
      /*
       * Convert fragment Z to the desired type, aligning the LSB to the right.
       */

      assert(z_type.width == z_src_type.width);
      assert(z_type.length == z_src_type.length);
      assert(lp_check_value(z_src_type, z_src));
      if (z_src_type.floating) {
         /*
          * Convert from floating point values
          */

         if (!z_type.floating) {
            z_src = lp_build_clamped_float_to_unsigned_norm(gallivm,
                                                            z_src_type,
                                                            z_width,
                                                            z_src);
         }
      } else {
         /*
          * Convert from unsigned normalized values.
          */

         assert(!z_src_type.sign);
         assert(!z_src_type.fixed);
         assert(z_src_type.norm);
         assert(!z_type.floating);
         if (z_src_type.width > z_width) {
            LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_src_type,
                                                        z_src_type.width - z_width);
            z_src = LLVMBuildLShr(builder, z_src, shift, "");
         }
      }
      assert(lp_check_value(z_type, z_src));

      lp_build_name(z_src, "z_src");

      /* compare src Z to dst Z, returning 'pass' mask */
      z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst);

      if (!stencil[0].enabled) {
         /* We can potentially skip all remaining operations here, but only
          * if stencil is disabled because we still need to update the stencil
          * buffer values.  Don't need to update Z buffer values.
          */
         lp_build_mask_update(mask, z_pass);

         if (do_branch) {
            lp_build_mask_check(mask);
            do_branch = FALSE;
         }
      }

      if (depth->writemask) {
         LLVMValueRef zselectmask;

         /* mask off bits that failed Z test */
         zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, "");

         /* mask off bits that failed stencil test */
         if (s_pass_mask) {
            zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, "");
         }

         /* Mix the old and new Z buffer values.
          * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i]
          */
         z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst);
      }

      if (stencil[0].enabled) {
         /* update stencil buffer values according to z pass/fail result */
         LLVMValueRef z_fail_mask, z_pass_mask;

         /* apply Z-fail operator */
         z_fail_mask = lp_build_andnot(&s_bld, orig_mask, z_pass);
         stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP,
                                            stencil_refs, stencil_vals,
                                            z_fail_mask, front_facing);

         /* apply Z-pass operator */
         z_pass_mask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
         stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
                                            stencil_refs, stencil_vals,
                                            z_pass_mask, front_facing);
      }
   }
   else {
      /* No depth test: apply Z-pass operator to stencil buffer values which
       * passed the stencil test.
       */
      s_pass_mask = LLVMBuildAnd(builder, orig_mask, s_pass_mask, "");
      stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
                                         stencil_refs, stencil_vals,
                                         s_pass_mask, front_facing);
   }

   /* Put Z and stencil bits in the right place */
   if (have_z && z_shift) {
      LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
      z_dst = LLVMBuildShl(builder, z_dst, shift, "");
   }
   if (stencil_vals && stencil_shift)
      stencil_vals = LLVMBuildShl(builder, stencil_vals,
                                  stencil_shift, "");

   /* Finally, merge the z/stencil values */
   if (format_desc->block.bits <= 32) {
      if (have_z && have_s)
         *z_value = LLVMBuildOr(builder, z_dst, stencil_vals, "");
      else if (have_z)
         *z_value = z_dst;
      else
         *z_value = stencil_vals;
      *s_value = *z_value;
   }
   else {
      *z_value = z_dst;
      *s_value = stencil_vals;
   }

   if (s_pass_mask)
      lp_build_mask_update(mask, s_pass_mask);

   if (depth->enabled && stencil[0].enabled)
      lp_build_mask_update(mask, z_pass);
}