Esempio n. 1
0
/**
 * Set the sign of float vector 'a' according to 'sign'.
 * If sign==0, return abs(a).
 * If sign==1, return -abs(a);
 * Other values for sign produce undefined results.
 */
LLVMValueRef
lp_build_set_sign(struct lp_build_context *bld,
                  LLVMValueRef a, LLVMValueRef sign)
{
   const struct lp_type type = bld->type;
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
   LLVMTypeRef vec_type = lp_build_vec_type(type);
   LLVMValueRef shift = lp_build_int_const_scalar(type, type.width - 1);
   LLVMValueRef mask = lp_build_int_const_scalar(type,
                             ~((unsigned long long) 1 << (type.width - 1)));
   LLVMValueRef val, res;

   assert(type.floating);

   /* val = reinterpret_cast<int>(a) */
   val = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
   /* val = val & mask */
   val = LLVMBuildAnd(bld->builder, val, mask, "");
   /* sign = sign << shift */
   sign = LLVMBuildShl(bld->builder, sign, shift, "");
   /* res = val | sign */
   res = LLVMBuildOr(bld->builder, val, sign, "");
   /* res = reinterpret_cast<float>(res) */
   res = LLVMBuildBitCast(bld->builder, res, vec_type, "");

   return res;
}
Esempio n. 2
0
/**
 * Normalized 8bit multiplication.
 *
 * - alpha plus one
 *
 *     makes the following approximation to the division (Sree)
 *    
 *       a*b/255 ~= (a*(b + 1)) >> 256
 *    
 *     which is the fastest method that satisfies the following OpenGL criteria
 *    
 *       0*0 = 0 and 255*255 = 255
 *
 * - geometric series
 *
 *     takes the geometric series approximation to the division
 *
 *       t/255 = (t >> 8) + (t >> 16) + (t >> 24) ..
 *
 *     in this case just the first two terms to fit in 16bit arithmetic
 *
 *       t/255 ~= (t + (t >> 8)) >> 8
 *
 *     note that just by itself it doesn't satisfies the OpenGL criteria, as
 *     255*255 = 254, so the special case b = 255 must be accounted or roundoff
 *     must be used
 *
 * - geometric series plus rounding
 *
 *     when using a geometric series division instead of truncating the result
 *     use roundoff in the approximation (Jim Blinn)
 *
 *       t/255 ~= (t + (t >> 8) + 0x80) >> 8
 *
 *     achieving the exact results
 *
 * @sa Alvy Ray Smith, Image Compositing Fundamentals, Tech Memo 4, Aug 15, 1995, 
 *     ftp://ftp.alvyray.com/Acrobat/4_Comp.pdf
 * @sa Michael Herf, The "double blend trick", May 2000, 
 *     http://www.stereopsis.com/doubleblend.html
 */
static LLVMValueRef
lp_build_mul_u8n(LLVMBuilderRef builder,
                 struct lp_type i16_type,
                 LLVMValueRef a, LLVMValueRef b)
{
   LLVMValueRef c8;
   LLVMValueRef ab;

   c8 = lp_build_int_const_scalar(i16_type, 8);
   
#if 0
   
   /* a*b/255 ~= (a*(b + 1)) >> 256 */
   b = LLVMBuildAdd(builder, b, lp_build_int_const_scalar(i16_type, 1), "");
   ab = LLVMBuildMul(builder, a, b, "");

#else
   
   /* ab/255 ~= (ab + (ab >> 8) + 0x80) >> 8 */
   ab = LLVMBuildMul(builder, a, b, "");
   ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c8, ""), "");
   ab = LLVMBuildAdd(builder, ab, lp_build_int_const_scalar(i16_type, 0x80), "");

#endif
   
   ab = LLVMBuildLShr(builder, ab, c8, "");

   return ab;
}
Esempio n. 3
0
void
lp_build_exp2_approx(struct lp_build_context *bld,
                     LLVMValueRef x,
                     LLVMValueRef *p_exp2_int_part,
                     LLVMValueRef *p_frac_part,
                     LLVMValueRef *p_exp2)
{
   const struct lp_type type = bld->type;
   LLVMTypeRef vec_type = lp_build_vec_type(type);
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
   LLVMValueRef ipart = NULL;
   LLVMValueRef fpart = NULL;
   LLVMValueRef expipart = NULL;
   LLVMValueRef expfpart = NULL;
   LLVMValueRef res = NULL;

   if(p_exp2_int_part || p_frac_part || p_exp2) {
      /* TODO: optimize the constant case */
      if(LLVMIsConstant(x))
         debug_printf("%s: inefficient/imprecise constant arithmetic\n",
                      __FUNCTION__);

      assert(type.floating && type.width == 32);

      x = lp_build_min(bld, x, lp_build_const_scalar(type,  129.0));
      x = lp_build_max(bld, x, lp_build_const_scalar(type, -126.99999));

      /* ipart = int(x - 0.5) */
      ipart = LLVMBuildSub(bld->builder, x, lp_build_const_scalar(type, 0.5f), "");
      ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, "");

      /* fpart = x - ipart */
      fpart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, "");
      fpart = LLVMBuildSub(bld->builder, x, fpart, "");
   }

   if(p_exp2_int_part || p_exp2) {
      /* expipart = (float) (1 << ipart) */
      expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_scalar(type, 127), "");
      expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_scalar(type, 23), "");
      expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, "");
   }

   if(p_exp2) {
      expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
                                     Elements(lp_build_exp2_polynomial));

      res = LLVMBuildMul(bld->builder, expipart, expfpart, "");
   }

   if(p_exp2_int_part)
      *p_exp2_int_part = expipart;

   if(p_frac_part)
      *p_frac_part = fpart;

   if(p_exp2)
      *p_exp2 = res;
}
Esempio n. 4
0
/**
 * Generate abs(a)
 */
LLVMValueRef
lp_build_abs(struct lp_build_context *bld,
             LLVMValueRef a)
{
   const struct lp_type type = bld->type;
   LLVMTypeRef vec_type = lp_build_vec_type(type);

   if(!type.sign)
      return a;

   if(type.floating) {
      /* Mask out the sign bit */
      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
      unsigned long long absMask = ~(1ULL << (type.width - 1));
      LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask));
      a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
      a = LLVMBuildAnd(bld->builder, a, mask, "");
      a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
      return a;
   }

   if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) {
      switch(type.width) {
      case 8:
         return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a);
      case 16:
         return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a);
      case 32:
         return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a);
      }
   }

   return lp_build_max(bld, a, LLVMBuildNeg(bld->builder, a, ""));
}
Esempio n. 5
0
/**
 * Convert float[] to int[] with floor().
 */
LLVMValueRef
lp_build_ifloor(struct lp_build_context *bld,
                LLVMValueRef a)
{
   const struct lp_type type = bld->type;
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
   LLVMValueRef res;

   assert(type.floating);
   assert(lp_check_value(type, a));

   if(util_cpu_caps.has_sse4_1) {
      res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
   }
   else {
      /* Take the sign bit and add it to 1 constant */
      LLVMTypeRef vec_type = lp_build_vec_type(type);
      unsigned mantissa = lp_mantissa(type);
      LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
      LLVMValueRef sign;
      LLVMValueRef offset;

      /* sign = a < 0 ? ~0 : 0 */
      sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
      sign = LLVMBuildAnd(bld->builder, sign, mask, "");
      sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), "");
      lp_build_name(sign, "floor.sign");

      /* offset = -0.99999(9)f */
      offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa));
      offset = LLVMConstBitCast(offset, int_vec_type);

      /* offset = a < 0 ? -0.99999(9)f : 0.0f */
      offset = LLVMBuildAnd(bld->builder, offset, sign, "");
      offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "");
      lp_build_name(offset, "floor.offset");

      res = LLVMBuildAdd(bld->builder, a, offset, "");
      lp_build_name(res, "floor.res");
   }

   res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
   lp_build_name(res, "floor");

   return res;
}
Esempio n. 6
0
/**
 * Special case for converting clamped IEEE-754 floats to unsigned norms.
 *
 * The mathematical voodoo below may seem excessive but it is actually
 * paramount we do it this way for several reasons. First, there is no single
 * precision FP to unsigned integer conversion Intel SSE instruction. Second,
 * secondly, even if there was, since the FP's mantissa takes only a fraction
 * of register bits the typically scale and cast approach would require double
 * precision for accurate results, and therefore half the throughput
 *
 * Although the result values can be scaled to an arbitrary bit width specified
 * by dst_width, the actual result type will have the same width.
 */
LLVMValueRef
lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
                                        struct lp_type src_type,
                                        unsigned dst_width,
                                        LLVMValueRef src)
{
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(src_type);
   LLVMValueRef res;
   unsigned mantissa;
   unsigned n;
   unsigned long long ubound;
   unsigned long long mask;
   double scale;
   double bias;

   assert(src_type.floating);

   mantissa = lp_mantissa(src_type);

   /* We cannot carry more bits than the mantissa */
   n = MIN2(mantissa, dst_width);

   /* This magic coefficients will make the desired result to appear in the
    * lowest significant bits of the mantissa.
    */
   ubound = ((unsigned long long)1 << n);
   mask = ubound - 1;
   scale = (double)mask/ubound;
   bias = (double)((unsigned long long)1 << (mantissa - n));

   res = LLVMBuildMul(builder, src, lp_build_const_scalar(src_type, scale), "");
   res = LLVMBuildAdd(builder, res, lp_build_const_scalar(src_type, bias), "");
   res = LLVMBuildBitCast(builder, res, int_vec_type, "");

   if(dst_width > n) {
      int shift = dst_width - n;
      res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), "");

      /* TODO: Fill in the empty lower bits for additional precision? */
#if 0
      {
         LLVMValueRef msb;
         msb = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, dst_width - 1), "");
         msb = LLVMBuildShl(builder, msb, lp_build_int_const_scalar(src_type, shift), "");
         msb = LLVMBuildSub(builder, msb, lp_build_int_const_scalar(src_type, 1), "");
         res = LLVMBuildOr(builder, res, msb, "");
      }
#elif 0
      while(shift > 0) {
         res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, n), ""), "");
         shift -= n;
         n *= 2;
      }
#endif
   }
   else
      res = LLVMBuildAnd(builder, res, lp_build_int_const_scalar(src_type, mask), "");

   return res;
}
Esempio n. 7
0
static LLVMValueRef
build_int32_vec_const(int value)
{
   struct lp_type i32_type;

   memset(&i32_type, 0, sizeof i32_type);
   i32_type.floating = FALSE; /* values are integers */
   i32_type.sign = TRUE;      /* values are signed */
   i32_type.norm = FALSE;     /* values are not normalized */
   i32_type.width = 32;       /* 32-bit int values */
   i32_type.length = 4;       /* 4 elements per vector */
   return lp_build_int_const_scalar(i32_type, value);
}
Esempio n. 8
0
/**
 * Small vector x scale multiplication optimization.
 */
LLVMValueRef
lp_build_mul_imm(struct lp_build_context *bld,
                 LLVMValueRef a,
                 int b)
{
   LLVMValueRef factor;

   if(b == 0)
      return bld->zero;

   if(b == 1)
      return a;

   if(b == -1)
      return LLVMBuildNeg(bld->builder, a, "");

   if(b == 2 && bld->type.floating)
      return lp_build_add(bld, a, a);

   if(util_is_pot(b)) {
      unsigned shift = ffs(b) - 1;

      if(bld->type.floating) {
#if 0
         /*
          * Power of two multiplication by directly manipulating the mantissa.
          *
          * XXX: This might not be always faster, it will introduce a small error
          * for multiplication by zero, and it will produce wrong results
          * for Inf and NaN.
          */
         unsigned mantissa = lp_mantissa(bld->type);
         factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa);
         a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), "");
         a = LLVMBuildAdd(bld->builder, a, factor, "");
         a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), "");
         return a;
#endif
      }
      else {
         factor = lp_build_const_scalar(bld->type, shift);
         return LLVMBuildShl(bld->builder, a, factor, "");
      }
   }

   factor = lp_build_const_scalar(bld->type, (double)b);
   return lp_build_mul(bld, a, factor);
}
Esempio n. 9
0
/**
 * Inverse of lp_build_clamped_float_to_unsigned_norm above.
 */
LLVMValueRef
lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
                                unsigned src_width,
                                struct lp_type dst_type,
                                LLVMValueRef src)
{
   LLVMTypeRef vec_type = lp_build_vec_type(dst_type);
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(dst_type);
   LLVMValueRef bias_;
   LLVMValueRef res;
   unsigned mantissa;
   unsigned n;
   unsigned long long ubound;
   unsigned long long mask;
   double scale;
   double bias;

   mantissa = lp_mantissa(dst_type);

   n = MIN2(mantissa, src_width);

   ubound = ((unsigned long long)1 << n);
   mask = ubound - 1;
   scale = (double)ubound/mask;
   bias = (double)((unsigned long long)1 << (mantissa - n));

   res = src;

   if(src_width > mantissa) {
      int shift = src_width - mantissa;
      res = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(dst_type, shift), "");
   }

   bias_ = lp_build_const_scalar(dst_type, bias);

   res = LLVMBuildOr(builder,
                     res,
                     LLVMBuildBitCast(builder, bias_, int_vec_type, ""), "");

   res = LLVMBuildBitCast(builder, res, vec_type, "");

   res = LLVMBuildSub(builder, res, bias_, "");
   res = LLVMBuildMul(builder, res, lp_build_const_scalar(dst_type, scale), "");

   return res;
}
Esempio n. 10
0
LLVMValueRef
lp_build_sgn(struct lp_build_context *bld,
             LLVMValueRef a)
{
   const struct lp_type type = bld->type;
   LLVMTypeRef vec_type = lp_build_vec_type(type);
   LLVMValueRef cond;
   LLVMValueRef res;

   /* Handle non-zero case */
   if(!type.sign) {
      /* if not zero then sign must be positive */
      res = bld->one;
   }
   else if(type.floating) {
      /* Take the sign bit and add it to 1 constant */
      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
      LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
      LLVMValueRef sign;
      LLVMValueRef one;
      sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
      sign = LLVMBuildAnd(bld->builder, sign, mask, "");
      one = LLVMConstBitCast(bld->one, int_vec_type);
      res = LLVMBuildOr(bld->builder, sign, one, "");
      res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
   }
   else
   {
      LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0);
      cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero);
      res = lp_build_select(bld, cond, bld->one, minus_one);
   }

   /* Handle zero */
   cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero);
   res = lp_build_select(bld, cond, bld->zero, bld->one);

   return res;
}
Esempio n. 11
0
LLVMValueRef
lp_build_iround(struct lp_build_context *bld,
                LLVMValueRef a)
{
   const struct lp_type type = bld->type;
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
   LLVMValueRef res;

   assert(type.floating);
   assert(lp_check_value(type, a));

   if(util_cpu_caps.has_sse4_1) {
      res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
   }
   else {
      LLVMTypeRef vec_type = lp_build_vec_type(type);
      LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
      LLVMValueRef sign;
      LLVMValueRef half;

      /* get sign bit */
      sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
      sign = LLVMBuildAnd(bld->builder, sign, mask, "");

      /* sign * 0.5 */
      half = lp_build_const_scalar(type, 0.5);
      half = LLVMBuildBitCast(bld->builder, half, int_vec_type, "");
      half = LLVMBuildOr(bld->builder, sign, half, "");
      half = LLVMBuildBitCast(bld->builder, half, vec_type, "");

      res = LLVMBuildAdd(bld->builder, a, half, "");
   }

   res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");

   return res;
}
Esempio n. 12
0
/**
 * Linear interpolation.
 *
 * This also works for integer values with a few caveats.
 *
 * @sa http://www.stereopsis.com/doubleblend.html
 */
LLVMValueRef
lp_build_lerp(struct lp_build_context *bld,
              LLVMValueRef x,
              LLVMValueRef v0,
              LLVMValueRef v1)
{
   LLVMValueRef delta;
   LLVMValueRef res;

   delta = lp_build_sub(bld, v1, v0);

   res = lp_build_mul(bld, x, delta);

   res = lp_build_add(bld, v0, res);

   if(bld->type.fixed)
      /* XXX: This step is necessary for lerping 8bit colors stored on 16bits,
       * but it will be wrong for other uses. Basically we need a more
       * powerful lp_type, capable of further distinguishing the values
       * interpretation from the value storage. */
      res = LLVMBuildAnd(bld->builder, res, lp_build_int_const_scalar(bld->type, (1 << bld->type.width/2) - 1), "");

   return res;
}
Esempio n. 13
0
/**
 * Generate the code to do inside/outside triangle testing for the
 * four pixels in a 2x2 quad.  This will set the four elements of the
 * quad mask vector to 0 or ~0.
 * \param i  which quad of the quad group to test, in [0,3]
 */
static void
generate_tri_edge_mask(LLVMBuilderRef builder,
                       unsigned i,
                       LLVMValueRef *mask,      /* ivec4, out */
                       LLVMValueRef c0,         /* int32 */
                       LLVMValueRef c1,         /* int32 */
                       LLVMValueRef c2,         /* int32 */
                       LLVMValueRef step0_ptr,  /* ivec4 */
                       LLVMValueRef step1_ptr,  /* ivec4 */
                       LLVMValueRef step2_ptr)  /* ivec4 */
{
#define OPTIMIZE_IN_OUT_TEST 0
#if OPTIMIZE_IN_OUT_TEST
   struct lp_build_if_state ifctx;
   LLVMValueRef not_draw_all;
#endif
   struct lp_build_flow_context *flow;
   struct lp_type i32_type;
   LLVMTypeRef i32vec4_type, mask_type;
   LLVMValueRef c0_vec, c1_vec, c2_vec;
   LLVMValueRef in_out_mask;

   assert(i < 4);
   
   /* int32 vector type */
   memset(&i32_type, 0, sizeof i32_type);
   i32_type.floating = FALSE; /* values are integers */
   i32_type.sign = TRUE;      /* values are signed */
   i32_type.norm = FALSE;     /* values are not normalized */
   i32_type.width = 32;       /* 32-bit int values */
   i32_type.length = 4;       /* 4 elements per vector */

   i32vec4_type = lp_build_int32_vec4_type();

   mask_type = LLVMIntType(32 * 4);

   /*
    * Use a conditional here to do detailed pixel in/out testing.
    * We only have to do this if c0 != INT_MIN.
    */
   flow = lp_build_flow_create(builder);
   lp_build_flow_scope_begin(flow);

   {
#if OPTIMIZE_IN_OUT_TEST
      /* not_draw_all = (c0 != INT_MIN) */
      not_draw_all = LLVMBuildICmp(builder,
                                   LLVMIntNE,
                                   c0,
                                   LLVMConstInt(LLVMInt32Type(), INT_MIN, 0),
                                   "");

      in_out_mask = lp_build_int_const_scalar(i32_type, ~0);


      lp_build_flow_scope_declare(flow, &in_out_mask);

      /* if (not_draw_all) {... */
      lp_build_if(&ifctx, flow, builder, not_draw_all);
#endif
      {
         LLVMValueRef step0_vec, step1_vec, step2_vec;
         LLVMValueRef m0_vec, m1_vec, m2_vec;
         LLVMValueRef index, m;

         /* c0_vec = {c0, c0, c0, c0}
          * Note that we emit this code four times but LLVM optimizes away
          * three instances of it.
          */
         c0_vec = lp_build_broadcast(builder, i32vec4_type, c0);
         c1_vec = lp_build_broadcast(builder, i32vec4_type, c1);
         c2_vec = lp_build_broadcast(builder, i32vec4_type, c2);
         lp_build_name(c0_vec, "edgeconst0vec");
         lp_build_name(c1_vec, "edgeconst1vec");
         lp_build_name(c2_vec, "edgeconst2vec");

         /* load step0vec, step1, step2 vec from memory */
         index = LLVMConstInt(LLVMInt32Type(), i, 0);
         step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), "");
         step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), "");
         step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), "");
         lp_build_name(step0_vec, "step0vec");
         lp_build_name(step1_vec, "step1vec");
         lp_build_name(step2_vec, "step2vec");

         /* m0_vec = step0_ptr[i] > c0_vec */
         m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec);
         m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec);
         m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec);

         /* in_out_mask = m0_vec & m1_vec & m2_vec */
         m = LLVMBuildAnd(builder, m0_vec, m1_vec, "");
         in_out_mask = LLVMBuildAnd(builder, m, m2_vec, "");
         lp_build_name(in_out_mask, "inoutmaskvec");
      }
#if OPTIMIZE_IN_OUT_TEST
      lp_build_endif(&ifctx);
#endif

   }
   lp_build_flow_scope_end(flow);
   lp_build_flow_destroy(flow);

   /* This is the initial alive/dead pixel mask for a quad of four pixels.
    * It's an int[4] vector with each word set to 0 or ~0.
    * Words will get cleared when pixels faile the Z test, etc.
    */
   *mask = in_out_mask;
}
Esempio n. 14
0
/**
 * Generic type conversion.
 *
 * TODO: Take a precision argument, or even better, add a new precision member
 * to the lp_type union.
 */
void
lp_build_conv(LLVMBuilderRef builder,
              struct lp_type src_type,
              struct lp_type dst_type,
              const LLVMValueRef *src, unsigned num_srcs,
              LLVMValueRef *dst, unsigned num_dsts)
{
   struct lp_type tmp_type;
   LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
   unsigned num_tmps;
   unsigned i;

   /* Register width must remain constant */
   assert(src_type.width * src_type.length == dst_type.width * dst_type.length);

   /* We must not loose or gain channels. Only precision */
   assert(src_type.length * num_srcs == dst_type.length * num_dsts);

   assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
   assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);

   tmp_type = src_type;
   for(i = 0; i < num_srcs; ++i)
      tmp[i] = src[i];
   num_tmps = num_srcs;

   /*
    * Clamp if necessary
    */

   if(memcmp(&src_type, &dst_type, sizeof src_type) != 0) {
      struct lp_build_context bld;
      double src_min = lp_const_min(src_type);
      double dst_min = lp_const_min(dst_type);
      double src_max = lp_const_max(src_type);
      double dst_max = lp_const_max(dst_type);
      LLVMValueRef thres;

      lp_build_context_init(&bld, builder, tmp_type);

      if(src_min < dst_min) {
         if(dst_min == 0.0)
            thres = bld.zero;
         else
            thres = lp_build_const_scalar(src_type, dst_min);
         for(i = 0; i < num_tmps; ++i)
            tmp[i] = lp_build_max(&bld, tmp[i], thres);
      }

      if(src_max > dst_max) {
         if(dst_max == 1.0)
            thres = bld.one;
         else
            thres = lp_build_const_scalar(src_type, dst_max);
         for(i = 0; i < num_tmps; ++i)
            tmp[i] = lp_build_min(&bld, tmp[i], thres);
      }
   }

   /*
    * Scale to the narrowest range
    */

   if(dst_type.floating) {
      /* Nothing to do */
   }
   else if(tmp_type.floating) {
      if(!dst_type.fixed && !dst_type.sign && dst_type.norm) {
         for(i = 0; i < num_tmps; ++i) {
            tmp[i] = lp_build_clamped_float_to_unsigned_norm(builder,
                                                             tmp_type,
                                                             dst_type.width,
                                                             tmp[i]);
         }
         tmp_type.floating = FALSE;
      }
      else {
         double dst_scale = lp_const_scale(dst_type);
         LLVMTypeRef tmp_vec_type;

         if (dst_scale != 1.0) {
            LLVMValueRef scale = lp_build_const_scalar(tmp_type, dst_scale);
            for(i = 0; i < num_tmps; ++i)
               tmp[i] = LLVMBuildMul(builder, tmp[i], scale, "");
         }

         /* Use an equally sized integer for intermediate computations */
         tmp_type.floating = FALSE;
         tmp_vec_type = lp_build_vec_type(tmp_type);
         for(i = 0; i < num_tmps; ++i) {
#if 0
            if(dst_type.sign)
               tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, "");
            else
               tmp[i] = LLVMBuildFPToUI(builder, tmp[i], tmp_vec_type, "");
#else
           /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */
            tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, "");
#endif
         }
      }
   }
   else {
      unsigned src_shift = lp_const_shift(src_type);
      unsigned dst_shift = lp_const_shift(dst_type);

      /* FIXME: compensate different offsets too */
      if(src_shift > dst_shift) {
         LLVMValueRef shift = lp_build_int_const_scalar(tmp_type, src_shift - dst_shift);
         for(i = 0; i < num_tmps; ++i)
            if(src_type.sign)
               tmp[i] = LLVMBuildAShr(builder, tmp[i], shift, "");
            else
               tmp[i] = LLVMBuildLShr(builder, tmp[i], shift, "");
      }
   }

   /*
    * Truncate or expand bit width
    */

   assert(!tmp_type.floating || tmp_type.width == dst_type.width);

   if(tmp_type.width > dst_type.width) {
      assert(num_dsts == 1);
      tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps);
      tmp_type.width = dst_type.width;
      tmp_type.length = dst_type.length;
      num_tmps = 1;
   }

   if(tmp_type.width < dst_type.width) {
      assert(num_tmps == 1);
      lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts);
      tmp_type.width = dst_type.width;
      tmp_type.length = dst_type.length;
      num_tmps = num_dsts;
   }

   assert(tmp_type.width == dst_type.width);
   assert(tmp_type.length == dst_type.length);
   assert(num_tmps == num_dsts);

   /*
    * Scale to the widest range
    */

   if(src_type.floating) {
      /* Nothing to do */
   }
   else if(!src_type.floating && dst_type.floating) {
      if(!src_type.fixed && !src_type.sign && src_type.norm) {
         for(i = 0; i < num_tmps; ++i) {
            tmp[i] = lp_build_unsigned_norm_to_float(builder,
                                                     src_type.width,
                                                     dst_type,
                                                     tmp[i]);
         }
         tmp_type.floating = TRUE;
      }
      else {
         double src_scale = lp_const_scale(src_type);
         LLVMTypeRef tmp_vec_type;

         /* Use an equally sized integer for intermediate computations */
         tmp_type.floating = TRUE;
         tmp_type.sign = TRUE;
         tmp_vec_type = lp_build_vec_type(tmp_type);
         for(i = 0; i < num_tmps; ++i) {
#if 0
            if(dst_type.sign)
               tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, "");
            else
               tmp[i] = LLVMBuildUIToFP(builder, tmp[i], tmp_vec_type, "");
#else
            /* FIXME: there is no SSE counterpart for LLVMBuildUIToFP */
            tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, "");
#endif
          }

          if (src_scale != 1.0) {
             LLVMValueRef scale = lp_build_const_scalar(tmp_type, 1.0/src_scale);
             for(i = 0; i < num_tmps; ++i)
                tmp[i] = LLVMBuildMul(builder, tmp[i], scale, "");
          }
      }
    }
    else {
       unsigned src_shift = lp_const_shift(src_type);
       unsigned dst_shift = lp_const_shift(dst_type);

       /* FIXME: compensate different offsets too */
       if(src_shift < dst_shift) {
          LLVMValueRef shift = lp_build_int_const_scalar(tmp_type, dst_shift - src_shift);
          for(i = 0; i < num_tmps; ++i)
             tmp[i] = LLVMBuildShl(builder, tmp[i], shift, "");
       }
    }

   for(i = 0; i < num_dsts; ++i)
      dst[i] = tmp[i];
}
Esempio n. 15
0
/**
 * Generate a * b
 */
LLVMValueRef
lp_build_mul(struct lp_build_context *bld,
             LLVMValueRef a,
             LLVMValueRef b)
{
   const struct lp_type type = bld->type;
   LLVMValueRef shift;
   LLVMValueRef res;

   if(a == bld->zero)
      return bld->zero;
   if(a == bld->one)
      return b;
   if(b == bld->zero)
      return bld->zero;
   if(b == bld->one)
      return a;
   if(a == bld->undef || b == bld->undef)
      return bld->undef;

   if(!type.floating && !type.fixed && type.norm) {
      if(type.width == 8) {
         struct lp_type i16_type = lp_wider_type(type);
         LLVMValueRef al, ah, bl, bh, abl, abh, ab;

         lp_build_unpack2(bld->builder, type, i16_type, a, &al, &ah);
         lp_build_unpack2(bld->builder, type, i16_type, b, &bl, &bh);

         /* PMULLW, PSRLW, PADDW */
         abl = lp_build_mul_u8n(bld->builder, i16_type, al, bl);
         abh = lp_build_mul_u8n(bld->builder, i16_type, ah, bh);

         ab = lp_build_pack2(bld->builder, i16_type, type, abl, abh);
         
         return ab;
      }

      /* FIXME */
      assert(0);
   }

   if(type.fixed)
      shift = lp_build_int_const_scalar(type, type.width/2);
   else
      shift = NULL;

   if(LLVMIsConstant(a) && LLVMIsConstant(b)) {
      res =  LLVMConstMul(a, b);
      if(shift) {
         if(type.sign)
            res = LLVMConstAShr(res, shift);
         else
            res = LLVMConstLShr(res, shift);
      }
   }
   else {
      res = LLVMBuildMul(bld->builder, a, b, "");
      if(shift) {
         if(type.sign)
            res = LLVMBuildAShr(bld->builder, res, shift, "");
         else
            res = LLVMBuildLShr(bld->builder, res, shift, "");
      }
   }

   return res;
}
Esempio n. 16
0
/**
 * See http://www.devmaster.net/forums/showthread.php?p=43580
 */
void
lp_build_log2_approx(struct lp_build_context *bld,
                     LLVMValueRef x,
                     LLVMValueRef *p_exp,
                     LLVMValueRef *p_floor_log2,
                     LLVMValueRef *p_log2)
{
   const struct lp_type type = bld->type;
   LLVMTypeRef vec_type = lp_build_vec_type(type);
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);

   LLVMValueRef expmask = lp_build_int_const_scalar(type, 0x7f800000);
   LLVMValueRef mantmask = lp_build_int_const_scalar(type, 0x007fffff);
   LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type);

   LLVMValueRef i = NULL;
   LLVMValueRef exp = NULL;
   LLVMValueRef mant = NULL;
   LLVMValueRef logexp = NULL;
   LLVMValueRef logmant = NULL;
   LLVMValueRef res = NULL;

   if(p_exp || p_floor_log2 || p_log2) {
      /* TODO: optimize the constant case */
      if(LLVMIsConstant(x))
         debug_printf("%s: inefficient/imprecise constant arithmetic\n",
                      __FUNCTION__);

      assert(type.floating && type.width == 32);

      i = LLVMBuildBitCast(bld->builder, x, int_vec_type, "");

      /* exp = (float) exponent(x) */
      exp = LLVMBuildAnd(bld->builder, i, expmask, "");
   }

   if(p_floor_log2 || p_log2) {
      logexp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_scalar(type, 23), "");
      logexp = LLVMBuildSub(bld->builder, logexp, lp_build_int_const_scalar(type, 127), "");
      logexp = LLVMBuildSIToFP(bld->builder, logexp, vec_type, "");
   }

   if(p_log2) {
      /* mant = (float) mantissa(x) */
      mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
      mant = LLVMBuildOr(bld->builder, mant, one, "");
      mant = LLVMBuildBitCast(bld->builder, mant, vec_type, "");

      logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial,
                                    Elements(lp_build_log2_polynomial));

      /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
      logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), "");

      res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
   }

   if(p_exp)
      *p_exp = exp;

   if(p_floor_log2)
      *p_floor_log2 = logexp;

   if(p_log2)
      *p_log2 = res;
}