Exemplo n.º 1
0
/**
 * Special case for converting clamped IEEE-754 floats to unsigned norms.
 *
 * The mathematical voodoo below may seem excessive but it is actually
 * paramount we do it this way for several reasons. First, there is no single
 * precision FP to unsigned integer conversion Intel SSE instruction. Second,
 * secondly, even if there was, since the FP's mantissa takes only a fraction
 * of register bits the typically scale and cast approach would require double
 * precision for accurate results, and therefore half the throughput
 *
 * Although the result values can be scaled to an arbitrary bit width specified
 * by dst_width, the actual result type will have the same width.
 */
LLVMValueRef
lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
                                        struct lp_type src_type,
                                        unsigned dst_width,
                                        LLVMValueRef src)
{
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(src_type);
   LLVMValueRef res;
   unsigned mantissa;
   unsigned n;
   unsigned long long ubound;
   unsigned long long mask;
   double scale;
   double bias;

   assert(src_type.floating);

   mantissa = lp_mantissa(src_type);

   /* We cannot carry more bits than the mantissa */
   n = MIN2(mantissa, dst_width);

   /* This magic coefficients will make the desired result to appear in the
    * lowest significant bits of the mantissa.
    */
   ubound = ((unsigned long long)1 << n);
   mask = ubound - 1;
   scale = (double)mask/ubound;
   bias = (double)((unsigned long long)1 << (mantissa - n));

   res = LLVMBuildMul(builder, src, lp_build_const_scalar(src_type, scale), "");
   res = LLVMBuildAdd(builder, res, lp_build_const_scalar(src_type, bias), "");
   res = LLVMBuildBitCast(builder, res, int_vec_type, "");

   if(dst_width > n) {
      int shift = dst_width - n;
      res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), "");

      /* TODO: Fill in the empty lower bits for additional precision? */
#if 0
      {
         LLVMValueRef msb;
         msb = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, dst_width - 1), "");
         msb = LLVMBuildShl(builder, msb, lp_build_int_const_scalar(src_type, shift), "");
         msb = LLVMBuildSub(builder, msb, lp_build_int_const_scalar(src_type, 1), "");
         res = LLVMBuildOr(builder, res, msb, "");
      }
#elif 0
      while(shift > 0) {
         res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, n), ""), "");
         shift -= n;
         n *= 2;
      }
#endif
   }
   else
      res = LLVMBuildAnd(builder, res, lp_build_int_const_scalar(src_type, mask), "");

   return res;
}
Exemplo n.º 2
0
/**
 * Inverse of lp_build_clamped_float_to_unsigned_norm above.
 * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1]
 * return {float, float, float, float} with values in range [0, 1].
 */
LLVMValueRef
lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
                                unsigned src_width,
                                struct lp_type dst_type,
                                LLVMValueRef src)
{
   LLVMTypeRef vec_type = lp_build_vec_type(dst_type);
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(dst_type);
   LLVMValueRef bias_;
   LLVMValueRef res;
   unsigned mantissa;
   unsigned n;
   unsigned long long ubound;
   unsigned long long mask;
   double scale;
   double bias;

   assert(dst_type.floating);

   mantissa = lp_mantissa(dst_type);

   n = MIN2(mantissa, src_width);

   ubound = ((unsigned long long)1 << n);
   mask = ubound - 1;
   scale = (double)ubound/mask;
   bias = (double)((unsigned long long)1 << (mantissa - n));

   res = src;

   if(src_width > mantissa) {
      int shift = src_width - mantissa;
      res = LLVMBuildLShr(builder, res, lp_build_const_int_vec(dst_type, shift), "");
   }

   bias_ = lp_build_const_vec(dst_type, bias);

   res = LLVMBuildOr(builder,
                     res,
                     LLVMBuildBitCast(builder, bias_, int_vec_type, ""), "");

   res = LLVMBuildBitCast(builder, res, vec_type, "");

   res = LLVMBuildFSub(builder, res, bias_, "");
   res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), "");

   return res;
}
Exemplo n.º 3
0
/**
 * Small vector x scale multiplication optimization.
 */
LLVMValueRef
lp_build_mul_imm(struct lp_build_context *bld,
                 LLVMValueRef a,
                 int b)
{
   LLVMValueRef factor;

   if(b == 0)
      return bld->zero;

   if(b == 1)
      return a;

   if(b == -1)
      return LLVMBuildNeg(bld->builder, a, "");

   if(b == 2 && bld->type.floating)
      return lp_build_add(bld, a, a);

   if(util_is_pot(b)) {
      unsigned shift = ffs(b) - 1;

      if(bld->type.floating) {
#if 0
         /*
          * Power of two multiplication by directly manipulating the mantissa.
          *
          * XXX: This might not be always faster, it will introduce a small error
          * for multiplication by zero, and it will produce wrong results
          * for Inf and NaN.
          */
         unsigned mantissa = lp_mantissa(bld->type);
         factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa);
         a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), "");
         a = LLVMBuildAdd(bld->builder, a, factor, "");
         a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), "");
         return a;
#endif
      }
      else {
         factor = lp_build_const_scalar(bld->type, shift);
         return LLVMBuildShl(bld->builder, a, factor, "");
      }
   }

   factor = lp_build_const_scalar(bld->type, (double)b);
   return lp_build_mul(bld, a, factor);
}
Exemplo n.º 4
0
/**
 * Convert float[] to int[] with floor().
 */
LLVMValueRef
lp_build_ifloor(struct lp_build_context *bld,
                LLVMValueRef a)
{
   const struct lp_type type = bld->type;
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
   LLVMValueRef res;

   assert(type.floating);
   assert(lp_check_value(type, a));

   if(util_cpu_caps.has_sse4_1) {
      res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
   }
   else {
      /* Take the sign bit and add it to 1 constant */
      LLVMTypeRef vec_type = lp_build_vec_type(type);
      unsigned mantissa = lp_mantissa(type);
      LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
      LLVMValueRef sign;
      LLVMValueRef offset;

      /* sign = a < 0 ? ~0 : 0 */
      sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
      sign = LLVMBuildAnd(bld->builder, sign, mask, "");
      sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), "");
      lp_build_name(sign, "floor.sign");

      /* offset = -0.99999(9)f */
      offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa));
      offset = LLVMConstBitCast(offset, int_vec_type);

      /* offset = a < 0 ? -0.99999(9)f : 0.0f */
      offset = LLVMBuildAnd(bld->builder, offset, sign, "");
      offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "");
      lp_build_name(offset, "floor.offset");

      res = LLVMBuildAdd(bld->builder, a, offset, "");
      lp_build_name(res, "floor.res");
   }

   res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
   lp_build_name(res, "floor");

   return res;
}
Exemplo n.º 5
0
/**
 * Inverse of lp_build_clamped_float_to_unsigned_norm above.
 * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1]
 * return {float, float, float, float} with values in range [0, 1].
 */
LLVMValueRef
lp_build_unsigned_norm_to_float(struct gallivm_state *gallivm,
                                unsigned src_width,
                                struct lp_type dst_type,
                                LLVMValueRef src)
{
    LLVMBuilderRef builder = gallivm->builder;
    LLVMTypeRef vec_type = lp_build_vec_type(gallivm, dst_type);
    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, dst_type);
    LLVMValueRef bias_;
    LLVMValueRef res;
    unsigned mantissa;
    unsigned n;
    unsigned long long ubound;
    unsigned long long mask;
    double scale;
    double bias;

    assert(dst_type.floating);

    mantissa = lp_mantissa(dst_type);

    if (src_width <= (mantissa + 1)) {
        /*
         * The source width matches fits what can be represented in floating
         * point (i.e., mantissa + 1 bits). So do a straight multiplication
         * followed by casting. No further rounding is necessary.
         */

        scale = 1.0/(double)((1ULL << src_width) - 1);
        res = LLVMBuildSIToFP(builder, src, vec_type, "");
        res = LLVMBuildFMul(builder, res,
                            lp_build_const_vec(gallivm, dst_type, scale), "");
        return res;
    }
    else {
        /*
         * The source width exceeds what can be represented in floating
         * point. So truncate the incoming values.
         */

        n = MIN2(mantissa, src_width);

        ubound = ((unsigned long long)1 << n);
        mask = ubound - 1;
        scale = (double)ubound/mask;
        bias = (double)((unsigned long long)1 << (mantissa - n));

        res = src;

        if (src_width > mantissa) {
            int shift = src_width - mantissa;
            res = LLVMBuildLShr(builder, res,
                                lp_build_const_int_vec(gallivm, dst_type, shift), "");
        }

        bias_ = lp_build_const_vec(gallivm, dst_type, bias);

        res = LLVMBuildOr(builder,
                          res,
                          LLVMBuildBitCast(builder, bias_, int_vec_type, ""), "");

        res = LLVMBuildBitCast(builder, res, vec_type, "");

        res = LLVMBuildFSub(builder, res, bias_, "");
        res = LLVMBuildFMul(builder, res, lp_build_const_vec(gallivm, dst_type, scale), "");
    }

    return res;
}
Exemplo n.º 6
0
/**
 * Special case for converting clamped IEEE-754 floats to unsigned norms.
 *
 * The mathematical voodoo below may seem excessive but it is actually
 * paramount we do it this way for several reasons. First, there is no single
 * precision FP to unsigned integer conversion Intel SSE instruction. Second,
 * secondly, even if there was, since the FP's mantissa takes only a fraction
 * of register bits the typically scale and cast approach would require double
 * precision for accurate results, and therefore half the throughput
 *
 * Although the result values can be scaled to an arbitrary bit width specified
 * by dst_width, the actual result type will have the same width.
 *
 * Ex: src = { float, float, float, float }
 * return { i32, i32, i32, i32 } where each value is in [0, 2^dst_width-1].
 */
LLVMValueRef
lp_build_clamped_float_to_unsigned_norm(struct gallivm_state *gallivm,
                                        struct lp_type src_type,
                                        unsigned dst_width,
                                        LLVMValueRef src)
{
    LLVMBuilderRef builder = gallivm->builder;
    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, src_type);
    LLVMValueRef res;
    unsigned mantissa;

    assert(src_type.floating);
    assert(dst_width <= src_type.width);
    src_type.sign = FALSE;

    mantissa = lp_mantissa(src_type);

    if (dst_width <= mantissa) {
        /*
         * Apply magic coefficients that will make the desired result to appear
         * in the lowest significant bits of the mantissa, with correct rounding.
         *
         * This only works if the destination width fits in the mantissa.
         */

        unsigned long long ubound;
        unsigned long long mask;
        double scale;
        double bias;

        ubound = (1ULL << dst_width);
        mask = ubound - 1;
        scale = (double)mask/ubound;
        bias = (double)(1ULL << (mantissa - dst_width));

        res = LLVMBuildFMul(builder, src, lp_build_const_vec(gallivm, src_type, scale), "");
        res = LLVMBuildFAdd(builder, res, lp_build_const_vec(gallivm, src_type, bias), "");
        res = LLVMBuildBitCast(builder, res, int_vec_type, "");
        res = LLVMBuildAnd(builder, res,
                           lp_build_const_int_vec(gallivm, src_type, mask), "");
    }
    else if (dst_width == (mantissa + 1)) {
        /*
         * The destination width matches exactly what can be represented in
         * floating point (i.e., mantissa + 1 bits). So do a straight
         * multiplication followed by casting. No further rounding is necessary.
         */

        double scale;

        scale = (double)((1ULL << dst_width) - 1);

        res = LLVMBuildFMul(builder, src,
                            lp_build_const_vec(gallivm, src_type, scale), "");
        res = LLVMBuildFPToSI(builder, res, int_vec_type, "");
    }
    else {
        /*
         * The destination exceeds what can be represented in the floating point.
         * So multiply by the largest power two we get away with, and when
         * subtract the most significant bit to rescale to normalized values.
         *
         * The largest power of two factor we can get away is
         * (1 << (src_type.width - 1)), because we need to use signed . In theory it
         * should be (1 << (src_type.width - 2)), but IEEE 754 rules states
         * INT_MIN should be returned in FPToSI, which is the correct result for
         * values near 1.0!
         *
         * This means we get (src_type.width - 1) correct bits for values near 0.0,
         * and (mantissa + 1) correct bits for values near 1.0. Equally or more
         * important, we also get exact results for 0.0 and 1.0.
         */

        unsigned n = MIN2(src_type.width - 1, dst_width);

        double scale = (double)(1ULL << n);
        unsigned lshift = dst_width - n;
        unsigned rshift = n;
        LLVMValueRef lshifted;
        LLVMValueRef rshifted;

        res = LLVMBuildFMul(builder, src,
                            lp_build_const_vec(gallivm, src_type, scale), "");
        res = LLVMBuildFPToSI(builder, res, int_vec_type, "");

        /*
         * Align the most significant bit to its final place.
         *
         * This will cause 1.0 to overflow to 0, but the later adjustment will
         * get it right.
         */
        if (lshift) {
            lshifted = LLVMBuildShl(builder, res,
                                    lp_build_const_int_vec(gallivm, src_type,
                                            lshift), "");
        } else {
            lshifted = res;
        }

        /*
         * Align the most significant bit to the right.
         */
        rshifted =  LLVMBuildLShr(builder, res,
                                  lp_build_const_int_vec(gallivm, src_type, rshift),
                                  "");

        /*
         * Subtract the MSB to the LSB, therefore re-scaling from
         * (1 << dst_width) to ((1 << dst_width) - 1).
         */

        res = LLVMBuildSub(builder, lshifted, rshifted, "");
    }

    return res;
}