Example #1
0
void
lp_build_exp2_approx(struct lp_build_context *bld,
                     LLVMValueRef x,
                     LLVMValueRef *p_exp2_int_part,
                     LLVMValueRef *p_frac_part,
                     LLVMValueRef *p_exp2)
{
   const struct lp_type type = bld->type;
   LLVMTypeRef vec_type = lp_build_vec_type(type);
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
   LLVMValueRef ipart = NULL;
   LLVMValueRef fpart = NULL;
   LLVMValueRef expipart = NULL;
   LLVMValueRef expfpart = NULL;
   LLVMValueRef res = NULL;

   if(p_exp2_int_part || p_frac_part || p_exp2) {
      /* TODO: optimize the constant case */
      if(LLVMIsConstant(x))
         debug_printf("%s: inefficient/imprecise constant arithmetic\n",
                      __FUNCTION__);

      assert(type.floating && type.width == 32);

      x = lp_build_min(bld, x, lp_build_const_scalar(type,  129.0));
      x = lp_build_max(bld, x, lp_build_const_scalar(type, -126.99999));

      /* ipart = int(x - 0.5) */
      ipart = LLVMBuildSub(bld->builder, x, lp_build_const_scalar(type, 0.5f), "");
      ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, "");

      /* fpart = x - ipart */
      fpart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, "");
      fpart = LLVMBuildSub(bld->builder, x, fpart, "");
   }

   if(p_exp2_int_part || p_exp2) {
      /* expipart = (float) (1 << ipart) */
      expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_scalar(type, 127), "");
      expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_scalar(type, 23), "");
      expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, "");
   }

   if(p_exp2) {
      expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
                                     Elements(lp_build_exp2_polynomial));

      res = LLVMBuildMul(bld->builder, expipart, expfpart, "");
   }

   if(p_exp2_int_part)
      *p_exp2_int_part = expipart;

   if(p_frac_part)
      *p_frac_part = fpart;

   if(p_exp2)
      *p_exp2 = res;
}
/**
 * Convert srgb int values to linear float values.
 * Several possibilities how to do this, e.g.
 * - table
 * - doing the pow() with int-to-float and float-to-int tricks
 *   (http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent)
 * - just using standard polynomial approximation
 *   (3rd order polynomial is required for crappy but just sufficient accuracy)
 *
 * @param src   integer (vector) value(s) to convert
 *              (chan_bits bit values unpacked to 32 bit already).
 */
LLVMValueRef
lp_build_srgb_to_linear(struct gallivm_state *gallivm,
                        struct lp_type src_type,
                        unsigned chan_bits,
                        LLVMValueRef src)
{
   struct lp_type f32_type = lp_type_float_vec(32, src_type.length * 32);
   struct lp_build_context f32_bld;
   LLVMValueRef srcf, part_lin, part_pow, is_linear, lin_const, lin_thresh;
   double coeffs[4] = {0.0023f,
                       0.0030f / 255.0f,
                       0.6935f / (255.0f * 255.0f),
                       0.3012f / (255.0f * 255.0f * 255.0f)
   };

   assert(src_type.width == 32);
   /* Technically this would work with more bits too but would be inaccurate. */
   assert(chan_bits <= 8);

   lp_build_context_init(&f32_bld, gallivm, f32_type);

   /*
    * using polynomial: (src * (src * (src * 0.3012 + 0.6935) + 0.0030) + 0.0023)
    * ( poly =  0.3012*x^3 + 0.6935*x^2 + 0.0030*x + 0.0023)
    * (found with octave polyfit and some magic as I couldn't get the error
    * function right). Using the above mentioned error function, the values stay
    * within +-0.35, except for the lowest values - hence tweaking linear segment
    * to cover the first 16 instead of the first 11 values (the error stays
    * just about acceptable there too).
    * Hence: lin = src > 15 ? poly : src / 12.6
    * This function really only makes sense for vectors, should use LUT otherwise.
    * All in all (including float conversion) 11 instructions (with sse4.1),
    * 6 constants (polynomial could be done with 1 instruction less at the cost
    * of slightly worse dependency chain, fma should also help).
    */
   /* doing the 1/255 mul as part of the approximation */
   srcf = lp_build_int_to_float(&f32_bld, src);
   if (chan_bits != 8) {
      /* could adjust all the constants instead */
      LLVMValueRef rescale_const = lp_build_const_vec(gallivm, f32_type,
                                                      255.0f / ((1 << chan_bits) - 1));
      srcf = lp_build_mul(&f32_bld, srcf, rescale_const);
   }
   lin_const = lp_build_const_vec(gallivm, f32_type, 1.0f / (12.6f * 255.0f));
   part_lin = lp_build_mul(&f32_bld, srcf, lin_const);

   part_pow = lp_build_polynomial(&f32_bld, srcf, coeffs, 4);

   lin_thresh = lp_build_const_vec(gallivm, f32_type, 15.0f);
   is_linear = lp_build_compare(gallivm, f32_type, PIPE_FUNC_LEQUAL, srcf, lin_thresh);
   return lp_build_select(&f32_bld, is_linear, part_lin, part_pow);
}
Example #3
0
/**
 * See http://www.devmaster.net/forums/showthread.php?p=43580
 */
void
lp_build_log2_approx(struct lp_build_context *bld,
                     LLVMValueRef x,
                     LLVMValueRef *p_exp,
                     LLVMValueRef *p_floor_log2,
                     LLVMValueRef *p_log2)
{
   const struct lp_type type = bld->type;
   LLVMTypeRef vec_type = lp_build_vec_type(type);
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);

   LLVMValueRef expmask = lp_build_int_const_scalar(type, 0x7f800000);
   LLVMValueRef mantmask = lp_build_int_const_scalar(type, 0x007fffff);
   LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type);

   LLVMValueRef i = NULL;
   LLVMValueRef exp = NULL;
   LLVMValueRef mant = NULL;
   LLVMValueRef logexp = NULL;
   LLVMValueRef logmant = NULL;
   LLVMValueRef res = NULL;

   if(p_exp || p_floor_log2 || p_log2) {
      /* TODO: optimize the constant case */
      if(LLVMIsConstant(x))
         debug_printf("%s: inefficient/imprecise constant arithmetic\n",
                      __FUNCTION__);

      assert(type.floating && type.width == 32);

      i = LLVMBuildBitCast(bld->builder, x, int_vec_type, "");

      /* exp = (float) exponent(x) */
      exp = LLVMBuildAnd(bld->builder, i, expmask, "");
   }

   if(p_floor_log2 || p_log2) {
      logexp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_scalar(type, 23), "");
      logexp = LLVMBuildSub(bld->builder, logexp, lp_build_int_const_scalar(type, 127), "");
      logexp = LLVMBuildSIToFP(bld->builder, logexp, vec_type, "");
   }

   if(p_log2) {
      /* mant = (float) mantissa(x) */
      mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
      mant = LLVMBuildOr(bld->builder, mant, one, "");
      mant = LLVMBuildBitCast(bld->builder, mant, vec_type, "");

      logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial,
                                    Elements(lp_build_log2_polynomial));

      /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
      logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), "");

      res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
   }

   if(p_exp)
      *p_exp = exp;

   if(p_floor_log2)
      *p_floor_log2 = logexp;

   if(p_log2)
      *p_log2 = res;
}