Пример #1
0
void
lp_build_alpha_test(struct gallivm_state *gallivm,
                    unsigned func,
                    struct lp_type type,
                    const struct util_format_description *cbuf_format_desc,
                    struct lp_build_mask_context *mask,
                    LLVMValueRef alpha,
                    LLVMValueRef ref,
                    boolean do_branch)
{
    struct lp_build_context bld;
    LLVMValueRef test;

    lp_build_context_init(&bld, gallivm, type);

    /*
     * Alpha testing needs to be done in the color buffer precision.
     *
     * TODO: Ideally, instead of duplicating the color conversion code, we would do
     * alpha testing after converting the output colors, but that's not very
     * convenient, because it needs to be done before depth testing.  Hopefully
     * LLVM will detect and remove the duplicate expression.
     *
     * FIXME: This should be generalized to formats other than rgba8 variants.
     */
    if (type.floating &&
            util_format_is_rgba8_variant(cbuf_format_desc)) {
        const unsigned dst_width = 8;

        alpha = lp_build_clamp(&bld, alpha, bld.zero, bld.one);
        ref   = lp_build_clamp(&bld, ref,   bld.zero, bld.one);

        alpha = lp_build_clamped_float_to_unsigned_norm(gallivm, type, dst_width, alpha);
        ref   = lp_build_clamped_float_to_unsigned_norm(gallivm, type, dst_width, ref);

        type.floating = 0;
        lp_build_context_init(&bld, gallivm, type);
    }

    test = lp_build_cmp(&bld, func, alpha, ref);

    lp_build_name(test, "alpha_mask");

    lp_build_mask_update(mask, test);

    if (do_branch)
        lp_build_mask_check(mask);
}
Пример #2
0
/**
 * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
 * mipmap level index.
 * Note: this is all scalar code.
 * \param lod  scalar float texture level of detail
 * \param level_out  returns integer 
 */
void
lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
                           unsigned unit,
                           LLVMValueRef lod_ipart,
                           LLVMValueRef *level_out)
{
   struct lp_build_context *int_bld = &bld->int_bld;
   LLVMValueRef first_level, last_level, level;

   first_level = bld->dynamic_state->first_level(bld->dynamic_state,
                                                 bld->gallivm, unit);
   last_level = bld->dynamic_state->last_level(bld->dynamic_state,
                                               bld->gallivm, unit);

   /* convert float lod to integer */
   level = lp_build_add(int_bld, lod_ipart, first_level);

   /* clamp level to legal range of levels */
   *level_out = lp_build_clamp(int_bld, level, first_level, last_level);
}
Пример #3
0
/**
 * Convert linear float soa values to packed srgb AoS values.
 * This only handles packed formats which are 4x8bit in size
 * (rgba and rgbx plus swizzles).
 *
 * @param src   float SoA (vector) values to convert.
 */
LLVMValueRef
lp_build_float_to_srgb_packed(struct gallivm_state *gallivm,
                              const struct util_format_description *dst_fmt,
                              struct lp_type src_type,
                              LLVMValueRef *src)
{
   LLVMBuilderRef builder = gallivm->builder;
   unsigned chan;
   struct lp_build_context f32_bld;
   struct lp_type int32_type = lp_int_type(src_type);
   LLVMValueRef tmpsrgb[4], alpha, dst;

   lp_build_context_init(&f32_bld, gallivm, src_type);

   /* rgb is subject to linear->srgb conversion, alpha is not */
   for (chan = 0; chan < 3; chan++) {
      tmpsrgb[chan] = lp_build_linear_to_srgb(gallivm, src_type, src[chan]);
   }
   /*
    * can't use lp_build_conv since we want to keep values as 32bit
    * here so we can interleave with rgb to go from SoA->AoS.
    */
   alpha = lp_build_clamp(&f32_bld, src[3], f32_bld.zero, f32_bld.one);
   alpha = lp_build_mul(&f32_bld, alpha,
                        lp_build_const_vec(gallivm, src_type, 255.0f));
   tmpsrgb[3] = lp_build_iround(&f32_bld, alpha);

   dst = lp_build_zero(gallivm, int32_type);
   for (chan = 0; chan < dst_fmt->nr_channels; chan++) {
      if (dst_fmt->swizzle[chan] <= UTIL_FORMAT_SWIZZLE_W) {
         unsigned ls;
         LLVMValueRef shifted, shift_val;
         ls = dst_fmt->channel[dst_fmt->swizzle[chan]].shift;
         shift_val = lp_build_const_int_vec(gallivm, int32_type, ls);
         shifted = LLVMBuildShl(builder, tmpsrgb[chan], shift_val, "");
         dst = LLVMBuildOr(builder, dst, shifted, "");
      }
   }
   return dst;
}
Пример #4
0
static INLINE void
yuv_to_rgb_soa(struct gallivm_state *gallivm,
               unsigned n,
               LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
               LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)
{
   LLVMBuilderRef builder = gallivm->builder;
   struct lp_type type;
   struct lp_build_context bld;

   LLVMValueRef c0;
   LLVMValueRef c8;
   LLVMValueRef c16;
   LLVMValueRef c128;
   LLVMValueRef c255;

   LLVMValueRef cy;
   LLVMValueRef cug;
   LLVMValueRef cub;
   LLVMValueRef cvr;
   LLVMValueRef cvg;

   memset(&type, 0, sizeof type);
   type.sign = TRUE;
   type.width = 32;
   type.length = n;

   lp_build_context_init(&bld, gallivm, type);

   assert(lp_check_value(type, y));
   assert(lp_check_value(type, u));
   assert(lp_check_value(type, v));

   /*
    * Constants
    */

   c0   = lp_build_const_int_vec(gallivm, type,   0);
   c8   = lp_build_const_int_vec(gallivm, type,   8);
   c16  = lp_build_const_int_vec(gallivm, type,  16);
   c128 = lp_build_const_int_vec(gallivm, type, 128);
   c255 = lp_build_const_int_vec(gallivm, type, 255);

   cy  = lp_build_const_int_vec(gallivm, type,  298);
   cug = lp_build_const_int_vec(gallivm, type, -100);
   cub = lp_build_const_int_vec(gallivm, type,  516);
   cvr = lp_build_const_int_vec(gallivm, type,  409);
   cvg = lp_build_const_int_vec(gallivm, type, -208);

   /*
    *  y -= 16;
    *  u -= 128;
    *  v -= 128;
    */

   y = LLVMBuildSub(builder, y, c16, "");
   u = LLVMBuildSub(builder, u, c128, "");
   v = LLVMBuildSub(builder, v, c128, "");

   /*
    * r = 298 * _y            + 409 * _v + 128;
    * g = 298 * _y - 100 * _u - 208 * _v + 128;
    * b = 298 * _y + 516 * _u            + 128;
    */

   y = LLVMBuildMul(builder, y, cy, "");
   y = LLVMBuildAdd(builder, y, c128, "");

   *r = LLVMBuildMul(builder, v, cvr, "");
   *g = LLVMBuildAdd(builder,
                     LLVMBuildMul(builder, u, cug, ""),
                     LLVMBuildMul(builder, v, cvg, ""),
                     "");
   *b = LLVMBuildMul(builder, u, cub, "");

   *r = LLVMBuildAdd(builder, *r, y, "");
   *g = LLVMBuildAdd(builder, *g, y, "");
   *b = LLVMBuildAdd(builder, *b, y, "");

   /*
    * r >>= 8;
    * g >>= 8;
    * b >>= 8;
    */

   *r = LLVMBuildAShr(builder, *r, c8, "r");
   *g = LLVMBuildAShr(builder, *g, c8, "g");
   *b = LLVMBuildAShr(builder, *b, c8, "b");

   /*
    * Clamp
    */

   *r = lp_build_clamp(&bld, *r, c0, c255);
   *g = lp_build_clamp(&bld, *g, c0, c255);
   *b = lp_build_clamp(&bld, *b, c0, c255);
}
Пример #5
0
/**
 * Convert linear float values to srgb int values.
 * Several possibilities how to do this, e.g.
 * - use table (based on exponent/highest order mantissa bits) and do
 *   linear interpolation (https://gist.github.com/rygorous/2203834)
 * - Chebyshev polynomial
 * - Approximation using reciprocals
 * - using int-to-float and float-to-int tricks for pow()
 *   (http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent)
 *
 * @param src   float (vector) value(s) to convert.
 */
static LLVMValueRef
lp_build_linear_to_srgb(struct gallivm_state *gallivm,
                        struct lp_type src_type,
                        LLVMValueRef src)
{
   LLVMBuilderRef builder = gallivm->builder;
   struct lp_build_context f32_bld;
   LLVMValueRef lin_thresh, lin, lin_const, is_linear, tmp, pow_final;

   lp_build_context_init(&f32_bld, gallivm, src_type);

   src = lp_build_clamp(&f32_bld, src, f32_bld.zero, f32_bld.one);

   if (0) {
      /*
       * using int-to-float and float-to-int trick for pow().
       * This is much more accurate than necessary thanks to the correction,
       * but it most certainly makes no sense without rsqrt available.
       * Bonus points if you understand how this works...
       * All in all (including min/max clamp, conversion) 19 instructions.
       */

      float exp_f = 2.0f / 3.0f;
      /* some compilers can't do exp2f, so this is exp2f(127.0f/exp_f - 127.0f) */
      float exp2f_c = 1.30438178253e+19f;
      float coeff_f = 0.62996f;
      LLVMValueRef pow_approx, coeff, x2, exponent, pow_1, pow_2;
      struct lp_type int_type = lp_int_type(src_type);

      /*
       * First calculate approx x^8/12
       */
      exponent = lp_build_const_vec(gallivm, src_type, exp_f);
      coeff = lp_build_const_vec(gallivm, src_type,
                                 exp2f_c * powf(coeff_f, 1.0f / exp_f));

      /* premultiply src */
      tmp = lp_build_mul(&f32_bld, coeff, src);
      /* "log2" */
      tmp = LLVMBuildBitCast(builder, tmp, lp_build_vec_type(gallivm, int_type), "");
      tmp = lp_build_int_to_float(&f32_bld, tmp);
      /* multiply for pow */
      tmp = lp_build_mul(&f32_bld, tmp, exponent);
      /* "exp2" */
      pow_approx = lp_build_itrunc(&f32_bld, tmp);
      pow_approx = LLVMBuildBitCast(builder, pow_approx,
                                    lp_build_vec_type(gallivm, src_type), "");

      /*
       * Since that pow was inaccurate (like 3 bits, though each sqrt step would
       * give another bit), compensate the error (which is why we chose another
       * exponent in the first place).
       */
      /* x * x^(8/12) = x^(20/12) */
      pow_1 = lp_build_mul(&f32_bld, pow_approx, src);

      /* x * x * x^(-4/12) = x^(20/12) */
      /* Should avoid using rsqrt if it's not available, but
       * using x * x^(4/12) * x^(4/12) instead will change error weight */
      tmp = lp_build_fast_rsqrt(&f32_bld, pow_approx);
      x2 = lp_build_mul(&f32_bld, src, src);
      pow_2 = lp_build_mul(&f32_bld, x2, tmp);

      /* average the values so the errors cancel out, compensate bias,
       * we also squeeze the 1.055 mul of the srgb conversion plus the 255.0 mul
       * for conversion to int in here */
      tmp = lp_build_add(&f32_bld, pow_1, pow_2);
      coeff = lp_build_const_vec(gallivm, src_type,
                                 1.0f / (3.0f * coeff_f) * 0.999852f *
                                 powf(1.055f * 255.0f, 4.0f));
      pow_final = lp_build_mul(&f32_bld, tmp, coeff);

      /* x^(5/12) = rsqrt(rsqrt(x^20/12)) */
      if (lp_build_fast_rsqrt_available(src_type)) {
         pow_final = lp_build_fast_rsqrt(&f32_bld,
                        lp_build_fast_rsqrt(&f32_bld, pow_final));
      }
      else {
         pow_final = lp_build_sqrt(&f32_bld, lp_build_sqrt(&f32_bld, pow_final));
      }
      pow_final = lp_build_add(&f32_bld, pow_final,
                               lp_build_const_vec(gallivm, src_type, -0.055f * 255.0f));
   }

   else {
      /*
       * using "rational polynomial" approximation here.
       * Essentially y = a*x^0.375 + b*x^0.5 + c, with also
       * factoring in the 255.0 mul and the scaling mul.
       * (a is closer to actual value so has higher weight than b.)
       * Note: the constants are magic values. They were found empirically,
       * possibly could be improved but good enough (be VERY careful with
       * error metric if you'd want to tweak them, they also MUST fit with
       * the crappy polynomial above for srgb->linear since it is required
       * that each srgb value maps back to the same value).
       * This function has an error of max +-0.17 (and we'd only require +-0.6),
       * for the approximated srgb->linear values the error is naturally larger
       * (+-0.42) but still accurate enough (required +-0.5 essentially).
       * All in all (including min/max clamp, conversion) 15 instructions.
       * FMA would help (minus 2 instructions).
       */

      LLVMValueRef x05, x0375, a_const, b_const, c_const, tmp2;

      if (lp_build_fast_rsqrt_available(src_type)) {
         tmp = lp_build_fast_rsqrt(&f32_bld, src);
         x05 = lp_build_mul(&f32_bld, src, tmp);
      }
      else {
         /*
          * I don't really expect this to be practical without rsqrt
          * but there's no reason for triple punishment so at least
          * save the otherwise resulting division and unnecessary mul...
          */
         x05 = lp_build_sqrt(&f32_bld, src);
      }

      tmp = lp_build_mul(&f32_bld, x05, src);
      if (lp_build_fast_rsqrt_available(src_type)) {
         x0375 = lp_build_fast_rsqrt(&f32_bld, lp_build_fast_rsqrt(&f32_bld, tmp));
      }
      else {
         x0375 = lp_build_sqrt(&f32_bld, lp_build_sqrt(&f32_bld, tmp));
      }

      a_const = lp_build_const_vec(gallivm, src_type, 0.675f * 1.0622 * 255.0f);
      b_const = lp_build_const_vec(gallivm, src_type, 0.325f * 1.0622 * 255.0f);
      c_const = lp_build_const_vec(gallivm, src_type, -0.0620f * 255.0f);

      tmp = lp_build_mul(&f32_bld, a_const, x0375);
      tmp2 = lp_build_mul(&f32_bld, b_const, x05);
      tmp2 = lp_build_add(&f32_bld, tmp2, c_const);
      pow_final = lp_build_add(&f32_bld, tmp, tmp2);
   }

   /* linear part is easy */
   lin_const = lp_build_const_vec(gallivm, src_type, 12.92f * 255.0f);
   lin = lp_build_mul(&f32_bld, src, lin_const);

   lin_thresh = lp_build_const_vec(gallivm, src_type, 0.0031308f);
   is_linear = lp_build_compare(gallivm, src_type, PIPE_FUNC_LEQUAL, src, lin_thresh);
   tmp = lp_build_select(&f32_bld, is_linear, lin, pow_final);

   f32_bld.type.sign = 0;
   return lp_build_iround(&f32_bld, tmp);
}