/**
 * Generate blend code in SOA mode.
 * \param rt  render target index (to index the blend / colormask state)
 * \param src  src/fragment color
 * \param dst  dst/framebuffer color
 * \param con  constant blend color
 * \param res  the result/output
 */
void
lp_build_blend_soa(struct gallivm_state *gallivm,
                   const struct pipe_blend_state *blend,
                   struct lp_type type,
                   unsigned rt,
                   LLVMValueRef src[4],
                   LLVMValueRef dst[4],
                   LLVMValueRef con[4],
                   LLVMValueRef res[4])
{
   LLVMBuilderRef builder = gallivm->builder;
   struct lp_build_blend_soa_context bld;
   unsigned i, j, k;

   assert(rt < PIPE_MAX_COLOR_BUFS);

   /* Setup build context */
   memset(&bld, 0, sizeof bld);
   lp_build_context_init(&bld.base, gallivm, type);
   for (i = 0; i < 4; ++i) {
      bld.src[i] = src[i];
      bld.dst[i] = dst[i];
      bld.con[i] = con[i];
   }

   for (i = 0; i < 4; ++i) {
      /* only compute blending for the color channels enabled for writing */
      if (blend->rt[rt].colormask & (1 << i)) {
         if (blend->logicop_enable) {
            if(!type.floating) {
               res[i] = lp_build_logicop(builder, blend->logicop_func, src[i], dst[i]);
            }
            else
               res[i] = dst[i];
         }
         else if (blend->rt[rt].blend_enable) {
            unsigned src_factor = i < 3 ? blend->rt[rt].rgb_src_factor : blend->rt[rt].alpha_src_factor;
            unsigned dst_factor = i < 3 ? blend->rt[rt].rgb_dst_factor : blend->rt[rt].alpha_dst_factor;
            unsigned func = i < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func;
            boolean func_commutative = lp_build_blend_func_commutative(func);

            /*
             * Compute src/dst factors.
             */

            bld.factor[0][0][i] = src[i];
            bld.factor[0][1][i] = lp_build_blend_soa_factor(&bld, src_factor, i);
            bld.factor[1][0][i] = dst[i];
            bld.factor[1][1][i] = lp_build_blend_soa_factor(&bld, dst_factor, i);

            /*
             * Check if lp_build_blend can perform any optimisations
             */
            res[i] = lp_build_blend(&bld.base,
                                    func,
                                    src_factor,
                                    dst_factor,
                                    bld.factor[0][0][i],
                                    bld.factor[1][0][i],
                                    bld.factor[0][1][i],
                                    bld.factor[1][1][i],
                                    true,
                                    true);

            if (res[i]) {
               continue;
            }

            /*
             * Compute src/dst terms
             */

            for(k = 0; k < 2; ++k) {
               /* See if this multiplication has been previously computed */
               for(j = 0; j < i; ++j) {
                  if((bld.factor[k][0][j] == bld.factor[k][0][i] &&
                      bld.factor[k][1][j] == bld.factor[k][1][i]) ||
                     (bld.factor[k][0][j] == bld.factor[k][1][i] &&
                      bld.factor[k][1][j] == bld.factor[k][0][i]))
                     break;
               }

               if(j < i && bld.term[k][j])
                  bld.term[k][i] = bld.term[k][j];
               else
                  bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i], bld.factor[k][1][i]);

               if (src_factor == PIPE_BLENDFACTOR_ZERO &&
                   (dst_factor == PIPE_BLENDFACTOR_DST_ALPHA ||
                    dst_factor == PIPE_BLENDFACTOR_INV_DST_ALPHA)) {
                  /* XXX special case these combos to work around an apparent
                   * bug in LLVM.
                   * This hack disables the check for multiplication by zero
                   * in lp_bld_mul().  When we optimize away the
                   * multiplication, something goes wrong during code
                   * generation and we segfault at runtime.
                   */
                  LLVMValueRef zeroSave = bld.base.zero;
                  bld.base.zero = NULL;
                  bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i],
                                                bld.factor[k][1][i]);
                  bld.base.zero = zeroSave;
               }
            }

            /*
             * Combine terms
             */

            /* See if this function has been previously applied */
            for(j = 0; j < i; ++j) {
               unsigned prev_func = j < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func;
               unsigned func_reverse = lp_build_blend_func_reverse(func, prev_func);

               if((!func_reverse &&
                   bld.term[0][j] == bld.term[0][i] &&
                   bld.term[1][j] == bld.term[1][i]) ||
                  ((func_commutative || func_reverse) &&
                   bld.term[0][j] == bld.term[1][i] &&
                   bld.term[1][j] == bld.term[0][i]))
                  break;
            }

            if(j < i)
               res[i] = res[j];
            else
               res[i] = lp_build_blend_func(&bld.base, func, bld.term[0][i], bld.term[1][i]);
         }
         else {
            res[i] = src[i];
         }
      }
      else {
         res[i] = dst[i];
      }
   }
}
/**
 * Generate blend code in SOA mode.
 * \param src  src/fragment color
 * \param dst  dst/framebuffer color
 * \param con  constant blend color
 * \param res  the result/output
 */
void
lp_build_blend_soa(LLVMBuilderRef builder,
                   const struct pipe_blend_state *blend,
                   struct lp_type type,
                   LLVMValueRef src[4],
                   LLVMValueRef dst[4],
                   LLVMValueRef con[4],
                   LLVMValueRef res[4])
{
   struct lp_build_blend_soa_context bld;
   unsigned i, j, k;

   /* Setup build context */
   memset(&bld, 0, sizeof bld);
   lp_build_context_init(&bld.base, builder, type);
   for (i = 0; i < 4; ++i) {
      bld.src[i] = src[i];
      bld.dst[i] = dst[i];
      bld.con[i] = con[i];
   }

   for (i = 0; i < 4; ++i) {
      if (blend->colormask & (1 << i)) {
         if (blend->logicop_enable) {
            if(!type.floating) {
               res[i] = lp_build_logicop(builder, blend->logicop_func, src[i], dst[i]);
            }
            else
               res[i] = dst[i];
         }
         else if (blend->blend_enable) {
            unsigned src_factor = i < 3 ? blend->rgb_src_factor : blend->alpha_src_factor;
            unsigned dst_factor = i < 3 ? blend->rgb_dst_factor : blend->alpha_dst_factor;
            unsigned func = i < 3 ? blend->rgb_func : blend->alpha_func;
            boolean func_commutative = lp_build_blend_func_commutative(func);

            /* It makes no sense to blend unless values are normalized */
            assert(type.norm);

            /*
             * Compute src/dst factors.
             */

            bld.factor[0][0][i] = src[i];
            bld.factor[0][1][i] = lp_build_blend_soa_factor(&bld, src_factor, i);
            bld.factor[1][0][i] = dst[i];
            bld.factor[1][1][i] = lp_build_blend_soa_factor(&bld, dst_factor, i);

            /*
             * Compute src/dst terms
             */

            for(k = 0; k < 2; ++k) {
               /* See if this multiplication has been previously computed */
               for(j = 0; j < i; ++j) {
                  if((bld.factor[k][0][j] == bld.factor[k][0][i] &&
                      bld.factor[k][1][j] == bld.factor[k][1][i]) ||
                     (bld.factor[k][0][j] == bld.factor[k][1][i] &&
                      bld.factor[k][1][j] == bld.factor[k][0][i]))
                     break;
               }

               if(j < i)
                  bld.term[k][i] = bld.term[k][j];
               else
                  bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i], bld.factor[k][1][i]);
            }

            /*
             * Combine terms
             */

            /* See if this function has been previously applied */
            for(j = 0; j < i; ++j) {
               unsigned prev_func = j < 3 ? blend->rgb_func : blend->alpha_func;
               unsigned func_reverse = lp_build_blend_func_reverse(func, prev_func);

               if((!func_reverse &&
                   bld.term[0][j] == bld.term[0][i] &&
                   bld.term[1][j] == bld.term[1][i]) ||
                  ((func_commutative || func_reverse) &&
                   bld.term[0][j] == bld.term[1][i] &&
                   bld.term[1][j] == bld.term[0][i]))
                  break;
            }

            if(j < i)
               res[i] = res[j];
            else
               res[i] = lp_build_blend_func(&bld.base, func, bld.term[0][i], bld.term[1][i]);
         }
         else {
            res[i] = src[i];
         }
      }
      else {
         res[i] = dst[i];
      }
   }
}
Exemple #3
0
/**
 * Performs optimisations and blending independent of SoA/AoS
 *
 * @param func                   the blend function
 * @param factor_src             PIPE_BLENDFACTOR_xxx
 * @param factor_dst             PIPE_BLENDFACTOR_xxx
 * @param src                    source rgba
 * @param dst                    dest rgba
 * @param src_factor             src factor computed value
 * @param dst_factor             dst factor computed value
 * @param not_alpha_dependent    same factors accross all channels of src/dst
 *
 * not_alpha_dependent should be:
 *  SoA: always true as it is only one channel at a time
 *  AoS: rgb_src_factor == alpha_src_factor && rgb_dst_factor == alpha_dst_factor
 *
 * Note that pretty much every possible optimisation can only be done on non-unorm targets
 * due to unorm values not going above 1.0 meaning factorisation can change results.
 * e.g. (0.9 * 0.9) + (0.9 * 0.9) != 0.9 * (0.9 + 0.9) as result of + is always <= 1.
 */
LLVMValueRef
lp_build_blend(struct lp_build_context *bld,
               unsigned func,
               unsigned factor_src,
               unsigned factor_dst,
               LLVMValueRef src,
               LLVMValueRef dst,
               LLVMValueRef src_factor,
               LLVMValueRef dst_factor,
               boolean not_alpha_dependent,
               boolean optimise_only)
{
   LLVMValueRef result, src_term, dst_term;

   /* If we are not alpha dependent we can mess with the src/dst factors */
   if (not_alpha_dependent) {
      if (lp_build_blend_factor_complementary(factor_src, factor_dst)) {
         if (func == PIPE_BLEND_ADD) {
            if (factor_src < factor_dst) {
               return lp_build_lerp(bld, src_factor, dst, src, 0);
            } else {
               return lp_build_lerp(bld, dst_factor, src, dst, 0);
            }
         } else if(bld->type.floating && func == PIPE_BLEND_SUBTRACT) {
            result = lp_build_add(bld, src, dst);

            if (factor_src < factor_dst) {
               result = lp_build_mul(bld, result, src_factor);
               return lp_build_sub(bld, result, dst);
            } else {
               result = lp_build_mul(bld, result, dst_factor);
               return lp_build_sub(bld, src, result);
            }
         } else if(bld->type.floating && func == PIPE_BLEND_REVERSE_SUBTRACT) {
            result = lp_build_add(bld, src, dst);

            if (factor_src < factor_dst) {
               result = lp_build_mul(bld, result, src_factor);
               return lp_build_sub(bld, dst, result);
            } else {
               result = lp_build_mul(bld, result, dst_factor);
               return lp_build_sub(bld, result, src);
            }
         }
      }

      if (bld->type.floating && factor_src == factor_dst) {
         if (func == PIPE_BLEND_ADD ||
             func == PIPE_BLEND_SUBTRACT ||
             func == PIPE_BLEND_REVERSE_SUBTRACT) {
            LLVMValueRef result;
            result = lp_build_blend_func(bld, func, src, dst);
            return lp_build_mul(bld, result, src_factor);
         }
      }
   }

   if (optimise_only)
      return NULL;

   src_term = lp_build_mul(bld, src, src_factor);
   dst_term = lp_build_mul(bld, dst, dst_factor);
   return lp_build_blend_func(bld, func, src_term, dst_term);
}