/**
 * Generate blend code in SOA mode.
 * \param src  src/fragment color
 * \param dst  dst/framebuffer color
 * \param con  constant blend color
 * \param res  the result/output
 */
void
lp_build_blend_soa(LLVMBuilderRef builder,
                   const struct pipe_blend_state *blend,
                   struct lp_type type,
                   LLVMValueRef src[4],
                   LLVMValueRef dst[4],
                   LLVMValueRef con[4],
                   LLVMValueRef res[4])
{
   struct lp_build_blend_soa_context bld;
   unsigned i, j, k;

   /* Setup build context */
   memset(&bld, 0, sizeof bld);
   lp_build_context_init(&bld.base, builder, type);
   for (i = 0; i < 4; ++i) {
      bld.src[i] = src[i];
      bld.dst[i] = dst[i];
      bld.con[i] = con[i];
   }

   for (i = 0; i < 4; ++i) {
      if (blend->colormask & (1 << i)) {
         if (blend->logicop_enable) {
            if(!type.floating) {
               res[i] = lp_build_logicop(builder, blend->logicop_func, src[i], dst[i]);
            }
            else
               res[i] = dst[i];
         }
         else if (blend->blend_enable) {
            unsigned src_factor = i < 3 ? blend->rgb_src_factor : blend->alpha_src_factor;
            unsigned dst_factor = i < 3 ? blend->rgb_dst_factor : blend->alpha_dst_factor;
            unsigned func = i < 3 ? blend->rgb_func : blend->alpha_func;
            boolean func_commutative = lp_build_blend_func_commutative(func);

            /* It makes no sense to blend unless values are normalized */
            assert(type.norm);

            /*
             * Compute src/dst factors.
             */

            bld.factor[0][0][i] = src[i];
            bld.factor[0][1][i] = lp_build_blend_soa_factor(&bld, src_factor, i);
            bld.factor[1][0][i] = dst[i];
            bld.factor[1][1][i] = lp_build_blend_soa_factor(&bld, dst_factor, i);

            /*
             * Compute src/dst terms
             */

            for(k = 0; k < 2; ++k) {
               /* See if this multiplication has been previously computed */
               for(j = 0; j < i; ++j) {
                  if((bld.factor[k][0][j] == bld.factor[k][0][i] &&
                      bld.factor[k][1][j] == bld.factor[k][1][i]) ||
                     (bld.factor[k][0][j] == bld.factor[k][1][i] &&
                      bld.factor[k][1][j] == bld.factor[k][0][i]))
                     break;
               }

               if(j < i)
                  bld.term[k][i] = bld.term[k][j];
               else
                  bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i], bld.factor[k][1][i]);
            }

            /*
             * Combine terms
             */

            /* See if this function has been previously applied */
            for(j = 0; j < i; ++j) {
               unsigned prev_func = j < 3 ? blend->rgb_func : blend->alpha_func;
               unsigned func_reverse = lp_build_blend_func_reverse(func, prev_func);

               if((!func_reverse &&
                   bld.term[0][j] == bld.term[0][i] &&
                   bld.term[1][j] == bld.term[1][i]) ||
                  ((func_commutative || func_reverse) &&
                   bld.term[0][j] == bld.term[1][i] &&
                   bld.term[1][j] == bld.term[0][i]))
                  break;
            }

            if(j < i)
               res[i] = res[j];
            else
               res[i] = lp_build_blend_func(&bld.base, func, bld.term[0][i], bld.term[1][i]);
         }
         else {
            res[i] = src[i];
         }
      }
      else {
         res[i] = dst[i];
      }
   }
}
/**
 * Generate blend code in SOA mode.
 * \param rt  render target index (to index the blend / colormask state)
 * \param src  src/fragment color
 * \param dst  dst/framebuffer color
 * \param con  constant blend color
 * \param res  the result/output
 */
void
lp_build_blend_soa(struct gallivm_state *gallivm,
                   const struct pipe_blend_state *blend,
                   struct lp_type type,
                   unsigned rt,
                   LLVMValueRef src[4],
                   LLVMValueRef dst[4],
                   LLVMValueRef con[4],
                   LLVMValueRef res[4])
{
   LLVMBuilderRef builder = gallivm->builder;
   struct lp_build_blend_soa_context bld;
   unsigned i, j, k;

   assert(rt < PIPE_MAX_COLOR_BUFS);

   /* Setup build context */
   memset(&bld, 0, sizeof bld);
   lp_build_context_init(&bld.base, gallivm, type);
   for (i = 0; i < 4; ++i) {
      bld.src[i] = src[i];
      bld.dst[i] = dst[i];
      bld.con[i] = con[i];
   }

   for (i = 0; i < 4; ++i) {
      /* only compute blending for the color channels enabled for writing */
      if (blend->rt[rt].colormask & (1 << i)) {
         if (blend->logicop_enable) {
            if(!type.floating) {
               res[i] = lp_build_logicop(builder, blend->logicop_func, src[i], dst[i]);
            }
            else
               res[i] = dst[i];
         }
         else if (blend->rt[rt].blend_enable) {
            unsigned src_factor = i < 3 ? blend->rt[rt].rgb_src_factor : blend->rt[rt].alpha_src_factor;
            unsigned dst_factor = i < 3 ? blend->rt[rt].rgb_dst_factor : blend->rt[rt].alpha_dst_factor;
            unsigned func = i < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func;
            boolean func_commutative = lp_build_blend_func_commutative(func);

            /*
             * Compute src/dst factors.
             */

            bld.factor[0][0][i] = src[i];
            bld.factor[0][1][i] = lp_build_blend_soa_factor(&bld, src_factor, i);
            bld.factor[1][0][i] = dst[i];
            bld.factor[1][1][i] = lp_build_blend_soa_factor(&bld, dst_factor, i);

            /*
             * Check if lp_build_blend can perform any optimisations
             */
            res[i] = lp_build_blend(&bld.base,
                                    func,
                                    src_factor,
                                    dst_factor,
                                    bld.factor[0][0][i],
                                    bld.factor[1][0][i],
                                    bld.factor[0][1][i],
                                    bld.factor[1][1][i],
                                    true,
                                    true);

            if (res[i]) {
               continue;
            }

            /*
             * Compute src/dst terms
             */

            for(k = 0; k < 2; ++k) {
               /* See if this multiplication has been previously computed */
               for(j = 0; j < i; ++j) {
                  if((bld.factor[k][0][j] == bld.factor[k][0][i] &&
                      bld.factor[k][1][j] == bld.factor[k][1][i]) ||
                     (bld.factor[k][0][j] == bld.factor[k][1][i] &&
                      bld.factor[k][1][j] == bld.factor[k][0][i]))
                     break;
               }

               if(j < i && bld.term[k][j])
                  bld.term[k][i] = bld.term[k][j];
               else
                  bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i], bld.factor[k][1][i]);

               if (src_factor == PIPE_BLENDFACTOR_ZERO &&
                   (dst_factor == PIPE_BLENDFACTOR_DST_ALPHA ||
                    dst_factor == PIPE_BLENDFACTOR_INV_DST_ALPHA)) {
                  /* XXX special case these combos to work around an apparent
                   * bug in LLVM.
                   * This hack disables the check for multiplication by zero
                   * in lp_bld_mul().  When we optimize away the
                   * multiplication, something goes wrong during code
                   * generation and we segfault at runtime.
                   */
                  LLVMValueRef zeroSave = bld.base.zero;
                  bld.base.zero = NULL;
                  bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i],
                                                bld.factor[k][1][i]);
                  bld.base.zero = zeroSave;
               }
            }

            /*
             * Combine terms
             */

            /* See if this function has been previously applied */
            for(j = 0; j < i; ++j) {
               unsigned prev_func = j < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func;
               unsigned func_reverse = lp_build_blend_func_reverse(func, prev_func);

               if((!func_reverse &&
                   bld.term[0][j] == bld.term[0][i] &&
                   bld.term[1][j] == bld.term[1][i]) ||
                  ((func_commutative || func_reverse) &&
                   bld.term[0][j] == bld.term[1][i] &&
                   bld.term[1][j] == bld.term[0][i]))
                  break;
            }

            if(j < i)
               res[i] = res[j];
            else
               res[i] = lp_build_blend_func(&bld.base, func, bld.term[0][i], bld.term[1][i]);
         }
         else {
            res[i] = src[i];
         }
      }
      else {
         res[i] = dst[i];
      }
   }
}