/** * Generate blend code in SOA mode. * \param rt render target index (to index the blend / colormask state) * \param src src/fragment color * \param dst dst/framebuffer color * \param con constant blend color * \param res the result/output */ void lp_build_blend_soa(struct gallivm_state *gallivm, const struct pipe_blend_state *blend, struct lp_type type, unsigned rt, LLVMValueRef src[4], LLVMValueRef dst[4], LLVMValueRef con[4], LLVMValueRef res[4]) { LLVMBuilderRef builder = gallivm->builder; struct lp_build_blend_soa_context bld; unsigned i, j, k; assert(rt < PIPE_MAX_COLOR_BUFS); /* Setup build context */ memset(&bld, 0, sizeof bld); lp_build_context_init(&bld.base, gallivm, type); for (i = 0; i < 4; ++i) { bld.src[i] = src[i]; bld.dst[i] = dst[i]; bld.con[i] = con[i]; } for (i = 0; i < 4; ++i) { /* only compute blending for the color channels enabled for writing */ if (blend->rt[rt].colormask & (1 << i)) { if (blend->logicop_enable) { if(!type.floating) { res[i] = lp_build_logicop(builder, blend->logicop_func, src[i], dst[i]); } else res[i] = dst[i]; } else if (blend->rt[rt].blend_enable) { unsigned src_factor = i < 3 ? blend->rt[rt].rgb_src_factor : blend->rt[rt].alpha_src_factor; unsigned dst_factor = i < 3 ? blend->rt[rt].rgb_dst_factor : blend->rt[rt].alpha_dst_factor; unsigned func = i < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func; boolean func_commutative = lp_build_blend_func_commutative(func); /* * Compute src/dst factors. */ bld.factor[0][0][i] = src[i]; bld.factor[0][1][i] = lp_build_blend_soa_factor(&bld, src_factor, i); bld.factor[1][0][i] = dst[i]; bld.factor[1][1][i] = lp_build_blend_soa_factor(&bld, dst_factor, i); /* * Check if lp_build_blend can perform any optimisations */ res[i] = lp_build_blend(&bld.base, func, src_factor, dst_factor, bld.factor[0][0][i], bld.factor[1][0][i], bld.factor[0][1][i], bld.factor[1][1][i], true, true); if (res[i]) { continue; } /* * Compute src/dst terms */ for(k = 0; k < 2; ++k) { /* See if this multiplication has been previously computed */ for(j = 0; j < i; ++j) { if((bld.factor[k][0][j] == bld.factor[k][0][i] && bld.factor[k][1][j] == bld.factor[k][1][i]) || (bld.factor[k][0][j] == bld.factor[k][1][i] && bld.factor[k][1][j] == bld.factor[k][0][i])) break; } if(j < i && bld.term[k][j]) bld.term[k][i] = bld.term[k][j]; else bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i], bld.factor[k][1][i]); if (src_factor == PIPE_BLENDFACTOR_ZERO && (dst_factor == PIPE_BLENDFACTOR_DST_ALPHA || dst_factor == PIPE_BLENDFACTOR_INV_DST_ALPHA)) { /* XXX special case these combos to work around an apparent * bug in LLVM. * This hack disables the check for multiplication by zero * in lp_bld_mul(). When we optimize away the * multiplication, something goes wrong during code * generation and we segfault at runtime. */ LLVMValueRef zeroSave = bld.base.zero; bld.base.zero = NULL; bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i], bld.factor[k][1][i]); bld.base.zero = zeroSave; } } /* * Combine terms */ /* See if this function has been previously applied */ for(j = 0; j < i; ++j) { unsigned prev_func = j < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func; unsigned func_reverse = lp_build_blend_func_reverse(func, prev_func); if((!func_reverse && bld.term[0][j] == bld.term[0][i] && bld.term[1][j] == bld.term[1][i]) || ((func_commutative || func_reverse) && bld.term[0][j] == bld.term[1][i] && bld.term[1][j] == bld.term[0][i])) break; } if(j < i) res[i] = res[j]; else res[i] = lp_build_blend_func(&bld.base, func, bld.term[0][i], bld.term[1][i]); } else { res[i] = src[i]; } } else { res[i] = dst[i]; } } }
/** * Generate blend code in SOA mode. * \param src src/fragment color * \param dst dst/framebuffer color * \param con constant blend color * \param res the result/output */ void lp_build_blend_soa(LLVMBuilderRef builder, const struct pipe_blend_state *blend, struct lp_type type, LLVMValueRef src[4], LLVMValueRef dst[4], LLVMValueRef con[4], LLVMValueRef res[4]) { struct lp_build_blend_soa_context bld; unsigned i, j, k; /* Setup build context */ memset(&bld, 0, sizeof bld); lp_build_context_init(&bld.base, builder, type); for (i = 0; i < 4; ++i) { bld.src[i] = src[i]; bld.dst[i] = dst[i]; bld.con[i] = con[i]; } for (i = 0; i < 4; ++i) { if (blend->colormask & (1 << i)) { if (blend->logicop_enable) { if(!type.floating) { res[i] = lp_build_logicop(builder, blend->logicop_func, src[i], dst[i]); } else res[i] = dst[i]; } else if (blend->blend_enable) { unsigned src_factor = i < 3 ? blend->rgb_src_factor : blend->alpha_src_factor; unsigned dst_factor = i < 3 ? blend->rgb_dst_factor : blend->alpha_dst_factor; unsigned func = i < 3 ? blend->rgb_func : blend->alpha_func; boolean func_commutative = lp_build_blend_func_commutative(func); /* It makes no sense to blend unless values are normalized */ assert(type.norm); /* * Compute src/dst factors. */ bld.factor[0][0][i] = src[i]; bld.factor[0][1][i] = lp_build_blend_soa_factor(&bld, src_factor, i); bld.factor[1][0][i] = dst[i]; bld.factor[1][1][i] = lp_build_blend_soa_factor(&bld, dst_factor, i); /* * Compute src/dst terms */ for(k = 0; k < 2; ++k) { /* See if this multiplication has been previously computed */ for(j = 0; j < i; ++j) { if((bld.factor[k][0][j] == bld.factor[k][0][i] && bld.factor[k][1][j] == bld.factor[k][1][i]) || (bld.factor[k][0][j] == bld.factor[k][1][i] && bld.factor[k][1][j] == bld.factor[k][0][i])) break; } if(j < i) bld.term[k][i] = bld.term[k][j]; else bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i], bld.factor[k][1][i]); } /* * Combine terms */ /* See if this function has been previously applied */ for(j = 0; j < i; ++j) { unsigned prev_func = j < 3 ? blend->rgb_func : blend->alpha_func; unsigned func_reverse = lp_build_blend_func_reverse(func, prev_func); if((!func_reverse && bld.term[0][j] == bld.term[0][i] && bld.term[1][j] == bld.term[1][i]) || ((func_commutative || func_reverse) && bld.term[0][j] == bld.term[1][i] && bld.term[1][j] == bld.term[0][i])) break; } if(j < i) res[i] = res[j]; else res[i] = lp_build_blend_func(&bld.base, func, bld.term[0][i], bld.term[1][i]); } else { res[i] = src[i]; } } else { res[i] = dst[i]; } } }
/** * Performs optimisations and blending independent of SoA/AoS * * @param func the blend function * @param factor_src PIPE_BLENDFACTOR_xxx * @param factor_dst PIPE_BLENDFACTOR_xxx * @param src source rgba * @param dst dest rgba * @param src_factor src factor computed value * @param dst_factor dst factor computed value * @param not_alpha_dependent same factors accross all channels of src/dst * * not_alpha_dependent should be: * SoA: always true as it is only one channel at a time * AoS: rgb_src_factor == alpha_src_factor && rgb_dst_factor == alpha_dst_factor * * Note that pretty much every possible optimisation can only be done on non-unorm targets * due to unorm values not going above 1.0 meaning factorisation can change results. * e.g. (0.9 * 0.9) + (0.9 * 0.9) != 0.9 * (0.9 + 0.9) as result of + is always <= 1. */ LLVMValueRef lp_build_blend(struct lp_build_context *bld, unsigned func, unsigned factor_src, unsigned factor_dst, LLVMValueRef src, LLVMValueRef dst, LLVMValueRef src_factor, LLVMValueRef dst_factor, boolean not_alpha_dependent, boolean optimise_only) { LLVMValueRef result, src_term, dst_term; /* If we are not alpha dependent we can mess with the src/dst factors */ if (not_alpha_dependent) { if (lp_build_blend_factor_complementary(factor_src, factor_dst)) { if (func == PIPE_BLEND_ADD) { if (factor_src < factor_dst) { return lp_build_lerp(bld, src_factor, dst, src, 0); } else { return lp_build_lerp(bld, dst_factor, src, dst, 0); } } else if(bld->type.floating && func == PIPE_BLEND_SUBTRACT) { result = lp_build_add(bld, src, dst); if (factor_src < factor_dst) { result = lp_build_mul(bld, result, src_factor); return lp_build_sub(bld, result, dst); } else { result = lp_build_mul(bld, result, dst_factor); return lp_build_sub(bld, src, result); } } else if(bld->type.floating && func == PIPE_BLEND_REVERSE_SUBTRACT) { result = lp_build_add(bld, src, dst); if (factor_src < factor_dst) { result = lp_build_mul(bld, result, src_factor); return lp_build_sub(bld, dst, result); } else { result = lp_build_mul(bld, result, dst_factor); return lp_build_sub(bld, result, src); } } } if (bld->type.floating && factor_src == factor_dst) { if (func == PIPE_BLEND_ADD || func == PIPE_BLEND_SUBTRACT || func == PIPE_BLEND_REVERSE_SUBTRACT) { LLVMValueRef result; result = lp_build_blend_func(bld, func, src, dst); return lp_build_mul(bld, result, src_factor); } } } if (optimise_only) return NULL; src_term = lp_build_mul(bld, src, src_factor); dst_term = lp_build_mul(bld, dst, dst_factor); return lp_build_blend_func(bld, func, src_term, dst_term); }