/** * Do the one or two-sided stencil test op/update. */ static LLVMValueRef lp_build_stencil_op(struct lp_build_context *bld, const struct pipe_stencil_state stencil[2], enum stencil_op op, LLVMValueRef stencilRefs[2], LLVMValueRef stencilVals, LLVMValueRef mask, LLVMValueRef front_facing) { LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef res; assert(stencil[0].enabled); /* do front face op */ res = lp_build_stencil_op_single(bld, &stencil[0], op, stencilRefs[0], stencilVals); if (stencil[1].enabled && front_facing != NULL) { /* do back face op */ LLVMValueRef back_res; back_res = lp_build_stencil_op_single(bld, &stencil[1], op, stencilRefs[1], stencilVals); res = lp_build_select(bld, front_facing, res, back_res); } if (stencil[0].writemask != 0xff || (stencil[1].enabled && front_facing != NULL && stencil[1].writemask != 0xff)) { /* mask &= stencil[0].writemask */ LLVMValueRef writemask = lp_build_const_int_vec(bld->gallivm, bld->type, stencil[0].writemask); if (stencil[1].enabled && stencil[1].writemask != stencil[0].writemask && front_facing != NULL) { LLVMValueRef back_writemask = lp_build_const_int_vec(bld->gallivm, bld->type, stencil[1].writemask); writemask = lp_build_select(bld, front_facing, writemask, back_writemask); } mask = LLVMBuildAnd(builder, mask, writemask, ""); /* res = (res & mask) | (stencilVals & ~mask) */ res = lp_build_select_bitwise(bld, mask, res, stencilVals); } else { /* res = mask ? res : stencilVals */ res = lp_build_select(bld, mask, res, stencilVals); } return res; }
/** * Generate color blending and color output. * \param rt the render target index (to index blend, colormask state) * \param type the pixel color type * \param context_ptr pointer to the runtime JIT context * \param mask execution mask (active fragment/pixel mask) * \param src colors from the fragment shader * \param dst_ptr the destination color buffer pointer */ static void generate_blend(const struct pipe_blend_state *blend, unsigned rt, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef context_ptr, LLVMValueRef mask, LLVMValueRef *src, LLVMValueRef dst_ptr) { struct lp_build_context bld; struct lp_build_flow_context *flow; struct lp_build_mask_context mask_ctx; LLVMTypeRef vec_type; LLVMValueRef const_ptr; LLVMValueRef con[4]; LLVMValueRef dst[4]; LLVMValueRef res[4]; unsigned chan; lp_build_context_init(&bld, builder, type); flow = lp_build_flow_create(builder); /* we'll use this mask context to skip blending if all pixels are dead */ lp_build_mask_begin(&mask_ctx, flow, type, mask); vec_type = lp_build_vec_type(type); const_ptr = lp_jit_context_blend_color(builder, context_ptr); const_ptr = LLVMBuildBitCast(builder, const_ptr, LLVMPointerType(vec_type, 0), ""); /* load constant blend color and colors from the dest color buffer */ for(chan = 0; chan < 4; ++chan) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), ""); dst[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), ""); lp_build_name(con[chan], "con.%c", "rgba"[chan]); lp_build_name(dst[chan], "dst.%c", "rgba"[chan]); } /* do blend */ lp_build_blend_soa(builder, blend, type, rt, src, dst, con, res); /* store results to color buffer */ for(chan = 0; chan < 4; ++chan) { if(blend->rt[rt].colormask & (1 << chan)) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); lp_build_name(res[chan], "res.%c", "rgba"[chan]); res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, "")); } } lp_build_mask_end(&mask_ctx); lp_build_flow_destroy(flow); }
/** * Do the one or two-sided stencil test comparison. * \sa lp_build_stencil_test_single * \param front_facing an integer vector mask, indicating front (~0) or back * (0) facing polygon. If NULL, assume front-facing. */ static LLVMValueRef lp_build_stencil_test(struct lp_build_context *bld, const struct pipe_stencil_state stencil[2], LLVMValueRef stencilRefs[2], LLVMValueRef stencilVals, LLVMValueRef front_facing) { LLVMValueRef res; assert(stencil[0].enabled); /* do front face test */ res = lp_build_stencil_test_single(bld, &stencil[0], stencilRefs[0], stencilVals); if (stencil[1].enabled && front_facing != NULL) { /* do back face test */ LLVMValueRef back_res; back_res = lp_build_stencil_test_single(bld, &stencil[1], stencilRefs[1], stencilVals); res = lp_build_select(bld, front_facing, res, back_res); } return res; }
/** * Extract Y, U, V channels from packed YUYV. * @param packed is a <n x i32> vector with the packed YUYV blocks * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) */ static void yuyv_to_yuv_soa(struct gallivm_state *gallivm, unsigned n, LLVMValueRef packed, LLVMValueRef i, LLVMValueRef *y, LLVMValueRef *u, LLVMValueRef *v) { LLVMBuilderRef builder = gallivm->builder; struct lp_type type; LLVMValueRef mask; memset(&type, 0, sizeof type); type.width = 32; type.length = n; assert(lp_check_value(type, packed)); assert(lp_check_value(type, i)); /* * y = (yuyv >> 16*i) & 0xff * u = (yuyv >> 8 ) & 0xff * v = (yuyv >> 24 ) & 0xff */ #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) /* * Avoid shift with per-element count. * No support on x86, gets translated to roughly 5 instructions * per element. Didn't measure performance but cuts shader size * by quite a bit (less difference if cpu has no sse4.1 support). */ if (util_cpu_caps.has_sse2 && n == 4) { LLVMValueRef sel, tmp; struct lp_build_context bld32; lp_build_context_init(&bld32, gallivm, type); tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), ""); sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0)); *y = lp_build_select(&bld32, sel, packed, tmp); } else #endif { LLVMValueRef shift; shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), ""); *y = LLVMBuildLShr(builder, packed, shift, ""); } *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), ""); *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), ""); mask = lp_build_const_int_vec(gallivm, type, 0xff); *y = LLVMBuildAnd(builder, *y, mask, "y"); *u = LLVMBuildAnd(builder, *u, mask, "u"); *v = LLVMBuildAnd(builder, *v, mask, "v"); }
/** * Generate color blending and color output. */ static void generate_blend(const struct pipe_blend_state *blend, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef context_ptr, LLVMValueRef mask, LLVMValueRef *src, LLVMValueRef dst_ptr) { struct lp_build_context bld; struct lp_build_flow_context *flow; struct lp_build_mask_context mask_ctx; LLVMTypeRef vec_type; LLVMTypeRef int_vec_type; LLVMValueRef const_ptr; LLVMValueRef con[4]; LLVMValueRef dst[4]; LLVMValueRef res[4]; unsigned chan; lp_build_context_init(&bld, builder, type); flow = lp_build_flow_create(builder); lp_build_mask_begin(&mask_ctx, flow, type, mask); vec_type = lp_build_vec_type(type); int_vec_type = lp_build_int_vec_type(type); const_ptr = lp_jit_context_blend_color(builder, context_ptr); const_ptr = LLVMBuildBitCast(builder, const_ptr, LLVMPointerType(vec_type, 0), ""); for(chan = 0; chan < 4; ++chan) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), ""); dst[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), ""); lp_build_name(con[chan], "con.%c", "rgba"[chan]); lp_build_name(dst[chan], "dst.%c", "rgba"[chan]); } lp_build_blend_soa(builder, blend, type, src, dst, con, res); for(chan = 0; chan < 4; ++chan) { if(blend->colormask & (1 << chan)) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); lp_build_name(res[chan], "res.%c", "rgba"[chan]); res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, "")); } } lp_build_mask_end(&mask_ctx); lp_build_flow_destroy(flow); }
/** * Convert srgb int values to linear float values. * Several possibilities how to do this, e.g. * - table * - doing the pow() with int-to-float and float-to-int tricks * (http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent) * - just using standard polynomial approximation * (3rd order polynomial is required for crappy but just sufficient accuracy) * * @param src integer (vector) value(s) to convert * (chan_bits bit values unpacked to 32 bit already). */ LLVMValueRef lp_build_srgb_to_linear(struct gallivm_state *gallivm, struct lp_type src_type, unsigned chan_bits, LLVMValueRef src) { struct lp_type f32_type = lp_type_float_vec(32, src_type.length * 32); struct lp_build_context f32_bld; LLVMValueRef srcf, part_lin, part_pow, is_linear, lin_const, lin_thresh; double coeffs[4] = {0.0023f, 0.0030f / 255.0f, 0.6935f / (255.0f * 255.0f), 0.3012f / (255.0f * 255.0f * 255.0f) }; assert(src_type.width == 32); /* Technically this would work with more bits too but would be inaccurate. */ assert(chan_bits <= 8); lp_build_context_init(&f32_bld, gallivm, f32_type); /* * using polynomial: (src * (src * (src * 0.3012 + 0.6935) + 0.0030) + 0.0023) * ( poly = 0.3012*x^3 + 0.6935*x^2 + 0.0030*x + 0.0023) * (found with octave polyfit and some magic as I couldn't get the error * function right). Using the above mentioned error function, the values stay * within +-0.35, except for the lowest values - hence tweaking linear segment * to cover the first 16 instead of the first 11 values (the error stays * just about acceptable there too). * Hence: lin = src > 15 ? poly : src / 12.6 * This function really only makes sense for vectors, should use LUT otherwise. * All in all (including float conversion) 11 instructions (with sse4.1), * 6 constants (polynomial could be done with 1 instruction less at the cost * of slightly worse dependency chain, fma should also help). */ /* doing the 1/255 mul as part of the approximation */ srcf = lp_build_int_to_float(&f32_bld, src); if (chan_bits != 8) { /* could adjust all the constants instead */ LLVMValueRef rescale_const = lp_build_const_vec(gallivm, f32_type, 255.0f / ((1 << chan_bits) - 1)); srcf = lp_build_mul(&f32_bld, srcf, rescale_const); } lin_const = lp_build_const_vec(gallivm, f32_type, 1.0f / (12.6f * 255.0f)); part_lin = lp_build_mul(&f32_bld, srcf, lin_const); part_pow = lp_build_polynomial(&f32_bld, srcf, coeffs, 4); lin_thresh = lp_build_const_vec(gallivm, f32_type, 15.0f); is_linear = lp_build_compare(gallivm, f32_type, PIPE_FUNC_LEQUAL, srcf, lin_thresh); return lp_build_select(&f32_bld, is_linear, part_lin, part_pow); }
/** * Return mask ? a : b; * * mask is a TGSI_WRITEMASK_xxx. */ LLVMValueRef lp_build_select_aos(struct lp_build_context *bld, unsigned mask, LLVMValueRef a, LLVMValueRef b, unsigned num_channels) { LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; const unsigned n = type.length; unsigned i, j; assert((mask & ~0xf) == 0); assert(lp_check_value(type, a)); assert(lp_check_value(type, b)); if(a == b) return a; if((mask & 0xf) == 0xf) return a; if((mask & 0xf) == 0x0) return b; if(a == bld->undef || b == bld->undef) return bld->undef; /* * There are two major ways of accomplishing this: * - with a shuffle * - with a select * * The flip between these is empirical and might need to be adjusted. */ if (n <= 4) { /* * Shuffle. */ LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; for(j = 0; j < n; j += num_channels) for(i = 0; i < num_channels; ++i) shuffles[j + i] = LLVMConstInt(elem_type, (mask & (1 << i) ? 0 : n) + j + i, 0); return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), ""); } else { LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, num_channels); return lp_build_select(bld, mask_vec, a, b); } }
LLVMValueRef lp_build_sgn(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMValueRef cond; LLVMValueRef res; /* Handle non-zero case */ if(!type.sign) { /* if not zero then sign must be positive */ res = bld->one; } else if(type.floating) { /* Take the sign bit and add it to 1 constant */ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); LLVMValueRef sign; LLVMValueRef one; sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); sign = LLVMBuildAnd(bld->builder, sign, mask, ""); one = LLVMConstBitCast(bld->one, int_vec_type); res = LLVMBuildOr(bld->builder, sign, one, ""); res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); } else { LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0); cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero); res = lp_build_select(bld, cond, bld->one, minus_one); } /* Handle zero */ cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero); res = lp_build_select(bld, cond, bld->zero, bld->one); return res; }
/** * Generate min(a, b) * No checks for special case values of a or b = 1 or 0 are done. */ static LLVMValueRef lp_build_min_simple(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { const struct lp_type type = bld->type; const char *intrinsic = NULL; LLVMValueRef cond; /* TODO: optimize the constant case */ if(type.width * type.length == 128) { if(type.floating) { if(type.width == 32 && util_cpu_caps.has_sse) intrinsic = "llvm.x86.sse.min.ps"; if(type.width == 64 && util_cpu_caps.has_sse2) intrinsic = "llvm.x86.sse2.min.pd"; } else { if(type.width == 8 && !type.sign && util_cpu_caps.has_sse2) intrinsic = "llvm.x86.sse2.pminu.b"; if(type.width == 8 && type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pminsb"; if(type.width == 16 && !type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pminuw"; if(type.width == 16 && type.sign && util_cpu_caps.has_sse2) intrinsic = "llvm.x86.sse2.pmins.w"; if(type.width == 32 && !type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pminud"; if(type.width == 32 && type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pminsd"; } } if(intrinsic) return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b); return lp_build_select(bld, cond, a, b); }
/** * Register store. */ void lp_emit_store_aos( struct lp_build_tgsi_aos_context *bld, const struct tgsi_full_instruction *inst, unsigned index, LLVMValueRef value) { LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; const struct tgsi_full_dst_register *reg = &inst->Dst[index]; LLVMValueRef mask = NULL; LLVMValueRef ptr; /* * Saturate the value */ switch (inst->Instruction.Saturate) { case TGSI_SAT_NONE: break; case TGSI_SAT_ZERO_ONE: value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero); value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one); break; case TGSI_SAT_MINUS_PLUS_ONE: value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0)); value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one); break; default: assert(0); } /* * Translate the register file */ assert(!reg->Register.Indirect); switch (reg->Register.File) { case TGSI_FILE_OUTPUT: ptr = bld->outputs[reg->Register.Index]; break; case TGSI_FILE_TEMPORARY: ptr = bld->temps[reg->Register.Index]; break; case TGSI_FILE_ADDRESS: ptr = bld->addr[reg->Indirect.Index]; break; case TGSI_FILE_PREDICATE: ptr = bld->preds[reg->Register.Index]; break; default: assert(0); return; } if (!ptr) return; /* * Predicate */ if (inst->Instruction.Predicate) { LLVMValueRef pred; assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS); pred = LLVMBuildLoad(builder, bld->preds[inst->Predicate.Index], ""); /* * Convert the value to an integer mask. */ pred = lp_build_compare(bld->bld_base.base.gallivm, bld->bld_base.base.type, PIPE_FUNC_NOTEQUAL, pred, bld->bld_base.base.zero); if (inst->Predicate.Negate) { pred = LLVMBuildNot(builder, pred, ""); } pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred, inst->Predicate.SwizzleX, inst->Predicate.SwizzleY, inst->Predicate.SwizzleZ, inst->Predicate.SwizzleW); if (mask) { mask = LLVMBuildAnd(builder, mask, pred, ""); } else { mask = pred; } } /* * Writemask */ if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) { LLVMValueRef writemask; writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm, bld->bld_base.base.type, reg->Register.WriteMask, TGSI_NUM_CHANNELS, bld->swizzles); if (mask) { mask = LLVMBuildAnd(builder, mask, writemask, ""); } else { mask = writemask; } } if (mask) { LLVMValueRef orig_value; orig_value = LLVMBuildLoad(builder, ptr, ""); value = lp_build_select(&bld->bld_base.base, mask, value, orig_value); } LLVMBuildStore(builder, value, ptr); }
/** * Performs blending of src and dst pixels * * @param blend the blend state of the shader variant * @param cbuf_format format of the colour buffer * @param type data type of the pixel vector * @param rt render target index * @param src blend src * @param src_alpha blend src alpha (if not included in src) * @param src1 second blend src (for dual source blend) * @param src1_alpha second blend src alpha (if not included in src1) * @param dst blend dst * @param mask optional mask to apply to the blending result * @param const_ const blend color * @param const_alpha const blend color alpha (if not included in const_) * @param swizzle swizzle values for RGBA * * @return the result of blending src and dst */ LLVMValueRef lp_build_blend_aos(struct gallivm_state *gallivm, const struct pipe_blend_state *blend, enum pipe_format cbuf_format, struct lp_type type, unsigned rt, LLVMValueRef src, LLVMValueRef src_alpha, LLVMValueRef src1, LLVMValueRef src1_alpha, LLVMValueRef dst, LLVMValueRef mask, LLVMValueRef const_, LLVMValueRef const_alpha, const unsigned char swizzle[4], int nr_channels) { const struct pipe_rt_blend_state * state = &blend->rt[rt]; const struct util_format_description * desc; struct lp_build_blend_aos_context bld; LLVMValueRef src_factor, dst_factor; LLVMValueRef result; unsigned alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE; unsigned i; desc = util_format_description(cbuf_format); /* Setup build context */ memset(&bld, 0, sizeof bld); lp_build_context_init(&bld.base, gallivm, type); bld.src = src; bld.src1 = src1; bld.dst = dst; bld.const_ = const_; bld.src_alpha = src_alpha; bld.src1_alpha = src1_alpha; bld.const_alpha = const_alpha; /* Find the alpha channel if not provided seperately */ if (!src_alpha) { for (i = 0; i < 4; ++i) { if (swizzle[i] == 3) { alpha_swizzle = i; } } } if (blend->logicop_enable) { if(!type.floating) { result = lp_build_logicop(gallivm->builder, blend->logicop_func, src, dst); } else { result = src; } } else if (!state->blend_enable) { result = src; } else { boolean rgb_alpha_same = (state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor) || nr_channels == 1; src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor, state->alpha_src_factor, alpha_swizzle, nr_channels); dst_factor = lp_build_blend_factor(&bld, state->rgb_dst_factor, state->alpha_dst_factor, alpha_swizzle, nr_channels); result = lp_build_blend(&bld.base, state->rgb_func, state->rgb_src_factor, state->rgb_dst_factor, src, dst, src_factor, dst_factor, rgb_alpha_same, false); if(state->rgb_func != state->alpha_func && nr_channels > 1 && alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) { LLVMValueRef alpha; alpha = lp_build_blend(&bld.base, state->alpha_func, state->alpha_src_factor, state->alpha_dst_factor, src, dst, src_factor, dst_factor, rgb_alpha_same, false); result = lp_build_blend_swizzle(&bld, result, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle, nr_channels); } } /* Check if color mask is necessary */ if (!util_format_colormask_full(desc, state->colormask)) { LLVMValueRef color_mask; color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, state->colormask, nr_channels, swizzle); lp_build_name(color_mask, "color_mask"); /* Combine with input mask if necessary */ if (mask) { /* We can be blending floating values but masks are always integer... */ unsigned floating = bld.base.type.floating; bld.base.type.floating = 0; mask = lp_build_and(&bld.base, color_mask, mask); bld.base.type.floating = floating; } else { mask = color_mask; } } /* Apply mask, if one exists */ if (mask) { result = lp_build_select(&bld.base, mask, result, dst); } return result; }
/** * Generate code for performing depth and/or stencil tests. * We operate on a vector of values (typically n 2x2 quads). * * \param depth the depth test state * \param stencil the front/back stencil state * \param type the data type of the fragment depth/stencil values * \param format_desc description of the depth/stencil surface * \param mask the alive/dead pixel mask for the quad (vector) * \param stencil_refs the front/back stencil ref values (scalar) * \param z_src the incoming depth/stencil values (n 2x2 quad values, float32) * \param zs_dst the depth/stencil values in framebuffer * \param face contains boolean value indicating front/back facing polygon */ void lp_build_depth_stencil_test(struct gallivm_state *gallivm, const struct pipe_depth_state *depth, const struct pipe_stencil_state stencil[2], struct lp_type z_src_type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, LLVMValueRef stencil_refs[2], LLVMValueRef z_src, LLVMValueRef z_fb, LLVMValueRef s_fb, LLVMValueRef face, LLVMValueRef *z_value, LLVMValueRef *s_value, boolean do_branch) { LLVMBuilderRef builder = gallivm->builder; struct lp_type z_type; struct lp_build_context z_bld; struct lp_build_context s_bld; struct lp_type s_type; unsigned z_shift = 0, z_width = 0, z_mask = 0; LLVMValueRef z_dst = NULL; LLVMValueRef stencil_vals = NULL; LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; LLVMValueRef z_pass = NULL, s_pass_mask = NULL; LLVMValueRef orig_mask = lp_build_mask_value(mask); LLVMValueRef front_facing = NULL; boolean have_z, have_s; /* * Depths are expected to be between 0 and 1, even if they are stored in * floats. Setting these bits here will ensure that the lp_build_conv() call * below won't try to unnecessarily clamp the incoming values. */ if(z_src_type.floating) { z_src_type.sign = FALSE; z_src_type.norm = TRUE; } else { assert(!z_src_type.sign); assert(z_src_type.norm); } /* Pick the type matching the depth-stencil format. */ z_type = lp_depth_type(format_desc, z_src_type.length); /* Pick the intermediate type for depth operations. */ z_type.width = z_src_type.width; assert(z_type.length == z_src_type.length); /* FIXME: for non-float depth/stencil might generate better code * if we'd always split it up to use 128bit operations. * For stencil we'd almost certainly want to pack to 8xi16 values, * for z just run twice. */ /* Sanity checking */ { const unsigned z_swizzle = format_desc->swizzle[0]; const unsigned s_swizzle = format_desc->swizzle[1]; assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE || s_swizzle != UTIL_FORMAT_SWIZZLE_NONE); assert(depth->enabled || stencil[0].enabled); assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); assert(format_desc->block.width == 1); assert(format_desc->block.height == 1); if (stencil[0].enabled) { assert(s_swizzle < 4); assert(format_desc->channel[s_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); assert(format_desc->channel[s_swizzle].pure_integer); assert(!format_desc->channel[s_swizzle].normalized); assert(format_desc->channel[s_swizzle].size == 8); } if (depth->enabled) { assert(z_swizzle < 4); if (z_type.floating) { assert(z_swizzle == 0); assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT); assert(format_desc->channel[z_swizzle].size == 32); } else { assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); assert(format_desc->channel[z_swizzle].normalized); assert(!z_type.fixed); } } } /* Setup build context for Z vals */ lp_build_context_init(&z_bld, gallivm, z_type); /* Setup build context for stencil vals */ s_type = lp_int_type(z_type); lp_build_context_init(&s_bld, gallivm, s_type); /* Compute and apply the Z/stencil bitmasks and shifts. */ { unsigned s_shift, s_mask; z_dst = z_fb; stencil_vals = s_fb; have_z = get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask); have_s = get_s_shift_and_mask(format_desc, &s_shift, &s_mask); if (have_z) { if (z_mask != 0xffffffff) { z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask); } /* * Align the framebuffer Z 's LSB to the right. */ if (z_shift) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); z_dst = LLVMBuildLShr(builder, z_dst, shift, "z_dst"); } else if (z_bitmask) { z_dst = LLVMBuildAnd(builder, z_dst, z_bitmask, "z_dst"); } else { lp_build_name(z_dst, "z_dst"); } } if (have_s) { if (s_shift) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift); stencil_vals = LLVMBuildLShr(builder, stencil_vals, shift, ""); stencil_shift = shift; /* used below */ } if (s_mask != 0xffffffff) { LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask); stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); } lp_build_name(stencil_vals, "s_dst"); } } if (stencil[0].enabled) { if (face) { LLVMValueRef zero = lp_build_const_int32(gallivm, 0); /* front_facing = face != 0 ? ~0 : 0 */ front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, ""); front_facing = LLVMBuildSExt(builder, front_facing, LLVMIntTypeInContext(gallivm->context, s_bld.type.length*s_bld.type.width), ""); front_facing = LLVMBuildBitCast(builder, front_facing, s_bld.int_vec_type, ""); } /* convert scalar stencil refs into vectors */ stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]); stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]); s_pass_mask = lp_build_stencil_test(&s_bld, stencil, stencil_refs, stencil_vals, front_facing); /* apply stencil-fail operator */ { LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask); stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP, stencil_refs, stencil_vals, s_fail_mask, front_facing); } } if (depth->enabled) { /* * Convert fragment Z to the desired type, aligning the LSB to the right. */ assert(z_type.width == z_src_type.width); assert(z_type.length == z_src_type.length); assert(lp_check_value(z_src_type, z_src)); if (z_src_type.floating) { /* * Convert from floating point values */ if (!z_type.floating) { z_src = lp_build_clamped_float_to_unsigned_norm(gallivm, z_src_type, z_width, z_src); } } else { /* * Convert from unsigned normalized values. */ assert(!z_src_type.sign); assert(!z_src_type.fixed); assert(z_src_type.norm); assert(!z_type.floating); if (z_src_type.width > z_width) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_src_type, z_src_type.width - z_width); z_src = LLVMBuildLShr(builder, z_src, shift, ""); } } assert(lp_check_value(z_type, z_src)); lp_build_name(z_src, "z_src"); /* compare src Z to dst Z, returning 'pass' mask */ z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst); if (!stencil[0].enabled) { /* We can potentially skip all remaining operations here, but only * if stencil is disabled because we still need to update the stencil * buffer values. Don't need to update Z buffer values. */ lp_build_mask_update(mask, z_pass); if (do_branch) { lp_build_mask_check(mask); do_branch = FALSE; } } if (depth->writemask) { LLVMValueRef zselectmask; /* mask off bits that failed Z test */ zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, ""); /* mask off bits that failed stencil test */ if (s_pass_mask) { zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, ""); } /* Mix the old and new Z buffer values. * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i] */ z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst); } if (stencil[0].enabled) { /* update stencil buffer values according to z pass/fail result */ LLVMValueRef z_fail_mask, z_pass_mask; /* apply Z-fail operator */ z_fail_mask = lp_build_andnot(&s_bld, orig_mask, z_pass); stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP, stencil_refs, stencil_vals, z_fail_mask, front_facing); /* apply Z-pass operator */ z_pass_mask = LLVMBuildAnd(builder, orig_mask, z_pass, ""); stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, z_pass_mask, front_facing); } } else { /* No depth test: apply Z-pass operator to stencil buffer values which * passed the stencil test. */ s_pass_mask = LLVMBuildAnd(builder, orig_mask, s_pass_mask, ""); stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, s_pass_mask, front_facing); } /* Put Z and stencil bits in the right place */ if (have_z && z_shift) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); z_dst = LLVMBuildShl(builder, z_dst, shift, ""); } if (stencil_vals && stencil_shift) stencil_vals = LLVMBuildShl(builder, stencil_vals, stencil_shift, ""); /* Finally, merge the z/stencil values */ if (format_desc->block.bits <= 32) { if (have_z && have_s) *z_value = LLVMBuildOr(builder, z_dst, stencil_vals, ""); else if (have_z) *z_value = z_dst; else *z_value = stencil_vals; *s_value = *z_value; } else { *z_value = z_dst; *s_value = stencil_vals; } if (s_pass_mask) lp_build_mask_update(mask, s_pass_mask); if (depth->enabled && stencil[0].enabled) lp_build_mask_update(mask, z_pass); }
/** * Store depth/stencil values. * Incoming values are swizzled (typically n 2x2 quads), stored linear. * If there's a mask it will do select/store otherwise just store. * * \param type the data type of the fragment depth/stencil values * \param format_desc description of the depth/stencil surface * \param mask the alive/dead pixel mask for the quad (vector) * \param z_fb z values read from fb (with padding) * \param s_fb s values read from fb (with padding) * \param loop_counter the current loop iteration * \param depth_ptr pointer to the depth/stencil values of this 4x4 block * \param depth_stride stride of the depth/stencil buffer * \param z_value the depth values to store (with padding) * \param s_value the stencil values to store (with padding) */ void lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, struct lp_type z_src_type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, LLVMValueRef z_fb, LLVMValueRef s_fb, LLVMValueRef loop_counter, LLVMValueRef depth_ptr, LLVMValueRef depth_stride, LLVMValueRef z_value, LLVMValueRef s_value) { struct lp_build_context z_bld; LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4]; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef mask_value = NULL; LLVMValueRef zs_dst1, zs_dst2; LLVMValueRef zs_dst_ptr1, zs_dst_ptr2; LLVMValueRef depth_offset1, depth_offset2; LLVMTypeRef load_ptr_type; unsigned depth_bytes = format_desc->block.bits / 8; struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length); struct lp_type z_type = zs_type; struct lp_type zs_load_type = zs_type; zs_load_type.length = zs_load_type.length / 2; load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0); z_type.width = z_src_type.width; lp_build_context_init(&z_bld, gallivm, z_type); /* * This is far from ideal, at least for late depth write we should do this * outside the fs loop to avoid all the swizzle stuff. */ if (z_src_type.length == 4) { LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter, lp_build_const_int32(gallivm, 1), ""); LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter, lp_build_const_int32(gallivm, 2), ""); LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb, depth_stride, ""); depth_offset1 = LLVMBuildMul(builder, looplsb, lp_build_const_int32(gallivm, depth_bytes * 2), ""); depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, ""); } else { unsigned i; LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter, lp_build_const_int32(gallivm, 1), ""); assert(z_src_type.length == 8); depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, ""); /* * We load 2x4 values, and need to swizzle them (order * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately. */ for (i = 0; i < 8; i++) { shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); } } depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, ""); zs_dst_ptr1 = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, ""); zs_dst_ptr1 = LLVMBuildBitCast(builder, zs_dst_ptr1, load_ptr_type, ""); zs_dst_ptr2 = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, ""); zs_dst_ptr2 = LLVMBuildBitCast(builder, zs_dst_ptr2, load_ptr_type, ""); if (format_desc->block.bits > 32) { s_value = LLVMBuildBitCast(builder, s_value, z_bld.vec_type, ""); } if (mask) { mask_value = lp_build_mask_value(mask); z_value = lp_build_select(&z_bld, mask_value, z_value, z_fb); if (format_desc->block.bits > 32) { s_fb = LLVMBuildBitCast(builder, s_fb, z_bld.vec_type, ""); s_value = lp_build_select(&z_bld, mask_value, s_value, s_fb); } } if (zs_type.width < z_src_type.width) { /* Truncate ZS values (e.g., when writing to Z16_UNORM) */ z_value = LLVMBuildTrunc(builder, z_value, lp_build_int_vec_type(gallivm, zs_type), ""); } if (format_desc->block.bits <= 32) { if (z_src_type.length == 4) { zs_dst1 = lp_build_extract_range(gallivm, z_value, 0, 2); zs_dst2 = lp_build_extract_range(gallivm, z_value, 2, 2); } else { assert(z_src_type.length == 8); zs_dst1 = LLVMBuildShuffleVector(builder, z_value, z_value, LLVMConstVector(&shuffles[0], zs_load_type.length), ""); zs_dst2 = LLVMBuildShuffleVector(builder, z_value, z_value, LLVMConstVector(&shuffles[4], zs_load_type.length), ""); } } else { if (z_src_type.length == 4) { zs_dst1 = lp_build_interleave2(gallivm, z_type, z_value, s_value, 0); zs_dst2 = lp_build_interleave2(gallivm, z_type, z_value, s_value, 1); } else { unsigned i; LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 2]; assert(z_src_type.length == 8); for (i = 0; i < 8; i++) { shuffles[i*2] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); shuffles[i*2+1] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2 + z_src_type.length); } zs_dst1 = LLVMBuildShuffleVector(builder, z_value, s_value, LLVMConstVector(&shuffles[0], z_src_type.length), ""); zs_dst2 = LLVMBuildShuffleVector(builder, z_value, s_value, LLVMConstVector(&shuffles[8], z_src_type.length), ""); } zs_dst1 = LLVMBuildBitCast(builder, zs_dst1, lp_build_vec_type(gallivm, zs_load_type), ""); zs_dst2 = LLVMBuildBitCast(builder, zs_dst2, lp_build_vec_type(gallivm, zs_load_type), ""); } LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1); LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2); }
/** * Convert linear float values to srgb int values. * Several possibilities how to do this, e.g. * - use table (based on exponent/highest order mantissa bits) and do * linear interpolation (https://gist.github.com/rygorous/2203834) * - Chebyshev polynomial * - Approximation using reciprocals * - using int-to-float and float-to-int tricks for pow() * (http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent) * * @param src float (vector) value(s) to convert. */ static LLVMValueRef lp_build_linear_to_srgb(struct gallivm_state *gallivm, struct lp_type src_type, LLVMValueRef src) { LLVMBuilderRef builder = gallivm->builder; struct lp_build_context f32_bld; LLVMValueRef lin_thresh, lin, lin_const, is_linear, tmp, pow_final; lp_build_context_init(&f32_bld, gallivm, src_type); src = lp_build_clamp(&f32_bld, src, f32_bld.zero, f32_bld.one); if (0) { /* * using int-to-float and float-to-int trick for pow(). * This is much more accurate than necessary thanks to the correction, * but it most certainly makes no sense without rsqrt available. * Bonus points if you understand how this works... * All in all (including min/max clamp, conversion) 19 instructions. */ float exp_f = 2.0f / 3.0f; /* some compilers can't do exp2f, so this is exp2f(127.0f/exp_f - 127.0f) */ float exp2f_c = 1.30438178253e+19f; float coeff_f = 0.62996f; LLVMValueRef pow_approx, coeff, x2, exponent, pow_1, pow_2; struct lp_type int_type = lp_int_type(src_type); /* * First calculate approx x^8/12 */ exponent = lp_build_const_vec(gallivm, src_type, exp_f); coeff = lp_build_const_vec(gallivm, src_type, exp2f_c * powf(coeff_f, 1.0f / exp_f)); /* premultiply src */ tmp = lp_build_mul(&f32_bld, coeff, src); /* "log2" */ tmp = LLVMBuildBitCast(builder, tmp, lp_build_vec_type(gallivm, int_type), ""); tmp = lp_build_int_to_float(&f32_bld, tmp); /* multiply for pow */ tmp = lp_build_mul(&f32_bld, tmp, exponent); /* "exp2" */ pow_approx = lp_build_itrunc(&f32_bld, tmp); pow_approx = LLVMBuildBitCast(builder, pow_approx, lp_build_vec_type(gallivm, src_type), ""); /* * Since that pow was inaccurate (like 3 bits, though each sqrt step would * give another bit), compensate the error (which is why we chose another * exponent in the first place). */ /* x * x^(8/12) = x^(20/12) */ pow_1 = lp_build_mul(&f32_bld, pow_approx, src); /* x * x * x^(-4/12) = x^(20/12) */ /* Should avoid using rsqrt if it's not available, but * using x * x^(4/12) * x^(4/12) instead will change error weight */ tmp = lp_build_fast_rsqrt(&f32_bld, pow_approx); x2 = lp_build_mul(&f32_bld, src, src); pow_2 = lp_build_mul(&f32_bld, x2, tmp); /* average the values so the errors cancel out, compensate bias, * we also squeeze the 1.055 mul of the srgb conversion plus the 255.0 mul * for conversion to int in here */ tmp = lp_build_add(&f32_bld, pow_1, pow_2); coeff = lp_build_const_vec(gallivm, src_type, 1.0f / (3.0f * coeff_f) * 0.999852f * powf(1.055f * 255.0f, 4.0f)); pow_final = lp_build_mul(&f32_bld, tmp, coeff); /* x^(5/12) = rsqrt(rsqrt(x^20/12)) */ if (lp_build_fast_rsqrt_available(src_type)) { pow_final = lp_build_fast_rsqrt(&f32_bld, lp_build_fast_rsqrt(&f32_bld, pow_final)); } else { pow_final = lp_build_sqrt(&f32_bld, lp_build_sqrt(&f32_bld, pow_final)); } pow_final = lp_build_add(&f32_bld, pow_final, lp_build_const_vec(gallivm, src_type, -0.055f * 255.0f)); } else { /* * using "rational polynomial" approximation here. * Essentially y = a*x^0.375 + b*x^0.5 + c, with also * factoring in the 255.0 mul and the scaling mul. * (a is closer to actual value so has higher weight than b.) * Note: the constants are magic values. They were found empirically, * possibly could be improved but good enough (be VERY careful with * error metric if you'd want to tweak them, they also MUST fit with * the crappy polynomial above for srgb->linear since it is required * that each srgb value maps back to the same value). * This function has an error of max +-0.17 (and we'd only require +-0.6), * for the approximated srgb->linear values the error is naturally larger * (+-0.42) but still accurate enough (required +-0.5 essentially). * All in all (including min/max clamp, conversion) 15 instructions. * FMA would help (minus 2 instructions). */ LLVMValueRef x05, x0375, a_const, b_const, c_const, tmp2; if (lp_build_fast_rsqrt_available(src_type)) { tmp = lp_build_fast_rsqrt(&f32_bld, src); x05 = lp_build_mul(&f32_bld, src, tmp); } else { /* * I don't really expect this to be practical without rsqrt * but there's no reason for triple punishment so at least * save the otherwise resulting division and unnecessary mul... */ x05 = lp_build_sqrt(&f32_bld, src); } tmp = lp_build_mul(&f32_bld, x05, src); if (lp_build_fast_rsqrt_available(src_type)) { x0375 = lp_build_fast_rsqrt(&f32_bld, lp_build_fast_rsqrt(&f32_bld, tmp)); } else { x0375 = lp_build_sqrt(&f32_bld, lp_build_sqrt(&f32_bld, tmp)); } a_const = lp_build_const_vec(gallivm, src_type, 0.675f * 1.0622 * 255.0f); b_const = lp_build_const_vec(gallivm, src_type, 0.325f * 1.0622 * 255.0f); c_const = lp_build_const_vec(gallivm, src_type, -0.0620f * 255.0f); tmp = lp_build_mul(&f32_bld, a_const, x0375); tmp2 = lp_build_mul(&f32_bld, b_const, x05); tmp2 = lp_build_add(&f32_bld, tmp2, c_const); pow_final = lp_build_add(&f32_bld, tmp, tmp2); } /* linear part is easy */ lin_const = lp_build_const_vec(gallivm, src_type, 12.92f * 255.0f); lin = lp_build_mul(&f32_bld, src, lin_const); lin_thresh = lp_build_const_vec(gallivm, src_type, 0.0031308f); is_linear = lp_build_compare(gallivm, src_type, PIPE_FUNC_LEQUAL, src, lin_thresh); tmp = lp_build_select(&f32_bld, is_linear, lin, pow_final); f32_bld.type.sign = 0; return lp_build_iround(&f32_bld, tmp); }
/** * Apply the stencil operator (add/sub/keep/etc) to the given vector * of stencil values. * \return new stencil values vector */ static LLVMValueRef lp_build_stencil_op_single(struct lp_build_context *bld, const struct pipe_stencil_state *stencil, enum stencil_op op, LLVMValueRef stencilRef, LLVMValueRef stencilVals, LLVMValueRef mask) { const unsigned stencilMax = 255; /* XXX fix */ struct lp_type type = bld->type; LLVMValueRef res; LLVMValueRef max = lp_build_const_int_vec(type, stencilMax); unsigned stencil_op; assert(type.sign); switch (op) { case S_FAIL_OP: stencil_op = stencil->fail_op; break; case Z_FAIL_OP: stencil_op = stencil->zfail_op; break; case Z_PASS_OP: stencil_op = stencil->zpass_op; break; default: assert(0 && "Invalid stencil_op mode"); stencil_op = PIPE_STENCIL_OP_KEEP; } switch (stencil_op) { case PIPE_STENCIL_OP_KEEP: res = stencilVals; /* we can return early for this case */ return res; case PIPE_STENCIL_OP_ZERO: res = bld->zero; break; case PIPE_STENCIL_OP_REPLACE: res = stencilRef; break; case PIPE_STENCIL_OP_INCR: res = lp_build_add(bld, stencilVals, bld->one); res = lp_build_min(bld, res, max); break; case PIPE_STENCIL_OP_DECR: res = lp_build_sub(bld, stencilVals, bld->one); res = lp_build_max(bld, res, bld->zero); break; case PIPE_STENCIL_OP_INCR_WRAP: res = lp_build_add(bld, stencilVals, bld->one); res = LLVMBuildAnd(bld->builder, res, max, ""); break; case PIPE_STENCIL_OP_DECR_WRAP: res = lp_build_sub(bld, stencilVals, bld->one); res = LLVMBuildAnd(bld->builder, res, max, ""); break; case PIPE_STENCIL_OP_INVERT: res = LLVMBuildNot(bld->builder, stencilVals, ""); res = LLVMBuildAnd(bld->builder, res, max, ""); break; default: assert(0 && "bad stencil op mode"); res = NULL; } if (stencil->writemask != stencilMax) { /* mask &= stencil->writemask */ LLVMValueRef writemask = lp_build_const_int_vec(type, stencil->writemask); mask = LLVMBuildAnd(bld->builder, mask, writemask, ""); /* res = (res & mask) | (stencilVals & ~mask) */ res = lp_build_select_bitwise(bld, writemask, res, stencilVals); } else { /* res = mask ? res : stencilVals */ res = lp_build_select(bld, mask, res, stencilVals); } return res; }
/** * Emit LLVM for one TGSI instruction. * \param return TRUE for success, FALSE otherwise */ boolean lp_emit_instruction_aos( struct lp_build_tgsi_aos_context *bld, const struct tgsi_full_instruction *inst, const struct tgsi_opcode_info *info, int *pc) { LLVMValueRef src0, src1, src2; LLVMValueRef tmp0, tmp1; LLVMValueRef dst0 = NULL; /* * Stores and write masks are handled in a general fashion after the long * instruction opcode switch statement. * * Although not stricitly necessary, we avoid generating instructions for * channels which won't be stored, in cases where's that easy. For some * complex instructions, like texture sampling, it is more convenient to * assume a full writemask and then let LLVM optimization passes eliminate * redundant code. */ (*pc)++; assert(info->num_dst <= 1); if (info->num_dst) { dst0 = bld->bld_base.base.undef; } switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); dst0 = lp_build_floor(&bld->bld_base.base, src0); break; case TGSI_OPCODE_MOV: dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); break; case TGSI_OPCODE_LIT: return FALSE; case TGSI_OPCODE_RCP: /* TGSI_OPCODE_RECIP */ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); dst0 = lp_build_rcp(&bld->bld_base.base, src0); break; case TGSI_OPCODE_RSQ: /* TGSI_OPCODE_RECIPSQRT */ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); tmp0 = lp_build_emit_llvm_unary(&bld->bld_base, TGSI_OPCODE_ABS, src0); dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0); break; case TGSI_OPCODE_EXP: return FALSE; case TGSI_OPCODE_LOG: return FALSE; case TGSI_OPCODE_MUL: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); dst0 = lp_build_mul(&bld->bld_base.base, src0, src1); break; case TGSI_OPCODE_ADD: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); dst0 = lp_build_add(&bld->bld_base.base, src0, src1); break; case TGSI_OPCODE_DP3: /* TGSI_OPCODE_DOT3 */ return FALSE; case TGSI_OPCODE_DP4: /* TGSI_OPCODE_DOT4 */ return FALSE; case TGSI_OPCODE_DST: return FALSE; case TGSI_OPCODE_MIN: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); dst0 = lp_build_max(&bld->bld_base.base, src0, src1); break; case TGSI_OPCODE_MAX: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); dst0 = lp_build_max(&bld->bld_base.base, src0, src1); break; case TGSI_OPCODE_SLT: /* TGSI_OPCODE_SETLT */ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1); dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); break; case TGSI_OPCODE_SGE: /* TGSI_OPCODE_SETGE */ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1); dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); break; case TGSI_OPCODE_MAD: /* TGSI_OPCODE_MADD */ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1); dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2); break; case TGSI_OPCODE_SUB: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); dst0 = lp_build_sub(&bld->bld_base.base, src0, src1); break; case TGSI_OPCODE_LRP: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2); tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0); dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2); break; case TGSI_OPCODE_CND: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); tmp1 = lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, 0.5); tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src2, tmp1); dst0 = lp_build_select(&bld->bld_base.base, tmp0, src0, src1); break; case TGSI_OPCODE_DP2A: return FALSE; case TGSI_OPCODE_FRC: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); tmp0 = lp_build_floor(&bld->bld_base.base, src0); dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0); break; case TGSI_OPCODE_CLAMP: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); tmp0 = lp_build_max(&bld->bld_base.base, src0, src1); dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2); break; case TGSI_OPCODE_FLR: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); dst0 = lp_build_floor(&bld->bld_base.base, src0); break; case TGSI_OPCODE_ROUND: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); dst0 = lp_build_round(&bld->bld_base.base, src0); break; case TGSI_OPCODE_EX2: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X, TGSI_NUM_CHANNELS); dst0 = lp_build_exp2(&bld->bld_base.base, tmp0); break; case TGSI_OPCODE_LG2: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); dst0 = lp_build_log2(&bld->bld_base.base, tmp0); break; case TGSI_OPCODE_POW: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X); dst0 = lp_build_pow(&bld->bld_base.base, src0, src1); break; case TGSI_OPCODE_XPD: return FALSE; case TGSI_OPCODE_RCC: /* deprecated? */ assert(0); return FALSE; case TGSI_OPCODE_DPH: return FALSE; case TGSI_OPCODE_COS: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); dst0 = lp_build_cos(&bld->bld_base.base, tmp0); break; case TGSI_OPCODE_DDX: return FALSE; case TGSI_OPCODE_DDY: return FALSE; case TGSI_OPCODE_KILP: /* predicated kill */ return FALSE; case TGSI_OPCODE_KIL: /* conditional kill */ return FALSE; case TGSI_OPCODE_PK2H: return FALSE; break; case TGSI_OPCODE_PK2US: return FALSE; break; case TGSI_OPCODE_PK4B: return FALSE; break; case TGSI_OPCODE_PK4UB: return FALSE; case TGSI_OPCODE_RFL: return FALSE; case TGSI_OPCODE_SEQ: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1); dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); break; case TGSI_OPCODE_SFL: dst0 = bld->bld_base.base.zero; break; case TGSI_OPCODE_SGT: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1); dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); break; case TGSI_OPCODE_SIN: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); dst0 = lp_build_sin(&bld->bld_base.base, tmp0); break; case TGSI_OPCODE_SLE: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1); dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); break; case TGSI_OPCODE_SNE: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1); dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); break; case TGSI_OPCODE_STR: dst0 = bld->bld_base.base.one; break; case TGSI_OPCODE_TEX: dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE); break; case TGSI_OPCODE_TXD: dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV); break; case TGSI_OPCODE_UP2H: /* deprecated */ assert (0); return FALSE; break; case TGSI_OPCODE_UP2US: /* deprecated */ assert(0); return FALSE; break; case TGSI_OPCODE_UP4B: /* deprecated */ assert(0); return FALSE; break; case TGSI_OPCODE_UP4UB: /* deprecated */ assert(0); return FALSE; break; case TGSI_OPCODE_X2D: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_ARA: /* deprecated */ assert(0); return FALSE; break; case TGSI_OPCODE_ARR: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); dst0 = lp_build_round(&bld->bld_base.base, src0); break; case TGSI_OPCODE_BRA: /* deprecated */ assert(0); return FALSE; break; case TGSI_OPCODE_CAL: return FALSE; case TGSI_OPCODE_RET: return FALSE; case TGSI_OPCODE_END: *pc = -1; break; case TGSI_OPCODE_SSG: /* TGSI_OPCODE_SGN */ tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); dst0 = lp_build_sgn(&bld->bld_base.base, tmp0); break; case TGSI_OPCODE_CMP: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero); dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2); break; case TGSI_OPCODE_SCS: return FALSE; case TGSI_OPCODE_TXB: dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS); break; case TGSI_OPCODE_NRM: /* fall-through */ case TGSI_OPCODE_NRM4: return FALSE; case TGSI_OPCODE_DIV: /* deprecated */ assert(0); return FALSE; break; case TGSI_OPCODE_DP2: return FALSE; case TGSI_OPCODE_TXL: dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD); break; case TGSI_OPCODE_TXP: dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED); break; case TGSI_OPCODE_BRK: return FALSE; case TGSI_OPCODE_IF: return FALSE; case TGSI_OPCODE_BGNLOOP: return FALSE; case TGSI_OPCODE_BGNSUB: return FALSE; case TGSI_OPCODE_ELSE: return FALSE; case TGSI_OPCODE_ENDIF: return FALSE; case TGSI_OPCODE_ENDLOOP: return FALSE; case TGSI_OPCODE_ENDSUB: return FALSE; case TGSI_OPCODE_PUSHA: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_POPA: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_CEIL: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); dst0 = lp_build_ceil(&bld->bld_base.base, src0); break; case TGSI_OPCODE_I2F: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_NOT: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_TRUNC: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); dst0 = lp_build_trunc(&bld->bld_base.base, src0); break; case TGSI_OPCODE_SHL: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_ISHR: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_AND: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_OR: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_MOD: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_XOR: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_SAD: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_TXF: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_TXQ: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_CONT: return FALSE; case TGSI_OPCODE_EMIT: return FALSE; break; case TGSI_OPCODE_ENDPRIM: return FALSE; break; case TGSI_OPCODE_NOP: break; default: return FALSE; } if (info->num_dst) { lp_emit_store_aos(bld, inst, 0, dst0); } return TRUE; }
/** * Build LLVM code for texture coord wrapping, for linear filtering, * for scaled integer texcoords. * \param block_length is the length of the pixel block along the * coordinate axis * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size * \param length the texture size along one dimension * \param stride pixel stride along the coordinate axis (in bytes) * \param is_pot if TRUE, length is a power of two * \param wrap_mode one of PIPE_TEX_WRAP_x * \param offset0 resulting relative offset for coord0 * \param offset1 resulting relative offset for coord0 + 1 * \param i0 resulting sub-block pixel coordinate for coord0 * \param i1 resulting sub-block pixel coordinate for coord0 + 1 */ static void lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, unsigned block_length, LLVMValueRef coord0, LLVMValueRef length, LLVMValueRef stride, boolean is_pot, unsigned wrap_mode, LLVMValueRef *offset0, LLVMValueRef *offset1, LLVMValueRef *i0, LLVMValueRef *i1) { struct lp_build_context *int_coord_bld = &bld->int_coord_bld; LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef length_minus_one; LLVMValueRef lmask, umask, mask; if (block_length != 1) { /* * If the pixel block covers more than one pixel then there is no easy * way to calculate offset1 relative to offset0. Instead, compute them * independently. */ LLVMValueRef coord1; lp_build_sample_wrap_nearest_int(bld, block_length, coord0, length, stride, is_pot, wrap_mode, offset0, i0); coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); lp_build_sample_wrap_nearest_int(bld, block_length, coord1, length, stride, is_pot, wrap_mode, offset1, i1); return; } /* * Scalar pixels -- try to compute offset0 and offset1 with a single stride * multiplication. */ *i0 = int_coord_bld->zero; *i1 = int_coord_bld->zero; length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); switch(wrap_mode) { case PIPE_TEX_WRAP_REPEAT: if (is_pot) { coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, ""); } else { /* Add a bias to the texcoord to handle negative coords */ LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); coord0 = LLVMBuildAdd(builder, coord0, bias, ""); coord0 = LLVMBuildURem(builder, coord0, length, ""); } mask = lp_build_compare(bld->gallivm, int_coord_bld->type, PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); *offset0 = lp_build_mul(int_coord_bld, coord0, stride); *offset1 = LLVMBuildAnd(builder, lp_build_add(int_coord_bld, *offset0, stride), mask, ""); break; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type, PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero); umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type, PIPE_FUNC_LESS, coord0, length_minus_one); coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero); coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one); mask = LLVMBuildAnd(builder, lmask, umask, ""); *offset0 = lp_build_mul(int_coord_bld, coord0, stride); *offset1 = lp_build_add(int_coord_bld, *offset0, LLVMBuildAnd(builder, stride, mask, "")); break; case PIPE_TEX_WRAP_CLAMP: case PIPE_TEX_WRAP_CLAMP_TO_BORDER: case PIPE_TEX_WRAP_MIRROR_REPEAT: case PIPE_TEX_WRAP_MIRROR_CLAMP: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: default: assert(0); *offset0 = int_coord_bld->zero; *offset1 = int_coord_bld->zero; break; } }
/** * Performs blending of src and dst pixels * * @param blend the blend state of the shader variant * @param cbuf_format format of the colour buffer * @param type data type of the pixel vector * @param rt rt number * @param src blend src * @param dst blend dst * @param mask optional mask to apply to the blending result * @param const_ const blend color * @param swizzle swizzle values for RGBA * * @return the result of blending src and dst */ LLVMValueRef lp_build_blend_aos(struct gallivm_state *gallivm, const struct pipe_blend_state *blend, const enum pipe_format *cbuf_format, struct lp_type type, unsigned rt, LLVMValueRef src, LLVMValueRef dst, LLVMValueRef mask, LLVMValueRef const_, const unsigned char swizzle[4]) { const struct pipe_rt_blend_state * state = &blend->rt[rt]; struct lp_build_blend_aos_context bld; LLVMValueRef src_factor, dst_factor; LLVMValueRef result; unsigned alpha_swizzle = swizzle[3]; boolean fullcolormask; /* Setup build context */ memset(&bld, 0, sizeof bld); lp_build_context_init(&bld.base, gallivm, type); bld.src = src; bld.dst = dst; bld.const_ = const_; if (swizzle[3] > UTIL_FORMAT_SWIZZLE_W || swizzle[3] == swizzle[0]) alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE; if (!state->blend_enable) { result = src; } else { boolean rgb_alpha_same = state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor; assert(rgb_alpha_same || alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE); src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor, state->alpha_src_factor, alpha_swizzle); dst_factor = lp_build_blend_factor(&bld, state->rgb_dst_factor, state->alpha_dst_factor, alpha_swizzle); result = lp_build_blend(&bld.base, state->rgb_func, state->rgb_src_factor, state->rgb_dst_factor, src, dst, src_factor, dst_factor, rgb_alpha_same, false); if(state->rgb_func != state->alpha_func && alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) { LLVMValueRef alpha; alpha = lp_build_blend(&bld.base, state->alpha_func, state->alpha_src_factor, state->alpha_dst_factor, src, dst, src_factor, dst_factor, rgb_alpha_same, false); result = lp_build_blend_swizzle(&bld, result, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle); } } /* Check if color mask is necessary */ fullcolormask = util_format_colormask_full(util_format_description(cbuf_format[rt]), state->colormask); if (!fullcolormask) { LLVMValueRef color_mask; color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, state->colormask, swizzle); lp_build_name(color_mask, "color_mask"); /* Combine with input mask if necessary */ if (mask) { mask = lp_build_and(&bld.base, color_mask, mask); } else { mask = color_mask; } } /* Apply mask, if one exists */ if (mask) { result = lp_build_select(&bld.base, mask, result, dst); } return result; }
/** * Register store. */ void lp_emit_store_aos( struct lp_build_tgsi_aos_context *bld, const struct tgsi_full_instruction *inst, unsigned index, LLVMValueRef value) { LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; const struct tgsi_full_dst_register *reg = &inst->Dst[index]; LLVMValueRef mask = NULL; LLVMValueRef ptr; /* * Saturate the value */ if (inst->Instruction.Saturate) { value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero); value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one); } /* * Translate the register file */ assert(!reg->Register.Indirect); switch (reg->Register.File) { case TGSI_FILE_OUTPUT: ptr = bld->outputs[reg->Register.Index]; break; case TGSI_FILE_TEMPORARY: ptr = bld->temps[reg->Register.Index]; break; case TGSI_FILE_ADDRESS: ptr = bld->addr[reg->Indirect.Index]; break; default: assert(0); return; } if (!ptr) return; /* * Writemask */ if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) { LLVMValueRef writemask; writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm, bld->bld_base.base.type, reg->Register.WriteMask, TGSI_NUM_CHANNELS, bld->swizzles); if (mask) { mask = LLVMBuildAnd(builder, mask, writemask, ""); } else { mask = writemask; } } if (mask) { LLVMValueRef orig_value; orig_value = LLVMBuildLoad(builder, ptr, ""); value = lp_build_select(&bld->bld_base.base, mask, value, orig_value); } LLVMBuildStore(builder, value, ptr); }