void lp_build_alpha_test(struct gallivm_state *gallivm, unsigned func, struct lp_type type, const struct util_format_description *cbuf_format_desc, struct lp_build_mask_context *mask, LLVMValueRef alpha, LLVMValueRef ref, boolean do_branch) { struct lp_build_context bld; LLVMValueRef test; lp_build_context_init(&bld, gallivm, type); /* * Alpha testing needs to be done in the color buffer precision. * * TODO: Ideally, instead of duplicating the color conversion code, we would do * alpha testing after converting the output colors, but that's not very * convenient, because it needs to be done before depth testing. Hopefully * LLVM will detect and remove the duplicate expression. * * FIXME: This should be generalized to formats other than rgba8 variants. */ if (type.floating && util_format_is_rgba8_variant(cbuf_format_desc)) { const unsigned dst_width = 8; alpha = lp_build_clamp(&bld, alpha, bld.zero, bld.one); ref = lp_build_clamp(&bld, ref, bld.zero, bld.one); alpha = lp_build_clamped_float_to_unsigned_norm(gallivm, type, dst_width, alpha); ref = lp_build_clamped_float_to_unsigned_norm(gallivm, type, dst_width, ref); type.floating = 0; lp_build_context_init(&bld, gallivm, type); } test = lp_build_cmp(&bld, func, alpha, ref); lp_build_name(test, "alpha_mask"); lp_build_mask_update(mask, test); if (do_branch) lp_build_mask_check(mask); }
/** * Generate color blending and color output. * \param rt the render target index (to index blend, colormask state) * \param type the pixel color type * \param context_ptr pointer to the runtime JIT context * \param mask execution mask (active fragment/pixel mask) * \param src colors from the fragment shader * \param dst_ptr the destination color buffer pointer */ static void generate_blend(const struct pipe_blend_state *blend, unsigned rt, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef context_ptr, LLVMValueRef mask, LLVMValueRef *src, LLVMValueRef dst_ptr) { struct lp_build_context bld; struct lp_build_flow_context *flow; struct lp_build_mask_context mask_ctx; LLVMTypeRef vec_type; LLVMValueRef const_ptr; LLVMValueRef con[4]; LLVMValueRef dst[4]; LLVMValueRef res[4]; unsigned chan; lp_build_context_init(&bld, builder, type); flow = lp_build_flow_create(builder); /* we'll use this mask context to skip blending if all pixels are dead */ lp_build_mask_begin(&mask_ctx, flow, type, mask); vec_type = lp_build_vec_type(type); const_ptr = lp_jit_context_blend_color(builder, context_ptr); const_ptr = LLVMBuildBitCast(builder, const_ptr, LLVMPointerType(vec_type, 0), ""); /* load constant blend color and colors from the dest color buffer */ for(chan = 0; chan < 4; ++chan) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), ""); dst[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), ""); lp_build_name(con[chan], "con.%c", "rgba"[chan]); lp_build_name(dst[chan], "dst.%c", "rgba"[chan]); } /* do blend */ lp_build_blend_soa(builder, blend, type, rt, src, dst, con, res); /* store results to color buffer */ for(chan = 0; chan < 4; ++chan) { if(blend->rt[rt].colormask & (1 << chan)) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); lp_build_name(res[chan], "res.%c", "rgba"[chan]); res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, "")); } } lp_build_mask_end(&mask_ctx); lp_build_flow_destroy(flow); }
/** * Extract Y, U, V channels from packed YUYV. * @param packed is a <n x i32> vector with the packed YUYV blocks * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) */ static void yuyv_to_yuv_soa(struct gallivm_state *gallivm, unsigned n, LLVMValueRef packed, LLVMValueRef i, LLVMValueRef *y, LLVMValueRef *u, LLVMValueRef *v) { LLVMBuilderRef builder = gallivm->builder; struct lp_type type; LLVMValueRef mask; memset(&type, 0, sizeof type); type.width = 32; type.length = n; assert(lp_check_value(type, packed)); assert(lp_check_value(type, i)); /* * y = (yuyv >> 16*i) & 0xff * u = (yuyv >> 8 ) & 0xff * v = (yuyv >> 24 ) & 0xff */ #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) /* * Avoid shift with per-element count. * No support on x86, gets translated to roughly 5 instructions * per element. Didn't measure performance but cuts shader size * by quite a bit (less difference if cpu has no sse4.1 support). */ if (util_cpu_caps.has_sse2 && n == 4) { LLVMValueRef sel, tmp; struct lp_build_context bld32; lp_build_context_init(&bld32, gallivm, type); tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), ""); sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0)); *y = lp_build_select(&bld32, sel, packed, tmp); } else #endif { LLVMValueRef shift; shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), ""); *y = LLVMBuildLShr(builder, packed, shift, ""); } *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), ""); *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), ""); mask = lp_build_const_int_vec(gallivm, type, 0xff); *y = LLVMBuildAnd(builder, *y, mask, "y"); *u = LLVMBuildAnd(builder, *u, mask, "u"); *v = LLVMBuildAnd(builder, *v, mask, "v"); }
/** * Generate color blending and color output. */ static void generate_blend(const struct pipe_blend_state *blend, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef context_ptr, LLVMValueRef mask, LLVMValueRef *src, LLVMValueRef dst_ptr) { struct lp_build_context bld; struct lp_build_flow_context *flow; struct lp_build_mask_context mask_ctx; LLVMTypeRef vec_type; LLVMTypeRef int_vec_type; LLVMValueRef const_ptr; LLVMValueRef con[4]; LLVMValueRef dst[4]; LLVMValueRef res[4]; unsigned chan; lp_build_context_init(&bld, builder, type); flow = lp_build_flow_create(builder); lp_build_mask_begin(&mask_ctx, flow, type, mask); vec_type = lp_build_vec_type(type); int_vec_type = lp_build_int_vec_type(type); const_ptr = lp_jit_context_blend_color(builder, context_ptr); const_ptr = LLVMBuildBitCast(builder, const_ptr, LLVMPointerType(vec_type, 0), ""); for(chan = 0; chan < 4; ++chan) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), ""); dst[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), ""); lp_build_name(con[chan], "con.%c", "rgba"[chan]); lp_build_name(dst[chan], "dst.%c", "rgba"[chan]); } lp_build_blend_soa(builder, blend, type, src, dst, con, res); for(chan = 0; chan < 4; ++chan) { if(blend->colormask & (1 << chan)) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); lp_build_name(res[chan], "res.%c", "rgba"[chan]); res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, "")); } } lp_build_mask_end(&mask_ctx); lp_build_flow_destroy(flow); }
/** * @brief lp_build_fetch_rgba_aos_array * * \param format_desc describes format of the image we're fetching from * \param dst_type output type * \param base_ptr address of the pixel block (or the texel if uncompressed) * \param offset ptr offset */ LLVMValueRef lp_build_fetch_rgba_aos_array(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type dst_type, LLVMValueRef base_ptr, LLVMValueRef offset) { struct lp_build_context bld; LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef src_elem_type, src_vec_type; LLVMValueRef ptr, res = NULL; struct lp_type src_type; memset(&src_type, 0, sizeof src_type); src_type.floating = format_desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT; src_type.fixed = format_desc->channel[0].type == UTIL_FORMAT_TYPE_FIXED; src_type.sign = format_desc->channel[0].type != UTIL_FORMAT_TYPE_UNSIGNED; src_type.norm = format_desc->channel[0].normalized; src_type.width = format_desc->channel[0].size; src_type.length = format_desc->nr_channels; assert(src_type.length <= dst_type.length); src_elem_type = lp_build_elem_type(gallivm, src_type); src_vec_type = lp_build_vec_type(gallivm, src_type); /* Read whole vector from memory, unaligned */ if (!res) { ptr = LLVMBuildGEP(builder, base_ptr, &offset, 1, ""); ptr = LLVMBuildPointerCast(builder, ptr, LLVMPointerType(src_vec_type, 0), ""); res = LLVMBuildLoad(builder, ptr, ""); lp_set_load_alignment(res, src_type.width / 8); } /* Truncate doubles to float */ if (src_type.floating && src_type.width == 64) { src_type.width = 32; src_vec_type = lp_build_vec_type(gallivm, src_type); res = LLVMBuildFPTrunc(builder, res, src_vec_type, ""); } /* Expand to correct length */ if (src_type.length < dst_type.length) { res = lp_build_pad_vector(gallivm, res, src_type, dst_type.length); src_type.length = dst_type.length; } /* Convert to correct format */ lp_build_conv(gallivm, src_type, dst_type, &res, 1, &res, 1); /* Swizzle it */ lp_build_context_init(&bld, gallivm, dst_type); return lp_build_format_swizzle_aos(format_desc, &bld, res); }
/** * Convert srgb int values to linear float values. * Several possibilities how to do this, e.g. * - table * - doing the pow() with int-to-float and float-to-int tricks * (http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent) * - just using standard polynomial approximation * (3rd order polynomial is required for crappy but just sufficient accuracy) * * @param src integer (vector) value(s) to convert * (chan_bits bit values unpacked to 32 bit already). */ LLVMValueRef lp_build_srgb_to_linear(struct gallivm_state *gallivm, struct lp_type src_type, unsigned chan_bits, LLVMValueRef src) { struct lp_type f32_type = lp_type_float_vec(32, src_type.length * 32); struct lp_build_context f32_bld; LLVMValueRef srcf, part_lin, part_pow, is_linear, lin_const, lin_thresh; double coeffs[4] = {0.0023f, 0.0030f / 255.0f, 0.6935f / (255.0f * 255.0f), 0.3012f / (255.0f * 255.0f * 255.0f) }; assert(src_type.width == 32); /* Technically this would work with more bits too but would be inaccurate. */ assert(chan_bits <= 8); lp_build_context_init(&f32_bld, gallivm, f32_type); /* * using polynomial: (src * (src * (src * 0.3012 + 0.6935) + 0.0030) + 0.0023) * ( poly = 0.3012*x^3 + 0.6935*x^2 + 0.0030*x + 0.0023) * (found with octave polyfit and some magic as I couldn't get the error * function right). Using the above mentioned error function, the values stay * within +-0.35, except for the lowest values - hence tweaking linear segment * to cover the first 16 instead of the first 11 values (the error stays * just about acceptable there too). * Hence: lin = src > 15 ? poly : src / 12.6 * This function really only makes sense for vectors, should use LUT otherwise. * All in all (including float conversion) 11 instructions (with sse4.1), * 6 constants (polynomial could be done with 1 instruction less at the cost * of slightly worse dependency chain, fma should also help). */ /* doing the 1/255 mul as part of the approximation */ srcf = lp_build_int_to_float(&f32_bld, src); if (chan_bits != 8) { /* could adjust all the constants instead */ LLVMValueRef rescale_const = lp_build_const_vec(gallivm, f32_type, 255.0f / ((1 << chan_bits) - 1)); srcf = lp_build_mul(&f32_bld, srcf, rescale_const); } lin_const = lp_build_const_vec(gallivm, f32_type, 1.0f / (12.6f * 255.0f)); part_lin = lp_build_mul(&f32_bld, srcf, lin_const); part_pow = lp_build_polynomial(&f32_bld, srcf, coeffs, 4); lin_thresh = lp_build_const_vec(gallivm, f32_type, 15.0f); is_linear = lp_build_compare(gallivm, f32_type, PIPE_FUNC_LEQUAL, srcf, lin_thresh); return lp_build_select(&f32_bld, is_linear, part_lin, part_pow); }
/** * Convert linear float soa values to packed srgb AoS values. * This only handles packed formats which are 4x8bit in size * (rgba and rgbx plus swizzles), and 16bit 565-style formats * with no alpha. (In the latter case the return values won't be * fully packed, it will look like r5g6b5x16r5g6b5x16...) * * @param src float SoA (vector) values to convert. */ LLVMValueRef lp_build_float_to_srgb_packed(struct gallivm_state *gallivm, const struct util_format_description *dst_fmt, struct lp_type src_type, LLVMValueRef *src) { LLVMBuilderRef builder = gallivm->builder; unsigned chan; struct lp_build_context f32_bld; struct lp_type int32_type = lp_int_type(src_type); LLVMValueRef tmpsrgb[4], alpha, dst; lp_build_context_init(&f32_bld, gallivm, src_type); /* rgb is subject to linear->srgb conversion, alpha is not */ for (chan = 0; chan < 3; chan++) { unsigned chan_bits = dst_fmt->channel[dst_fmt->swizzle[chan]].size; tmpsrgb[chan] = lp_build_linear_to_srgb(gallivm, src_type, chan_bits, src[chan]); } /* * can't use lp_build_conv since we want to keep values as 32bit * here so we can interleave with rgb to go from SoA->AoS. */ alpha = lp_build_clamp_zero_one_nanzero(&f32_bld, src[3]); alpha = lp_build_mul(&f32_bld, alpha, lp_build_const_vec(gallivm, src_type, 255.0f)); tmpsrgb[3] = lp_build_iround(&f32_bld, alpha); dst = lp_build_zero(gallivm, int32_type); for (chan = 0; chan < dst_fmt->nr_channels; chan++) { if (dst_fmt->swizzle[chan] <= PIPE_SWIZZLE_W) { unsigned ls; LLVMValueRef shifted, shift_val; ls = dst_fmt->channel[dst_fmt->swizzle[chan]].shift; shift_val = lp_build_const_int_vec(gallivm, int32_type, ls); shifted = LLVMBuildShl(builder, tmpsrgb[chan], shift_val, ""); dst = LLVMBuildOr(builder, dst, shifted, ""); } } return dst; }
/** * Unpack several pixels in SoA. * * It takes a vector of packed pixels: * * packed = {P0, P1, P2, P3, ..., Pn} * * And will produce four vectors: * * red = {R0, R1, R2, R3, ..., Rn} * green = {G0, G1, G2, G3, ..., Gn} * blue = {B0, B1, B2, B3, ..., Bn} * alpha = {A0, A1, A2, A3, ..., An} * * It requires that a packed pixel fits into an element of the output * channels. The common case is when converting pixel with a depth of 32 bit or * less into floats. * * \param format_desc the format of the 'packed' incoming pixel vector * \param type the desired type for rgba_out (type.length = n, above) * \param packed the incoming vector of packed pixels * \param rgba_out returns the SoA R,G,B,A vectors */ void lp_build_unpack_rgba_soa(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, LLVMValueRef packed, LLVMValueRef rgba_out[4]) { struct lp_build_context bld; LLVMValueRef inputs[4]; unsigned chan; assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(format_desc->block.width == 1); assert(format_desc->block.height == 1); assert(format_desc->block.bits <= type.width); /* FIXME: Support more output types */ assert(type.width == 32); lp_build_context_init(&bld, gallivm, type); /* Decode the input vector components */ for (chan = 0; chan < format_desc->nr_channels; ++chan) { struct util_format_channel_description chan_desc = format_desc->channel[chan]; boolean srgb_chan = FALSE; if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB && format_desc->swizzle[3] != chan) { srgb_chan = TRUE; } inputs[chan] = lp_build_extract_soa_chan(&bld, format_desc->block.bits, srgb_chan, chan_desc, packed); } lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out); }
/* * Build LLVM function that exercises the unary operator builder. */ static LLVMValueRef build_unary_test_func(struct gallivm_state *gallivm, const struct unary_test_t *test) { struct lp_type type = lp_type_float_vec(32, lp_native_vector_width); LLVMContextRef context = gallivm->context; LLVMModuleRef module = gallivm->module; LLVMTypeRef vf32t = lp_build_vec_type(gallivm, type); LLVMTypeRef args[2] = { LLVMPointerType(vf32t, 0), LLVMPointerType(vf32t, 0) }; LLVMValueRef func = LLVMAddFunction(module, test->name, LLVMFunctionType(LLVMVoidTypeInContext(context), args, Elements(args), 0)); LLVMValueRef arg0 = LLVMGetParam(func, 0); LLVMValueRef arg1 = LLVMGetParam(func, 1); LLVMBuilderRef builder = gallivm->builder; LLVMBasicBlockRef block = LLVMAppendBasicBlockInContext(context, func, "entry"); LLVMValueRef ret; struct lp_build_context bld; lp_build_context_init(&bld, gallivm, type); LLVMSetFunctionCallConv(func, LLVMCCallConv); LLVMPositionBuilderAtEnd(builder, block); arg1 = LLVMBuildLoad(builder, arg1, ""); ret = test->builder(&bld, arg1); LLVMBuildStore(builder, ret, arg0); LLVMBuildRetVoid(builder); gallivm_verify_function(gallivm, func); return func; }
/** * Non-interleaved pack and saturate. * * Same as lp_build_pack2 but will saturate values so that they fit into the * destination type. */ LLVMValueRef lp_build_packs2(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, LLVMValueRef lo, LLVMValueRef hi) { boolean clamp; assert(!src_type.floating); assert(!dst_type.floating); assert(src_type.sign == dst_type.sign); assert(src_type.width == dst_type.width * 2); assert(src_type.length * 2 == dst_type.length); clamp = TRUE; /* All X86 SSE non-interleaved pack instructions take signed inputs and * saturate them, so no need to clamp for those cases. */ if(util_cpu_caps.has_sse2 && src_type.width * src_type.length >= 128 && src_type.sign && (src_type.width == 32 || src_type.width == 16)) clamp = FALSE; if(clamp) { struct lp_build_context bld; unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width; LLVMValueRef dst_max = lp_build_const_int_vec(gallivm, src_type, ((unsigned long long)1 << dst_bits) - 1); lp_build_context_init(&bld, gallivm, src_type); lo = lp_build_min(&bld, lo, dst_max); hi = lp_build_min(&bld, hi, dst_max); /* FIXME: What about lower bound? */ } return lp_build_pack2(gallivm, src_type, dst_type, lo, hi); }
void lp_build_alpha_to_coverage(struct gallivm_state *gallivm, struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, boolean do_branch) { struct lp_build_context bld; LLVMValueRef test; LLVMValueRef alpha_ref_value; lp_build_context_init(&bld, gallivm, type); alpha_ref_value = lp_build_const_vec(gallivm, type, 0.5); test = lp_build_cmp(&bld, PIPE_FUNC_GREATER, alpha, alpha_ref_value); lp_build_name(test, "alpha_to_coverage"); lp_build_mask_update(mask, test); if (do_branch) lp_build_mask_check(mask); }
void lp_build_tgsi_aos(struct gallivm_state *gallivm, const struct tgsi_token *tokens, struct lp_type type, const unsigned char swizzles[4], LLVMValueRef consts_ptr, const LLVMValueRef *inputs, LLVMValueRef *outputs, struct lp_build_sampler_aos *sampler, const struct tgsi_shader_info *info) { struct lp_build_tgsi_aos_context bld; struct tgsi_parse_context parse; uint num_immediates = 0; unsigned chan; int pc = 0; /* Setup build context */ memset(&bld, 0, sizeof bld); lp_build_context_init(&bld.bld_base.base, gallivm, type); lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type)); lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type)); lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type)); for (chan = 0; chan < 4; ++chan) { bld.swizzles[chan] = swizzles[chan]; bld.inv_swizzles[swizzles[chan]] = chan; } bld.inputs = inputs; bld.outputs = outputs; bld.consts_ptr = consts_ptr; bld.sampler = sampler; bld.indirect_files = info->indirect_files; bld.bld_base.emit_swizzle = swizzle_aos; bld.bld_base.info = info; bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant; bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate; bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input; bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary; /* Set opcode actions */ lp_set_default_actions_cpu(&bld.bld_base); if (!lp_bld_tgsi_list_init(&bld.bld_base)) { return; } tgsi_parse_init(&parse, tokens); while (!tgsi_parse_end_of_tokens(&parse)) { tgsi_parse_token(&parse); switch(parse.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_DECLARATION: /* Inputs already interpolated */ lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration); break; case TGSI_TOKEN_TYPE_INSTRUCTION: /* save expanded instruction */ lp_bld_tgsi_add_instruction(&bld.bld_base, &parse.FullToken.FullInstruction); break; case TGSI_TOKEN_TYPE_IMMEDIATE: /* simply copy the immediate values into the next immediates[] slot */ { const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; float imm[4]; assert(size <= 4); assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); for (chan = 0; chan < 4; ++chan) { imm[chan] = 0.0f; } for (chan = 0; chan < size; ++chan) { unsigned swizzle = bld.swizzles[chan]; imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float; } bld.immediates[num_immediates] = lp_build_const_aos(gallivm, type, imm[0], imm[1], imm[2], imm[3], NULL); num_immediates++; } break; case TGSI_TOKEN_TYPE_PROPERTY: break; default: assert(0); } } while (pc != -1) { struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc; const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(instr->Instruction.Opcode); if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc)) _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", opcode_info->mnemonic); } if (0) { LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); LLVMValueRef function = LLVMGetBasicBlockParent(block); debug_printf("11111111111111111111111111111 \n"); tgsi_dump(tokens, 0); lp_debug_dump_value(function); debug_printf("2222222222222222222222222222 \n"); } tgsi_parse_free(&parse); FREE(bld.bld_base.instructions); if (0) { LLVMModuleRef module = LLVMGetGlobalParent( LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); LLVMDumpModule(module); } }
/** * Initialize fragment shader input attribute info. */ void lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, struct gallivm_state *gallivm, unsigned num_inputs, const struct lp_shader_input *inputs, boolean pixel_center_integer, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef a0_ptr, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr, LLVMValueRef x0, LLVMValueRef y0) { struct lp_type coeff_type; struct lp_type setup_type; unsigned attrib; unsigned chan; memset(bld, 0, sizeof *bld); memset(&coeff_type, 0, sizeof coeff_type); coeff_type.floating = TRUE; coeff_type.sign = TRUE; coeff_type.width = 32; coeff_type.length = type.length; memset(&setup_type, 0, sizeof setup_type); setup_type.floating = TRUE; setup_type.sign = TRUE; setup_type.width = 32; setup_type.length = TGSI_NUM_CHANNELS; /* XXX: we don't support interpolating into any other types */ assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0); lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type); lp_build_context_init(&bld->setup_bld, gallivm, setup_type); /* For convenience */ bld->pos = bld->attribs[0]; bld->inputs = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) bld->attribs[1]; /* Position */ bld->mask[0] = TGSI_WRITEMASK_XYZW; bld->interp[0] = LP_INTERP_LINEAR; /* Inputs */ for (attrib = 0; attrib < num_inputs; ++attrib) { bld->mask[1 + attrib] = inputs[attrib].usage_mask; bld->interp[1 + attrib] = inputs[attrib].interp; } bld->num_attribs = 1 + num_inputs; /* Ensure all masked out input channels have a valid value */ for (attrib = 0; attrib < bld->num_attribs; ++attrib) { for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { bld->attribs[attrib][chan] = bld->coeff_bld.undef; } } if (pixel_center_integer) { bld->pos_offset = 0.0; } else { bld->pos_offset = 0.5; } pos_init(bld, x0, y0); /* * Simple method (single step interpolation) may be slower if vector length * is just 4, but the results are different (generally less accurate) with * the other method, so always use more accurate version. */ if (1) { bld->simple_interp = TRUE; { /* XXX this should use a global static table */ unsigned i; unsigned num_loops = 16 / type.length; LLVMValueRef pixoffx, pixoffy, index; LLVMValueRef ptr; bld->xoffset_store = lp_build_array_alloca(gallivm, lp_build_vec_type(gallivm, type), lp_build_const_int32(gallivm, num_loops), ""); bld->yoffset_store = lp_build_array_alloca(gallivm, lp_build_vec_type(gallivm, type), lp_build_const_int32(gallivm, num_loops), ""); for (i = 0; i < num_loops; i++) { index = lp_build_const_int32(gallivm, i); calc_offsets(&bld->coeff_bld, i*type.length/4, &pixoffx, &pixoffy); ptr = LLVMBuildGEP(builder, bld->xoffset_store, &index, 1, ""); LLVMBuildStore(builder, pixoffx, ptr); ptr = LLVMBuildGEP(builder, bld->yoffset_store, &index, 1, ""); LLVMBuildStore(builder, pixoffy, ptr); } } coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr); } else { bld->simple_interp = FALSE; coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr); } }
/** * Generate blend code in SOA mode. * \param src src/fragment color * \param dst dst/framebuffer color * \param con constant blend color * \param res the result/output */ void lp_build_blend_soa(LLVMBuilderRef builder, const struct pipe_blend_state *blend, struct lp_type type, LLVMValueRef src[4], LLVMValueRef dst[4], LLVMValueRef con[4], LLVMValueRef res[4]) { struct lp_build_blend_soa_context bld; unsigned i, j, k; /* Setup build context */ memset(&bld, 0, sizeof bld); lp_build_context_init(&bld.base, builder, type); for (i = 0; i < 4; ++i) { bld.src[i] = src[i]; bld.dst[i] = dst[i]; bld.con[i] = con[i]; } for (i = 0; i < 4; ++i) { if (blend->colormask & (1 << i)) { if (blend->logicop_enable) { if(!type.floating) { res[i] = lp_build_logicop(builder, blend->logicop_func, src[i], dst[i]); } else res[i] = dst[i]; } else if (blend->blend_enable) { unsigned src_factor = i < 3 ? blend->rgb_src_factor : blend->alpha_src_factor; unsigned dst_factor = i < 3 ? blend->rgb_dst_factor : blend->alpha_dst_factor; unsigned func = i < 3 ? blend->rgb_func : blend->alpha_func; boolean func_commutative = lp_build_blend_func_commutative(func); /* It makes no sense to blend unless values are normalized */ assert(type.norm); /* * Compute src/dst factors. */ bld.factor[0][0][i] = src[i]; bld.factor[0][1][i] = lp_build_blend_soa_factor(&bld, src_factor, i); bld.factor[1][0][i] = dst[i]; bld.factor[1][1][i] = lp_build_blend_soa_factor(&bld, dst_factor, i); /* * Compute src/dst terms */ for(k = 0; k < 2; ++k) { /* See if this multiplication has been previously computed */ for(j = 0; j < i; ++j) { if((bld.factor[k][0][j] == bld.factor[k][0][i] && bld.factor[k][1][j] == bld.factor[k][1][i]) || (bld.factor[k][0][j] == bld.factor[k][1][i] && bld.factor[k][1][j] == bld.factor[k][0][i])) break; } if(j < i) bld.term[k][i] = bld.term[k][j]; else bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i], bld.factor[k][1][i]); } /* * Combine terms */ /* See if this function has been previously applied */ for(j = 0; j < i; ++j) { unsigned prev_func = j < 3 ? blend->rgb_func : blend->alpha_func; unsigned func_reverse = lp_build_blend_func_reverse(func, prev_func); if((!func_reverse && bld.term[0][j] == bld.term[0][i] && bld.term[1][j] == bld.term[1][i]) || ((func_commutative || func_reverse) && bld.term[0][j] == bld.term[1][i] && bld.term[1][j] == bld.term[0][i])) break; } if(j < i) res[i] = res[j]; else res[i] = lp_build_blend_func(&bld.base, func, bld.term[0][i], bld.term[1][i]); } else { res[i] = src[i]; } } else { res[i] = dst[i]; } } }
/** * @brief lp_build_fetch_rgba_aos_array * * \param format_desc describes format of the image we're fetching from * \param dst_type output type * \param base_ptr address of the pixel block (or the texel if uncompressed) * \param offset ptr offset */ LLVMValueRef lp_build_fetch_rgba_aos_array(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type dst_type, LLVMValueRef base_ptr, LLVMValueRef offset) { struct lp_build_context bld; LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef src_vec_type; LLVMValueRef ptr, res = NULL; struct lp_type src_type; boolean pure_integer = format_desc->channel[0].pure_integer; struct lp_type tmp_type; lp_type_from_format_desc(&src_type, format_desc); assert(src_type.length <= dst_type.length); src_vec_type = lp_build_vec_type(gallivm, src_type); /* Read whole vector from memory, unaligned */ ptr = LLVMBuildGEP(builder, base_ptr, &offset, 1, ""); ptr = LLVMBuildPointerCast(builder, ptr, LLVMPointerType(src_vec_type, 0), ""); res = LLVMBuildLoad(builder, ptr, ""); LLVMSetAlignment(res, src_type.width / 8); /* Truncate doubles to float */ if (src_type.floating && src_type.width == 64) { src_type.width = 32; src_vec_type = lp_build_vec_type(gallivm, src_type); res = LLVMBuildFPTrunc(builder, res, src_vec_type, ""); } /* Expand to correct length */ if (src_type.length < dst_type.length) { res = lp_build_pad_vector(gallivm, res, dst_type.length); src_type.length = dst_type.length; } tmp_type = dst_type; if (pure_integer) { /* some callers expect (fake) floats other real ints. */ tmp_type.floating = 0; tmp_type.sign = src_type.sign; } /* Convert to correct format */ lp_build_conv(gallivm, src_type, tmp_type, &res, 1, &res, 1); /* Swizzle it */ lp_build_context_init(&bld, gallivm, tmp_type); res = lp_build_format_swizzle_aos(format_desc, &bld, res); /* Bitcast to floats (for pure integers) when requested */ if (pure_integer && dst_type.floating) { res = LLVMBuildBitCast(builder, res, lp_build_vec_type(gallivm, dst_type), ""); } return res; }
/** * Generate blend code in SOA mode. * \param rt render target index (to index the blend / colormask state) * \param src src/fragment color * \param dst dst/framebuffer color * \param con constant blend color * \param res the result/output */ void lp_build_blend_soa(struct gallivm_state *gallivm, const struct pipe_blend_state *blend, struct lp_type type, unsigned rt, LLVMValueRef src[4], LLVMValueRef dst[4], LLVMValueRef con[4], LLVMValueRef res[4]) { LLVMBuilderRef builder = gallivm->builder; struct lp_build_blend_soa_context bld; unsigned i, j, k; assert(rt < PIPE_MAX_COLOR_BUFS); /* Setup build context */ memset(&bld, 0, sizeof bld); lp_build_context_init(&bld.base, gallivm, type); for (i = 0; i < 4; ++i) { bld.src[i] = src[i]; bld.dst[i] = dst[i]; bld.con[i] = con[i]; } for (i = 0; i < 4; ++i) { /* only compute blending for the color channels enabled for writing */ if (blend->rt[rt].colormask & (1 << i)) { if (blend->logicop_enable) { if(!type.floating) { res[i] = lp_build_logicop(builder, blend->logicop_func, src[i], dst[i]); } else res[i] = dst[i]; } else if (blend->rt[rt].blend_enable) { unsigned src_factor = i < 3 ? blend->rt[rt].rgb_src_factor : blend->rt[rt].alpha_src_factor; unsigned dst_factor = i < 3 ? blend->rt[rt].rgb_dst_factor : blend->rt[rt].alpha_dst_factor; unsigned func = i < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func; boolean func_commutative = lp_build_blend_func_commutative(func); /* * Compute src/dst factors. */ bld.factor[0][0][i] = src[i]; bld.factor[0][1][i] = lp_build_blend_soa_factor(&bld, src_factor, i); bld.factor[1][0][i] = dst[i]; bld.factor[1][1][i] = lp_build_blend_soa_factor(&bld, dst_factor, i); /* * Check if lp_build_blend can perform any optimisations */ res[i] = lp_build_blend(&bld.base, func, src_factor, dst_factor, bld.factor[0][0][i], bld.factor[1][0][i], bld.factor[0][1][i], bld.factor[1][1][i], true, true); if (res[i]) { continue; } /* * Compute src/dst terms */ for(k = 0; k < 2; ++k) { /* See if this multiplication has been previously computed */ for(j = 0; j < i; ++j) { if((bld.factor[k][0][j] == bld.factor[k][0][i] && bld.factor[k][1][j] == bld.factor[k][1][i]) || (bld.factor[k][0][j] == bld.factor[k][1][i] && bld.factor[k][1][j] == bld.factor[k][0][i])) break; } if(j < i && bld.term[k][j]) bld.term[k][i] = bld.term[k][j]; else bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i], bld.factor[k][1][i]); if (src_factor == PIPE_BLENDFACTOR_ZERO && (dst_factor == PIPE_BLENDFACTOR_DST_ALPHA || dst_factor == PIPE_BLENDFACTOR_INV_DST_ALPHA)) { /* XXX special case these combos to work around an apparent * bug in LLVM. * This hack disables the check for multiplication by zero * in lp_bld_mul(). When we optimize away the * multiplication, something goes wrong during code * generation and we segfault at runtime. */ LLVMValueRef zeroSave = bld.base.zero; bld.base.zero = NULL; bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i], bld.factor[k][1][i]); bld.base.zero = zeroSave; } } /* * Combine terms */ /* See if this function has been previously applied */ for(j = 0; j < i; ++j) { unsigned prev_func = j < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func; unsigned func_reverse = lp_build_blend_func_reverse(func, prev_func); if((!func_reverse && bld.term[0][j] == bld.term[0][i] && bld.term[1][j] == bld.term[1][i]) || ((func_commutative || func_reverse) && bld.term[0][j] == bld.term[1][i] && bld.term[1][j] == bld.term[0][i])) break; } if(j < i) res[i] = res[j]; else res[i] = lp_build_blend_func(&bld.base, func, bld.term[0][i], bld.term[1][i]); } else { res[i] = src[i]; } } else { res[i] = dst[i]; } } }
/** * Performs blending of src and dst pixels * * @param blend the blend state of the shader variant * @param cbuf_format format of the colour buffer * @param type data type of the pixel vector * @param rt render target index * @param src blend src * @param src_alpha blend src alpha (if not included in src) * @param src1 second blend src (for dual source blend) * @param src1_alpha second blend src alpha (if not included in src1) * @param dst blend dst * @param mask optional mask to apply to the blending result * @param const_ const blend color * @param const_alpha const blend color alpha (if not included in const_) * @param swizzle swizzle values for RGBA * * @return the result of blending src and dst */ LLVMValueRef lp_build_blend_aos(struct gallivm_state *gallivm, const struct pipe_blend_state *blend, enum pipe_format cbuf_format, struct lp_type type, unsigned rt, LLVMValueRef src, LLVMValueRef src_alpha, LLVMValueRef src1, LLVMValueRef src1_alpha, LLVMValueRef dst, LLVMValueRef mask, LLVMValueRef const_, LLVMValueRef const_alpha, const unsigned char swizzle[4], int nr_channels) { const struct pipe_rt_blend_state * state = &blend->rt[rt]; const struct util_format_description * desc; struct lp_build_blend_aos_context bld; LLVMValueRef src_factor, dst_factor; LLVMValueRef result; unsigned alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE; unsigned i; desc = util_format_description(cbuf_format); /* Setup build context */ memset(&bld, 0, sizeof bld); lp_build_context_init(&bld.base, gallivm, type); bld.src = src; bld.src1 = src1; bld.dst = dst; bld.const_ = const_; bld.src_alpha = src_alpha; bld.src1_alpha = src1_alpha; bld.const_alpha = const_alpha; /* Find the alpha channel if not provided seperately */ if (!src_alpha) { for (i = 0; i < 4; ++i) { if (swizzle[i] == 3) { alpha_swizzle = i; } } } if (blend->logicop_enable) { if(!type.floating) { result = lp_build_logicop(gallivm->builder, blend->logicop_func, src, dst); } else { result = src; } } else if (!state->blend_enable) { result = src; } else { boolean rgb_alpha_same = (state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor) || nr_channels == 1; src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor, state->alpha_src_factor, alpha_swizzle, nr_channels); dst_factor = lp_build_blend_factor(&bld, state->rgb_dst_factor, state->alpha_dst_factor, alpha_swizzle, nr_channels); result = lp_build_blend(&bld.base, state->rgb_func, state->rgb_src_factor, state->rgb_dst_factor, src, dst, src_factor, dst_factor, rgb_alpha_same, false); if(state->rgb_func != state->alpha_func && nr_channels > 1 && alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) { LLVMValueRef alpha; alpha = lp_build_blend(&bld.base, state->alpha_func, state->alpha_src_factor, state->alpha_dst_factor, src, dst, src_factor, dst_factor, rgb_alpha_same, false); result = lp_build_blend_swizzle(&bld, result, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle, nr_channels); } } /* Check if color mask is necessary */ if (!util_format_colormask_full(desc, state->colormask)) { LLVMValueRef color_mask; color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, state->colormask, nr_channels, swizzle); lp_build_name(color_mask, "color_mask"); /* Combine with input mask if necessary */ if (mask) { /* We can be blending floating values but masks are always integer... */ unsigned floating = bld.base.type.floating; bld.base.type.floating = 0; mask = lp_build_and(&bld.base, color_mask, mask); bld.base.type.floating = floating; } else { mask = color_mask; } } /* Apply mask, if one exists */ if (mask) { result = lp_build_select(&bld.base, mask, result, dst); } return result; }
/* * Do a cached lookup. * * Returns (vectors of) 4x8 rgba aos value */ LLVMValueRef lp_build_fetch_cached_texels(struct gallivm_state *gallivm, const struct util_format_description *format_desc, unsigned n, LLVMValueRef base_ptr, LLVMValueRef offset, LLVMValueRef i, LLVMValueRef j, LLVMValueRef cache) { LLVMBuilderRef builder = gallivm->builder; unsigned count, low_bit, log2size; LLVMValueRef color, offset_stored, addr, ptr_addrtrunc, tmp; LLVMValueRef ij_index, hash_index, hash_mask, block_index; LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context); struct lp_type type; struct lp_build_context bld32; memset(&type, 0, sizeof type); type.width = 32; type.length = n; assert(format_desc->block.width == 4); assert(format_desc->block.height == 4); lp_build_context_init(&bld32, gallivm, type); /* * compute hash - we use direct mapped cache, the hash function could * be better but it needs to be simple * per-element: * compare offset with offset stored at tag (hash) * if not equal decode/store block, update tag * extract color from cache * assemble result vector */ /* TODO: not ideal with 32bit pointers... */ low_bit = util_logbase2(format_desc->block.bits / 8); log2size = util_logbase2(LP_BUILD_FORMAT_CACHE_SIZE); addr = LLVMBuildPtrToInt(builder, base_ptr, i64t, ""); ptr_addrtrunc = LLVMBuildPtrToInt(builder, base_ptr, i32t, ""); ptr_addrtrunc = lp_build_broadcast_scalar(&bld32, ptr_addrtrunc); /* For the hash function, first mask off the unused lowest bits. Then just do some xor with address bits - only use lower 32bits */ ptr_addrtrunc = LLVMBuildAdd(builder, offset, ptr_addrtrunc, ""); ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc, lp_build_const_int_vec(gallivm, type, low_bit), ""); /* This only really makes sense for size 64,128,256 */ hash_index = ptr_addrtrunc; ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc, lp_build_const_int_vec(gallivm, type, 2*log2size), ""); hash_index = LLVMBuildXor(builder, ptr_addrtrunc, hash_index, ""); tmp = LLVMBuildLShr(builder, hash_index, lp_build_const_int_vec(gallivm, type, log2size), ""); hash_index = LLVMBuildXor(builder, hash_index, tmp, ""); hash_mask = lp_build_const_int_vec(gallivm, type, LP_BUILD_FORMAT_CACHE_SIZE - 1); hash_index = LLVMBuildAnd(builder, hash_index, hash_mask, ""); ij_index = LLVMBuildShl(builder, i, lp_build_const_int_vec(gallivm, type, 2), ""); ij_index = LLVMBuildAdd(builder, ij_index, j, ""); block_index = LLVMBuildShl(builder, hash_index, lp_build_const_int_vec(gallivm, type, 4), ""); block_index = LLVMBuildAdd(builder, ij_index, block_index, ""); if (n > 1) { color = LLVMGetUndef(LLVMVectorType(i32t, n)); for (count = 0; count < n; count++) { LLVMValueRef index, cond, colorx; LLVMValueRef block_indexx, hash_indexx, addrx, offsetx, ptr_addrx; struct lp_build_if_state if_ctx; index = lp_build_const_int32(gallivm, count); offsetx = LLVMBuildExtractElement(builder, offset, index, ""); addrx = LLVMBuildZExt(builder, offsetx, i64t, ""); addrx = LLVMBuildAdd(builder, addrx, addr, ""); block_indexx = LLVMBuildExtractElement(builder, block_index, index, ""); hash_indexx = LLVMBuildLShr(builder, block_indexx, lp_build_const_int32(gallivm, 4), ""); offset_stored = lookup_tag_data(gallivm, cache, hash_indexx); cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addrx, ""); lp_build_if(&if_ctx, gallivm, cond); { ptr_addrx = LLVMBuildIntToPtr(builder, addrx, LLVMPointerType(i8t, 0), ""); update_cached_block(gallivm, format_desc, ptr_addrx, hash_indexx, cache); #if LP_BUILD_FORMAT_CACHE_DEBUG update_cache_access(gallivm, cache, 1, LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS); #endif } lp_build_endif(&if_ctx); colorx = lookup_cached_pixel(gallivm, cache, block_indexx); color = LLVMBuildInsertElement(builder, color, colorx, lp_build_const_int32(gallivm, count), ""); } } else { LLVMValueRef cond; struct lp_build_if_state if_ctx; tmp = LLVMBuildZExt(builder, offset, i64t, ""); addr = LLVMBuildAdd(builder, tmp, addr, ""); offset_stored = lookup_tag_data(gallivm, cache, hash_index); cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addr, ""); lp_build_if(&if_ctx, gallivm, cond); { tmp = LLVMBuildIntToPtr(builder, addr, LLVMPointerType(i8t, 0), ""); update_cached_block(gallivm, format_desc, tmp, hash_index, cache); #if LP_BUILD_FORMAT_CACHE_DEBUG update_cache_access(gallivm, cache, 1, LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS); #endif } lp_build_endif(&if_ctx); color = lookup_cached_pixel(gallivm, cache, block_index); } #if LP_BUILD_FORMAT_CACHE_DEBUG update_cache_access(gallivm, cache, n, LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL); #endif return LLVMBuildBitCast(builder, color, LLVMVectorType(i8t, n * 4), ""); }
/** * Sample a single texture image with (bi-)(tri-)linear sampling. * Return filtered color as two vectors of 16-bit fixed point values. */ static void lp_build_sample_image_linear(struct lp_build_sample_context *bld, LLVMValueRef int_size, LLVMValueRef row_stride_vec, LLVMValueRef img_stride_vec, LLVMValueRef data_ptr, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, LLVMValueRef *colors_lo, LLVMValueRef *colors_hi) { const unsigned dims = bld->dims; LLVMBuilderRef builder = bld->gallivm->builder; struct lp_build_context i32, h16, u8n; LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; LLVMValueRef i32_c8, i32_c128, i32_c255; LLVMValueRef width_vec, height_vec, depth_vec; LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_fpart_lo = NULL, t_fpart_hi = NULL; LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_fpart_lo = NULL, r_fpart_hi = NULL; LLVMValueRef x_stride, y_stride, z_stride; LLVMValueRef x_offset0, x_offset1; LLVMValueRef y_offset0, y_offset1; LLVMValueRef z_offset0, z_offset1; LLVMValueRef offset[2][2][2]; /* [z][y][x] */ LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2]; LLVMValueRef neighbors_lo[2][2][2]; /* [z][y][x] */ LLVMValueRef neighbors_hi[2][2][2]; /* [z][y][x] */ LLVMValueRef packed_lo, packed_hi; unsigned x, y, z; unsigned i, j, k; unsigned numj, numk; lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32)); lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16)); lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8)); i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type); h16_vec_type = lp_build_vec_type(bld->gallivm, h16.type); u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type); lp_build_extract_image_sizes(bld, bld->int_size_type, bld->int_coord_type, int_size, &width_vec, &height_vec, &depth_vec); if (bld->static_state->normalized_coords) { LLVMValueRef scaled_size; LLVMValueRef flt_size; /* scale size by 256 (8 fractional bits) */ scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8); flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size); lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r); } else { /* scale coords by 256 (8 fractional bits) */ s = lp_build_mul_imm(&bld->coord_bld, s, 256); if (dims >= 2) t = lp_build_mul_imm(&bld->coord_bld, t, 256); if (dims >= 3) r = lp_build_mul_imm(&bld->coord_bld, r, 256); } /* convert float to int */ s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); if (dims >= 2) t = LLVMBuildFPToSI(builder, t, i32_vec_type, ""); if (dims >= 3) r = LLVMBuildFPToSI(builder, r, i32_vec_type, ""); /* subtract 0.5 (add -128) */ i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128); s = LLVMBuildAdd(builder, s, i32_c128, ""); if (dims >= 2) { t = LLVMBuildAdd(builder, t, i32_c128, ""); } if (dims >= 3) { r = LLVMBuildAdd(builder, r, i32_c128, ""); } /* compute floor (shift right 8) */ i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8); s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); if (dims >= 2) t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); if (dims >= 3) r_ipart = LLVMBuildAShr(builder, r, i32_c8, ""); /* compute fractional part (AND with 0xff) */ i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255); s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); if (dims >= 2) t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); if (dims >= 3) r_fpart = LLVMBuildAnd(builder, r, i32_c255, ""); /* get pixel, row and image strides */ x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type, bld->format_desc->block.bits/8); y_stride = row_stride_vec; z_stride = img_stride_vec; /* do texcoord wrapping and compute texel offsets */ lp_build_sample_wrap_linear_int(bld, bld->format_desc->block.width, s_ipart, width_vec, x_stride, bld->static_state->pot_width, bld->static_state->wrap_s, &x_offset0, &x_offset1, &x_subcoord[0], &x_subcoord[1]); for (z = 0; z < 2; z++) { for (y = 0; y < 2; y++) { offset[z][y][0] = x_offset0; offset[z][y][1] = x_offset1; } } if (dims >= 2) { lp_build_sample_wrap_linear_int(bld, bld->format_desc->block.height, t_ipart, height_vec, y_stride, bld->static_state->pot_height, bld->static_state->wrap_t, &y_offset0, &y_offset1, &y_subcoord[0], &y_subcoord[1]); for (z = 0; z < 2; z++) { for (x = 0; x < 2; x++) { offset[z][0][x] = lp_build_add(&bld->int_coord_bld, offset[z][0][x], y_offset0); offset[z][1][x] = lp_build_add(&bld->int_coord_bld, offset[z][1][x], y_offset1); } } } if (dims >= 3) { lp_build_sample_wrap_linear_int(bld, bld->format_desc->block.height, r_ipart, depth_vec, z_stride, bld->static_state->pot_depth, bld->static_state->wrap_r, &z_offset0, &z_offset1, &z_subcoord[0], &z_subcoord[1]); for (y = 0; y < 2; y++) { for (x = 0; x < 2; x++) { offset[0][y][x] = lp_build_add(&bld->int_coord_bld, offset[0][y][x], z_offset0); offset[1][y][x] = lp_build_add(&bld->int_coord_bld, offset[1][y][x], z_offset1); } } } else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { LLVMValueRef z_offset; z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec); for (y = 0; y < 2; y++) { for (x = 0; x < 2; x++) { /* The r coord is the cube face in [0,5] */ offset[0][y][x] = lp_build_add(&bld->int_coord_bld, offset[0][y][x], z_offset); } } } /* * Transform 4 x i32 in * * s_fpart = {s0, s1, s2, s3} * * into 8 x i16 * * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3} * * into two 8 x i16 * * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1} * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3} * * and likewise for t_fpart. There is no risk of loosing precision here * since the fractional parts only use the lower 8bits. */ s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, ""); if (dims >= 2) t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, ""); if (dims >= 3) r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, ""); { LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH]; LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH]; LLVMValueRef shuffle_lo; LLVMValueRef shuffle_hi; for (j = 0; j < h16.type.length; j += 4) { #ifdef PIPE_ARCH_LITTLE_ENDIAN unsigned subindex = 0; #else unsigned subindex = 1; #endif LLVMValueRef index; index = LLVMConstInt(elem_type, j/2 + subindex, 0); for (i = 0; i < 4; ++i) shuffles_lo[j + i] = index; index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0); for (i = 0; i < 4; ++i) shuffles_hi[j + i] = index; } shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length); shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length); s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, ""); s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, ""); if (dims >= 2) { t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, ""); t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, ""); } if (dims >= 3) { r_fpart_lo = LLVMBuildShuffleVector(builder, r_fpart, h16.undef, shuffle_lo, ""); r_fpart_hi = LLVMBuildShuffleVector(builder, r_fpart, h16.undef, shuffle_hi, ""); } } /* * Fetch the pixels as 4 x 32bit (rgba order might differ): * * rgba0 rgba1 rgba2 rgba3 * * bit cast them into 16 x u8 * * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3 * * unpack them into two 8 x i16: * * r0 g0 b0 a0 r1 g1 b1 a1 * r2 g2 b2 a2 r3 g3 b3 a3 * * The higher 8 bits of the resulting elements will be zero. */ numj = 1 + (dims >= 2); numk = 1 + (dims >= 3); for (k = 0; k < numk; k++) { for (j = 0; j < numj; j++) { for (i = 0; i < 2; i++) { LLVMValueRef rgba8; if (util_format_is_rgba8_variant(bld->format_desc)) { /* * Given the format is a rgba8, just read the pixels as is, * without any swizzling. Swizzling will be done later. */ rgba8 = lp_build_gather(bld->gallivm, bld->texel_type.length, bld->format_desc->block.bits, bld->texel_type.width, data_ptr, offset[k][j][i]); rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, ""); } else { rgba8 = lp_build_fetch_rgba_aos(bld->gallivm, bld->format_desc, u8n.type, data_ptr, offset[k][j][i], x_subcoord[i], y_subcoord[j]); } /* Expand one 4*rgba8 to two 2*rgba16 */ lp_build_unpack2(bld->gallivm, u8n.type, h16.type, rgba8, &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]); } } } /* * Linear interpolation with 8.8 fixed point. */ if (dims == 1) { /* 1-D lerp */ packed_lo = lp_build_lerp(&h16, s_fpart_lo, neighbors_lo[0][0][0], neighbors_lo[0][0][1]); packed_hi = lp_build_lerp(&h16, s_fpart_hi, neighbors_hi[0][0][0], neighbors_hi[0][0][1]); } else { /* 2-D lerp */ packed_lo = lp_build_lerp_2d(&h16, s_fpart_lo, t_fpart_lo, neighbors_lo[0][0][0], neighbors_lo[0][0][1], neighbors_lo[0][1][0], neighbors_lo[0][1][1]); packed_hi = lp_build_lerp_2d(&h16, s_fpart_hi, t_fpart_hi, neighbors_hi[0][0][0], neighbors_hi[0][0][1], neighbors_hi[0][1][0], neighbors_hi[0][1][1]); if (dims >= 3) { LLVMValueRef packed_lo2, packed_hi2; /* lerp in the second z slice */ packed_lo2 = lp_build_lerp_2d(&h16, s_fpart_lo, t_fpart_lo, neighbors_lo[1][0][0], neighbors_lo[1][0][1], neighbors_lo[1][1][0], neighbors_lo[1][1][1]); packed_hi2 = lp_build_lerp_2d(&h16, s_fpart_hi, t_fpart_hi, neighbors_hi[1][0][0], neighbors_hi[1][0][1], neighbors_hi[1][1][0], neighbors_hi[1][1][1]); /* interp between two z slices */ packed_lo = lp_build_lerp(&h16, r_fpart_lo, packed_lo, packed_lo2); packed_hi = lp_build_lerp(&h16, r_fpart_hi, packed_hi, packed_hi2); } } *colors_lo = packed_lo; *colors_hi = packed_hi; }
/** * Sample the texture/mipmap using given image filter and mip filter. * data0_ptr and data1_ptr point to the two mipmap levels to sample * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes. * If we're using nearest miplevel sampling the '1' values will be null/unused. */ static void lp_build_sample_mipmap(struct lp_build_sample_context *bld, unsigned img_filter, unsigned mip_filter, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, LLVMValueRef ilevel0, LLVMValueRef ilevel1, LLVMValueRef lod_fpart, LLVMValueRef colors_lo_var, LLVMValueRef colors_hi_var) { LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef size0; LLVMValueRef size1; LLVMValueRef row_stride0_vec; LLVMValueRef row_stride1_vec; LLVMValueRef img_stride0_vec; LLVMValueRef img_stride1_vec; LLVMValueRef data_ptr0; LLVMValueRef data_ptr1; LLVMValueRef colors0_lo, colors0_hi; LLVMValueRef colors1_lo, colors1_hi; /* sample the first mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel0, &size0, &row_stride0_vec, &img_stride0_vec); data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); if (img_filter == PIPE_TEX_FILTER_NEAREST) { lp_build_sample_image_nearest(bld, size0, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, &colors0_lo, &colors0_hi); } else { assert(img_filter == PIPE_TEX_FILTER_LINEAR); lp_build_sample_image_linear(bld, size0, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, &colors0_lo, &colors0_hi); } /* Store the first level's colors in the output variables */ LLVMBuildStore(builder, colors0_lo, colors_lo_var); LLVMBuildStore(builder, colors0_hi, colors_hi_var); if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { LLVMValueRef h16_scale = lp_build_const_float(bld->gallivm, 256.0); LLVMTypeRef i32_type = LLVMIntTypeInContext(bld->gallivm->context, 32); struct lp_build_if_state if_ctx; LLVMValueRef need_lerp; lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16_scale, ""); lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "lod_fpart.fixed16"); /* need_lerp = lod_fpart > 0 */ need_lerp = LLVMBuildICmp(builder, LLVMIntSGT, lod_fpart, LLVMConstNull(i32_type), "need_lerp"); lp_build_if(&if_ctx, bld->gallivm, need_lerp); { struct lp_build_context h16_bld; lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16)); /* sample the second mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel1, &size1, &row_stride1_vec, &img_stride1_vec); data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); if (img_filter == PIPE_TEX_FILTER_NEAREST) { lp_build_sample_image_nearest(bld, size1, row_stride1_vec, img_stride1_vec, data_ptr1, s, t, r, &colors1_lo, &colors1_hi); } else { lp_build_sample_image_linear(bld, size1, row_stride1_vec, img_stride1_vec, data_ptr1, s, t, r, &colors1_lo, &colors1_hi); } /* interpolate samples from the two mipmap levels */ lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, ""); lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart); #if HAVE_LLVM == 0x208 /* This is a work-around for a bug in LLVM 2.8. * Evidently, something goes wrong in the construction of the * lod_fpart short[8] vector. Adding this no-effect shuffle seems * to force the vector to be properly constructed. * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f). */ { LLVMValueRef shuffles[8], shuffle; int i; assert(h16_bld.type.length <= Elements(shuffles)); for (i = 0; i < h16_bld.type.length; i++) shuffles[i] = lp_build_const_int32(bld->gallivm, 2 * (i & 1)); shuffle = LLVMConstVector(shuffles, h16_bld.type.length); lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, lod_fpart, shuffle, ""); } #endif colors0_lo = lp_build_lerp(&h16_bld, lod_fpart, colors0_lo, colors1_lo); colors0_hi = lp_build_lerp(&h16_bld, lod_fpart, colors0_hi, colors1_hi); LLVMBuildStore(builder, colors0_lo, colors_lo_var); LLVMBuildStore(builder, colors0_hi, colors_hi_var); } lp_build_endif(&if_ctx); } }
/** * Fetch a pixel into a 4 float AoS. * * \param format_desc describes format of the image we're fetching from * \param ptr address of the pixel block (or the texel if uncompressed) * \param i, j the sub-block pixel coordinates. For non-compressed formats * these will always be (0, 0). * \return a 4 element vector with the pixel's RGBA values. */ LLVMValueRef lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, LLVMValueRef base_ptr, LLVMValueRef offset, LLVMValueRef i, LLVMValueRef j) { LLVMBuilderRef builder = gallivm->builder; unsigned num_pixels = type.length / 4; struct lp_build_context bld; assert(type.length <= LP_MAX_VECTOR_LENGTH); assert(type.length % 4 == 0); lp_build_context_init(&bld, gallivm, type); /* * Trivial case * * The format matches the type (apart of a swizzle) so no need for * scaling or converting. */ if (format_matches_type(format_desc, type) && format_desc->block.bits <= type.width * 4 && util_is_power_of_two(format_desc->block.bits)) { LLVMValueRef packed; /* * The format matches the type (apart of a swizzle) so no need for * scaling or converting. */ packed = lp_build_gather(gallivm, type.length/4, format_desc->block.bits, type.width*4, base_ptr, offset); assert(format_desc->block.bits <= type.width * type.length); packed = LLVMBuildBitCast(gallivm->builder, packed, lp_build_vec_type(gallivm, type), ""); return lp_build_format_swizzle_aos(format_desc, &bld, packed); } /* * Bit arithmetic */ if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && format_desc->block.width == 1 && format_desc->block.height == 1 && util_is_power_of_two(format_desc->block.bits) && format_desc->block.bits <= 32 && format_desc->is_bitmask && !format_desc->is_mixed && (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED || format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) { LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; LLVMValueRef res; unsigned k; /* * Unpack a pixel at a time into a <4 x float> RGBA vector */ for (k = 0; k < num_pixels; ++k) { LLVMValueRef packed; packed = lp_build_gather_elem(gallivm, num_pixels, format_desc->block.bits, 32, base_ptr, offset, k); tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm, format_desc, packed); } /* * Type conversion. * * TODO: We could avoid floating conversion for integer to * integer conversions. */ if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) { debug_printf("%s: unpacking %s with floating point\n", __FUNCTION__, format_desc->short_name); } lp_build_conv(gallivm, lp_float32_vec4_type(), type, tmps, num_pixels, &res, 1); return lp_build_format_swizzle_aos(format_desc, &bld, res); } /* * YUV / subsampled formats */ if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { struct lp_type tmp_type; LLVMValueRef tmp; memset(&tmp_type, 0, sizeof tmp_type); tmp_type.width = 8; tmp_type.length = num_pixels * 4; tmp_type.norm = TRUE; tmp = lp_build_fetch_subsampled_rgba_aos(gallivm, format_desc, num_pixels, base_ptr, offset, i, j); lp_build_conv(gallivm, tmp_type, type, &tmp, 1, &tmp, 1); return tmp; } /* * Fallback to util_format_description::fetch_rgba_8unorm(). */ if (format_desc->fetch_rgba_8unorm && !type.floating && type.width == 8 && !type.sign && type.norm) { /* * Fallback to calling util_format_description::fetch_rgba_8unorm. * * This is definitely not the most efficient way of fetching pixels, as * we miss the opportunity to do vectorization, but this it is a * convenient for formats or scenarios for which there was no opportunity * or incentive to optimize. */ LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); char name[256]; LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); LLVMTypeRef pi8t = LLVMPointerType(i8t, 0); LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef function; LLVMValueRef tmp_ptr; LLVMValueRef tmp; LLVMValueRef res; LLVMValueRef callee; unsigned k; util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm", format_desc->short_name); if (gallivm_debug & GALLIVM_DEBUG_PERF) { debug_printf("%s: falling back to %s\n", __FUNCTION__, name); } /* * Declare and bind format_desc->fetch_rgba_8unorm(). */ function = LLVMGetNamedFunction(module, name); if (!function) { /* * Function to call looks like: * fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) */ LLVMTypeRef ret_type; LLVMTypeRef arg_types[4]; LLVMTypeRef function_type; ret_type = LLVMVoidTypeInContext(gallivm->context); arg_types[0] = pi8t; arg_types[1] = pi8t; arg_types[2] = i32t; arg_types[3] = i32t; function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); function = LLVMAddFunction(module, name, function_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); LLVMSetLinkage(function, LLVMExternalLinkage); assert(LLVMIsDeclaration(function)); } /* make const pointer for the C fetch_rgba_float function */ callee = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm)); /* cast the callee pointer to the function's type */ function = LLVMBuildBitCast(builder, callee, LLVMTypeOf(function), "cast callee"); tmp_ptr = lp_build_alloca(gallivm, i32t, ""); res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels)); /* * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result * in the SoA vectors. */ for (k = 0; k < num_pixels; ++k) { LLVMValueRef index = lp_build_const_int32(gallivm, k); LLVMValueRef args[4]; args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, ""); args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, base_ptr, offset, k); if (num_pixels == 1) { args[2] = i; args[3] = j; } else { args[2] = LLVMBuildExtractElement(builder, i, index, ""); args[3] = LLVMBuildExtractElement(builder, j, index, ""); } LLVMBuildCall(builder, function, args, Elements(args), ""); tmp = LLVMBuildLoad(builder, tmp_ptr, ""); if (num_pixels == 1) { res = tmp; } else { res = LLVMBuildInsertElement(builder, res, tmp, index, ""); } } /* Bitcast from <n x i32> to <4n x i8> */ res = LLVMBuildBitCast(builder, res, bld.vec_type, ""); return res; } /* * Fallback to util_format_description::fetch_rgba_float(). */ if (format_desc->fetch_rgba_float) { /* * Fallback to calling util_format_description::fetch_rgba_float. * * This is definitely not the most efficient way of fetching pixels, as * we miss the opportunity to do vectorization, but this it is a * convenient for formats or scenarios for which there was no opportunity * or incentive to optimize. */ LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); char name[256]; LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context); LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4); LLVMTypeRef pf32t = LLVMPointerType(f32t, 0); LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef function; LLVMValueRef tmp_ptr; LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; LLVMValueRef res; LLVMValueRef callee; unsigned k; util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", format_desc->short_name); if (gallivm_debug & GALLIVM_DEBUG_PERF) { debug_printf("%s: falling back to %s\n", __FUNCTION__, name); } /* * Declare and bind format_desc->fetch_rgba_float(). */ function = LLVMGetNamedFunction(module, name); if (!function) { /* * Function to call looks like: * fetch(float *dst, const uint8_t *src, unsigned i, unsigned j) */ LLVMTypeRef ret_type; LLVMTypeRef arg_types[4]; LLVMTypeRef function_type; ret_type = LLVMVoidTypeInContext(gallivm->context); arg_types[0] = pf32t; arg_types[1] = pi8t; arg_types[2] = i32t; arg_types[3] = i32t; function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); function = LLVMAddFunction(module, name, function_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); LLVMSetLinkage(function, LLVMExternalLinkage); assert(LLVMIsDeclaration(function)); } /* Note: we're using this casting here instead of LLVMAddGlobalMapping() * to work around a bug in LLVM 2.6. */ /* make const pointer for the C fetch_rgba_float function */ callee = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer) format_desc->fetch_rgba_float)); /* cast the callee pointer to the function's type */ function = LLVMBuildBitCast(builder, callee, LLVMTypeOf(function), "cast callee"); tmp_ptr = lp_build_alloca(gallivm, f32x4t, ""); /* * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result * in the SoA vectors. */ for (k = 0; k < num_pixels; ++k) { LLVMValueRef args[4]; args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, ""); args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, base_ptr, offset, k); if (num_pixels == 1) { args[2] = i; args[3] = j; } else { LLVMValueRef index = lp_build_const_int32(gallivm, k); args[2] = LLVMBuildExtractElement(builder, i, index, ""); args[3] = LLVMBuildExtractElement(builder, j, index, ""); } LLVMBuildCall(builder, function, args, Elements(args), ""); tmps[k] = LLVMBuildLoad(builder, tmp_ptr, ""); } lp_build_conv(gallivm, lp_float32_vec4_type(), type, tmps, num_pixels, &res, 1); return res; } assert(0); return lp_build_undef(gallivm, type); }
/** * Texture sampling in AoS format. Used when sampling common 32-bit/texel * formats. 1D/2D/3D/cube texture supported. All mipmap sampling modes * but only limited texture coord wrap modes. */ void lp_build_sample_aos(struct lp_build_sample_context *bld, unsigned unit, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, const LLVMValueRef *ddx, const LLVMValueRef *ddy, LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ LLVMValueRef texel_out[4]) { struct lp_build_context *int_bld = &bld->int_bld; LLVMBuilderRef builder = bld->gallivm->builder; const unsigned mip_filter = bld->static_state->min_mip_filter; const unsigned min_filter = bld->static_state->min_img_filter; const unsigned mag_filter = bld->static_state->mag_img_filter; const unsigned dims = bld->dims; LLVMValueRef lod_ipart = NULL, lod_fpart = NULL; LLVMValueRef ilevel0, ilevel1 = NULL; LLVMValueRef packed, packed_lo, packed_hi; LLVMValueRef unswizzled[4]; LLVMValueRef face_ddx[4], face_ddy[4]; struct lp_build_context h16_bld; LLVMValueRef first_level; LLVMValueRef i32t_zero = lp_build_const_int32(bld->gallivm, 0); /* we only support the common/simple wrap modes at this time */ assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s)); if (dims >= 2) assert(lp_is_simple_wrap_mode(bld->static_state->wrap_t)); if (dims >= 3) assert(lp_is_simple_wrap_mode(bld->static_state->wrap_r)); /* make 16-bit fixed-pt builder context */ lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16)); /* cube face selection, compute pre-face coords, etc. */ if (bld->static_state->target == PIPE_TEXTURE_CUBE) { LLVMValueRef face, face_s, face_t; lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t); s = face_s; /* vec */ t = face_t; /* vec */ /* use 'r' to indicate cube face */ r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */ /* recompute ddx, ddy using the new (s,t) face texcoords */ face_ddx[0] = lp_build_scalar_ddx(&bld->coord_bld, s); face_ddx[1] = lp_build_scalar_ddx(&bld->coord_bld, t); face_ddx[2] = NULL; face_ddx[3] = NULL; face_ddy[0] = lp_build_scalar_ddy(&bld->coord_bld, s); face_ddy[1] = lp_build_scalar_ddy(&bld->coord_bld, t); face_ddy[2] = NULL; face_ddy[3] = NULL; ddx = face_ddx; ddy = face_ddy; } /* * Compute the level of detail (float). */ if (min_filter != mag_filter || mip_filter != PIPE_TEX_MIPFILTER_NONE) { /* Need to compute lod either to choose mipmap levels or to * distinguish between minification/magnification with one mipmap level. */ lp_build_lod_selector(bld, unit, ddx, ddy, lod_bias, explicit_lod, mip_filter, &lod_ipart, &lod_fpart); } else { lod_ipart = i32t_zero; } /* * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1 */ switch (mip_filter) { default: assert(0 && "bad mip_filter value in lp_build_sample_aos()"); /* fall-through */ case PIPE_TEX_MIPFILTER_NONE: /* always use mip level 0 */ if (bld->static_state->target == PIPE_TEXTURE_CUBE) { /* XXX this is a work-around for an apparent bug in LLVM 2.7. * We should be able to set ilevel0 = const(0) but that causes * bad x86 code to be emitted. */ assert(lod_ipart); lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); } else { first_level = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm, unit); ilevel0 = first_level; } break; case PIPE_TEX_MIPFILTER_NEAREST: assert(lod_ipart); lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); break; case PIPE_TEX_MIPFILTER_LINEAR: assert(lod_ipart); assert(lod_fpart); lp_build_linear_mip_levels(bld, unit, lod_ipart, &lod_fpart, &ilevel0, &ilevel1); break; } /* * Get/interpolate texture colors. */ packed_lo = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_lo"); packed_hi = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_hi"); if (min_filter == mag_filter) { /* no need to distinquish between minification and magnification */ lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, ilevel0, ilevel1, lod_fpart, packed_lo, packed_hi); } else { /* Emit conditional to choose min image filter or mag image filter * depending on the lod being > 0 or <= 0, respectively. */ struct lp_build_if_state if_ctx; LLVMValueRef minify; /* minify = lod >= 0.0 */ minify = LLVMBuildICmp(builder, LLVMIntSGE, lod_ipart, int_bld->zero, ""); lp_build_if(&if_ctx, bld->gallivm, minify); { /* Use the minification filter */ lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, ilevel0, ilevel1, lod_fpart, packed_lo, packed_hi); } lp_build_else(&if_ctx); { /* Use the magnification filter */ lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE, s, t, r, ilevel0, NULL, NULL, packed_lo, packed_hi); } lp_build_endif(&if_ctx); } /* * combine the values stored in 'packed_lo' and 'packed_hi' variables * into 'packed' */ packed = lp_build_pack2(bld->gallivm, h16_bld.type, lp_type_unorm(8), LLVMBuildLoad(builder, packed_lo, ""), LLVMBuildLoad(builder, packed_hi, "")); /* * Convert to SoA and swizzle. */ lp_build_rgba8_to_f32_soa(bld->gallivm, bld->texel_type, packed, unswizzled); if (util_format_is_rgba8_variant(bld->format_desc)) { lp_build_format_swizzle_soa(bld->format_desc, &bld->texel_bld, unswizzled, texel_out); } else { texel_out[0] = unswizzled[0]; texel_out[1] = unswizzled[1]; texel_out[2] = unswizzled[2]; texel_out[3] = unswizzled[3]; } }
/** * Performs blending of src and dst pixels * * @param blend the blend state of the shader variant * @param cbuf_format format of the colour buffer * @param type data type of the pixel vector * @param rt rt number * @param src blend src * @param dst blend dst * @param mask optional mask to apply to the blending result * @param const_ const blend color * @param swizzle swizzle values for RGBA * * @return the result of blending src and dst */ LLVMValueRef lp_build_blend_aos(struct gallivm_state *gallivm, const struct pipe_blend_state *blend, const enum pipe_format *cbuf_format, struct lp_type type, unsigned rt, LLVMValueRef src, LLVMValueRef dst, LLVMValueRef mask, LLVMValueRef const_, const unsigned char swizzle[4]) { const struct pipe_rt_blend_state * state = &blend->rt[rt]; struct lp_build_blend_aos_context bld; LLVMValueRef src_factor, dst_factor; LLVMValueRef result; unsigned alpha_swizzle = swizzle[3]; boolean fullcolormask; /* Setup build context */ memset(&bld, 0, sizeof bld); lp_build_context_init(&bld.base, gallivm, type); bld.src = src; bld.dst = dst; bld.const_ = const_; if (swizzle[3] > UTIL_FORMAT_SWIZZLE_W || swizzle[3] == swizzle[0]) alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE; if (!state->blend_enable) { result = src; } else { boolean rgb_alpha_same = state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor; assert(rgb_alpha_same || alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE); src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor, state->alpha_src_factor, alpha_swizzle); dst_factor = lp_build_blend_factor(&bld, state->rgb_dst_factor, state->alpha_dst_factor, alpha_swizzle); result = lp_build_blend(&bld.base, state->rgb_func, state->rgb_src_factor, state->rgb_dst_factor, src, dst, src_factor, dst_factor, rgb_alpha_same, false); if(state->rgb_func != state->alpha_func && alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) { LLVMValueRef alpha; alpha = lp_build_blend(&bld.base, state->alpha_func, state->alpha_src_factor, state->alpha_dst_factor, src, dst, src_factor, dst_factor, rgb_alpha_same, false); result = lp_build_blend_swizzle(&bld, result, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle); } } /* Check if color mask is necessary */ fullcolormask = util_format_colormask_full(util_format_description(cbuf_format[rt]), state->colormask); if (!fullcolormask) { LLVMValueRef color_mask; color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, state->colormask, swizzle); lp_build_name(color_mask, "color_mask"); /* Combine with input mask if necessary */ if (mask) { mask = lp_build_and(&bld.base, color_mask, mask); } else { mask = color_mask; } } /* Apply mask, if one exists */ if (mask) { result = lp_build_select(&bld.base, mask, result, dst); } return result; }
/** * Generic type conversion. * * TODO: Take a precision argument, or even better, add a new precision member * to the lp_type union. */ void lp_build_conv(LLVMBuilderRef builder, struct lp_type src_type, struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, LLVMValueRef *dst, unsigned num_dsts) { struct lp_type tmp_type; LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; unsigned num_tmps; unsigned i; /* We must not loose or gain channels. Only precision */ assert(src_type.length * num_srcs == dst_type.length * num_dsts); assert(src_type.length <= LP_MAX_VECTOR_LENGTH); assert(dst_type.length <= LP_MAX_VECTOR_LENGTH); assert(num_srcs <= LP_MAX_VECTOR_LENGTH); assert(num_dsts <= LP_MAX_VECTOR_LENGTH); tmp_type = src_type; for(i = 0; i < num_srcs; ++i) { assert(lp_check_value(src_type, src[i])); tmp[i] = src[i]; } num_tmps = num_srcs; /* * Clamp if necessary */ if(memcmp(&src_type, &dst_type, sizeof src_type) != 0) { struct lp_build_context bld; double src_min = lp_const_min(src_type); double dst_min = lp_const_min(dst_type); double src_max = lp_const_max(src_type); double dst_max = lp_const_max(dst_type); LLVMValueRef thres; lp_build_context_init(&bld, builder, tmp_type); if(src_min < dst_min) { if(dst_min == 0.0) thres = bld.zero; else thres = lp_build_const_vec(src_type, dst_min); for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_max(&bld, tmp[i], thres); } if(src_max > dst_max) { if(dst_max == 1.0) thres = bld.one; else thres = lp_build_const_vec(src_type, dst_max); for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_min(&bld, tmp[i], thres); } } /* * Scale to the narrowest range */ if(dst_type.floating) { /* Nothing to do */ } else if(tmp_type.floating) { if(!dst_type.fixed && !dst_type.sign && dst_type.norm) { for(i = 0; i < num_tmps; ++i) { tmp[i] = lp_build_clamped_float_to_unsigned_norm(builder, tmp_type, dst_type.width, tmp[i]); } tmp_type.floating = FALSE; } else { double dst_scale = lp_const_scale(dst_type); LLVMTypeRef tmp_vec_type; if (dst_scale != 1.0) { LLVMValueRef scale = lp_build_const_vec(tmp_type, dst_scale); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, ""); } /* Use an equally sized integer for intermediate computations */ tmp_type.floating = FALSE; tmp_vec_type = lp_build_vec_type(tmp_type); for(i = 0; i < num_tmps; ++i) { #if 0 if(dst_type.sign) tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, ""); else tmp[i] = LLVMBuildFPToUI(builder, tmp[i], tmp_vec_type, ""); #else /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */ tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, ""); #endif } } } else { unsigned src_shift = lp_const_shift(src_type); unsigned dst_shift = lp_const_shift(dst_type); /* FIXME: compensate different offsets too */ if(src_shift > dst_shift) { LLVMValueRef shift = lp_build_const_int_vec(tmp_type, src_shift - dst_shift); for(i = 0; i < num_tmps; ++i) if(src_type.sign) tmp[i] = LLVMBuildAShr(builder, tmp[i], shift, ""); else tmp[i] = LLVMBuildLShr(builder, tmp[i], shift, ""); } } /* * Truncate or expand bit width * * No data conversion should happen here, although the sign bits are * crucial to avoid bad clamping. */ { struct lp_type new_type; new_type = tmp_type; new_type.sign = dst_type.sign; new_type.width = dst_type.width; new_type.length = dst_type.length; lp_build_resize(builder, tmp_type, new_type, tmp, num_srcs, tmp, num_dsts); tmp_type = new_type; num_tmps = num_dsts; } /* * Scale to the widest range */ if(src_type.floating) { /* Nothing to do */ } else if(!src_type.floating && dst_type.floating) { if(!src_type.fixed && !src_type.sign && src_type.norm) { for(i = 0; i < num_tmps; ++i) { tmp[i] = lp_build_unsigned_norm_to_float(builder, src_type.width, dst_type, tmp[i]); } tmp_type.floating = TRUE; } else { double src_scale = lp_const_scale(src_type); LLVMTypeRef tmp_vec_type; /* Use an equally sized integer for intermediate computations */ tmp_type.floating = TRUE; tmp_type.sign = TRUE; tmp_vec_type = lp_build_vec_type(tmp_type); for(i = 0; i < num_tmps; ++i) { #if 0 if(dst_type.sign) tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, ""); else tmp[i] = LLVMBuildUIToFP(builder, tmp[i], tmp_vec_type, ""); #else /* FIXME: there is no SSE counterpart for LLVMBuildUIToFP */ tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, ""); #endif } if (src_scale != 1.0) { LLVMValueRef scale = lp_build_const_vec(tmp_type, 1.0/src_scale); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, ""); } } } else { unsigned src_shift = lp_const_shift(src_type); unsigned dst_shift = lp_const_shift(dst_type); /* FIXME: compensate different offsets too */ if(src_shift < dst_shift) { LLVMValueRef shift = lp_build_const_int_vec(tmp_type, dst_shift - src_shift); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildShl(builder, tmp[i], shift, ""); } } for(i = 0; i < num_dsts; ++i) { dst[i] = tmp[i]; assert(lp_check_value(dst_type, dst[i])); } }
/** * Generic type conversion. * * TODO: Take a precision argument, or even better, add a new precision member * to the lp_type union. */ void lp_build_conv(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, LLVMValueRef *dst, unsigned num_dsts) { LLVMBuilderRef builder = gallivm->builder; struct lp_type tmp_type; LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; unsigned num_tmps; unsigned i; /* We must not loose or gain channels. Only precision */ assert(src_type.length * num_srcs == dst_type.length * num_dsts); assert(src_type.length <= LP_MAX_VECTOR_LENGTH); assert(dst_type.length <= LP_MAX_VECTOR_LENGTH); assert(num_srcs <= LP_MAX_VECTOR_LENGTH); assert(num_dsts <= LP_MAX_VECTOR_LENGTH); tmp_type = src_type; for(i = 0; i < num_srcs; ++i) { assert(lp_check_value(src_type, src[i])); tmp[i] = src[i]; } num_tmps = num_srcs; /* Special case 4x4f --> 1x16ub */ if (src_type.floating == 1 && src_type.fixed == 0 && src_type.sign == 1 && src_type.norm == 0 && src_type.width == 32 && src_type.length == 4 && dst_type.floating == 0 && dst_type.fixed == 0 && dst_type.sign == 0 && dst_type.norm == 1 && dst_type.width == 8 && dst_type.length == 16 && 4 * num_dsts == num_srcs && util_cpu_caps.has_sse2) { struct lp_build_context bld; struct lp_type int16_type = dst_type; struct lp_type int32_type = dst_type; LLVMValueRef const_255f; unsigned i, j; lp_build_context_init(&bld, gallivm, src_type); int16_type.width *= 2; int16_type.length /= 2; int16_type.sign = 1; int32_type.width *= 4; int32_type.length /= 4; int32_type.sign = 1; const_255f = lp_build_const_vec(gallivm, src_type, 255.0f); for (i = 0; i < num_dsts; ++i, src += 4) { LLVMValueRef lo, hi; for (j = 0; j < 4; ++j) { tmp[j] = LLVMBuildFMul(builder, src[j], const_255f, ""); tmp[j] = lp_build_iround(&bld, tmp[j]); } /* relying on clamping behavior of sse2 intrinsics here */ lo = lp_build_pack2(gallivm, int32_type, int16_type, tmp[0], tmp[1]); hi = lp_build_pack2(gallivm, int32_type, int16_type, tmp[2], tmp[3]); dst[i] = lp_build_pack2(gallivm, int16_type, dst_type, lo, hi); } return; } /* Special case 2x8f --> 1x16ub */ else if (src_type.floating == 1 && src_type.fixed == 0 && src_type.sign == 1 && src_type.norm == 0 && src_type.width == 32 && src_type.length == 8 && dst_type.floating == 0 && dst_type.fixed == 0 && dst_type.sign == 0 && dst_type.norm == 1 && dst_type.width == 8 && dst_type.length == 16 && 2 * num_dsts == num_srcs && util_cpu_caps.has_avx) { struct lp_build_context bld; struct lp_type int16_type = dst_type; struct lp_type int32_type = dst_type; LLVMValueRef const_255f; unsigned i; lp_build_context_init(&bld, gallivm, src_type); int16_type.width *= 2; int16_type.length /= 2; int16_type.sign = 1; int32_type.width *= 4; int32_type.length /= 4; int32_type.sign = 1; const_255f = lp_build_const_vec(gallivm, src_type, 255.0f); for (i = 0; i < num_dsts; ++i, src += 2) { LLVMValueRef lo, hi, a, b; a = LLVMBuildFMul(builder, src[0], const_255f, ""); b = LLVMBuildFMul(builder, src[1], const_255f, ""); a = lp_build_iround(&bld, a); b = lp_build_iround(&bld, b); tmp[0] = lp_build_extract_range(gallivm, a, 0, 4); tmp[1] = lp_build_extract_range(gallivm, a, 4, 4); tmp[2] = lp_build_extract_range(gallivm, b, 0, 4); tmp[3] = lp_build_extract_range(gallivm, b, 4, 4); /* relying on clamping behavior of sse2 intrinsics here */ lo = lp_build_pack2(gallivm, int32_type, int16_type, tmp[0], tmp[1]); hi = lp_build_pack2(gallivm, int32_type, int16_type, tmp[2], tmp[3]); dst[i] = lp_build_pack2(gallivm, int16_type, dst_type, lo, hi); } return; } /* Pre convert half-floats to floats */ else if (src_type.floating && src_type.width == 16) { for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_half_to_float(gallivm, src_type, tmp[i]); tmp_type.width = 32; } /* * Clamp if necessary */ if(memcmp(&src_type, &dst_type, sizeof src_type) != 0) { struct lp_build_context bld; double src_min = lp_const_min(src_type); double dst_min = lp_const_min(dst_type); double src_max = lp_const_max(src_type); double dst_max = lp_const_max(dst_type); LLVMValueRef thres; lp_build_context_init(&bld, gallivm, tmp_type); if(src_min < dst_min) { if(dst_min == 0.0) thres = bld.zero; else thres = lp_build_const_vec(gallivm, src_type, dst_min); for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_max(&bld, tmp[i], thres); } if(src_max > dst_max) { if(dst_max == 1.0) thres = bld.one; else thres = lp_build_const_vec(gallivm, src_type, dst_max); for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_min(&bld, tmp[i], thres); } } /* * Scale to the narrowest range */ if(dst_type.floating) { /* Nothing to do */ } else if(tmp_type.floating) { if(!dst_type.fixed && !dst_type.sign && dst_type.norm) { for(i = 0; i < num_tmps; ++i) { tmp[i] = lp_build_clamped_float_to_unsigned_norm(gallivm, tmp_type, dst_type.width, tmp[i]); } tmp_type.floating = FALSE; } else { double dst_scale = lp_const_scale(dst_type); LLVMTypeRef tmp_vec_type; if (dst_scale != 1.0) { LLVMValueRef scale = lp_build_const_vec(gallivm, tmp_type, dst_scale); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, ""); } /* Use an equally sized integer for intermediate computations */ tmp_type.floating = FALSE; tmp_vec_type = lp_build_vec_type(gallivm, tmp_type); for(i = 0; i < num_tmps; ++i) { #if 0 if(dst_type.sign) tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, ""); else tmp[i] = LLVMBuildFPToUI(builder, tmp[i], tmp_vec_type, ""); #else /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */ tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, ""); #endif } } } else { unsigned src_shift = lp_const_shift(src_type); unsigned dst_shift = lp_const_shift(dst_type); unsigned src_offset = lp_const_offset(src_type); unsigned dst_offset = lp_const_offset(dst_type); /* Compensate for different offsets */ if (dst_offset > src_offset && src_type.width > dst_type.width) { for (i = 0; i < num_tmps; ++i) { LLVMValueRef shifted; LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, src_shift - 1); if(src_type.sign) shifted = LLVMBuildAShr(builder, tmp[i], shift, ""); else shifted = LLVMBuildLShr(builder, tmp[i], shift, ""); tmp[i] = LLVMBuildSub(builder, tmp[i], shifted, ""); } } if(src_shift > dst_shift) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, src_shift - dst_shift); for(i = 0; i < num_tmps; ++i) if(src_type.sign) tmp[i] = LLVMBuildAShr(builder, tmp[i], shift, ""); else tmp[i] = LLVMBuildLShr(builder, tmp[i], shift, ""); } } /* * Truncate or expand bit width * * No data conversion should happen here, although the sign bits are * crucial to avoid bad clamping. */ { struct lp_type new_type; new_type = tmp_type; new_type.sign = dst_type.sign; new_type.width = dst_type.width; new_type.length = dst_type.length; lp_build_resize(gallivm, tmp_type, new_type, tmp, num_srcs, tmp, num_dsts); tmp_type = new_type; num_tmps = num_dsts; } /* * Scale to the widest range */ if(src_type.floating) { /* Nothing to do */ } else if(!src_type.floating && dst_type.floating) { if(!src_type.fixed && !src_type.sign && src_type.norm) { for(i = 0; i < num_tmps; ++i) { tmp[i] = lp_build_unsigned_norm_to_float(gallivm, src_type.width, dst_type, tmp[i]); } tmp_type.floating = TRUE; } else { double src_scale = lp_const_scale(src_type); LLVMTypeRef tmp_vec_type; /* Use an equally sized integer for intermediate computations */ tmp_type.floating = TRUE; tmp_type.sign = TRUE; tmp_vec_type = lp_build_vec_type(gallivm, tmp_type); for(i = 0; i < num_tmps; ++i) { #if 0 if(dst_type.sign) tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, ""); else tmp[i] = LLVMBuildUIToFP(builder, tmp[i], tmp_vec_type, ""); #else /* FIXME: there is no SSE counterpart for LLVMBuildUIToFP */ tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, ""); #endif } if (src_scale != 1.0) { LLVMValueRef scale = lp_build_const_vec(gallivm, tmp_type, 1.0/src_scale); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, ""); } } } else { unsigned src_shift = lp_const_shift(src_type); unsigned dst_shift = lp_const_shift(dst_type); unsigned src_offset = lp_const_offset(src_type); unsigned dst_offset = lp_const_offset(dst_type); if (src_shift < dst_shift) { LLVMValueRef pre_shift[LP_MAX_VECTOR_LENGTH]; LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, dst_shift - src_shift); for (i = 0; i < num_tmps; ++i) { pre_shift[i] = tmp[i]; tmp[i] = LLVMBuildShl(builder, tmp[i], shift, ""); } /* Compensate for different offsets */ if (dst_offset > src_offset) { for (i = 0; i < num_tmps; ++i) { tmp[i] = LLVMBuildSub(builder, tmp[i], pre_shift[i], ""); } } } } for(i = 0; i < num_dsts; ++i) { dst[i] = tmp[i]; assert(lp_check_value(dst_type, dst[i])); } }
/** * Generate code for performing depth and/or stencil tests. * We operate on a vector of values (typically a 2x2 quad). * * \param depth the depth test state * \param stencil the front/back stencil state * \param type the data type of the fragment depth/stencil values * \param format_desc description of the depth/stencil surface * \param mask the alive/dead pixel mask for the quad (vector) * \param stencil_refs the front/back stencil ref values (scalar) * \param z_src the incoming depth/stencil values (a 2x2 quad) * \param zs_dst_ptr pointer to depth/stencil values in framebuffer * \param facing contains float value indicating front/back facing polygon */ void lp_build_depth_stencil_test(LLVMBuilderRef builder, const struct pipe_depth_state *depth, const struct pipe_stencil_state stencil[2], struct lp_type type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, LLVMValueRef stencil_refs[2], LLVMValueRef z_src, LLVMValueRef zs_dst_ptr, LLVMValueRef face, LLVMValueRef counter) { struct lp_build_context bld; struct lp_build_context sbld; struct lp_type s_type; LLVMValueRef zs_dst, z_dst = NULL; LLVMValueRef stencil_vals = NULL; LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; LLVMValueRef z_pass = NULL, s_pass_mask = NULL; LLVMValueRef orig_mask = mask->value; /* Sanity checking */ { const unsigned z_swizzle = format_desc->swizzle[0]; const unsigned s_swizzle = format_desc->swizzle[1]; assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE || s_swizzle != UTIL_FORMAT_SWIZZLE_NONE); assert(depth->enabled || stencil[0].enabled); assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); assert(format_desc->block.width == 1); assert(format_desc->block.height == 1); if (stencil[0].enabled) { assert(format_desc->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED || format_desc->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM); } assert(z_swizzle < 4); assert(format_desc->block.bits == type.width); if (type.floating) { assert(z_swizzle == 0); assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT); assert(format_desc->channel[z_swizzle].size == format_desc->block.bits); } else { assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); assert(format_desc->channel[z_swizzle].normalized); assert(!type.fixed); assert(!type.sign); assert(type.norm); } } /* Setup build context for Z vals */ lp_build_context_init(&bld, builder, type); /* Setup build context for stencil vals */ s_type = lp_type_int_vec(type.width); lp_build_context_init(&sbld, builder, s_type); /* Load current z/stencil value from z/stencil buffer */ zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, ""); lp_build_name(zs_dst, "zsbufval"); /* Compute and apply the Z/stencil bitmasks and shifts. */ { unsigned z_shift, z_mask; unsigned s_shift, s_mask; if (get_z_shift_and_mask(format_desc, &z_shift, &z_mask)) { if (z_shift) { LLVMValueRef shift = lp_build_const_int_vec(type, z_shift); z_src = LLVMBuildLShr(builder, z_src, shift, ""); } if (z_mask != 0xffffffff) { LLVMValueRef mask = lp_build_const_int_vec(type, z_mask); z_src = LLVMBuildAnd(builder, z_src, mask, ""); z_dst = LLVMBuildAnd(builder, zs_dst, mask, ""); z_bitmask = mask; /* used below */ } else { z_dst = zs_dst; } lp_build_name(z_dst, "zsbuf.z"); } if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) { if (s_shift) { LLVMValueRef shift = lp_build_const_int_vec(type, s_shift); stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, ""); stencil_shift = shift; /* used below */ } else { stencil_vals = zs_dst; } if (s_mask != 0xffffffff) { LLVMValueRef mask = lp_build_const_int_vec(type, s_mask); stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); } lp_build_name(stencil_vals, "stencil"); } } if (stencil[0].enabled) { /* convert scalar stencil refs into vectors */ stencil_refs[0] = lp_build_broadcast_scalar(&bld, stencil_refs[0]); stencil_refs[1] = lp_build_broadcast_scalar(&bld, stencil_refs[1]); s_pass_mask = lp_build_stencil_test(&sbld, stencil, stencil_refs, stencil_vals, face); /* apply stencil-fail operator */ { LLVMValueRef s_fail_mask = lp_build_andc(&bld, orig_mask, s_pass_mask); stencil_vals = lp_build_stencil_op(&sbld, stencil, S_FAIL_OP, stencil_refs, stencil_vals, s_fail_mask, face); } } if (depth->enabled) { /* compare src Z to dst Z, returning 'pass' mask */ z_pass = lp_build_cmp(&bld, depth->func, z_src, z_dst); if (!stencil[0].enabled) { /* We can potentially skip all remaining operations here, but only * if stencil is disabled because we still need to update the stencil * buffer values. Don't need to update Z buffer values. */ lp_build_mask_update(mask, z_pass); } if (depth->writemask) { LLVMValueRef zselectmask = mask->value; /* mask off bits that failed Z test */ zselectmask = LLVMBuildAnd(builder, zselectmask, z_pass, ""); /* mask off bits that failed stencil test */ if (s_pass_mask) { zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, ""); } /* if combined Z/stencil format, mask off the stencil bits */ if (z_bitmask) { zselectmask = LLVMBuildAnd(builder, zselectmask, z_bitmask, ""); } /* Mix the old and new Z buffer values. * z_dst[i] = (zselectmask[i] & z_src[i]) | (~zselectmask[i] & z_dst[i]) */ z_dst = lp_build_select_bitwise(&bld, zselectmask, z_src, z_dst); } if (stencil[0].enabled) { /* update stencil buffer values according to z pass/fail result */ LLVMValueRef z_fail_mask, z_pass_mask; /* apply Z-fail operator */ z_fail_mask = lp_build_andc(&bld, orig_mask, z_pass); stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_FAIL_OP, stencil_refs, stencil_vals, z_fail_mask, face); /* apply Z-pass operator */ z_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, z_pass, ""); stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, z_pass_mask, face); } } else { /* No depth test: apply Z-pass operator to stencil buffer values which * passed the stencil test. */ s_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, s_pass_mask, ""); stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, s_pass_mask, face); } /* The Z bits are already in the right place but we may need to shift the * stencil bits before ORing Z with Stencil to make the final pixel value. */ if (stencil_vals && stencil_shift) stencil_vals = LLVMBuildShl(bld.builder, stencil_vals, stencil_shift, ""); /* Finally, merge/store the z/stencil values */ if ((depth->enabled && depth->writemask) || (stencil[0].enabled && stencil[0].writemask)) { if (z_dst && stencil_vals) zs_dst = LLVMBuildOr(bld.builder, z_dst, stencil_vals, ""); else if (z_dst) zs_dst = z_dst; else zs_dst = stencil_vals; LLVMBuildStore(builder, zs_dst, zs_dst_ptr); } if (s_pass_mask) lp_build_mask_update(mask, s_pass_mask); if (depth->enabled && stencil[0].enabled) lp_build_mask_update(mask, z_pass); if (counter) lp_build_occlusion_count(builder, type, mask->value, counter); }
/** * Unpack several pixels in SoA. * * It takes a vector of packed pixels: * * packed = {P0, P1, P2, P3, ..., Pn} * * And will produce four vectors: * * red = {R0, R1, R2, R3, ..., Rn} * green = {G0, G1, G2, G3, ..., Gn} * blue = {B0, B1, B2, B3, ..., Bn} * alpha = {A0, A1, A2, A3, ..., An} * * It requires that a packed pixel fits into an element of the output * channels. The common case is when converting pixel with a depth of 32 bit or * less into floats. * * \param format_desc the format of the 'packed' incoming pixel vector * \param type the desired type for rgba_out (type.length = n, above) * \param packed the incoming vector of packed pixels * \param rgba_out returns the SoA R,G,B,A vectors */ void lp_build_unpack_rgba_soa(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, LLVMValueRef packed, LLVMValueRef rgba_out[4]) { LLVMBuilderRef builder = gallivm->builder; struct lp_build_context bld; LLVMValueRef inputs[4]; unsigned chan; assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(format_desc->block.width == 1); assert(format_desc->block.height == 1); assert(format_desc->block.bits <= type.width); /* FIXME: Support more output types */ assert(type.width == 32); lp_build_context_init(&bld, gallivm, type); /* Decode the input vector components */ for (chan = 0; chan < format_desc->nr_channels; ++chan) { const unsigned width = format_desc->channel[chan].size; const unsigned start = format_desc->channel[chan].shift; const unsigned stop = start + width; LLVMValueRef input; input = packed; switch(format_desc->channel[chan].type) { case UTIL_FORMAT_TYPE_VOID: input = lp_build_undef(gallivm, type); break; case UTIL_FORMAT_TYPE_UNSIGNED: /* * Align the LSB */ if (start) { input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), ""); } /* * Zero the MSBs */ if (stop < format_desc->block.bits) { unsigned mask = ((unsigned long long)1 << width) - 1; input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), ""); } /* * Type conversion */ if (type.floating) { if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { assert(width == 8); if (format_desc->swizzle[3] == chan) { input = lp_build_unsigned_norm_to_float(gallivm, width, type, input); } else { struct lp_type conv_type = lp_uint_type(type); input = lp_build_srgb_to_linear(gallivm, conv_type, input); } } else { if(format_desc->channel[chan].normalized) input = lp_build_unsigned_norm_to_float(gallivm, width, type, input); else input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), ""); } } else if (format_desc->channel[chan].pure_integer) { /* Nothing to do */ } else { /* FIXME */ assert(0); } break; case UTIL_FORMAT_TYPE_SIGNED: /* * Align the sign bit first. */ if (stop < type.width) { unsigned bits = type.width - stop; LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits); input = LLVMBuildShl(builder, input, bits_val, ""); } /* * Align the LSB (with an arithmetic shift to preserve the sign) */ if (format_desc->channel[chan].size < type.width) { unsigned bits = type.width - format_desc->channel[chan].size; LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits); input = LLVMBuildAShr(builder, input, bits_val, ""); } /* * Type conversion */ if (type.floating) { input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), ""); if (format_desc->channel[chan].normalized) { double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1); LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale); input = LLVMBuildFMul(builder, input, scale_val, ""); /* the formula above will produce value below -1.0 for most negative * value but everything seems happy with that hence disable for now */ if (0) input = lp_build_max(&bld, input, lp_build_const_vec(gallivm, type, -1.0f)); } } else if (format_desc->channel[chan].pure_integer) { /* Nothing to do */ } else { /* FIXME */ assert(0); } break; case UTIL_FORMAT_TYPE_FLOAT: if (type.floating) { assert(start == 0); assert(stop == 32); assert(type.width == 32); input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), ""); } else { /* FIXME */ assert(0); input = lp_build_undef(gallivm, type); } break; case UTIL_FORMAT_TYPE_FIXED: if (type.floating) { double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1); LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale); input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), ""); input = LLVMBuildFMul(builder, input, scale_val, ""); } else { /* FIXME */ assert(0); input = lp_build_undef(gallivm, type); } break; default: assert(0); input = lp_build_undef(gallivm, type); break; } inputs[chan] = input; } lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out); }
/** * Store depth/stencil values. * Incoming values are swizzled (typically n 2x2 quads), stored linear. * If there's a mask it will do select/store otherwise just store. * * \param type the data type of the fragment depth/stencil values * \param format_desc description of the depth/stencil surface * \param mask the alive/dead pixel mask for the quad (vector) * \param z_fb z values read from fb (with padding) * \param s_fb s values read from fb (with padding) * \param loop_counter the current loop iteration * \param depth_ptr pointer to the depth/stencil values of this 4x4 block * \param depth_stride stride of the depth/stencil buffer * \param z_value the depth values to store (with padding) * \param s_value the stencil values to store (with padding) */ void lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, struct lp_type z_src_type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, LLVMValueRef z_fb, LLVMValueRef s_fb, LLVMValueRef loop_counter, LLVMValueRef depth_ptr, LLVMValueRef depth_stride, LLVMValueRef z_value, LLVMValueRef s_value) { struct lp_build_context z_bld; LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4]; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef mask_value = NULL; LLVMValueRef zs_dst1, zs_dst2; LLVMValueRef zs_dst_ptr1, zs_dst_ptr2; LLVMValueRef depth_offset1, depth_offset2; LLVMTypeRef load_ptr_type; unsigned depth_bytes = format_desc->block.bits / 8; struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length); struct lp_type z_type = zs_type; struct lp_type zs_load_type = zs_type; zs_load_type.length = zs_load_type.length / 2; load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0); z_type.width = z_src_type.width; lp_build_context_init(&z_bld, gallivm, z_type); /* * This is far from ideal, at least for late depth write we should do this * outside the fs loop to avoid all the swizzle stuff. */ if (z_src_type.length == 4) { LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter, lp_build_const_int32(gallivm, 1), ""); LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter, lp_build_const_int32(gallivm, 2), ""); LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb, depth_stride, ""); depth_offset1 = LLVMBuildMul(builder, looplsb, lp_build_const_int32(gallivm, depth_bytes * 2), ""); depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, ""); } else { unsigned i; LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter, lp_build_const_int32(gallivm, 1), ""); assert(z_src_type.length == 8); depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, ""); /* * We load 2x4 values, and need to swizzle them (order * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately. */ for (i = 0; i < 8; i++) { shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); } } depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, ""); zs_dst_ptr1 = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, ""); zs_dst_ptr1 = LLVMBuildBitCast(builder, zs_dst_ptr1, load_ptr_type, ""); zs_dst_ptr2 = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, ""); zs_dst_ptr2 = LLVMBuildBitCast(builder, zs_dst_ptr2, load_ptr_type, ""); if (format_desc->block.bits > 32) { s_value = LLVMBuildBitCast(builder, s_value, z_bld.vec_type, ""); } if (mask) { mask_value = lp_build_mask_value(mask); z_value = lp_build_select(&z_bld, mask_value, z_value, z_fb); if (format_desc->block.bits > 32) { s_fb = LLVMBuildBitCast(builder, s_fb, z_bld.vec_type, ""); s_value = lp_build_select(&z_bld, mask_value, s_value, s_fb); } } if (zs_type.width < z_src_type.width) { /* Truncate ZS values (e.g., when writing to Z16_UNORM) */ z_value = LLVMBuildTrunc(builder, z_value, lp_build_int_vec_type(gallivm, zs_type), ""); } if (format_desc->block.bits <= 32) { if (z_src_type.length == 4) { zs_dst1 = lp_build_extract_range(gallivm, z_value, 0, 2); zs_dst2 = lp_build_extract_range(gallivm, z_value, 2, 2); } else { assert(z_src_type.length == 8); zs_dst1 = LLVMBuildShuffleVector(builder, z_value, z_value, LLVMConstVector(&shuffles[0], zs_load_type.length), ""); zs_dst2 = LLVMBuildShuffleVector(builder, z_value, z_value, LLVMConstVector(&shuffles[4], zs_load_type.length), ""); } } else { if (z_src_type.length == 4) { zs_dst1 = lp_build_interleave2(gallivm, z_type, z_value, s_value, 0); zs_dst2 = lp_build_interleave2(gallivm, z_type, z_value, s_value, 1); } else { unsigned i; LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 2]; assert(z_src_type.length == 8); for (i = 0; i < 8; i++) { shuffles[i*2] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); shuffles[i*2+1] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2 + z_src_type.length); } zs_dst1 = LLVMBuildShuffleVector(builder, z_value, s_value, LLVMConstVector(&shuffles[0], z_src_type.length), ""); zs_dst2 = LLVMBuildShuffleVector(builder, z_value, s_value, LLVMConstVector(&shuffles[8], z_src_type.length), ""); } zs_dst1 = LLVMBuildBitCast(builder, zs_dst1, lp_build_vec_type(gallivm, zs_load_type), ""); zs_dst2 = LLVMBuildBitCast(builder, zs_dst2, lp_build_vec_type(gallivm, zs_load_type), ""); } LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1); LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2); }
static INLINE void yuv_to_rgb_soa(struct gallivm_state *gallivm, unsigned n, LLVMValueRef y, LLVMValueRef u, LLVMValueRef v, LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b) { LLVMBuilderRef builder = gallivm->builder; struct lp_type type; struct lp_build_context bld; LLVMValueRef c0; LLVMValueRef c8; LLVMValueRef c16; LLVMValueRef c128; LLVMValueRef c255; LLVMValueRef cy; LLVMValueRef cug; LLVMValueRef cub; LLVMValueRef cvr; LLVMValueRef cvg; memset(&type, 0, sizeof type); type.sign = TRUE; type.width = 32; type.length = n; lp_build_context_init(&bld, gallivm, type); assert(lp_check_value(type, y)); assert(lp_check_value(type, u)); assert(lp_check_value(type, v)); /* * Constants */ c0 = lp_build_const_int_vec(gallivm, type, 0); c8 = lp_build_const_int_vec(gallivm, type, 8); c16 = lp_build_const_int_vec(gallivm, type, 16); c128 = lp_build_const_int_vec(gallivm, type, 128); c255 = lp_build_const_int_vec(gallivm, type, 255); cy = lp_build_const_int_vec(gallivm, type, 298); cug = lp_build_const_int_vec(gallivm, type, -100); cub = lp_build_const_int_vec(gallivm, type, 516); cvr = lp_build_const_int_vec(gallivm, type, 409); cvg = lp_build_const_int_vec(gallivm, type, -208); /* * y -= 16; * u -= 128; * v -= 128; */ y = LLVMBuildSub(builder, y, c16, ""); u = LLVMBuildSub(builder, u, c128, ""); v = LLVMBuildSub(builder, v, c128, ""); /* * r = 298 * _y + 409 * _v + 128; * g = 298 * _y - 100 * _u - 208 * _v + 128; * b = 298 * _y + 516 * _u + 128; */ y = LLVMBuildMul(builder, y, cy, ""); y = LLVMBuildAdd(builder, y, c128, ""); *r = LLVMBuildMul(builder, v, cvr, ""); *g = LLVMBuildAdd(builder, LLVMBuildMul(builder, u, cug, ""), LLVMBuildMul(builder, v, cvg, ""), ""); *b = LLVMBuildMul(builder, u, cub, ""); *r = LLVMBuildAdd(builder, *r, y, ""); *g = LLVMBuildAdd(builder, *g, y, ""); *b = LLVMBuildAdd(builder, *b, y, ""); /* * r >>= 8; * g >>= 8; * b >>= 8; */ *r = LLVMBuildAShr(builder, *r, c8, "r"); *g = LLVMBuildAShr(builder, *g, c8, "g"); *b = LLVMBuildAShr(builder, *b, c8, "b"); /* * Clamp */ *r = lp_build_clamp(&bld, *r, c0, c255); *g = lp_build_clamp(&bld, *g, c0, c255); *b = lp_build_clamp(&bld, *b, c0, c255); }
/** * Generate code for performing depth and/or stencil tests. * We operate on a vector of values (typically n 2x2 quads). * * \param depth the depth test state * \param stencil the front/back stencil state * \param type the data type of the fragment depth/stencil values * \param format_desc description of the depth/stencil surface * \param mask the alive/dead pixel mask for the quad (vector) * \param stencil_refs the front/back stencil ref values (scalar) * \param z_src the incoming depth/stencil values (n 2x2 quad values, float32) * \param zs_dst the depth/stencil values in framebuffer * \param face contains boolean value indicating front/back facing polygon */ void lp_build_depth_stencil_test(struct gallivm_state *gallivm, const struct pipe_depth_state *depth, const struct pipe_stencil_state stencil[2], struct lp_type z_src_type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, LLVMValueRef stencil_refs[2], LLVMValueRef z_src, LLVMValueRef z_fb, LLVMValueRef s_fb, LLVMValueRef face, LLVMValueRef *z_value, LLVMValueRef *s_value, boolean do_branch) { LLVMBuilderRef builder = gallivm->builder; struct lp_type z_type; struct lp_build_context z_bld; struct lp_build_context s_bld; struct lp_type s_type; unsigned z_shift = 0, z_width = 0, z_mask = 0; LLVMValueRef z_dst = NULL; LLVMValueRef stencil_vals = NULL; LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; LLVMValueRef z_pass = NULL, s_pass_mask = NULL; LLVMValueRef orig_mask = lp_build_mask_value(mask); LLVMValueRef front_facing = NULL; boolean have_z, have_s; /* * Depths are expected to be between 0 and 1, even if they are stored in * floats. Setting these bits here will ensure that the lp_build_conv() call * below won't try to unnecessarily clamp the incoming values. */ if(z_src_type.floating) { z_src_type.sign = FALSE; z_src_type.norm = TRUE; } else { assert(!z_src_type.sign); assert(z_src_type.norm); } /* Pick the type matching the depth-stencil format. */ z_type = lp_depth_type(format_desc, z_src_type.length); /* Pick the intermediate type for depth operations. */ z_type.width = z_src_type.width; assert(z_type.length == z_src_type.length); /* FIXME: for non-float depth/stencil might generate better code * if we'd always split it up to use 128bit operations. * For stencil we'd almost certainly want to pack to 8xi16 values, * for z just run twice. */ /* Sanity checking */ { const unsigned z_swizzle = format_desc->swizzle[0]; const unsigned s_swizzle = format_desc->swizzle[1]; assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE || s_swizzle != UTIL_FORMAT_SWIZZLE_NONE); assert(depth->enabled || stencil[0].enabled); assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); assert(format_desc->block.width == 1); assert(format_desc->block.height == 1); if (stencil[0].enabled) { assert(s_swizzle < 4); assert(format_desc->channel[s_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); assert(format_desc->channel[s_swizzle].pure_integer); assert(!format_desc->channel[s_swizzle].normalized); assert(format_desc->channel[s_swizzle].size == 8); } if (depth->enabled) { assert(z_swizzle < 4); if (z_type.floating) { assert(z_swizzle == 0); assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT); assert(format_desc->channel[z_swizzle].size == 32); } else { assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); assert(format_desc->channel[z_swizzle].normalized); assert(!z_type.fixed); } } } /* Setup build context for Z vals */ lp_build_context_init(&z_bld, gallivm, z_type); /* Setup build context for stencil vals */ s_type = lp_int_type(z_type); lp_build_context_init(&s_bld, gallivm, s_type); /* Compute and apply the Z/stencil bitmasks and shifts. */ { unsigned s_shift, s_mask; z_dst = z_fb; stencil_vals = s_fb; have_z = get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask); have_s = get_s_shift_and_mask(format_desc, &s_shift, &s_mask); if (have_z) { if (z_mask != 0xffffffff) { z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask); } /* * Align the framebuffer Z 's LSB to the right. */ if (z_shift) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); z_dst = LLVMBuildLShr(builder, z_dst, shift, "z_dst"); } else if (z_bitmask) { z_dst = LLVMBuildAnd(builder, z_dst, z_bitmask, "z_dst"); } else { lp_build_name(z_dst, "z_dst"); } } if (have_s) { if (s_shift) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift); stencil_vals = LLVMBuildLShr(builder, stencil_vals, shift, ""); stencil_shift = shift; /* used below */ } if (s_mask != 0xffffffff) { LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask); stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); } lp_build_name(stencil_vals, "s_dst"); } } if (stencil[0].enabled) { if (face) { LLVMValueRef zero = lp_build_const_int32(gallivm, 0); /* front_facing = face != 0 ? ~0 : 0 */ front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, ""); front_facing = LLVMBuildSExt(builder, front_facing, LLVMIntTypeInContext(gallivm->context, s_bld.type.length*s_bld.type.width), ""); front_facing = LLVMBuildBitCast(builder, front_facing, s_bld.int_vec_type, ""); } /* convert scalar stencil refs into vectors */ stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]); stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]); s_pass_mask = lp_build_stencil_test(&s_bld, stencil, stencil_refs, stencil_vals, front_facing); /* apply stencil-fail operator */ { LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask); stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP, stencil_refs, stencil_vals, s_fail_mask, front_facing); } } if (depth->enabled) { /* * Convert fragment Z to the desired type, aligning the LSB to the right. */ assert(z_type.width == z_src_type.width); assert(z_type.length == z_src_type.length); assert(lp_check_value(z_src_type, z_src)); if (z_src_type.floating) { /* * Convert from floating point values */ if (!z_type.floating) { z_src = lp_build_clamped_float_to_unsigned_norm(gallivm, z_src_type, z_width, z_src); } } else { /* * Convert from unsigned normalized values. */ assert(!z_src_type.sign); assert(!z_src_type.fixed); assert(z_src_type.norm); assert(!z_type.floating); if (z_src_type.width > z_width) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_src_type, z_src_type.width - z_width); z_src = LLVMBuildLShr(builder, z_src, shift, ""); } } assert(lp_check_value(z_type, z_src)); lp_build_name(z_src, "z_src"); /* compare src Z to dst Z, returning 'pass' mask */ z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst); if (!stencil[0].enabled) { /* We can potentially skip all remaining operations here, but only * if stencil is disabled because we still need to update the stencil * buffer values. Don't need to update Z buffer values. */ lp_build_mask_update(mask, z_pass); if (do_branch) { lp_build_mask_check(mask); do_branch = FALSE; } } if (depth->writemask) { LLVMValueRef zselectmask; /* mask off bits that failed Z test */ zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, ""); /* mask off bits that failed stencil test */ if (s_pass_mask) { zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, ""); } /* Mix the old and new Z buffer values. * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i] */ z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst); } if (stencil[0].enabled) { /* update stencil buffer values according to z pass/fail result */ LLVMValueRef z_fail_mask, z_pass_mask; /* apply Z-fail operator */ z_fail_mask = lp_build_andnot(&s_bld, orig_mask, z_pass); stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP, stencil_refs, stencil_vals, z_fail_mask, front_facing); /* apply Z-pass operator */ z_pass_mask = LLVMBuildAnd(builder, orig_mask, z_pass, ""); stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, z_pass_mask, front_facing); } } else { /* No depth test: apply Z-pass operator to stencil buffer values which * passed the stencil test. */ s_pass_mask = LLVMBuildAnd(builder, orig_mask, s_pass_mask, ""); stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, s_pass_mask, front_facing); } /* Put Z and stencil bits in the right place */ if (have_z && z_shift) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); z_dst = LLVMBuildShl(builder, z_dst, shift, ""); } if (stencil_vals && stencil_shift) stencil_vals = LLVMBuildShl(builder, stencil_vals, stencil_shift, ""); /* Finally, merge the z/stencil values */ if (format_desc->block.bits <= 32) { if (have_z && have_s) *z_value = LLVMBuildOr(builder, z_dst, stencil_vals, ""); else if (have_z) *z_value = z_dst; else *z_value = stencil_vals; *s_value = *z_value; } else { *z_value = z_dst; *s_value = stencil_vals; } if (s_pass_mask) lp_build_mask_update(mask, s_pass_mask); if (depth->enabled && stencil[0].enabled) lp_build_mask_update(mask, z_pass); }