/** * Do the stencil test comparison (compare FB stencil values against ref value). * This will be used twice when generating two-sided stencil code. * \param stencil the front/back stencil state * \param stencilRef the stencil reference value, replicated as a vector * \param stencilVals vector of stencil values from framebuffer * \return vector mask of pass/fail values (~0 or 0) */ static LLVMValueRef lp_build_stencil_test_single(struct lp_build_context *bld, const struct pipe_stencil_state *stencil, LLVMValueRef stencilRef, LLVMValueRef stencilVals) { const unsigned stencilMax = 255; /* XXX fix */ struct lp_type type = bld->type; LLVMValueRef res; assert(type.sign); assert(stencil->enabled); if (stencil->valuemask != stencilMax) { /* compute stencilRef = stencilRef & valuemask */ LLVMValueRef valuemask = lp_build_const_int_vec(type, stencil->valuemask); stencilRef = LLVMBuildAnd(bld->builder, stencilRef, valuemask, ""); /* compute stencilVals = stencilVals & valuemask */ stencilVals = LLVMBuildAnd(bld->builder, stencilVals, valuemask, ""); } res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals); return res; }
/** * Return i1 true if the primitive is accepted (not culled). * * \param pos Vertex positions 3x vec4 * \param initially_accepted AND'ed with the result. Some computations can be * skipped if this is false. * \param vp_scale Viewport scale XY. * For MSAA, multiply them by the number of samples. * \param vp_translate Viewport translation XY. * For MSAA, multiply them by the number of samples. * \param small_prim_precision Precision of small primitive culling. This should * be the same as or greater than the precision of * the rasterizer. Set to num_samples / 2^subpixel_bits. * subpixel_bits are defined by the quantization mode. * \param options See ac_cull_options. */ LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2], LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision, struct ac_cull_options *options) { struct ac_position_w_info w; ac_analyze_position_w(ctx, pos, &w); /* W culling. */ LLVMValueRef accepted = options->cull_w ? w.w_accepted : ctx->i1true; accepted = LLVMBuildAnd(ctx->builder, accepted, initially_accepted, ""); /* Face culling. */ accepted = LLVMBuildAnd(ctx->builder, accepted, ac_cull_face(ctx, pos, &w, options->cull_front, options->cull_back, options->cull_zero_area), ""); /* View culling and small primitive elimination. */ accepted = cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate, small_prim_precision, options->cull_view_xy, options->cull_view_near_z, options->cull_view_far_z, options->cull_small_prims, options->use_halfz_clip_space); return accepted; }
/** * Extract Y, U, V channels from packed YUYV. * @param packed is a <n x i32> vector with the packed YUYV blocks * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) */ static void yuyv_to_yuv_soa(struct gallivm_state *gallivm, unsigned n, LLVMValueRef packed, LLVMValueRef i, LLVMValueRef *y, LLVMValueRef *u, LLVMValueRef *v) { LLVMBuilderRef builder = gallivm->builder; struct lp_type type; LLVMValueRef mask; memset(&type, 0, sizeof type); type.width = 32; type.length = n; assert(lp_check_value(type, packed)); assert(lp_check_value(type, i)); /* * y = (yuyv >> 16*i) & 0xff * u = (yuyv >> 8 ) & 0xff * v = (yuyv >> 24 ) & 0xff */ #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) /* * Avoid shift with per-element count. * No support on x86, gets translated to roughly 5 instructions * per element. Didn't measure performance but cuts shader size * by quite a bit (less difference if cpu has no sse4.1 support). */ if (util_cpu_caps.has_sse2 && n == 4) { LLVMValueRef sel, tmp; struct lp_build_context bld32; lp_build_context_init(&bld32, gallivm, type); tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), ""); sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0)); *y = lp_build_select(&bld32, sel, packed, tmp); } else #endif { LLVMValueRef shift; shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), ""); *y = LLVMBuildLShr(builder, packed, shift, ""); } *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), ""); *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), ""); mask = lp_build_const_int_vec(gallivm, type, 0xff); *y = LLVMBuildAnd(builder, *y, mask, "y"); *u = LLVMBuildAnd(builder, *u, mask, "u"); *v = LLVMBuildAnd(builder, *v, mask, "v"); }
/** * Compute the offset of a pixel. * * x, y, y_stride are vectors */ LLVMValueRef lp_build_sample_offset(struct lp_build_context *bld, const struct util_format_description *format_desc, LLVMValueRef x, LLVMValueRef y, LLVMValueRef y_stride, LLVMValueRef data_ptr) { LLVMValueRef x_stride; LLVMValueRef offset; x_stride = lp_build_const_scalar(bld->type, format_desc->block.bits/8); if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { LLVMValueRef x_lo, x_hi; LLVMValueRef y_lo, y_hi; LLVMValueRef x_stride_lo, x_stride_hi; LLVMValueRef y_stride_lo, y_stride_hi; LLVMValueRef x_offset_lo, x_offset_hi; LLVMValueRef y_offset_lo, y_offset_hi; LLVMValueRef offset_lo, offset_hi; x_lo = LLVMBuildAnd(bld->builder, x, bld->one, ""); y_lo = LLVMBuildAnd(bld->builder, y, bld->one, ""); x_hi = LLVMBuildLShr(bld->builder, x, bld->one, ""); y_hi = LLVMBuildLShr(bld->builder, y, bld->one, ""); x_stride_lo = x_stride; y_stride_lo = lp_build_const_scalar(bld->type, 2*format_desc->block.bits/8); x_stride_hi = lp_build_const_scalar(bld->type, 4*format_desc->block.bits/8); y_stride_hi = LLVMBuildShl(bld->builder, y_stride, bld->one, ""); x_offset_lo = lp_build_mul(bld, x_lo, x_stride_lo); y_offset_lo = lp_build_mul(bld, y_lo, y_stride_lo); offset_lo = lp_build_add(bld, x_offset_lo, y_offset_lo); x_offset_hi = lp_build_mul(bld, x_hi, x_stride_hi); y_offset_hi = lp_build_mul(bld, y_hi, y_stride_hi); offset_hi = lp_build_add(bld, x_offset_hi, y_offset_hi); offset = lp_build_add(bld, offset_hi, offset_lo); } else { LLVMValueRef x_offset; LLVMValueRef y_offset; x_offset = lp_build_mul(bld, x, x_stride); y_offset = lp_build_mul(bld, y, y_stride); offset = lp_build_add(bld, x_offset, y_offset); } return offset; }
/** * Set the sign of float vector 'a' according to 'sign'. * If sign==0, return abs(a). * If sign==1, return -abs(a); * Other values for sign produce undefined results. */ LLVMValueRef lp_build_set_sign(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef sign) { const struct lp_type type = bld->type; LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMValueRef shift = lp_build_int_const_scalar(type, type.width - 1); LLVMValueRef mask = lp_build_int_const_scalar(type, ~((unsigned long long) 1 << (type.width - 1))); LLVMValueRef val, res; assert(type.floating); /* val = reinterpret_cast<int>(a) */ val = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); /* val = val & mask */ val = LLVMBuildAnd(bld->builder, val, mask, ""); /* sign = sign << shift */ sign = LLVMBuildShl(bld->builder, sign, shift, ""); /* res = val | sign */ res = LLVMBuildOr(bld->builder, val, sign, ""); /* res = reinterpret_cast<float>(res) */ res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); return res; }
/** * Generate abs(a) */ LLVMValueRef lp_build_abs(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); if(!type.sign) return a; if(type.floating) { /* Mask out the sign bit */ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); unsigned long long absMask = ~(1ULL << (type.width - 1)); LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask)); a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); a = LLVMBuildAnd(bld->builder, a, mask, ""); a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); return a; } if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) { switch(type.width) { case 8: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a); case 16: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a); case 32: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a); } } return lp_build_max(bld, a, LLVMBuildNeg(bld->builder, a, "")); }
LLVMValueRef gen_and(struct node *ast) { return LLVMBuildAnd(builder, codegen(ast->one), codegen(ast->two), ""); }
/** * Special case for converting clamped IEEE-754 floats to unsigned norms. * * The mathematical voodoo below may seem excessive but it is actually * paramount we do it this way for several reasons. First, there is no single * precision FP to unsigned integer conversion Intel SSE instruction. Second, * secondly, even if there was, since the FP's mantissa takes only a fraction * of register bits the typically scale and cast approach would require double * precision for accurate results, and therefore half the throughput * * Although the result values can be scaled to an arbitrary bit width specified * by dst_width, the actual result type will have the same width. */ LLVMValueRef lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, struct lp_type src_type, unsigned dst_width, LLVMValueRef src) { LLVMTypeRef int_vec_type = lp_build_int_vec_type(src_type); LLVMValueRef res; unsigned mantissa; unsigned n; unsigned long long ubound; unsigned long long mask; double scale; double bias; assert(src_type.floating); mantissa = lp_mantissa(src_type); /* We cannot carry more bits than the mantissa */ n = MIN2(mantissa, dst_width); /* This magic coefficients will make the desired result to appear in the * lowest significant bits of the mantissa. */ ubound = ((unsigned long long)1 << n); mask = ubound - 1; scale = (double)mask/ubound; bias = (double)((unsigned long long)1 << (mantissa - n)); res = LLVMBuildMul(builder, src, lp_build_const_scalar(src_type, scale), ""); res = LLVMBuildAdd(builder, res, lp_build_const_scalar(src_type, bias), ""); res = LLVMBuildBitCast(builder, res, int_vec_type, ""); if(dst_width > n) { int shift = dst_width - n; res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), ""); /* TODO: Fill in the empty lower bits for additional precision? */ #if 0 { LLVMValueRef msb; msb = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, dst_width - 1), ""); msb = LLVMBuildShl(builder, msb, lp_build_int_const_scalar(src_type, shift), ""); msb = LLVMBuildSub(builder, msb, lp_build_int_const_scalar(src_type, 1), ""); res = LLVMBuildOr(builder, res, msb, ""); } #elif 0 while(shift > 0) { res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, n), ""), ""); shift -= n; n *= 2; } #endif } else res = LLVMBuildAnd(builder, res, lp_build_int_const_scalar(src_type, mask), ""); return res; }
/** * Return (a & ~b) */ LLVMValueRef lp_build_andnot(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; LLVMValueRef res; assert(lp_check_value(type, a)); assert(lp_check_value(type, b)); /* can't do bitwise ops on floating-point values */ if (type.floating) { a = LLVMBuildBitCast(builder, a, bld->int_vec_type, ""); b = LLVMBuildBitCast(builder, b, bld->int_vec_type, ""); } res = LLVMBuildNot(builder, b, ""); res = LLVMBuildAnd(builder, a, res, ""); if (type.floating) { res = LLVMBuildBitCast(builder, res, bld->vec_type, ""); } return res; }
LLVMValueRef build_t_from_tag(struct llvm_ctx *ctx, LLVMValueRef mr0) { LLVMValueRef t = LLVMBuildAnd(ctx->builder, LLVMBuildLShr(ctx->builder, mr0, CONST_WORD(6), "tag.t.raw"), CONST_WORD(0x3f), "tag.t"); return LLVMBuildTruncOrBitCast(ctx->builder, t, ctx->i32t, "tag.t.int"); }
/** * Convert a vector of rgba8 values into 32bit wide SoA vectors. * * \param dst_type The desired return type. For pure integer formats * this should be a 32bit wide int or uint vector type, * otherwise a float vector type. * * \param packed The rgba8 values to pack. * * \param rgba The 4 SoA return vectors. */ void lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm, struct lp_type dst_type, LLVMValueRef packed, LLVMValueRef *rgba) { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff); unsigned chan; /* XXX technically shouldn't use that for uint dst_type */ packed = LLVMBuildBitCast(builder, packed, lp_build_int_vec_type(gallivm, dst_type), ""); /* Decode the input vector components */ for (chan = 0; chan < 4; ++chan) { unsigned start = chan*8; unsigned stop = start + 8; LLVMValueRef input; input = packed; if (start) input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, dst_type, start), ""); if (stop < 32) input = LLVMBuildAnd(builder, input, mask, ""); if (dst_type.floating) input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input); rgba[chan] = input; } }
/** * Converts int16 half-float to float32 * Note this can be performed in 1 instruction if vcvtph2ps exists (sse5 i think?) * [llvm.x86.vcvtph2ps / _mm_cvtph_ps] * * @param src_type <vector> type of int16 * @param src value to convert * * ref http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ */ LLVMValueRef lp_build_half_to_float(struct gallivm_state *gallivm, struct lp_type src_type, LLVMValueRef src) { struct lp_type f32_type = lp_type_float_vec(32, 32 * src_type.length); struct lp_type i32_type = lp_type_int_vec(32, 32 * src_type.length); LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type); LLVMTypeRef float_vec_type = lp_build_vec_type(gallivm, f32_type); /* Constants */ LLVMValueRef i32_13 = lp_build_const_int_vec(gallivm, i32_type, 13); LLVMValueRef i32_16 = lp_build_const_int_vec(gallivm, i32_type, 16); LLVMValueRef i32_mask_nosign = lp_build_const_int_vec(gallivm, i32_type, 0x7fff); LLVMValueRef i32_was_infnan = lp_build_const_int_vec(gallivm, i32_type, 0x7bff); LLVMValueRef i32_exp_infnan = lp_build_const_int_vec(gallivm, i32_type, 0xff << 23); LLVMValueRef f32_magic = LLVMBuildBitCast(builder, lp_build_const_int_vec(gallivm, i32_type, (254 - 15) << 23), float_vec_type, ""); /* Convert int16 vector to int32 vector by zero ext */ LLVMValueRef h = LLVMBuildZExt(builder, src, int_vec_type, ""); /* Exponent / mantissa bits */ LLVMValueRef expmant = LLVMBuildAnd(builder, i32_mask_nosign, h, ""); LLVMValueRef shifted = LLVMBuildBitCast(builder, LLVMBuildShl(builder, expmant, i32_13, ""), float_vec_type, ""); /* Exponent adjust */ LLVMValueRef scaled = LLVMBuildBitCast(builder, LLVMBuildFMul(builder, shifted, f32_magic, ""), int_vec_type, ""); /* Make sure Inf/NaN survive */ LLVMValueRef b_wasinfnan = lp_build_compare(gallivm, i32_type, PIPE_FUNC_GREATER, expmant, i32_was_infnan); LLVMValueRef infnanexp = LLVMBuildAnd(builder, b_wasinfnan, i32_exp_infnan, ""); /* Sign bit */ LLVMValueRef justsign = LLVMBuildXor(builder, h, expmant, ""); LLVMValueRef sign = LLVMBuildShl(builder, justsign, i32_16, ""); /* Combine result */ LLVMValueRef sign_inf = LLVMBuildOr(builder, sign, infnanexp, ""); LLVMValueRef final = LLVMBuildOr(builder, scaled, sign_inf, ""); /* Cast from int32 vector to float32 vector */ return LLVMBuildBitCast(builder, final, float_vec_type, ""); }
LLVMValueRef build_u_from_tag( struct llvm_ctx *ctx, LLVMValueRef mr0) { LLVMValueRef u = LLVMBuildAnd(ctx->builder, mr0, CONST_WORD(0x3f), "tag.u"); return LLVMBuildTruncOrBitCast(ctx->builder, u, ctx->i32t, "tag.u.int"); }
/** * Convert float[] to int[] with floor(). */ LLVMValueRef lp_build_ifloor(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef res; assert(type.floating); assert(lp_check_value(type, a)); if(util_cpu_caps.has_sse4_1) { res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); } else { /* Take the sign bit and add it to 1 constant */ LLVMTypeRef vec_type = lp_build_vec_type(type); unsigned mantissa = lp_mantissa(type); LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); LLVMValueRef sign; LLVMValueRef offset; /* sign = a < 0 ? ~0 : 0 */ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); sign = LLVMBuildAnd(bld->builder, sign, mask, ""); sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), ""); lp_build_name(sign, "floor.sign"); /* offset = -0.99999(9)f */ offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa)); offset = LLVMConstBitCast(offset, int_vec_type); /* offset = a < 0 ? -0.99999(9)f : 0.0f */ offset = LLVMBuildAnd(bld->builder, offset, sign, ""); offset = LLVMBuildBitCast(bld->builder, offset, vec_type, ""); lp_build_name(offset, "floor.offset"); res = LLVMBuildAdd(bld->builder, a, offset, ""); lp_build_name(res, "floor.res"); } res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); lp_build_name(res, "floor"); return res; }
static void emit_and(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { LLVMBuilderRef builder = bld_base->base.gallivm->builder; emit_data->output[emit_data->chan] = LLVMBuildAnd(builder, emit_data->args[0], emit_data->args[1], ""); }
/** * Build a manual selection sequence for cube face sc/tc coordinates and * major axis vector (multiplied by 2 for consistency) for the given * vec3 \p coords, for the face implied by \p selcoords. * * For the major axis, we always adjust the sign to be in the direction of * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards * the selcoords major axis. */ static void build_cube_select(LLVMBuilderRef builder, const struct cube_selection_coords *selcoords, const LLVMValueRef *coords, LLVMValueRef *out_st, LLVMValueRef *out_ma) { LLVMTypeRef f32 = LLVMTypeOf(coords[0]); LLVMValueRef is_ma_positive; LLVMValueRef sgn_ma; LLVMValueRef is_ma_z, is_not_ma_z; LLVMValueRef is_ma_y; LLVMValueRef is_ma_x; LLVMValueRef sgn; LLVMValueRef tmp; is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->ma, LLVMConstReal(f32, 0.0), ""); sgn_ma = LLVMBuildSelect(builder, is_ma_positive, LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), ""); is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), ""); is_not_ma_z = LLVMBuildNot(builder, is_ma_z, ""); is_ma_y = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), ""); is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), ""); /* Select sc */ tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], ""); sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0), LLVMBuildSelect(builder, is_ma_x, sgn_ma, LLVMBuildFNeg(builder, sgn_ma, ""), ""), ""); out_st[0] = LLVMBuildFMul(builder, tmp, sgn, ""); /* Select tc */ tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], ""); sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, sgn_ma, ""), LLVMConstReal(f32, -1.0), ""); out_st[1] = LLVMBuildFMul(builder, tmp, sgn, ""); /* Select ma */ tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), ""); sgn = LLVMBuildSelect(builder, is_ma_positive, LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), ""); *out_ma = LLVMBuildFMul(builder, tmp, sgn, ""); }
/** * Update boolean mask with given value (bitwise AND). * Typically used to update the quad's pixel alive/killed mask * after depth testing, alpha testing, TGSI_OPCODE_KILL_IF, etc. */ void lp_build_mask_update(struct lp_build_mask_context *mask, LLVMValueRef value) { value = LLVMBuildAnd(mask->skip.gallivm->builder, lp_build_mask_value(mask), value, ""); LLVMBuildStore(mask->skip.gallivm->builder, value, mask->var); }
static LLVMValueRef generate_scissor_test(LLVMBuilderRef builder, LLVMValueRef context_ptr, const struct lp_build_interp_soa_context *interp, struct lp_type type) { LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMValueRef xpos = interp->pos[0], ypos = interp->pos[1]; LLVMValueRef xmin, ymin, xmax, ymax; LLVMValueRef m0, m1, m2, m3, m; /* xpos, ypos contain the window coords for the four pixels in the quad */ assert(xpos); assert(ypos); /* get the current scissor bounds, convert to vectors */ xmin = lp_jit_context_scissor_xmin_value(builder, context_ptr); xmin = lp_build_broadcast(builder, vec_type, xmin); ymin = lp_jit_context_scissor_ymin_value(builder, context_ptr); ymin = lp_build_broadcast(builder, vec_type, ymin); xmax = lp_jit_context_scissor_xmax_value(builder, context_ptr); xmax = lp_build_broadcast(builder, vec_type, xmax); ymax = lp_jit_context_scissor_ymax_value(builder, context_ptr); ymax = lp_build_broadcast(builder, vec_type, ymax); /* compare the fragment's position coordinates against the scissor bounds */ m0 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, xpos, xmin); m1 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, ypos, ymin); m2 = lp_build_compare(builder, type, PIPE_FUNC_LESS, xpos, xmax); m3 = lp_build_compare(builder, type, PIPE_FUNC_LESS, ypos, ymax); /* AND all the masks together */ m = LLVMBuildAnd(builder, m0, m1, ""); m = LLVMBuildAnd(builder, m, m2, ""); m = LLVMBuildAnd(builder, m, m3, ""); lp_build_name(m, "scissormask"); return m; }
LLVMValueRef gen_and(compile_t* c, ast_t* left, ast_t* right) { LLVMValueRef l_value = gen_expr(c, left); LLVMValueRef r_value = gen_expr(c, right); if((l_value == NULL) || (r_value == NULL)) return NULL; return LLVMBuildAnd(c->builder, l_value, r_value, ""); }
static LLVMValueRef gen_digestof_box(compile_t* c, reach_type_t* type, LLVMValueRef value, int boxed_subtype) { pony_assert(LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMPointerTypeKind); LLVMBasicBlockRef box_block = NULL; LLVMBasicBlockRef nonbox_block = NULL; LLVMBasicBlockRef post_block = NULL; LLVMValueRef desc = gendesc_fetch(c, value); if((boxed_subtype & SUBTYPE_KIND_UNBOXED) != 0) { box_block = codegen_block(c, "digestof_box"); nonbox_block = codegen_block(c, "digestof_nonbox"); post_block = codegen_block(c, "digestof_post"); // Check if it's a boxed value. LLVMValueRef type_id = gendesc_typeid(c, desc); LLVMValueRef boxed_mask = LLVMConstInt(c->i32, 1, false); LLVMValueRef is_boxed = LLVMBuildAnd(c->builder, type_id, boxed_mask, ""); LLVMValueRef zero = LLVMConstInt(c->i32, 0, false); is_boxed = LLVMBuildICmp(c->builder, LLVMIntEQ, is_boxed, zero, ""); LLVMBuildCondBr(c->builder, is_boxed, box_block, nonbox_block); LLVMPositionBuilderAtEnd(c->builder, box_block); } // Call the type-specific __digestof function, which will unbox the value. reach_method_t* digest_fn = reach_method(type, TK_BOX, stringtab("__digestof"), NULL); pony_assert(digest_fn != NULL); LLVMValueRef func = gendesc_vtable(c, desc, digest_fn->vtable_index); LLVMTypeRef fn_type = LLVMFunctionType(c->intptr, &c->object_ptr, 1, false); func = LLVMBuildBitCast(c->builder, func, LLVMPointerType(fn_type, 0), ""); LLVMValueRef box_digest = codegen_call(c, func, &value, 1, true); if((boxed_subtype & SUBTYPE_KIND_UNBOXED) != 0) { LLVMBuildBr(c->builder, post_block); // Just cast the address. LLVMPositionBuilderAtEnd(c->builder, nonbox_block); LLVMValueRef nonbox_digest = LLVMBuildPtrToInt(c->builder, value, c->intptr, ""); LLVMBuildBr(c->builder, post_block); LLVMPositionBuilderAtEnd(c->builder, post_block); LLVMValueRef phi = LLVMBuildPhi(c->builder, c->intptr, ""); LLVMAddIncoming(phi, &box_digest, &box_block, 1); LLVMAddIncoming(phi, &nonbox_digest, &nonbox_block, 1); return phi; } else { return box_digest; } }
/** * Return (mask & a) | (~mask & b); */ LLVMValueRef lp_build_select_bitwise(struct lp_build_context *bld, LLVMValueRef mask, LLVMValueRef a, LLVMValueRef b) { LLVMBuilderRef builder = bld->gallivm->builder; struct lp_type type = bld->type; LLVMValueRef res; assert(lp_check_value(type, a)); assert(lp_check_value(type, b)); if (a == b) { return a; } if(type.floating) { LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type); a = LLVMBuildBitCast(builder, a, int_vec_type, ""); b = LLVMBuildBitCast(builder, b, int_vec_type, ""); } a = LLVMBuildAnd(builder, a, mask, ""); /* This often gets translated to PANDN, but sometimes the NOT is * pre-computed and stored in another constant. The best strategy depends * on available registers, so it is not a big deal -- hopefully LLVM does * the right decision attending the rest of the program. */ b = LLVMBuildAnd(builder, b, LLVMBuildNot(builder, mask, ""), ""); res = LLVMBuildOr(builder, a, b, ""); if(type.floating) { LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); res = LLVMBuildBitCast(builder, res, vec_type, ""); } return res; }
/* * SI implements derivatives using the local data store (LDS) * All writes to the LDS happen in all executing threads at * the same time. TID is the Thread ID for the current * thread and is a value between 0 and 63, representing * the thread's position in the wavefront. * * For the pixel shader threads are grouped into quads of four pixels. * The TIDs of the pixels of a quad are: * * +------+------+ * |4n + 0|4n + 1| * +------+------+ * |4n + 2|4n + 3| * +------+------+ * * So, masking the TID with 0xfffffffc yields the TID of the top left pixel * of the quad, masking with 0xfffffffd yields the TID of the top pixel of * the current pixel's column, and masking with 0xfffffffe yields the TID * of the left pixel of the current pixel's row. * * Adding 1 yields the TID of the pixel to the right of the left pixel, and * adding 2 yields the TID of the pixel below the top pixel. */ LLVMValueRef ac_build_ddxy(struct ac_llvm_context *ctx, bool has_ds_bpermute, uint32_t mask, int idx, LLVMValueRef lds, LLVMValueRef val) { LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2]; LLVMValueRef result; thread_id = ac_get_thread_id(ctx); tl_tid = LLVMBuildAnd(ctx->builder, thread_id, LLVMConstInt(ctx->i32, mask, false), ""); trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid, LLVMConstInt(ctx->i32, idx, false), ""); if (has_ds_bpermute) { args[0] = LLVMBuildMul(ctx->builder, tl_tid, LLVMConstInt(ctx->i32, 4, false), ""); args[1] = val; tl = ac_build_intrinsic(ctx, "llvm.amdgcn.ds.bpermute", ctx->i32, args, 2, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); args[0] = LLVMBuildMul(ctx->builder, trbl_tid, LLVMConstInt(ctx->i32, 4, false), ""); trbl = ac_build_intrinsic(ctx, "llvm.amdgcn.ds.bpermute", ctx->i32, args, 2, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); } else { LLVMValueRef store_ptr, load_ptr0, load_ptr1; store_ptr = ac_build_gep0(ctx, lds, thread_id); load_ptr0 = ac_build_gep0(ctx, lds, tl_tid); load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid); LLVMBuildStore(ctx->builder, val, store_ptr); tl = LLVMBuildLoad(ctx->builder, load_ptr0, ""); trbl = LLVMBuildLoad(ctx->builder, load_ptr1, ""); } tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, ""); trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, ""); result = LLVMBuildFSub(ctx->builder, trbl, tl, ""); return result; }
/** * Do the one or two-sided stencil test op/update. */ static LLVMValueRef lp_build_stencil_op(struct lp_build_context *bld, const struct pipe_stencil_state stencil[2], enum stencil_op op, LLVMValueRef stencilRefs[2], LLVMValueRef stencilVals, LLVMValueRef mask, LLVMValueRef front_facing) { LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef res; assert(stencil[0].enabled); /* do front face op */ res = lp_build_stencil_op_single(bld, &stencil[0], op, stencilRefs[0], stencilVals); if (stencil[1].enabled && front_facing != NULL) { /* do back face op */ LLVMValueRef back_res; back_res = lp_build_stencil_op_single(bld, &stencil[1], op, stencilRefs[1], stencilVals); res = lp_build_select(bld, front_facing, res, back_res); } if (stencil[0].writemask != 0xff || (stencil[1].enabled && front_facing != NULL && stencil[1].writemask != 0xff)) { /* mask &= stencil[0].writemask */ LLVMValueRef writemask = lp_build_const_int_vec(bld->gallivm, bld->type, stencil[0].writemask); if (stencil[1].enabled && stencil[1].writemask != stencil[0].writemask && front_facing != NULL) { LLVMValueRef back_writemask = lp_build_const_int_vec(bld->gallivm, bld->type, stencil[1].writemask); writemask = lp_build_select(bld, front_facing, writemask, back_writemask); } mask = LLVMBuildAnd(builder, mask, writemask, ""); /* res = (res & mask) | (stencilVals & ~mask) */ res = lp_build_select_bitwise(bld, mask, res, stencilVals); } else { /* res = mask ? res : stencilVals */ res = lp_build_select(bld, mask, res, stencilVals); } return res; }
/** * Extract Y, U, V channels from packed UYVY. * @param packed is a <n x i32> vector with the packed UYVY blocks * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) */ static void uyvy_to_yuv_soa(LLVMBuilderRef builder, unsigned n, LLVMValueRef packed, LLVMValueRef i, LLVMValueRef *y, LLVMValueRef *u, LLVMValueRef *v) { struct lp_type type; LLVMValueRef shift, mask; memset(&type, 0, sizeof type); type.width = 32; type.length = n; assert(lp_check_value(type, packed)); assert(lp_check_value(type, i)); /* * y = (uyvy >> 16*i) & 0xff * u = (uyvy ) & 0xff * v = (uyvy >> 16 ) & 0xff */ shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), ""); shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), ""); *y = LLVMBuildLShr(builder, packed, shift, ""); *u = packed; *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), ""); mask = lp_build_const_int_vec(type, 0xff); *y = LLVMBuildAnd(builder, *y, mask, "y"); *u = LLVMBuildAnd(builder, *u, mask, "u"); *v = LLVMBuildAnd(builder, *v, mask, "v"); }
/** * Do the stencil test comparison (compare FB stencil values against ref value). * This will be used twice when generating two-sided stencil code. * \param stencil the front/back stencil state * \param stencilRef the stencil reference value, replicated as a vector * \param stencilVals vector of stencil values from framebuffer * \return vector mask of pass/fail values (~0 or 0) */ static LLVMValueRef lp_build_stencil_test_single(struct lp_build_context *bld, const struct pipe_stencil_state *stencil, LLVMValueRef stencilRef, LLVMValueRef stencilVals) { LLVMBuilderRef builder = bld->gallivm->builder; const unsigned stencilMax = 255; /* XXX fix */ struct lp_type type = bld->type; LLVMValueRef res; /* * SSE2 has intrinsics for signed comparisons, but not unsigned ones. Values * are between 0..255 so ensure we generate the fastest comparisons for * wider elements. */ if (type.width <= 8) { assert(!type.sign); } else { assert(type.sign); } assert(stencil->enabled); if (stencil->valuemask != stencilMax) { /* compute stencilRef = stencilRef & valuemask */ LLVMValueRef valuemask = lp_build_const_int_vec(bld->gallivm, type, stencil->valuemask); stencilRef = LLVMBuildAnd(builder, stencilRef, valuemask, ""); /* compute stencilVals = stencilVals & valuemask */ stencilVals = LLVMBuildAnd(builder, stencilVals, valuemask, ""); } res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals); return res; }
LLVMValueRef gen_and(compile_t* c, ast_t* left, ast_t* right) { LLVMValueRef l_value = gen_expr(c, left); LLVMValueRef r_value = gen_expr(c, right); if((l_value == NULL) || (r_value == NULL)) return NULL; if(LLVMIsConstant(l_value) && LLVMIsConstant(r_value)) return LLVMConstAnd(l_value, r_value); if(is_always_false(c, l_value) || is_always_false(c, r_value)) return LLVMConstInt(c->i1, 0, false); return LLVMBuildAnd(c->builder, l_value, r_value, ""); }
/** * Build LLVM code for texture coord wrapping, for nearest filtering, * for scaled integer texcoords. * \param block_length is the length of the pixel block along the * coordinate axis * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size * \param length the texture size along one dimension * \param stride pixel stride along the coordinate axis (in bytes) * \param is_pot if TRUE, length is a power of two * \param wrap_mode one of PIPE_TEX_WRAP_x * \param out_offset byte offset for the wrapped coordinate * \param out_i resulting sub-block pixel coordinate for coord0 */ static void lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, unsigned block_length, LLVMValueRef coord, LLVMValueRef length, LLVMValueRef stride, boolean is_pot, unsigned wrap_mode, LLVMValueRef *out_offset, LLVMValueRef *out_i) { struct lp_build_context *int_coord_bld = &bld->int_coord_bld; LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef length_minus_one; length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); switch(wrap_mode) { case PIPE_TEX_WRAP_REPEAT: if(is_pot) coord = LLVMBuildAnd(builder, coord, length_minus_one, ""); else { /* Add a bias to the texcoord to handle negative coords */ LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); coord = LLVMBuildAdd(builder, coord, bias, ""); coord = LLVMBuildURem(builder, coord, length, ""); } break; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); coord = lp_build_min(int_coord_bld, coord, length_minus_one); break; case PIPE_TEX_WRAP_CLAMP: case PIPE_TEX_WRAP_CLAMP_TO_BORDER: case PIPE_TEX_WRAP_MIRROR_REPEAT: case PIPE_TEX_WRAP_MIRROR_CLAMP: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: default: assert(0); } lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride, out_offset, out_i); }
/** * Compute the partial offset of a pixel block along an arbitrary axis. * * @param coord coordinate in pixels * @param stride number of bytes between rows of successive pixel blocks * @param block_length number of pixels in a pixels block along the coordinate * axis * @param out_offset resulting relative offset of the pixel block in bytes * @param out_subcoord resulting sub-block pixel coordinate */ void lp_build_sample_partial_offset(struct lp_build_context *bld, unsigned block_length, LLVMValueRef coord, LLVMValueRef stride, LLVMValueRef *out_offset, LLVMValueRef *out_subcoord) { LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef offset; LLVMValueRef subcoord; if (block_length == 1) { subcoord = bld->zero; } else { /* * Pixel blocks have power of two dimensions. LLVM should convert the * rem/div to bit arithmetic. * TODO: Verify this. * It does indeed BUT it does transform it to scalar (and back) when doing so * (using roughly extract, shift/and, mov, unpack) (llvm 2.7). * The generated code looks seriously unfunny and is quite expensive. */ #if 0 LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length); subcoord = LLVMBuildURem(builder, coord, block_width, ""); coord = LLVMBuildUDiv(builder, coord, block_width, ""); #else unsigned logbase2 = util_logbase2(block_length); LLVMValueRef block_shift = lp_build_const_int_vec(bld->gallivm, bld->type, logbase2); LLVMValueRef block_mask = lp_build_const_int_vec(bld->gallivm, bld->type, block_length - 1); subcoord = LLVMBuildAnd(builder, coord, block_mask, ""); coord = LLVMBuildLShr(builder, coord, block_shift, ""); #endif } offset = lp_build_mul(bld, coord, stride); assert(out_offset); assert(out_subcoord); *out_offset = offset; *out_subcoord = subcoord; }
static LLVMValueRef translateIntBinOp(NodeKind Op, LLVMValueRef ValueE1, LLVMValueRef ValueE2) { switch (Op) { case OrOp: return LLVMBuildOr (Builder, ValueE1, ValueE2, ""); case AndOp: return LLVMBuildAnd(Builder, ValueE1, ValueE2, ""); case SumOp: return LLVMBuildAdd(Builder, ValueE1, ValueE2, ""); case SubOp: return LLVMBuildSub(Builder, ValueE1, ValueE2, ""); case MultOp: return LLVMBuildMul(Builder, ValueE1, ValueE2, ""); case DivOp: return LLVMBuildSDiv(Builder, ValueE1, ValueE2, ""); case LtOp: return LLVMBuildICmp(Builder, LLVMIntSLT, ValueE1, ValueE2, ""); case LeOp: return LLVMBuildICmp(Builder, LLVMIntSLE, ValueE1, ValueE2, ""); case GtOp: return LLVMBuildICmp(Builder, LLVMIntSGT, ValueE1, ValueE2, ""); case GeOp: return LLVMBuildICmp(Builder, LLVMIntSGE, ValueE1, ValueE2, ""); case EqOp: return LLVMBuildICmp(Builder, LLVMIntEQ, ValueE1, ValueE2, ""); case DiffOp: return LLVMBuildICmp(Builder, LLVMIntNE, ValueE1, ValueE2, ""); default: return NULL; } }
static void emit_bfi(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef bfi_args[3]; LLVMValueRef bfi_sm5; LLVMValueRef cond; // Calculate the bitmask: (((1 << src3) - 1) << src2 bfi_args[0] = LLVMBuildShl(builder, LLVMBuildSub(builder, LLVMBuildShl(builder, bld_base->int_bld.one, emit_data->args[3], ""), bld_base->int_bld.one, ""), emit_data->args[2], ""); bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1], emit_data->args[2], ""); bfi_args[2] = emit_data->args[0]; /* Calculate: * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2) * Use the right-hand side, which the LLVM backend can convert to V_BFI. */ bfi_sm5 = LLVMBuildXor(builder, bfi_args[2], LLVMBuildAnd(builder, bfi_args[0], LLVMBuildXor(builder, bfi_args[1], bfi_args[2], ""), ""), ""); /* Since shifts of >= 32 bits are undefined in LLVM IR, the backend * uses the convenient V_BFI lowering for the above, which follows SM5 * and disagrees with GLSL semantics when bits (src3) is 32. */ cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[3], lp_build_const_int32(gallivm, 32), ""); emit_data->output[emit_data->chan] = LLVMBuildSelect(builder, cond, emit_data->args[1], bfi_sm5, ""); }