/** * When sampling a mipmap, we need to compute the width, height, depth * of the source levels from the level indexes. This helper function * does that. */ void lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, LLVMValueRef ilevel, LLVMValueRef *out_size, LLVMValueRef *row_stride_vec, LLVMValueRef *img_stride_vec) { const unsigned dims = bld->dims; LLVMValueRef ilevel_vec; ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel); /* * Compute width, height, depth at mipmap level 'ilevel' */ *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec); if (dims >= 2) { *row_stride_vec = lp_build_get_level_stride_vec(bld, bld->row_stride_array, ilevel); if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) { *img_stride_vec = lp_build_get_level_stride_vec(bld, bld->img_stride_array, ilevel); } } }
/** * Dereference stride_array[mipmap_level] array to get a stride. * Return stride as a vector. */ static LLVMValueRef lp_build_get_level_stride_vec(struct lp_build_sample_context *bld, LLVMValueRef stride_array, LLVMValueRef level) { LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef indexes[2], stride; indexes[0] = lp_build_const_int32(bld->gallivm, 0); indexes[1] = level; stride = LLVMBuildGEP(builder, stride_array, indexes, 2, ""); stride = LLVMBuildLoad(builder, stride, ""); stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride); return stride; }
/** * Helper used by lp_build_cube_lookup() * \param sign scalar +1 or -1 * \param coord float vector * \param ima float vector */ static LLVMValueRef lp_build_cube_coord(struct lp_build_context *coord_bld, LLVMValueRef sign, int negate_coord, LLVMValueRef coord, LLVMValueRef ima) { /* return negate(coord) * ima * sign + 0.5; */ LLVMValueRef half = lp_build_const_vec(coord_bld->gallivm, coord_bld->type, 0.5); LLVMValueRef res; assert(negate_coord == +1 || negate_coord == -1); if (negate_coord == -1) { coord = lp_build_negate(coord_bld, coord); } res = lp_build_mul(coord_bld, coord, ima); if (sign) { sign = lp_build_broadcast_scalar(coord_bld, sign); res = lp_build_mul(coord_bld, res, sign); } res = lp_build_add(coord_bld, res, half); return res; }
/** * Increment the shader input attribute values. * This is called when we move from one quad to the next. */ static void attribs_update(struct lp_build_interp_soa_context *bld, struct gallivm_state *gallivm, LLVMValueRef loop_iter, int start, int end) { LLVMBuilderRef builder = gallivm->builder; struct lp_build_context *coeff_bld = &bld->coeff_bld; LLVMValueRef oow = NULL; unsigned attrib; unsigned chan; for(attrib = start; attrib < end; ++attrib) { const unsigned mask = bld->mask[attrib]; const unsigned interp = bld->interp[attrib]; for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { if(mask & (1 << chan)) { LLVMValueRef a; if (interp == LP_INTERP_CONSTANT || interp == LP_INTERP_FACING) { a = LLVMBuildLoad(builder, bld->a[attrib][chan], ""); } else if (interp == LP_INTERP_POSITION) { assert(attrib > 0); a = bld->attribs[0][chan]; } else { LLVMValueRef dadq; a = bld->a[attrib][chan]; /* * Broadcast the attribute value for this quad into all elements */ { /* stored as vector load as float */ LLVMTypeRef ptr_type = LLVMPointerType(LLVMFloatTypeInContext( gallivm->context), 0); LLVMValueRef ptr; a = LLVMBuildBitCast(builder, a, ptr_type, ""); ptr = LLVMBuildGEP(builder, a, &loop_iter, 1, ""); a = LLVMBuildLoad(builder, ptr, ""); a = lp_build_broadcast_scalar(&bld->coeff_bld, a); } /* * Get the derivatives. */ dadq = bld->dadq[attrib][chan]; #if PERSPECTIVE_DIVIDE_PER_QUAD if (interp == LP_INTERP_PERSPECTIVE) { LLVMValueRef dwdq = bld->dadq[0][3]; if (oow == NULL) { assert(bld->oow); oow = LLVMBuildShuffleVector(coeff_bld->builder, bld->oow, coeff_bld->undef, shuffle, ""); } dadq = lp_build_sub(coeff_bld, dadq, lp_build_mul(coeff_bld, a, dwdq)); dadq = lp_build_mul(coeff_bld, dadq, oow); } #endif /* * Add the derivatives */ a = lp_build_add(coeff_bld, a, dadq); #if !PERSPECTIVE_DIVIDE_PER_QUAD if (interp == LP_INTERP_PERSPECTIVE) { if (oow == NULL) { LLVMValueRef w = bld->attribs[0][3]; assert(attrib != 0); assert(bld->mask[0] & TGSI_WRITEMASK_W); oow = lp_build_rcp(coeff_bld, w); } a = lp_build_mul(coeff_bld, a, oow); } #endif if (attrib == 0 && chan == 2) { /* FIXME: Depth values can exceed 1.0, due to the fact that * setup interpolation coefficients refer to (0,0) which causes * precision loss. So we must clamp to 1.0 here to avoid artifacts */ a = lp_build_min(coeff_bld, a, coeff_bld->one); } attrib_name(a, attrib, chan, ""); } bld->attribs[attrib][chan] = a; } } } }
/** * Generate code to compute coordinate gradient (rho). * \param ddx partial derivatives of (s, t, r, q) with respect to X * \param ddy partial derivatives of (s, t, r, q) with respect to Y * * XXX: The resulting rho is scalar, so we ignore all but the first element of * derivatives that are passed by the shader. */ static LLVMValueRef lp_build_rho(struct lp_build_sample_context *bld, unsigned unit, const LLVMValueRef ddx[4], const LLVMValueRef ddy[4]) { struct lp_build_context *int_size_bld = &bld->int_size_bld; struct lp_build_context *float_size_bld = &bld->float_size_bld; struct lp_build_context *float_bld = &bld->float_bld; const unsigned dims = bld->dims; LLVMBuilderRef builder = bld->gallivm->builder; LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0); LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0); LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy; LLVMValueRef rho_x, rho_y; LLVMValueRef rho_vec; LLVMValueRef int_size, float_size; LLVMValueRef rho; LLVMValueRef first_level, first_level_vec; dsdx = ddx[0]; dsdy = ddy[0]; if (dims <= 1) { rho_x = dsdx; rho_y = dsdy; } else { rho_x = float_size_bld->undef; rho_y = float_size_bld->undef; rho_x = LLVMBuildInsertElement(builder, rho_x, dsdx, index0, ""); rho_y = LLVMBuildInsertElement(builder, rho_y, dsdy, index0, ""); dtdx = ddx[1]; dtdy = ddy[1]; rho_x = LLVMBuildInsertElement(builder, rho_x, dtdx, index1, ""); rho_y = LLVMBuildInsertElement(builder, rho_y, dtdy, index1, ""); if (dims >= 3) { drdx = ddx[2]; drdy = ddy[2]; rho_x = LLVMBuildInsertElement(builder, rho_x, drdx, index2, ""); rho_y = LLVMBuildInsertElement(builder, rho_y, drdy, index2, ""); } } rho_x = lp_build_abs(float_size_bld, rho_x); rho_y = lp_build_abs(float_size_bld, rho_y); rho_vec = lp_build_max(float_size_bld, rho_x, rho_y); first_level = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm, unit); first_level_vec = lp_build_broadcast_scalar(&bld->int_size_bld, first_level); int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec); float_size = lp_build_int_to_float(float_size_bld, int_size); rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size); if (dims <= 1) { rho = rho_vec; } else { if (dims >= 2) { LLVMValueRef rho_s, rho_t, rho_r; rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, ""); rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, ""); rho = lp_build_max(float_bld, rho_s, rho_t); if (dims >= 3) { rho_r = LLVMBuildExtractElement(builder, rho_vec, index2, ""); rho = lp_build_max(float_bld, rho, rho_r); } } } return rho; }
/** * Interpolate the shader input attribute values. * This is called for each (group of) quad(s). */ static void attribs_update_simple(struct lp_build_interp_soa_context *bld, struct gallivm_state *gallivm, LLVMValueRef loop_iter, int start, int end) { LLVMBuilderRef builder = gallivm->builder; struct lp_build_context *coeff_bld = &bld->coeff_bld; struct lp_build_context *setup_bld = &bld->setup_bld; LLVMValueRef oow = NULL; unsigned attrib; LLVMValueRef pixoffx; LLVMValueRef pixoffy; LLVMValueRef ptr; /* could do this with code-generated passed in pixel offsets too */ assert(loop_iter); ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, ""); pixoffx = LLVMBuildLoad(builder, ptr, ""); ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, ""); pixoffy = LLVMBuildLoad(builder, ptr, ""); pixoffx = LLVMBuildFAdd(builder, pixoffx, lp_build_broadcast_scalar(coeff_bld, bld->x), ""); pixoffy = LLVMBuildFAdd(builder, pixoffy, lp_build_broadcast_scalar(coeff_bld, bld->y), ""); for (attrib = start; attrib < end; attrib++) { const unsigned mask = bld->mask[attrib]; const unsigned interp = bld->interp[attrib]; unsigned chan; for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { if (mask & (1 << chan)) { LLVMValueRef index; LLVMValueRef dadx = coeff_bld->zero; LLVMValueRef dady = coeff_bld->zero; LLVMValueRef a = coeff_bld->zero; index = lp_build_const_int32(gallivm, chan); switch (interp) { case LP_INTERP_PERSPECTIVE: /* fall-through */ case LP_INTERP_LINEAR: if (attrib == 0 && chan == 0) { dadx = coeff_bld->one; if (bld->pos_offset) { a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset); } } else if (attrib == 0 && chan == 1) { dady = coeff_bld->one; if (bld->pos_offset) { a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset); } } else { dadx = lp_build_extract_broadcast(gallivm, setup_bld->type, coeff_bld->type, bld->dadxaos[attrib], index); dady = lp_build_extract_broadcast(gallivm, setup_bld->type, coeff_bld->type, bld->dadyaos[attrib], index); a = lp_build_extract_broadcast(gallivm, setup_bld->type, coeff_bld->type, bld->a0aos[attrib], index); } /* * a = a0 + (x * dadx + y * dady) */ a = lp_build_fmuladd(builder, dadx, pixoffx, a); a = lp_build_fmuladd(builder, dady, pixoffy, a); if (interp == LP_INTERP_PERSPECTIVE) { if (oow == NULL) { LLVMValueRef w = bld->attribs[0][3]; assert(attrib != 0); assert(bld->mask[0] & TGSI_WRITEMASK_W); oow = lp_build_rcp(coeff_bld, w); } a = lp_build_mul(coeff_bld, a, oow); } break; case LP_INTERP_CONSTANT: case LP_INTERP_FACING: a = lp_build_extract_broadcast(gallivm, setup_bld->type, coeff_bld->type, bld->a0aos[attrib], index); break; case LP_INTERP_POSITION: assert(attrib > 0); a = bld->attribs[0][chan]; break; default: assert(0); break; } if ((attrib == 0) && (chan == 2)){ /* FIXME: Depth values can exceed 1.0, due to the fact that * setup interpolation coefficients refer to (0,0) which causes * precision loss. So we must clamp to 1.0 here to avoid artifacts */ a = lp_build_min(coeff_bld, a, coeff_bld->one); } bld->attribs[attrib][chan] = a; } } } }
/** * Initialize the bld->a, dadq fields. This involves fetching * those values from the arrays which are passed into the JIT function. */ static void coeffs_init(struct lp_build_interp_soa_context *bld, LLVMValueRef a0_ptr, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr) { struct lp_build_context *coeff_bld = &bld->coeff_bld; struct lp_build_context *setup_bld = &bld->setup_bld; struct gallivm_state *gallivm = coeff_bld->gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef pixoffx, pixoffy; unsigned attrib; unsigned chan; unsigned i; pixoffx = coeff_bld->undef; pixoffy = coeff_bld->undef; for (i = 0; i < coeff_bld->type.length; i++) { LLVMValueRef nr = lp_build_const_int32(gallivm, i); LLVMValueRef pixxf = lp_build_const_float(gallivm, quad_offset_x[i]); LLVMValueRef pixyf = lp_build_const_float(gallivm, quad_offset_y[i]); pixoffx = LLVMBuildInsertElement(builder, pixoffx, pixxf, nr, ""); pixoffy = LLVMBuildInsertElement(builder, pixoffy, pixyf, nr, ""); } for (attrib = 0; attrib < bld->num_attribs; ++attrib) { const unsigned mask = bld->mask[attrib]; const unsigned interp = bld->interp[attrib]; LLVMValueRef index = lp_build_const_int32(gallivm, attrib * TGSI_NUM_CHANNELS); LLVMValueRef ptr; LLVMValueRef dadxaos = setup_bld->zero; LLVMValueRef dadyaos = setup_bld->zero; LLVMValueRef a0aos = setup_bld->zero; /* always fetch all 4 values for performance/simplicity */ switch (interp) { case LP_INTERP_PERSPECTIVE: /* fall-through */ case LP_INTERP_LINEAR: ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""); ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(setup_bld->vec_type, 0), ""); dadxaos = LLVMBuildLoad(builder, ptr, ""); ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, ""); ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(setup_bld->vec_type, 0), ""); dadyaos = LLVMBuildLoad(builder, ptr, ""); attrib_name(dadxaos, attrib, 0, ".dadxaos"); attrib_name(dadyaos, attrib, 0, ".dadyaos"); /* fall-through */ case LP_INTERP_CONSTANT: case LP_INTERP_FACING: ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, ""); ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(setup_bld->vec_type, 0), ""); a0aos = LLVMBuildLoad(builder, ptr, ""); attrib_name(a0aos, attrib, 0, ".a0aos"); break; case LP_INTERP_POSITION: /* Nothing to do as the position coeffs are already setup in slot 0 */ continue; default: assert(0); break; } /* * a = a0 + (x * dadx + y * dady) * a0aos is the attrib value at top left corner of stamp */ if (interp != LP_INTERP_CONSTANT && interp != LP_INTERP_FACING) { LLVMValueRef x = lp_build_broadcast_scalar(setup_bld, bld->x); LLVMValueRef y = lp_build_broadcast_scalar(setup_bld, bld->y); a0aos = lp_build_fmuladd(builder, x, dadxaos, a0aos); a0aos = lp_build_fmuladd(builder, y, dadyaos, a0aos); } /* * dadq = {0, dadx, dady, dadx + dady} * for two quads (side by side) this is: * {0, dadx, dady, dadx+dady, 2*dadx, 2*dadx+dady, 3*dadx+dady} */ for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { /* this generates a CRAPLOAD of shuffles... */ if (mask & (1 << chan)) { LLVMValueRef dadx, dady; LLVMValueRef dadq, dadq2; LLVMValueRef a; LLVMValueRef chan_index = lp_build_const_int32(gallivm, chan); if (attrib == 0 && chan == 0) { a = bld->x; if (bld->pos_offset) { a = LLVMBuildFAdd(builder, a, lp_build_const_float(gallivm, bld->pos_offset), ""); } a = lp_build_broadcast_scalar(coeff_bld, a); dadx = coeff_bld->one; dady = coeff_bld->zero; } else if (attrib == 0 && chan == 1) { a = bld->y; if (bld->pos_offset) { a = LLVMBuildFAdd(builder, a, lp_build_const_float(gallivm, bld->pos_offset), ""); } a = lp_build_broadcast_scalar(coeff_bld, a); dady = coeff_bld->one; dadx = coeff_bld->zero; } else { dadx = lp_build_extract_broadcast(gallivm, setup_bld->type, coeff_bld->type, dadxaos, chan_index); dady = lp_build_extract_broadcast(gallivm, setup_bld->type, coeff_bld->type, dadyaos, chan_index); /* * a = {a, a, a, a} */ a = lp_build_extract_broadcast(gallivm, setup_bld->type, coeff_bld->type, a0aos, chan_index); } dadx = LLVMBuildFMul(builder, dadx, pixoffx, ""); dady = LLVMBuildFMul(builder, dady, pixoffy, ""); dadq = LLVMBuildFAdd(builder, dadx, dady, ""); /* * Compute the attrib values on the upper-left corner of each * group of quads. * Note that if we process 2 quads at once this doesn't * really exactly to what we want. * We need to access elem 0 and 2 respectively later if we process * 2 quads at once. */ if (interp != LP_INTERP_CONSTANT && interp != LP_INTERP_FACING) { dadq2 = LLVMBuildFAdd(builder, dadq, dadq, ""); a = LLVMBuildFAdd(builder, a, dadq2, ""); } #if PERSPECTIVE_DIVIDE_PER_QUAD /* * a *= 1 / w */ /* * XXX since we're only going to access elements 0,2 out of 8 * if we have 8-wide vectors we should do the division only 4-wide. * a is really a 2-elements in a 4-wide vector disguised as 8-wide * in this case. */ if (interp == LP_INTERP_PERSPECTIVE) { LLVMValueRef w = bld->a[0][3]; assert(attrib != 0); assert(bld->mask[0] & TGSI_WRITEMASK_W); if (!bld->oow) { bld->oow = lp_build_rcp(coeff_bld, w); lp_build_name(bld->oow, "oow"); } a = lp_build_mul(coeff_bld, a, bld->oow); } #endif attrib_name(a, attrib, chan, ".a"); attrib_name(dadq, attrib, chan, ".dadq"); bld->a[attrib][chan] = lp_build_alloca(gallivm, LLVMTypeOf(a), ""); LLVMBuildStore(builder, a, bld->a[attrib][chan]); bld->dadq[attrib][chan] = dadq; } } } }
/* * Do a cached lookup. * * Returns (vectors of) 4x8 rgba aos value */ LLVMValueRef lp_build_fetch_cached_texels(struct gallivm_state *gallivm, const struct util_format_description *format_desc, unsigned n, LLVMValueRef base_ptr, LLVMValueRef offset, LLVMValueRef i, LLVMValueRef j, LLVMValueRef cache) { LLVMBuilderRef builder = gallivm->builder; unsigned count, low_bit, log2size; LLVMValueRef color, offset_stored, addr, ptr_addrtrunc, tmp; LLVMValueRef ij_index, hash_index, hash_mask, block_index; LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context); struct lp_type type; struct lp_build_context bld32; memset(&type, 0, sizeof type); type.width = 32; type.length = n; assert(format_desc->block.width == 4); assert(format_desc->block.height == 4); lp_build_context_init(&bld32, gallivm, type); /* * compute hash - we use direct mapped cache, the hash function could * be better but it needs to be simple * per-element: * compare offset with offset stored at tag (hash) * if not equal decode/store block, update tag * extract color from cache * assemble result vector */ /* TODO: not ideal with 32bit pointers... */ low_bit = util_logbase2(format_desc->block.bits / 8); log2size = util_logbase2(LP_BUILD_FORMAT_CACHE_SIZE); addr = LLVMBuildPtrToInt(builder, base_ptr, i64t, ""); ptr_addrtrunc = LLVMBuildPtrToInt(builder, base_ptr, i32t, ""); ptr_addrtrunc = lp_build_broadcast_scalar(&bld32, ptr_addrtrunc); /* For the hash function, first mask off the unused lowest bits. Then just do some xor with address bits - only use lower 32bits */ ptr_addrtrunc = LLVMBuildAdd(builder, offset, ptr_addrtrunc, ""); ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc, lp_build_const_int_vec(gallivm, type, low_bit), ""); /* This only really makes sense for size 64,128,256 */ hash_index = ptr_addrtrunc; ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc, lp_build_const_int_vec(gallivm, type, 2*log2size), ""); hash_index = LLVMBuildXor(builder, ptr_addrtrunc, hash_index, ""); tmp = LLVMBuildLShr(builder, hash_index, lp_build_const_int_vec(gallivm, type, log2size), ""); hash_index = LLVMBuildXor(builder, hash_index, tmp, ""); hash_mask = lp_build_const_int_vec(gallivm, type, LP_BUILD_FORMAT_CACHE_SIZE - 1); hash_index = LLVMBuildAnd(builder, hash_index, hash_mask, ""); ij_index = LLVMBuildShl(builder, i, lp_build_const_int_vec(gallivm, type, 2), ""); ij_index = LLVMBuildAdd(builder, ij_index, j, ""); block_index = LLVMBuildShl(builder, hash_index, lp_build_const_int_vec(gallivm, type, 4), ""); block_index = LLVMBuildAdd(builder, ij_index, block_index, ""); if (n > 1) { color = LLVMGetUndef(LLVMVectorType(i32t, n)); for (count = 0; count < n; count++) { LLVMValueRef index, cond, colorx; LLVMValueRef block_indexx, hash_indexx, addrx, offsetx, ptr_addrx; struct lp_build_if_state if_ctx; index = lp_build_const_int32(gallivm, count); offsetx = LLVMBuildExtractElement(builder, offset, index, ""); addrx = LLVMBuildZExt(builder, offsetx, i64t, ""); addrx = LLVMBuildAdd(builder, addrx, addr, ""); block_indexx = LLVMBuildExtractElement(builder, block_index, index, ""); hash_indexx = LLVMBuildLShr(builder, block_indexx, lp_build_const_int32(gallivm, 4), ""); offset_stored = lookup_tag_data(gallivm, cache, hash_indexx); cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addrx, ""); lp_build_if(&if_ctx, gallivm, cond); { ptr_addrx = LLVMBuildIntToPtr(builder, addrx, LLVMPointerType(i8t, 0), ""); update_cached_block(gallivm, format_desc, ptr_addrx, hash_indexx, cache); #if LP_BUILD_FORMAT_CACHE_DEBUG update_cache_access(gallivm, cache, 1, LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS); #endif } lp_build_endif(&if_ctx); colorx = lookup_cached_pixel(gallivm, cache, block_indexx); color = LLVMBuildInsertElement(builder, color, colorx, lp_build_const_int32(gallivm, count), ""); } } else { LLVMValueRef cond; struct lp_build_if_state if_ctx; tmp = LLVMBuildZExt(builder, offset, i64t, ""); addr = LLVMBuildAdd(builder, tmp, addr, ""); offset_stored = lookup_tag_data(gallivm, cache, hash_index); cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addr, ""); lp_build_if(&if_ctx, gallivm, cond); { tmp = LLVMBuildIntToPtr(builder, addr, LLVMPointerType(i8t, 0), ""); update_cached_block(gallivm, format_desc, tmp, hash_index, cache); #if LP_BUILD_FORMAT_CACHE_DEBUG update_cache_access(gallivm, cache, 1, LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS); #endif } lp_build_endif(&if_ctx); color = lookup_cached_pixel(gallivm, cache, block_index); } #if LP_BUILD_FORMAT_CACHE_DEBUG update_cache_access(gallivm, cache, n, LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL); #endif return LLVMBuildBitCast(builder, color, LLVMVectorType(i8t, n * 4), ""); }
/** * Generate code for performing depth and/or stencil tests. * We operate on a vector of values (typically n 2x2 quads). * * \param depth the depth test state * \param stencil the front/back stencil state * \param type the data type of the fragment depth/stencil values * \param format_desc description of the depth/stencil surface * \param mask the alive/dead pixel mask for the quad (vector) * \param stencil_refs the front/back stencil ref values (scalar) * \param z_src the incoming depth/stencil values (n 2x2 quad values, float32) * \param zs_dst the depth/stencil values in framebuffer * \param face contains boolean value indicating front/back facing polygon */ void lp_build_depth_stencil_test(struct gallivm_state *gallivm, const struct pipe_depth_state *depth, const struct pipe_stencil_state stencil[2], struct lp_type z_src_type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, LLVMValueRef stencil_refs[2], LLVMValueRef z_src, LLVMValueRef z_fb, LLVMValueRef s_fb, LLVMValueRef face, LLVMValueRef *z_value, LLVMValueRef *s_value, boolean do_branch) { LLVMBuilderRef builder = gallivm->builder; struct lp_type z_type; struct lp_build_context z_bld; struct lp_build_context s_bld; struct lp_type s_type; unsigned z_shift = 0, z_width = 0, z_mask = 0; LLVMValueRef z_dst = NULL; LLVMValueRef stencil_vals = NULL; LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; LLVMValueRef z_pass = NULL, s_pass_mask = NULL; LLVMValueRef orig_mask = lp_build_mask_value(mask); LLVMValueRef front_facing = NULL; boolean have_z, have_s; /* * Depths are expected to be between 0 and 1, even if they are stored in * floats. Setting these bits here will ensure that the lp_build_conv() call * below won't try to unnecessarily clamp the incoming values. */ if(z_src_type.floating) { z_src_type.sign = FALSE; z_src_type.norm = TRUE; } else { assert(!z_src_type.sign); assert(z_src_type.norm); } /* Pick the type matching the depth-stencil format. */ z_type = lp_depth_type(format_desc, z_src_type.length); /* Pick the intermediate type for depth operations. */ z_type.width = z_src_type.width; assert(z_type.length == z_src_type.length); /* FIXME: for non-float depth/stencil might generate better code * if we'd always split it up to use 128bit operations. * For stencil we'd almost certainly want to pack to 8xi16 values, * for z just run twice. */ /* Sanity checking */ { const unsigned z_swizzle = format_desc->swizzle[0]; const unsigned s_swizzle = format_desc->swizzle[1]; assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE || s_swizzle != UTIL_FORMAT_SWIZZLE_NONE); assert(depth->enabled || stencil[0].enabled); assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); assert(format_desc->block.width == 1); assert(format_desc->block.height == 1); if (stencil[0].enabled) { assert(s_swizzle < 4); assert(format_desc->channel[s_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); assert(format_desc->channel[s_swizzle].pure_integer); assert(!format_desc->channel[s_swizzle].normalized); assert(format_desc->channel[s_swizzle].size == 8); } if (depth->enabled) { assert(z_swizzle < 4); if (z_type.floating) { assert(z_swizzle == 0); assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT); assert(format_desc->channel[z_swizzle].size == 32); } else { assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); assert(format_desc->channel[z_swizzle].normalized); assert(!z_type.fixed); } } } /* Setup build context for Z vals */ lp_build_context_init(&z_bld, gallivm, z_type); /* Setup build context for stencil vals */ s_type = lp_int_type(z_type); lp_build_context_init(&s_bld, gallivm, s_type); /* Compute and apply the Z/stencil bitmasks and shifts. */ { unsigned s_shift, s_mask; z_dst = z_fb; stencil_vals = s_fb; have_z = get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask); have_s = get_s_shift_and_mask(format_desc, &s_shift, &s_mask); if (have_z) { if (z_mask != 0xffffffff) { z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask); } /* * Align the framebuffer Z 's LSB to the right. */ if (z_shift) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); z_dst = LLVMBuildLShr(builder, z_dst, shift, "z_dst"); } else if (z_bitmask) { z_dst = LLVMBuildAnd(builder, z_dst, z_bitmask, "z_dst"); } else { lp_build_name(z_dst, "z_dst"); } } if (have_s) { if (s_shift) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift); stencil_vals = LLVMBuildLShr(builder, stencil_vals, shift, ""); stencil_shift = shift; /* used below */ } if (s_mask != 0xffffffff) { LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask); stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); } lp_build_name(stencil_vals, "s_dst"); } } if (stencil[0].enabled) { if (face) { LLVMValueRef zero = lp_build_const_int32(gallivm, 0); /* front_facing = face != 0 ? ~0 : 0 */ front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, ""); front_facing = LLVMBuildSExt(builder, front_facing, LLVMIntTypeInContext(gallivm->context, s_bld.type.length*s_bld.type.width), ""); front_facing = LLVMBuildBitCast(builder, front_facing, s_bld.int_vec_type, ""); } /* convert scalar stencil refs into vectors */ stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]); stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]); s_pass_mask = lp_build_stencil_test(&s_bld, stencil, stencil_refs, stencil_vals, front_facing); /* apply stencil-fail operator */ { LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask); stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP, stencil_refs, stencil_vals, s_fail_mask, front_facing); } } if (depth->enabled) { /* * Convert fragment Z to the desired type, aligning the LSB to the right. */ assert(z_type.width == z_src_type.width); assert(z_type.length == z_src_type.length); assert(lp_check_value(z_src_type, z_src)); if (z_src_type.floating) { /* * Convert from floating point values */ if (!z_type.floating) { z_src = lp_build_clamped_float_to_unsigned_norm(gallivm, z_src_type, z_width, z_src); } } else { /* * Convert from unsigned normalized values. */ assert(!z_src_type.sign); assert(!z_src_type.fixed); assert(z_src_type.norm); assert(!z_type.floating); if (z_src_type.width > z_width) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_src_type, z_src_type.width - z_width); z_src = LLVMBuildLShr(builder, z_src, shift, ""); } } assert(lp_check_value(z_type, z_src)); lp_build_name(z_src, "z_src"); /* compare src Z to dst Z, returning 'pass' mask */ z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst); if (!stencil[0].enabled) { /* We can potentially skip all remaining operations here, but only * if stencil is disabled because we still need to update the stencil * buffer values. Don't need to update Z buffer values. */ lp_build_mask_update(mask, z_pass); if (do_branch) { lp_build_mask_check(mask); do_branch = FALSE; } } if (depth->writemask) { LLVMValueRef zselectmask; /* mask off bits that failed Z test */ zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, ""); /* mask off bits that failed stencil test */ if (s_pass_mask) { zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, ""); } /* Mix the old and new Z buffer values. * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i] */ z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst); } if (stencil[0].enabled) { /* update stencil buffer values according to z pass/fail result */ LLVMValueRef z_fail_mask, z_pass_mask; /* apply Z-fail operator */ z_fail_mask = lp_build_andnot(&s_bld, orig_mask, z_pass); stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP, stencil_refs, stencil_vals, z_fail_mask, front_facing); /* apply Z-pass operator */ z_pass_mask = LLVMBuildAnd(builder, orig_mask, z_pass, ""); stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, z_pass_mask, front_facing); } } else { /* No depth test: apply Z-pass operator to stencil buffer values which * passed the stencil test. */ s_pass_mask = LLVMBuildAnd(builder, orig_mask, s_pass_mask, ""); stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, s_pass_mask, front_facing); } /* Put Z and stencil bits in the right place */ if (have_z && z_shift) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); z_dst = LLVMBuildShl(builder, z_dst, shift, ""); } if (stencil_vals && stencil_shift) stencil_vals = LLVMBuildShl(builder, stencil_vals, stencil_shift, ""); /* Finally, merge the z/stencil values */ if (format_desc->block.bits <= 32) { if (have_z && have_s) *z_value = LLVMBuildOr(builder, z_dst, stencil_vals, ""); else if (have_z) *z_value = z_dst; else *z_value = stencil_vals; *s_value = *z_value; } else { *z_value = z_dst; *s_value = stencil_vals; } if (s_pass_mask) lp_build_mask_update(mask, s_pass_mask); if (depth->enabled && stencil[0].enabled) lp_build_mask_update(mask, z_pass); }
/** * Generate code for performing depth and/or stencil tests. * We operate on a vector of values (typically a 2x2 quad). * * \param depth the depth test state * \param stencil the front/back stencil state * \param type the data type of the fragment depth/stencil values * \param format_desc description of the depth/stencil surface * \param mask the alive/dead pixel mask for the quad (vector) * \param stencil_refs the front/back stencil ref values (scalar) * \param z_src the incoming depth/stencil values (a 2x2 quad) * \param zs_dst_ptr pointer to depth/stencil values in framebuffer * \param facing contains float value indicating front/back facing polygon */ void lp_build_depth_stencil_test(LLVMBuilderRef builder, const struct pipe_depth_state *depth, const struct pipe_stencil_state stencil[2], struct lp_type type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, LLVMValueRef stencil_refs[2], LLVMValueRef z_src, LLVMValueRef zs_dst_ptr, LLVMValueRef face, LLVMValueRef counter) { struct lp_build_context bld; struct lp_build_context sbld; struct lp_type s_type; LLVMValueRef zs_dst, z_dst = NULL; LLVMValueRef stencil_vals = NULL; LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; LLVMValueRef z_pass = NULL, s_pass_mask = NULL; LLVMValueRef orig_mask = mask->value; /* Sanity checking */ { const unsigned z_swizzle = format_desc->swizzle[0]; const unsigned s_swizzle = format_desc->swizzle[1]; assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE || s_swizzle != UTIL_FORMAT_SWIZZLE_NONE); assert(depth->enabled || stencil[0].enabled); assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); assert(format_desc->block.width == 1); assert(format_desc->block.height == 1); if (stencil[0].enabled) { assert(format_desc->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED || format_desc->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM); } assert(z_swizzle < 4); assert(format_desc->block.bits == type.width); if (type.floating) { assert(z_swizzle == 0); assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT); assert(format_desc->channel[z_swizzle].size == format_desc->block.bits); } else { assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); assert(format_desc->channel[z_swizzle].normalized); assert(!type.fixed); assert(!type.sign); assert(type.norm); } } /* Setup build context for Z vals */ lp_build_context_init(&bld, builder, type); /* Setup build context for stencil vals */ s_type = lp_type_int_vec(type.width); lp_build_context_init(&sbld, builder, s_type); /* Load current z/stencil value from z/stencil buffer */ zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, ""); lp_build_name(zs_dst, "zsbufval"); /* Compute and apply the Z/stencil bitmasks and shifts. */ { unsigned z_shift, z_mask; unsigned s_shift, s_mask; if (get_z_shift_and_mask(format_desc, &z_shift, &z_mask)) { if (z_shift) { LLVMValueRef shift = lp_build_const_int_vec(type, z_shift); z_src = LLVMBuildLShr(builder, z_src, shift, ""); } if (z_mask != 0xffffffff) { LLVMValueRef mask = lp_build_const_int_vec(type, z_mask); z_src = LLVMBuildAnd(builder, z_src, mask, ""); z_dst = LLVMBuildAnd(builder, zs_dst, mask, ""); z_bitmask = mask; /* used below */ } else { z_dst = zs_dst; } lp_build_name(z_dst, "zsbuf.z"); } if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) { if (s_shift) { LLVMValueRef shift = lp_build_const_int_vec(type, s_shift); stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, ""); stencil_shift = shift; /* used below */ } else { stencil_vals = zs_dst; } if (s_mask != 0xffffffff) { LLVMValueRef mask = lp_build_const_int_vec(type, s_mask); stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); } lp_build_name(stencil_vals, "stencil"); } } if (stencil[0].enabled) { /* convert scalar stencil refs into vectors */ stencil_refs[0] = lp_build_broadcast_scalar(&bld, stencil_refs[0]); stencil_refs[1] = lp_build_broadcast_scalar(&bld, stencil_refs[1]); s_pass_mask = lp_build_stencil_test(&sbld, stencil, stencil_refs, stencil_vals, face); /* apply stencil-fail operator */ { LLVMValueRef s_fail_mask = lp_build_andc(&bld, orig_mask, s_pass_mask); stencil_vals = lp_build_stencil_op(&sbld, stencil, S_FAIL_OP, stencil_refs, stencil_vals, s_fail_mask, face); } } if (depth->enabled) { /* compare src Z to dst Z, returning 'pass' mask */ z_pass = lp_build_cmp(&bld, depth->func, z_src, z_dst); if (!stencil[0].enabled) { /* We can potentially skip all remaining operations here, but only * if stencil is disabled because we still need to update the stencil * buffer values. Don't need to update Z buffer values. */ lp_build_mask_update(mask, z_pass); } if (depth->writemask) { LLVMValueRef zselectmask = mask->value; /* mask off bits that failed Z test */ zselectmask = LLVMBuildAnd(builder, zselectmask, z_pass, ""); /* mask off bits that failed stencil test */ if (s_pass_mask) { zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, ""); } /* if combined Z/stencil format, mask off the stencil bits */ if (z_bitmask) { zselectmask = LLVMBuildAnd(builder, zselectmask, z_bitmask, ""); } /* Mix the old and new Z buffer values. * z_dst[i] = (zselectmask[i] & z_src[i]) | (~zselectmask[i] & z_dst[i]) */ z_dst = lp_build_select_bitwise(&bld, zselectmask, z_src, z_dst); } if (stencil[0].enabled) { /* update stencil buffer values according to z pass/fail result */ LLVMValueRef z_fail_mask, z_pass_mask; /* apply Z-fail operator */ z_fail_mask = lp_build_andc(&bld, orig_mask, z_pass); stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_FAIL_OP, stencil_refs, stencil_vals, z_fail_mask, face); /* apply Z-pass operator */ z_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, z_pass, ""); stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, z_pass_mask, face); } } else { /* No depth test: apply Z-pass operator to stencil buffer values which * passed the stencil test. */ s_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, s_pass_mask, ""); stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, s_pass_mask, face); } /* The Z bits are already in the right place but we may need to shift the * stencil bits before ORing Z with Stencil to make the final pixel value. */ if (stencil_vals && stencil_shift) stencil_vals = LLVMBuildShl(bld.builder, stencil_vals, stencil_shift, ""); /* Finally, merge/store the z/stencil values */ if ((depth->enabled && depth->writemask) || (stencil[0].enabled && stencil[0].writemask)) { if (z_dst && stencil_vals) zs_dst = LLVMBuildOr(bld.builder, z_dst, stencil_vals, ""); else if (z_dst) zs_dst = z_dst; else zs_dst = stencil_vals; LLVMBuildStore(builder, zs_dst, zs_dst_ptr); } if (s_pass_mask) lp_build_mask_update(mask, s_pass_mask); if (depth->enabled && stencil[0].enabled) lp_build_mask_update(mask, z_pass); if (counter) lp_build_occlusion_count(builder, type, mask->value, counter); }
/** * Texture sampling in AoS format. Used when sampling common 32-bit/texel * formats. 1D/2D/3D/cube texture supported. All mipmap sampling modes * but only limited texture coord wrap modes. */ void lp_build_sample_aos(struct lp_build_sample_context *bld, unsigned unit, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, const LLVMValueRef *ddx, const LLVMValueRef *ddy, LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ LLVMValueRef texel_out[4]) { struct lp_build_context *int_bld = &bld->int_bld; LLVMBuilderRef builder = bld->gallivm->builder; const unsigned mip_filter = bld->static_state->min_mip_filter; const unsigned min_filter = bld->static_state->min_img_filter; const unsigned mag_filter = bld->static_state->mag_img_filter; const unsigned dims = bld->dims; LLVMValueRef lod_ipart = NULL, lod_fpart = NULL; LLVMValueRef ilevel0, ilevel1 = NULL; LLVMValueRef packed, packed_lo, packed_hi; LLVMValueRef unswizzled[4]; LLVMValueRef face_ddx[4], face_ddy[4]; struct lp_build_context h16_bld; LLVMValueRef first_level; LLVMValueRef i32t_zero = lp_build_const_int32(bld->gallivm, 0); /* we only support the common/simple wrap modes at this time */ assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s)); if (dims >= 2) assert(lp_is_simple_wrap_mode(bld->static_state->wrap_t)); if (dims >= 3) assert(lp_is_simple_wrap_mode(bld->static_state->wrap_r)); /* make 16-bit fixed-pt builder context */ lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16)); /* cube face selection, compute pre-face coords, etc. */ if (bld->static_state->target == PIPE_TEXTURE_CUBE) { LLVMValueRef face, face_s, face_t; lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t); s = face_s; /* vec */ t = face_t; /* vec */ /* use 'r' to indicate cube face */ r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */ /* recompute ddx, ddy using the new (s,t) face texcoords */ face_ddx[0] = lp_build_scalar_ddx(&bld->coord_bld, s); face_ddx[1] = lp_build_scalar_ddx(&bld->coord_bld, t); face_ddx[2] = NULL; face_ddx[3] = NULL; face_ddy[0] = lp_build_scalar_ddy(&bld->coord_bld, s); face_ddy[1] = lp_build_scalar_ddy(&bld->coord_bld, t); face_ddy[2] = NULL; face_ddy[3] = NULL; ddx = face_ddx; ddy = face_ddy; } /* * Compute the level of detail (float). */ if (min_filter != mag_filter || mip_filter != PIPE_TEX_MIPFILTER_NONE) { /* Need to compute lod either to choose mipmap levels or to * distinguish between minification/magnification with one mipmap level. */ lp_build_lod_selector(bld, unit, ddx, ddy, lod_bias, explicit_lod, mip_filter, &lod_ipart, &lod_fpart); } else { lod_ipart = i32t_zero; } /* * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1 */ switch (mip_filter) { default: assert(0 && "bad mip_filter value in lp_build_sample_aos()"); /* fall-through */ case PIPE_TEX_MIPFILTER_NONE: /* always use mip level 0 */ if (bld->static_state->target == PIPE_TEXTURE_CUBE) { /* XXX this is a work-around for an apparent bug in LLVM 2.7. * We should be able to set ilevel0 = const(0) but that causes * bad x86 code to be emitted. */ assert(lod_ipart); lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); } else { first_level = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm, unit); ilevel0 = first_level; } break; case PIPE_TEX_MIPFILTER_NEAREST: assert(lod_ipart); lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); break; case PIPE_TEX_MIPFILTER_LINEAR: assert(lod_ipart); assert(lod_fpart); lp_build_linear_mip_levels(bld, unit, lod_ipart, &lod_fpart, &ilevel0, &ilevel1); break; } /* * Get/interpolate texture colors. */ packed_lo = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_lo"); packed_hi = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_hi"); if (min_filter == mag_filter) { /* no need to distinquish between minification and magnification */ lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, ilevel0, ilevel1, lod_fpart, packed_lo, packed_hi); } else { /* Emit conditional to choose min image filter or mag image filter * depending on the lod being > 0 or <= 0, respectively. */ struct lp_build_if_state if_ctx; LLVMValueRef minify; /* minify = lod >= 0.0 */ minify = LLVMBuildICmp(builder, LLVMIntSGE, lod_ipart, int_bld->zero, ""); lp_build_if(&if_ctx, bld->gallivm, minify); { /* Use the minification filter */ lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, ilevel0, ilevel1, lod_fpart, packed_lo, packed_hi); } lp_build_else(&if_ctx); { /* Use the magnification filter */ lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE, s, t, r, ilevel0, NULL, NULL, packed_lo, packed_hi); } lp_build_endif(&if_ctx); } /* * combine the values stored in 'packed_lo' and 'packed_hi' variables * into 'packed' */ packed = lp_build_pack2(bld->gallivm, h16_bld.type, lp_type_unorm(8), LLVMBuildLoad(builder, packed_lo, ""), LLVMBuildLoad(builder, packed_hi, "")); /* * Convert to SoA and swizzle. */ lp_build_rgba8_to_f32_soa(bld->gallivm, bld->texel_type, packed, unswizzled); if (util_format_is_rgba8_variant(bld->format_desc)) { lp_build_format_swizzle_soa(bld->format_desc, &bld->texel_bld, unswizzled, texel_out); } else { texel_out[0] = unswizzled[0]; texel_out[1] = unswizzled[1]; texel_out[2] = unswizzled[2]; texel_out[3] = unswizzled[3]; } }
/** * Sample the texture/mipmap using given image filter and mip filter. * data0_ptr and data1_ptr point to the two mipmap levels to sample * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes. * If we're using nearest miplevel sampling the '1' values will be null/unused. */ static void lp_build_sample_mipmap(struct lp_build_sample_context *bld, unsigned img_filter, unsigned mip_filter, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, LLVMValueRef ilevel0, LLVMValueRef ilevel1, LLVMValueRef lod_fpart, LLVMValueRef colors_lo_var, LLVMValueRef colors_hi_var) { LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef size0; LLVMValueRef size1; LLVMValueRef row_stride0_vec; LLVMValueRef row_stride1_vec; LLVMValueRef img_stride0_vec; LLVMValueRef img_stride1_vec; LLVMValueRef data_ptr0; LLVMValueRef data_ptr1; LLVMValueRef colors0_lo, colors0_hi; LLVMValueRef colors1_lo, colors1_hi; /* sample the first mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel0, &size0, &row_stride0_vec, &img_stride0_vec); data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); if (img_filter == PIPE_TEX_FILTER_NEAREST) { lp_build_sample_image_nearest(bld, size0, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, &colors0_lo, &colors0_hi); } else { assert(img_filter == PIPE_TEX_FILTER_LINEAR); lp_build_sample_image_linear(bld, size0, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, &colors0_lo, &colors0_hi); } /* Store the first level's colors in the output variables */ LLVMBuildStore(builder, colors0_lo, colors_lo_var); LLVMBuildStore(builder, colors0_hi, colors_hi_var); if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { LLVMValueRef h16_scale = lp_build_const_float(bld->gallivm, 256.0); LLVMTypeRef i32_type = LLVMIntTypeInContext(bld->gallivm->context, 32); struct lp_build_if_state if_ctx; LLVMValueRef need_lerp; lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16_scale, ""); lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "lod_fpart.fixed16"); /* need_lerp = lod_fpart > 0 */ need_lerp = LLVMBuildICmp(builder, LLVMIntSGT, lod_fpart, LLVMConstNull(i32_type), "need_lerp"); lp_build_if(&if_ctx, bld->gallivm, need_lerp); { struct lp_build_context h16_bld; lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16)); /* sample the second mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel1, &size1, &row_stride1_vec, &img_stride1_vec); data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); if (img_filter == PIPE_TEX_FILTER_NEAREST) { lp_build_sample_image_nearest(bld, size1, row_stride1_vec, img_stride1_vec, data_ptr1, s, t, r, &colors1_lo, &colors1_hi); } else { lp_build_sample_image_linear(bld, size1, row_stride1_vec, img_stride1_vec, data_ptr1, s, t, r, &colors1_lo, &colors1_hi); } /* interpolate samples from the two mipmap levels */ lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, ""); lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart); #if HAVE_LLVM == 0x208 /* This is a work-around for a bug in LLVM 2.8. * Evidently, something goes wrong in the construction of the * lod_fpart short[8] vector. Adding this no-effect shuffle seems * to force the vector to be properly constructed. * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f). */ { LLVMValueRef shuffles[8], shuffle; int i; assert(h16_bld.type.length <= Elements(shuffles)); for (i = 0; i < h16_bld.type.length; i++) shuffles[i] = lp_build_const_int32(bld->gallivm, 2 * (i & 1)); shuffle = LLVMConstVector(shuffles, h16_bld.type.length); lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, lod_fpart, shuffle, ""); } #endif colors0_lo = lp_build_lerp(&h16_bld, lod_fpart, colors0_lo, colors1_lo); colors0_hi = lp_build_lerp(&h16_bld, lod_fpart, colors0_hi, colors1_hi); LLVMBuildStore(builder, colors0_lo, colors_lo_var); LLVMBuildStore(builder, colors0_hi, colors_hi_var); } lp_build_endif(&if_ctx); } }