/** * Extract and broadcast texture size. * * @param size_type type of the texture size vector (either * bld->int_size_type or bld->float_size_type) * @param coord_type type of the texture size vector (either * bld->int_coord_type or bld->coord_type) * @param int_size vector with the integer texture size (width, height, * depth) */ void lp_build_extract_image_sizes(struct lp_build_sample_context *bld, struct lp_type size_type, struct lp_type coord_type, LLVMValueRef size, LLVMValueRef *out_width, LLVMValueRef *out_height, LLVMValueRef *out_depth) { const unsigned dims = bld->dims; LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); *out_width = lp_build_extract_broadcast(bld->gallivm, size_type, coord_type, size, LLVMConstInt(i32t, 0, 0)); if (dims >= 2) { *out_height = lp_build_extract_broadcast(bld->gallivm, size_type, coord_type, size, LLVMConstInt(i32t, 1, 0)); if (dims == 3) { *out_depth = lp_build_extract_broadcast(bld->gallivm, size_type, coord_type, size, LLVMConstInt(i32t, 2, 0)); } } }
/** * Unpack and broadcast packed aos values consisting of only the * first value, i.e. x1 x2 _ _ will become x1 x1 x1 x1 x2 x2 x2 x2 */ LLVMValueRef lp_build_unpack_broadcast_aos_scalars(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, const LLVMValueRef src) { LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; unsigned num_dst = dst_type.length; unsigned num_src = dst_type.length / 4; unsigned i; assert(num_dst / 4 <= src_type.length); for (i = 0; i < num_src; i++) { shuffles[i*4] = LLVMConstInt(i32t, i, 0); shuffles[i*4+1] = LLVMConstInt(i32t, i, 0); shuffles[i*4+2] = LLVMConstInt(i32t, i, 0); shuffles[i*4+3] = LLVMConstInt(i32t, i, 0); } if (num_src == 1) { return lp_build_extract_broadcast(gallivm, src_type, dst_type, src, shuffles[0]); } else { return LLVMBuildShuffleVector(gallivm->builder, src, src, LLVMConstVector(shuffles, num_dst), ""); } }
/** * Initialize the bld->a, dadq fields. This involves fetching * those values from the arrays which are passed into the JIT function. */ static void coeffs_init(struct lp_build_interp_soa_context *bld, LLVMValueRef a0_ptr, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr) { struct lp_build_context *coeff_bld = &bld->coeff_bld; struct lp_build_context *setup_bld = &bld->setup_bld; struct gallivm_state *gallivm = coeff_bld->gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef pixoffx, pixoffy; unsigned attrib; unsigned chan; unsigned i; pixoffx = coeff_bld->undef; pixoffy = coeff_bld->undef; for (i = 0; i < coeff_bld->type.length; i++) { LLVMValueRef nr = lp_build_const_int32(gallivm, i); LLVMValueRef pixxf = lp_build_const_float(gallivm, quad_offset_x[i]); LLVMValueRef pixyf = lp_build_const_float(gallivm, quad_offset_y[i]); pixoffx = LLVMBuildInsertElement(builder, pixoffx, pixxf, nr, ""); pixoffy = LLVMBuildInsertElement(builder, pixoffy, pixyf, nr, ""); } for (attrib = 0; attrib < bld->num_attribs; ++attrib) { const unsigned mask = bld->mask[attrib]; const unsigned interp = bld->interp[attrib]; LLVMValueRef index = lp_build_const_int32(gallivm, attrib * TGSI_NUM_CHANNELS); LLVMValueRef ptr; LLVMValueRef dadxaos = setup_bld->zero; LLVMValueRef dadyaos = setup_bld->zero; LLVMValueRef a0aos = setup_bld->zero; /* always fetch all 4 values for performance/simplicity */ switch (interp) { case LP_INTERP_PERSPECTIVE: /* fall-through */ case LP_INTERP_LINEAR: ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""); ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(setup_bld->vec_type, 0), ""); dadxaos = LLVMBuildLoad(builder, ptr, ""); ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, ""); ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(setup_bld->vec_type, 0), ""); dadyaos = LLVMBuildLoad(builder, ptr, ""); attrib_name(dadxaos, attrib, 0, ".dadxaos"); attrib_name(dadyaos, attrib, 0, ".dadyaos"); /* fall-through */ case LP_INTERP_CONSTANT: case LP_INTERP_FACING: ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, ""); ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(setup_bld->vec_type, 0), ""); a0aos = LLVMBuildLoad(builder, ptr, ""); attrib_name(a0aos, attrib, 0, ".a0aos"); break; case LP_INTERP_POSITION: /* Nothing to do as the position coeffs are already setup in slot 0 */ continue; default: assert(0); break; } /* * a = a0 + (x * dadx + y * dady) * a0aos is the attrib value at top left corner of stamp */ if (interp != LP_INTERP_CONSTANT && interp != LP_INTERP_FACING) { LLVMValueRef x = lp_build_broadcast_scalar(setup_bld, bld->x); LLVMValueRef y = lp_build_broadcast_scalar(setup_bld, bld->y); a0aos = lp_build_fmuladd(builder, x, dadxaos, a0aos); a0aos = lp_build_fmuladd(builder, y, dadyaos, a0aos); } /* * dadq = {0, dadx, dady, dadx + dady} * for two quads (side by side) this is: * {0, dadx, dady, dadx+dady, 2*dadx, 2*dadx+dady, 3*dadx+dady} */ for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { /* this generates a CRAPLOAD of shuffles... */ if (mask & (1 << chan)) { LLVMValueRef dadx, dady; LLVMValueRef dadq, dadq2; LLVMValueRef a; LLVMValueRef chan_index = lp_build_const_int32(gallivm, chan); if (attrib == 0 && chan == 0) { a = bld->x; if (bld->pos_offset) { a = LLVMBuildFAdd(builder, a, lp_build_const_float(gallivm, bld->pos_offset), ""); } a = lp_build_broadcast_scalar(coeff_bld, a); dadx = coeff_bld->one; dady = coeff_bld->zero; } else if (attrib == 0 && chan == 1) { a = bld->y; if (bld->pos_offset) { a = LLVMBuildFAdd(builder, a, lp_build_const_float(gallivm, bld->pos_offset), ""); } a = lp_build_broadcast_scalar(coeff_bld, a); dady = coeff_bld->one; dadx = coeff_bld->zero; } else { dadx = lp_build_extract_broadcast(gallivm, setup_bld->type, coeff_bld->type, dadxaos, chan_index); dady = lp_build_extract_broadcast(gallivm, setup_bld->type, coeff_bld->type, dadyaos, chan_index); /* * a = {a, a, a, a} */ a = lp_build_extract_broadcast(gallivm, setup_bld->type, coeff_bld->type, a0aos, chan_index); } dadx = LLVMBuildFMul(builder, dadx, pixoffx, ""); dady = LLVMBuildFMul(builder, dady, pixoffy, ""); dadq = LLVMBuildFAdd(builder, dadx, dady, ""); /* * Compute the attrib values on the upper-left corner of each * group of quads. * Note that if we process 2 quads at once this doesn't * really exactly to what we want. * We need to access elem 0 and 2 respectively later if we process * 2 quads at once. */ if (interp != LP_INTERP_CONSTANT && interp != LP_INTERP_FACING) { dadq2 = LLVMBuildFAdd(builder, dadq, dadq, ""); a = LLVMBuildFAdd(builder, a, dadq2, ""); } #if PERSPECTIVE_DIVIDE_PER_QUAD /* * a *= 1 / w */ /* * XXX since we're only going to access elements 0,2 out of 8 * if we have 8-wide vectors we should do the division only 4-wide. * a is really a 2-elements in a 4-wide vector disguised as 8-wide * in this case. */ if (interp == LP_INTERP_PERSPECTIVE) { LLVMValueRef w = bld->a[0][3]; assert(attrib != 0); assert(bld->mask[0] & TGSI_WRITEMASK_W); if (!bld->oow) { bld->oow = lp_build_rcp(coeff_bld, w); lp_build_name(bld->oow, "oow"); } a = lp_build_mul(coeff_bld, a, bld->oow); } #endif attrib_name(a, attrib, chan, ".a"); attrib_name(dadq, attrib, chan, ".dadq"); bld->a[attrib][chan] = lp_build_alloca(gallivm, LLVMTypeOf(a), ""); LLVMBuildStore(builder, a, bld->a[attrib][chan]); bld->dadq[attrib][chan] = dadq; } } } }
/** * Interpolate the shader input attribute values. * This is called for each (group of) quad(s). */ static void attribs_update_simple(struct lp_build_interp_soa_context *bld, struct gallivm_state *gallivm, LLVMValueRef loop_iter, int start, int end) { LLVMBuilderRef builder = gallivm->builder; struct lp_build_context *coeff_bld = &bld->coeff_bld; struct lp_build_context *setup_bld = &bld->setup_bld; LLVMValueRef oow = NULL; unsigned attrib; LLVMValueRef pixoffx; LLVMValueRef pixoffy; LLVMValueRef ptr; /* could do this with code-generated passed in pixel offsets too */ assert(loop_iter); ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, ""); pixoffx = LLVMBuildLoad(builder, ptr, ""); ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, ""); pixoffy = LLVMBuildLoad(builder, ptr, ""); pixoffx = LLVMBuildFAdd(builder, pixoffx, lp_build_broadcast_scalar(coeff_bld, bld->x), ""); pixoffy = LLVMBuildFAdd(builder, pixoffy, lp_build_broadcast_scalar(coeff_bld, bld->y), ""); for (attrib = start; attrib < end; attrib++) { const unsigned mask = bld->mask[attrib]; const unsigned interp = bld->interp[attrib]; unsigned chan; for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { if (mask & (1 << chan)) { LLVMValueRef index; LLVMValueRef dadx = coeff_bld->zero; LLVMValueRef dady = coeff_bld->zero; LLVMValueRef a = coeff_bld->zero; index = lp_build_const_int32(gallivm, chan); switch (interp) { case LP_INTERP_PERSPECTIVE: /* fall-through */ case LP_INTERP_LINEAR: if (attrib == 0 && chan == 0) { dadx = coeff_bld->one; if (bld->pos_offset) { a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset); } } else if (attrib == 0 && chan == 1) { dady = coeff_bld->one; if (bld->pos_offset) { a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset); } } else { dadx = lp_build_extract_broadcast(gallivm, setup_bld->type, coeff_bld->type, bld->dadxaos[attrib], index); dady = lp_build_extract_broadcast(gallivm, setup_bld->type, coeff_bld->type, bld->dadyaos[attrib], index); a = lp_build_extract_broadcast(gallivm, setup_bld->type, coeff_bld->type, bld->a0aos[attrib], index); } /* * a = a0 + (x * dadx + y * dady) */ a = lp_build_fmuladd(builder, dadx, pixoffx, a); a = lp_build_fmuladd(builder, dady, pixoffy, a); if (interp == LP_INTERP_PERSPECTIVE) { if (oow == NULL) { LLVMValueRef w = bld->attribs[0][3]; assert(attrib != 0); assert(bld->mask[0] & TGSI_WRITEMASK_W); oow = lp_build_rcp(coeff_bld, w); } a = lp_build_mul(coeff_bld, a, oow); } break; case LP_INTERP_CONSTANT: case LP_INTERP_FACING: a = lp_build_extract_broadcast(gallivm, setup_bld->type, coeff_bld->type, bld->a0aos[attrib], index); break; case LP_INTERP_POSITION: assert(attrib > 0); a = bld->attribs[0][chan]; break; default: assert(0); break; } if ((attrib == 0) && (chan == 2)){ /* FIXME: Depth values can exceed 1.0, due to the fact that * setup interpolation coefficients refer to (0,0) which causes * precision loss. So we must clamp to 1.0 here to avoid artifacts */ a = lp_build_min(coeff_bld, a, coeff_bld->one); } bld->attribs[attrib][chan] = a; } } } }