/* * Helper for building packed ddx/ddy vector for one coord (scalar per quad * values). The vector will look like this (8-wide): * dr1dx _____ -dr1dy _____ dr2dx _____ -dr2dy _____ * This only requires one shuffle instead of two for more straightforward packing. */ LLVMValueRef lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld, LLVMValueRef a) { struct gallivm_state *gallivm = bld->gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef vec1, vec2; /* use aos swizzle helper */ static const unsigned char swizzle1[] = { /* no-op swizzle */ LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE, LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_SWIZZLE_DONTCARE }; static const unsigned char swizzle2[] = { LP_BLD_QUAD_TOP_RIGHT, LP_BLD_SWIZZLE_DONTCARE, LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE }; vec1 = lp_build_swizzle_aos(bld, a, swizzle1); vec2 = lp_build_swizzle_aos(bld, a, swizzle2); if (bld->type.floating) return LLVMBuildFSub(builder, vec2, vec1, "ddxddy"); else return LLVMBuildSub(builder, vec2, vec1, "ddxddy"); }
/* * Helper for building packed ddx/ddy vector for one coord (scalar per quad * values). The vector will look like this (8-wide): * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy * This only needs 2 (v)shufps. */ LLVMValueRef lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { struct gallivm_state *gallivm = bld->gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH/4]; LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH/4]; LLVMValueRef vec1, vec2; unsigned length, num_quads, i; /* XXX: do hsub version */ length = bld->type.length; num_quads = length / 4; for (i = 0; i < num_quads; i++) { unsigned s1 = 4 * i; unsigned s2 = 4 * i + length; shuffles1[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1); shuffles1[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1); shuffles1[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2); shuffles1[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2); shuffles2[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s1); shuffles2[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s1); shuffles2[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s2); shuffles2[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s2); } vec1 = LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles1, length), ""); vec2 = LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles2, length), ""); if (bld->type.floating) return LLVMBuildFSub(builder, vec2, vec1, "ddxddyddxddy"); else return LLVMBuildSub(builder, vec2, vec1, "ddxddyddxddy"); }
/* * To be able to handle multiple quads at once in texture sampling and * do lod calculations per quad, it is necessary to get the per-quad * derivatives into the lp_build_rho function. * For 8-wide vectors the packed derivative values for 3 coords would * look like this, this scales to a arbitrary (multiple of 4) vector size: * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy * dr1dx dr1dy _____ _____ dr2dx dr2dy _____ _____ * The second vector will be unused for 1d and 2d textures. */ LLVMValueRef lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld, LLVMValueRef a) { struct gallivm_state *gallivm = bld->gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef vec1, vec2; /* same packing as _twocoord, but can use aos swizzle helper */ /* * XXX could make swizzle1 a noop swizzle by using right top/bottom * pair for ddy */ static const unsigned char swizzle1[] = { LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE }; static const unsigned char swizzle2[] = { LP_BLD_QUAD_TOP_RIGHT, LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE }; vec1 = lp_build_swizzle_aos(bld, a, swizzle1); vec2 = lp_build_swizzle_aos(bld, a, swizzle2); if (bld->type.floating) return LLVMBuildFSub(builder, vec2, vec1, "ddxddy"); else return LLVMBuildSub(builder, vec2, vec1, "ddxddy"); }
/* Perform front/back face culling and return true if the primitive is accepted. */ static LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], struct ac_position_w_info *w, bool cull_front, bool cull_back, bool cull_zero_area) { LLVMBuilderRef builder = ctx->builder; if (cull_front && cull_back) return ctx->i1false; if (!cull_front && !cull_back && !cull_zero_area) return ctx->i1true; /* Front/back face culling. Also if the determinant == 0, the triangle * area is 0. */ LLVMValueRef det_t0 = LLVMBuildFSub(builder, pos[2][0], pos[0][0], ""); LLVMValueRef det_t1 = LLVMBuildFSub(builder, pos[1][1], pos[0][1], ""); LLVMValueRef det_t2 = LLVMBuildFSub(builder, pos[0][0], pos[1][0], ""); LLVMValueRef det_t3 = LLVMBuildFSub(builder, pos[0][1], pos[2][1], ""); LLVMValueRef det_p0 = LLVMBuildFMul(builder, det_t0, det_t1, ""); LLVMValueRef det_p1 = LLVMBuildFMul(builder, det_t2, det_t3, ""); LLVMValueRef det = LLVMBuildFSub(builder, det_p0, det_p1, ""); /* Negative W negates the determinant. */ det = LLVMBuildSelect(builder, w->w_reflection, LLVMBuildFNeg(builder, det, ""), det, ""); LLVMValueRef accepted = NULL; if (cull_front) { LLVMRealPredicate cond = cull_zero_area ? LLVMRealOGT : LLVMRealOGE; accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, ""); } else if (cull_back) { LLVMRealPredicate cond = cull_zero_area ? LLVMRealOLT : LLVMRealOLE; accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, ""); } else if (cull_zero_area) { accepted = LLVMBuildFCmp(builder, LLVMRealONE, det, ctx->f32_0, ""); } return accepted; }
/* * SI implements derivatives using the local data store (LDS) * All writes to the LDS happen in all executing threads at * the same time. TID is the Thread ID for the current * thread and is a value between 0 and 63, representing * the thread's position in the wavefront. * * For the pixel shader threads are grouped into quads of four pixels. * The TIDs of the pixels of a quad are: * * +------+------+ * |4n + 0|4n + 1| * +------+------+ * |4n + 2|4n + 3| * +------+------+ * * So, masking the TID with 0xfffffffc yields the TID of the top left pixel * of the quad, masking with 0xfffffffd yields the TID of the top pixel of * the current pixel's column, and masking with 0xfffffffe yields the TID * of the left pixel of the current pixel's row. * * Adding 1 yields the TID of the pixel to the right of the left pixel, and * adding 2 yields the TID of the pixel below the top pixel. */ LLVMValueRef ac_build_ddxy(struct ac_llvm_context *ctx, bool has_ds_bpermute, uint32_t mask, int idx, LLVMValueRef lds, LLVMValueRef val) { LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2]; LLVMValueRef result; thread_id = ac_get_thread_id(ctx); tl_tid = LLVMBuildAnd(ctx->builder, thread_id, LLVMConstInt(ctx->i32, mask, false), ""); trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid, LLVMConstInt(ctx->i32, idx, false), ""); if (has_ds_bpermute) { args[0] = LLVMBuildMul(ctx->builder, tl_tid, LLVMConstInt(ctx->i32, 4, false), ""); args[1] = val; tl = ac_build_intrinsic(ctx, "llvm.amdgcn.ds.bpermute", ctx->i32, args, 2, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); args[0] = LLVMBuildMul(ctx->builder, trbl_tid, LLVMConstInt(ctx->i32, 4, false), ""); trbl = ac_build_intrinsic(ctx, "llvm.amdgcn.ds.bpermute", ctx->i32, args, 2, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); } else { LLVMValueRef store_ptr, load_ptr0, load_ptr1; store_ptr = ac_build_gep0(ctx, lds, thread_id); load_ptr0 = ac_build_gep0(ctx, lds, tl_tid); load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid); LLVMBuildStore(ctx->builder, val, store_ptr); tl = LLVMBuildLoad(ctx->builder, load_ptr0, ""); trbl = LLVMBuildLoad(ctx->builder, load_ptr1, ""); } tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, ""); trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, ""); result = LLVMBuildFSub(ctx->builder, trbl, tl, ""); return result; }
/** * Inverse of lp_build_clamped_float_to_unsigned_norm above. * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1] * return {float, float, float, float} with values in range [0, 1]. */ LLVMValueRef lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, unsigned src_width, struct lp_type dst_type, LLVMValueRef src) { LLVMTypeRef vec_type = lp_build_vec_type(dst_type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(dst_type); LLVMValueRef bias_; LLVMValueRef res; unsigned mantissa; unsigned n; unsigned long long ubound; unsigned long long mask; double scale; double bias; assert(dst_type.floating); mantissa = lp_mantissa(dst_type); n = MIN2(mantissa, src_width); ubound = ((unsigned long long)1 << n); mask = ubound - 1; scale = (double)ubound/mask; bias = (double)((unsigned long long)1 << (mantissa - n)); res = src; if(src_width > mantissa) { int shift = src_width - mantissa; res = LLVMBuildLShr(builder, res, lp_build_const_int_vec(dst_type, shift), ""); } bias_ = lp_build_const_vec(dst_type, bias); res = LLVMBuildOr(builder, res, LLVMBuildBitCast(builder, bias_, int_vec_type, ""), ""); res = LLVMBuildBitCast(builder, res, vec_type, ""); res = LLVMBuildFSub(builder, res, bias_, ""); res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), ""); return res; }
static LLVMValueRef translateFloatBinOp(NodeKind Op, LLVMValueRef ValueE1, LLVMValueRef ValueE2) { switch (Op) { case SumOp: return LLVMBuildFAdd(Builder, ValueE1, ValueE2, ""); case SubOp: return LLVMBuildFSub(Builder, ValueE1, ValueE2, ""); case MultOp: return LLVMBuildFMul(Builder, ValueE1, ValueE2, ""); case DivOp: return LLVMBuildFDiv(Builder, ValueE1, ValueE2, ""); case LtOp: return LLVMBuildFCmp(Builder, LLVMRealOLT, ValueE1, ValueE2, ""); case LeOp: return LLVMBuildFCmp(Builder, LLVMRealOLE, ValueE1, ValueE2, ""); case GtOp: return LLVMBuildFCmp(Builder, LLVMRealOGT, ValueE1, ValueE2, ""); case GeOp: return LLVMBuildFCmp(Builder, LLVMRealOGE, ValueE1, ValueE2, ""); case EqOp: return LLVMBuildFCmp(Builder, LLVMRealOEQ, ValueE1, ValueE2, ""); case DiffOp: return LLVMBuildFCmp(Builder, LLVMRealONE, ValueE1, ValueE2, ""); default: return NULL; } }
static void emit_frac(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { LLVMBuilderRef builder = bld_base->base.gallivm->builder; char *intr; if (emit_data->info->opcode == TGSI_OPCODE_FRC) intr = "llvm.floor.f32"; else if (emit_data->info->opcode == TGSI_OPCODE_DFRAC) intr = "llvm.floor.f64"; else { assert(0); return; } LLVMValueRef floor = lp_build_intrinsic(builder, intr, emit_data->dst_type, &emit_data->args[0], 1, LLVMReadNoneAttribute); emit_data->output[emit_data->chan] = LLVMBuildFSub(builder, emit_data->args[0], floor, ""); }
/** * Inverse of lp_build_clamped_float_to_unsigned_norm above. * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1] * return {float, float, float, float} with values in range [0, 1]. */ LLVMValueRef lp_build_unsigned_norm_to_float(struct gallivm_state *gallivm, unsigned src_width, struct lp_type dst_type, LLVMValueRef src) { LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef vec_type = lp_build_vec_type(gallivm, dst_type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, dst_type); LLVMValueRef bias_; LLVMValueRef res; unsigned mantissa; unsigned n; unsigned long long ubound; unsigned long long mask; double scale; double bias; assert(dst_type.floating); mantissa = lp_mantissa(dst_type); if (src_width <= (mantissa + 1)) { /* * The source width matches fits what can be represented in floating * point (i.e., mantissa + 1 bits). So do a straight multiplication * followed by casting. No further rounding is necessary. */ scale = 1.0/(double)((1ULL << src_width) - 1); res = LLVMBuildSIToFP(builder, src, vec_type, ""); res = LLVMBuildFMul(builder, res, lp_build_const_vec(gallivm, dst_type, scale), ""); return res; } else { /* * The source width exceeds what can be represented in floating * point. So truncate the incoming values. */ n = MIN2(mantissa, src_width); ubound = ((unsigned long long)1 << n); mask = ubound - 1; scale = (double)ubound/mask; bias = (double)((unsigned long long)1 << (mantissa - n)); res = src; if (src_width > mantissa) { int shift = src_width - mantissa; res = LLVMBuildLShr(builder, res, lp_build_const_int_vec(gallivm, dst_type, shift), ""); } bias_ = lp_build_const_vec(gallivm, dst_type, bias); res = LLVMBuildOr(builder, res, LLVMBuildBitCast(builder, bias_, int_vec_type, ""), ""); res = LLVMBuildBitCast(builder, res, vec_type, ""); res = LLVMBuildFSub(builder, res, bias_, ""); res = LLVMBuildFMul(builder, res, lp_build_const_vec(gallivm, dst_type, scale), ""); } return res; }
/* Perform view culling and small primitive elimination and return true * if the primitive is accepted and initially_accepted == true. */ static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], LLVMValueRef initially_accepted, struct ac_position_w_info *w, LLVMValueRef vp_scale[2], LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision, bool cull_view_xy, bool cull_view_near_z, bool cull_view_far_z, bool cull_small_prims, bool use_halfz_clip_space) { LLVMBuilderRef builder = ctx->builder; if (!cull_view_xy && !cull_view_near_z && !cull_view_far_z && !cull_small_prims) return ctx->i1true; /* Skip the culling if the primitive has already been rejected or * if any W is negative. The bounding box culling doesn't work when * W is negative. */ LLVMValueRef cond = LLVMBuildAnd(builder, initially_accepted, w->all_w_positive, ""); LLVMValueRef accepted_var = ac_build_alloca_undef(ctx, ctx->i1, ""); LLVMBuildStore(builder, initially_accepted, accepted_var); ac_build_ifcc(ctx, cond, 10000000 /* does this matter? */); { LLVMValueRef bbox_min[3], bbox_max[3]; LLVMValueRef accepted = initially_accepted; /* Compute the primitive bounding box for easy culling. */ for (unsigned chan = 0; chan < 3; chan++) { bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]); bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]); bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]); bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]); } /* View culling. */ if (cull_view_xy || cull_view_near_z || cull_view_far_z) { for (unsigned chan = 0; chan < 3; chan++) { LLVMValueRef visible; if ((cull_view_xy && chan <= 1) || (cull_view_near_z && chan == 2)) { float t = chan == 2 && use_halfz_clip_space ? 0 : -1; visible = LLVMBuildFCmp(builder, LLVMRealOGE, bbox_max[chan], LLVMConstReal(ctx->f32, t), ""); accepted = LLVMBuildAnd(builder, accepted, visible, ""); } if ((cull_view_xy && chan <= 1) || (cull_view_far_z && chan == 2)) { visible = LLVMBuildFCmp(builder, LLVMRealOLE, bbox_min[chan], ctx->f32_1, ""); accepted = LLVMBuildAnd(builder, accepted, visible, ""); } } } /* Small primitive elimination. */ if (cull_small_prims) { /* Assuming a sample position at (0.5, 0.5), if we round * the bounding box min/max extents and the results of * the rounding are equal in either the X or Y direction, * the bounding box does not intersect the sample. * * See these GDC slides for pictures: * https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf */ LLVMValueRef min, max, not_equal[2], visible; for (unsigned chan = 0; chan < 2; chan++) { /* Convert the position to screen-space coordinates. */ min = ac_build_fmad(ctx, bbox_min[chan], vp_scale[chan], vp_translate[chan]); max = ac_build_fmad(ctx, bbox_max[chan], vp_scale[chan], vp_translate[chan]); /* Scale the bounding box according to the precision of * the rasterizer and the number of MSAA samples. */ min = LLVMBuildFSub(builder, min, small_prim_precision, ""); max = LLVMBuildFAdd(builder, max, small_prim_precision, ""); /* Determine if the bbox intersects the sample point. * It also works for MSAA, but vp_scale, vp_translate, * and small_prim_precision are computed differently. */ min = ac_build_round(ctx, min); max = ac_build_round(ctx, max); not_equal[chan] = LLVMBuildFCmp(builder, LLVMRealONE, min, max, ""); } visible = LLVMBuildAnd(builder, not_equal[0], not_equal[1], ""); accepted = LLVMBuildAnd(builder, accepted, visible, ""); } LLVMBuildStore(builder, accepted, accepted_var); } ac_build_endif(ctx, 10000000); return LLVMBuildLoad(builder, accepted_var, ""); }
void si_prepare_cube_coords(struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data, LLVMValueRef *coords_arg, LLVMValueRef *derivs_arg) { unsigned target = emit_data->inst->Texture.Texture; unsigned opcode = emit_data->inst->Instruction.Opcode; struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef coords[4]; unsigned i; si_llvm_cube_to_2d_coords(bld_base, coords_arg, coords); if (opcode == TGSI_OPCODE_TXD && derivs_arg) { LLVMValueRef derivs[4]; int axis; /* Convert cube derivatives to 2D derivatives. */ for (axis = 0; axis < 2; axis++) { LLVMValueRef shifted_cube_coords[4], shifted_coords[4]; /* Shift the cube coordinates by the derivatives to get * the cube coordinates of the "neighboring pixel". */ for (i = 0; i < 3; i++) shifted_cube_coords[i] = LLVMBuildFAdd(builder, coords_arg[i], derivs_arg[axis*3+i], ""); shifted_cube_coords[3] = LLVMGetUndef(bld_base->base.elem_type); /* Project the shifted cube coordinates onto the face. */ si_llvm_cube_to_2d_coords(bld_base, shifted_cube_coords, shifted_coords); /* Subtract both sets of 2D coordinates to get 2D derivatives. * This won't work if the shifted coordinates ended up * in a different face. */ for (i = 0; i < 2; i++) derivs[axis * 2 + i] = LLVMBuildFSub(builder, shifted_coords[i], coords[i], ""); } memcpy(derivs_arg, derivs, sizeof(derivs)); } if (target == TGSI_TEXTURE_CUBE_ARRAY || target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { /* for cube arrays coord.z = coord.w(array_index) * 8 + face */ /* coords_arg.w component - array_index for cube arrays */ coords[2] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD, coords_arg[3], lp_build_const_float(gallivm, 8.0), coords[2]); } /* Preserve compare/lod/bias. Put it in coords.w. */ if (opcode == TGSI_OPCODE_TEX2 || opcode == TGSI_OPCODE_TXB2 || opcode == TGSI_OPCODE_TXL2) { coords[3] = coords_arg[4]; } else if (opcode == TGSI_OPCODE_TXB || opcode == TGSI_OPCODE_TXL || target == TGSI_TEXTURE_SHADOWCUBE) { coords[3] = coords_arg[3]; } memcpy(coords_arg, coords, sizeof(coords)); }
void ac_prepare_cube_coords(struct ac_llvm_context *ctx, bool is_deriv, bool is_array, LLVMValueRef *coords_arg, LLVMValueRef *derivs_arg) { LLVMBuilderRef builder = ctx->builder; struct cube_selection_coords selcoords; LLVMValueRef coords[3]; LLVMValueRef invma; build_cube_intrinsic(ctx, coords_arg, &selcoords); invma = ac_build_intrinsic(ctx, "llvm.fabs.f32", ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE); invma = ac_build_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma); for (int i = 0; i < 2; ++i) coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, ""); coords[2] = selcoords.id; if (is_deriv && derivs_arg) { LLVMValueRef derivs[4]; int axis; /* Convert cube derivatives to 2D derivatives. */ for (axis = 0; axis < 2; axis++) { LLVMValueRef deriv_st[2]; LLVMValueRef deriv_ma; /* Transform the derivative alongside the texture * coordinate. Mathematically, the correct formula is * as follows. Assume we're projecting onto the +Z face * and denote by dx/dh the derivative of the (original) * X texture coordinate with respect to horizontal * window coordinates. The projection onto the +Z face * plane is: * * f(x,z) = x/z * * Then df/dh = df/dx * dx/dh + df/dz * dz/dh * = 1/z * dx/dh - x/z * 1/z * dz/dh. * * This motivatives the implementation below. * * Whether this actually gives the expected results for * apps that might feed in derivatives obtained via * finite differences is anyone's guess. The OpenGL spec * seems awfully quiet about how textureGrad for cube * maps should be handled. */ build_cube_select(builder, &selcoords, &derivs_arg[axis * 3], deriv_st, &deriv_ma); deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, ""); for (int i = 0; i < 2; ++i) derivs[axis * 2 + i] = LLVMBuildFSub(builder, LLVMBuildFMul(builder, deriv_st[i], invma, ""), LLVMBuildFMul(builder, deriv_ma, coords[i], ""), ""); } memcpy(derivs_arg, derivs, sizeof(derivs)); } /* Shift the texture coordinate. This must be applied after the * derivative calculation. */ for (int i = 0; i < 2; ++i) coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), ""); if (is_array) { /* for cube arrays coord.z = coord.w(array_index) * 8 + face */ /* coords_arg.w component - array_index for cube arrays */ LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), ""); coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], ""); } memcpy(coords_arg, coords, sizeof(coords)); }