/** * Generate color blending and color output. * \param rt the render target index (to index blend, colormask state) * \param type the pixel color type * \param context_ptr pointer to the runtime JIT context * \param mask execution mask (active fragment/pixel mask) * \param src colors from the fragment shader * \param dst_ptr the destination color buffer pointer */ static void generate_blend(const struct pipe_blend_state *blend, unsigned rt, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef context_ptr, LLVMValueRef mask, LLVMValueRef *src, LLVMValueRef dst_ptr) { struct lp_build_context bld; struct lp_build_flow_context *flow; struct lp_build_mask_context mask_ctx; LLVMTypeRef vec_type; LLVMValueRef const_ptr; LLVMValueRef con[4]; LLVMValueRef dst[4]; LLVMValueRef res[4]; unsigned chan; lp_build_context_init(&bld, builder, type); flow = lp_build_flow_create(builder); /* we'll use this mask context to skip blending if all pixels are dead */ lp_build_mask_begin(&mask_ctx, flow, type, mask); vec_type = lp_build_vec_type(type); const_ptr = lp_jit_context_blend_color(builder, context_ptr); const_ptr = LLVMBuildBitCast(builder, const_ptr, LLVMPointerType(vec_type, 0), ""); /* load constant blend color and colors from the dest color buffer */ for(chan = 0; chan < 4; ++chan) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), ""); dst[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), ""); lp_build_name(con[chan], "con.%c", "rgba"[chan]); lp_build_name(dst[chan], "dst.%c", "rgba"[chan]); } /* do blend */ lp_build_blend_soa(builder, blend, type, rt, src, dst, con, res); /* store results to color buffer */ for(chan = 0; chan < 4; ++chan) { if(blend->rt[rt].colormask & (1 << chan)) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); lp_build_name(res[chan], "res.%c", "rgba"[chan]); res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, "")); } } lp_build_mask_end(&mask_ctx); lp_build_flow_destroy(flow); }
/** * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to * two (adjacent) mipmap level indexes. Later, we'll sample from those * two mipmap levels and interpolate between them. */ void lp_build_linear_mip_levels(struct lp_build_sample_context *bld, unsigned unit, LLVMValueRef lod_ipart, LLVMValueRef *lod_fpart_inout, LLVMValueRef *level0_out, LLVMValueRef *level1_out) { LLVMBuilderRef builder = bld->gallivm->builder; struct lp_build_context *int_bld = &bld->int_bld; struct lp_build_context *float_bld = &bld->float_bld; LLVMValueRef last_level; LLVMValueRef clamp_min; LLVMValueRef clamp_max; *level0_out = lod_ipart; *level1_out = lp_build_add(int_bld, lod_ipart, int_bld->one); last_level = bld->dynamic_state->last_level(bld->dynamic_state, bld->gallivm, unit); /* * Clamp both lod_ipart and lod_ipart + 1 to [0, last_level], with the * minimum number of comparisons, and zeroing lod_fpart in the extreme * ends in the process. */ /* lod_ipart < 0 */ clamp_min = LLVMBuildICmp(builder, LLVMIntSLT, lod_ipart, int_bld->zero, "clamp_lod_to_zero"); *level0_out = LLVMBuildSelect(builder, clamp_min, int_bld->zero, *level0_out, ""); *level1_out = LLVMBuildSelect(builder, clamp_min, int_bld->zero, *level1_out, ""); *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min, float_bld->zero, *lod_fpart_inout, ""); /* lod_ipart >= last_level */ clamp_max = LLVMBuildICmp(builder, LLVMIntSGE, lod_ipart, last_level, "clamp_lod_to_last"); *level0_out = LLVMBuildSelect(builder, clamp_max, last_level, *level0_out, ""); *level1_out = LLVMBuildSelect(builder, clamp_max, last_level, *level1_out, ""); *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max, float_bld->zero, *lod_fpart_inout, ""); lp_build_name(*level0_out, "sampler%u_miplevel0", unit); lp_build_name(*level1_out, "sampler%u_miplevel1", unit); lp_build_name(*lod_fpart_inout, "sampler%u_mipweight", unit); }
static void attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix) { if(attrib == 0) lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix); else lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix); }
/** * Generate color blending and color output. */ static void generate_blend(const struct pipe_blend_state *blend, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef context_ptr, LLVMValueRef mask, LLVMValueRef *src, LLVMValueRef dst_ptr) { struct lp_build_context bld; struct lp_build_flow_context *flow; struct lp_build_mask_context mask_ctx; LLVMTypeRef vec_type; LLVMTypeRef int_vec_type; LLVMValueRef const_ptr; LLVMValueRef con[4]; LLVMValueRef dst[4]; LLVMValueRef res[4]; unsigned chan; lp_build_context_init(&bld, builder, type); flow = lp_build_flow_create(builder); lp_build_mask_begin(&mask_ctx, flow, type, mask); vec_type = lp_build_vec_type(type); int_vec_type = lp_build_int_vec_type(type); const_ptr = lp_jit_context_blend_color(builder, context_ptr); const_ptr = LLVMBuildBitCast(builder, const_ptr, LLVMPointerType(vec_type, 0), ""); for(chan = 0; chan < 4; ++chan) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), ""); dst[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), ""); lp_build_name(con[chan], "con.%c", "rgba"[chan]); lp_build_name(dst[chan], "dst.%c", "rgba"[chan]); } lp_build_blend_soa(builder, blend, type, src, dst, con, res); for(chan = 0; chan < 4; ++chan) { if(blend->colormask & (1 << chan)) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); lp_build_name(res[chan], "res.%c", "rgba"[chan]); res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, "")); } } lp_build_mask_end(&mask_ctx); lp_build_flow_destroy(flow); }
static LLVMValueRef add_blend_test(struct gallivm_state *gallivm, const struct pipe_blend_state *blend, struct lp_type type) { LLVMModuleRef module = gallivm->module; LLVMContextRef context = gallivm->context; LLVMTypeRef vec_type; LLVMTypeRef args[5]; LLVMValueRef func; LLVMValueRef src_ptr; LLVMValueRef src1_ptr; LLVMValueRef dst_ptr; LLVMValueRef const_ptr; LLVMValueRef res_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; const enum pipe_format format = PIPE_FORMAT_R8G8B8A8_UNORM; const unsigned rt = 0; const unsigned char swizzle[4] = { 0, 1, 2, 3 }; LLVMValueRef src; LLVMValueRef src1; LLVMValueRef dst; LLVMValueRef con; LLVMValueRef res; vec_type = lp_build_vec_type(gallivm, type); args[4] = args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0); func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidTypeInContext(context), args, 5, 0)); LLVMSetFunctionCallConv(func, LLVMCCallConv); src_ptr = LLVMGetParam(func, 0); src1_ptr = LLVMGetParam(func, 1); dst_ptr = LLVMGetParam(func, 2); const_ptr = LLVMGetParam(func, 3); res_ptr = LLVMGetParam(func, 4); block = LLVMAppendBasicBlockInContext(context, func, "entry"); builder = gallivm->builder; LLVMPositionBuilderAtEnd(builder, block); src = LLVMBuildLoad(builder, src_ptr, "src"); src1 = LLVMBuildLoad(builder, src1_ptr, "src1"); dst = LLVMBuildLoad(builder, dst_ptr, "dst"); con = LLVMBuildLoad(builder, const_ptr, "const"); res = lp_build_blend_aos(gallivm, blend, format, type, rt, src, NULL, src1, NULL, dst, NULL, con, NULL, swizzle, 4); lp_build_name(res, "res"); LLVMBuildStore(builder, res, res_ptr); LLVMBuildRetVoid(builder);; gallivm_verify_function(gallivm, func); return func; }
/** * Convert float[] to int[] with floor(). */ LLVMValueRef lp_build_ifloor(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef res; assert(type.floating); assert(lp_check_value(type, a)); if(util_cpu_caps.has_sse4_1) { res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); } else { /* Take the sign bit and add it to 1 constant */ LLVMTypeRef vec_type = lp_build_vec_type(type); unsigned mantissa = lp_mantissa(type); LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); LLVMValueRef sign; LLVMValueRef offset; /* sign = a < 0 ? ~0 : 0 */ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); sign = LLVMBuildAnd(bld->builder, sign, mask, ""); sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), ""); lp_build_name(sign, "floor.sign"); /* offset = -0.99999(9)f */ offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa)); offset = LLVMConstBitCast(offset, int_vec_type); /* offset = a < 0 ? -0.99999(9)f : 0.0f */ offset = LLVMBuildAnd(bld->builder, offset, sign, ""); offset = LLVMBuildBitCast(bld->builder, offset, vec_type, ""); lp_build_name(offset, "floor.offset"); res = LLVMBuildAdd(bld->builder, a, offset, ""); lp_build_name(res, "floor.res"); } res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); lp_build_name(res, "floor"); return res; }
LLVMValueRef lp_build_struct_get(struct gallivm_state *gallivm, LLVMValueRef ptr, unsigned member, const char *name) { LLVMValueRef member_ptr; LLVMValueRef res; assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind); assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(ptr))) == LLVMStructTypeKind); member_ptr = lp_build_struct_get_ptr(gallivm, ptr, member, name); res = LLVMBuildLoad(gallivm->builder, member_ptr, ""); lp_build_name(res, "%s.%s", LLVMGetValueName(ptr), name); return res; }
LLVMValueRef lp_build_pointer_get(LLVMBuilderRef builder, LLVMValueRef ptr, LLVMValueRef index) { LLVMValueRef element_ptr; LLVMValueRef res; assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind); element_ptr = LLVMBuildGEP(builder, ptr, &index, 1, ""); res = LLVMBuildLoad(builder, element_ptr, ""); #ifdef DEBUG lp_build_name(res, "%s[%s]", LLVMGetValueName(ptr), LLVMGetValueName(index)); #endif return res; }
void lp_build_alpha_test(struct gallivm_state *gallivm, unsigned func, struct lp_type type, const struct util_format_description *cbuf_format_desc, struct lp_build_mask_context *mask, LLVMValueRef alpha, LLVMValueRef ref, boolean do_branch) { struct lp_build_context bld; LLVMValueRef test; lp_build_context_init(&bld, gallivm, type); /* * Alpha testing needs to be done in the color buffer precision. * * TODO: Ideally, instead of duplicating the color conversion code, we would do * alpha testing after converting the output colors, but that's not very * convenient, because it needs to be done before depth testing. Hopefully * LLVM will detect and remove the duplicate expression. * * FIXME: This should be generalized to formats other than rgba8 variants. */ if (type.floating && util_format_is_rgba8_variant(cbuf_format_desc)) { const unsigned dst_width = 8; alpha = lp_build_clamp(&bld, alpha, bld.zero, bld.one); ref = lp_build_clamp(&bld, ref, bld.zero, bld.one); alpha = lp_build_clamped_float_to_unsigned_norm(gallivm, type, dst_width, alpha); ref = lp_build_clamped_float_to_unsigned_norm(gallivm, type, dst_width, ref); type.floating = 0; lp_build_context_init(&bld, gallivm, type); } test = lp_build_cmp(&bld, func, alpha, ref); lp_build_name(test, "alpha_mask"); lp_build_mask_update(mask, test); if (do_branch) lp_build_mask_check(mask); }
LLVMValueRef lp_build_struct_get_ptr(struct gallivm_state *gallivm, LLVMValueRef ptr, unsigned member, const char *name) { LLVMValueRef indices[2]; LLVMValueRef member_ptr; assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind); assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(ptr))) == LLVMStructTypeKind); indices[0] = lp_build_const_int32(gallivm, 0); indices[1] = lp_build_const_int32(gallivm, member); member_ptr = LLVMBuildGEP(gallivm->builder, ptr, indices, Elements(indices), ""); lp_build_name(member_ptr, "%s.%s_ptr", LLVMGetValueName(ptr), name); return member_ptr; }
LLVMValueRef lp_build_array_get(struct gallivm_state *gallivm, LLVMValueRef ptr, LLVMValueRef index) { LLVMValueRef element_ptr; LLVMValueRef res; assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind); assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(ptr))) == LLVMArrayTypeKind); element_ptr = lp_build_array_get_ptr(gallivm, ptr, index); res = LLVMBuildLoad(gallivm->builder, element_ptr, ""); #ifdef DEBUG lp_build_name(res, "%s[%s]", LLVMGetValueName(ptr), LLVMGetValueName(index)); #endif return res; }
LLVMValueRef lp_build_array_get_ptr(struct gallivm_state *gallivm, LLVMValueRef ptr, LLVMValueRef index) { LLVMValueRef indices[2]; LLVMValueRef element_ptr; assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind); assert(LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(ptr))) == LLVMArrayTypeKind); indices[0] = lp_build_const_int32(gallivm, 0); indices[1] = index; element_ptr = LLVMBuildGEP(gallivm->builder, ptr, indices, Elements(indices), ""); #ifdef DEBUG lp_build_name(element_ptr, "&%s[%s]", LLVMGetValueName(ptr), LLVMGetValueName(index)); #endif return element_ptr; }
static LLVMValueRef generate_scissor_test(LLVMBuilderRef builder, LLVMValueRef context_ptr, const struct lp_build_interp_soa_context *interp, struct lp_type type) { LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMValueRef xpos = interp->pos[0], ypos = interp->pos[1]; LLVMValueRef xmin, ymin, xmax, ymax; LLVMValueRef m0, m1, m2, m3, m; /* xpos, ypos contain the window coords for the four pixels in the quad */ assert(xpos); assert(ypos); /* get the current scissor bounds, convert to vectors */ xmin = lp_jit_context_scissor_xmin_value(builder, context_ptr); xmin = lp_build_broadcast(builder, vec_type, xmin); ymin = lp_jit_context_scissor_ymin_value(builder, context_ptr); ymin = lp_build_broadcast(builder, vec_type, ymin); xmax = lp_jit_context_scissor_xmax_value(builder, context_ptr); xmax = lp_build_broadcast(builder, vec_type, xmax); ymax = lp_jit_context_scissor_ymax_value(builder, context_ptr); ymax = lp_build_broadcast(builder, vec_type, ymax); /* compare the fragment's position coordinates against the scissor bounds */ m0 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, xpos, xmin); m1 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, ypos, ymin); m2 = lp_build_compare(builder, type, PIPE_FUNC_LESS, xpos, xmax); m3 = lp_build_compare(builder, type, PIPE_FUNC_LESS, ypos, ymax); /* AND all the masks together */ m = LLVMBuildAnd(builder, m0, m1, ""); m = LLVMBuildAnd(builder, m, m2, ""); m = LLVMBuildAnd(builder, m, m3, ""); lp_build_name(m, "scissormask"); return m; }
/** * Fetch the specified member of the lp_jit_texture structure. * \param emit_load if TRUE, emit the LLVM load instruction to actually * fetch the field's value. Otherwise, just emit the * GEP code to address the field. * * @sa http://llvm.org/docs/GetElementPtr.html */ static LLVMValueRef lp_llvm_texture_member(const struct lp_sampler_dynamic_state *base, struct gallivm_state *gallivm, unsigned unit, unsigned member_index, const char *member_name, boolean emit_load) { struct llvmpipe_sampler_dynamic_state *state = (struct llvmpipe_sampler_dynamic_state *)base; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef indices[4]; LLVMValueRef ptr; LLVMValueRef res; assert(unit < PIPE_MAX_SAMPLERS); /* context[0] */ indices[0] = lp_build_const_int32(gallivm, 0); /* context[0].textures */ indices[1] = lp_build_const_int32(gallivm, LP_JIT_CTX_TEXTURES); /* context[0].textures[unit] */ indices[2] = lp_build_const_int32(gallivm, unit); /* context[0].textures[unit].member */ indices[3] = lp_build_const_int32(gallivm, member_index); ptr = LLVMBuildGEP(builder, state->context_ptr, indices, Elements(indices), ""); if (emit_load) res = LLVMBuildLoad(builder, ptr, ""); else res = ptr; lp_build_name(res, "context.texture%u.%s", unit, member_name); return res; }
void lp_build_alpha_to_coverage(struct gallivm_state *gallivm, struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, boolean do_branch) { struct lp_build_context bld; LLVMValueRef test; LLVMValueRef alpha_ref_value; lp_build_context_init(&bld, gallivm, type); alpha_ref_value = lp_build_const_vec(gallivm, type, 0.5); test = lp_build_cmp(&bld, PIPE_FUNC_GREATER, alpha, alpha_ref_value); lp_build_name(test, "alpha_to_coverage"); lp_build_mask_update(mask, test); if (do_branch) lp_build_mask_check(mask); }
/** * Initialize the bld->a, dadq fields. This involves fetching * those values from the arrays which are passed into the JIT function. */ static void coeffs_init(struct lp_build_interp_soa_context *bld, LLVMValueRef a0_ptr, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr) { struct lp_build_context *coeff_bld = &bld->coeff_bld; struct lp_build_context *setup_bld = &bld->setup_bld; struct gallivm_state *gallivm = coeff_bld->gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef pixoffx, pixoffy; unsigned attrib; unsigned chan; unsigned i; pixoffx = coeff_bld->undef; pixoffy = coeff_bld->undef; for (i = 0; i < coeff_bld->type.length; i++) { LLVMValueRef nr = lp_build_const_int32(gallivm, i); LLVMValueRef pixxf = lp_build_const_float(gallivm, quad_offset_x[i]); LLVMValueRef pixyf = lp_build_const_float(gallivm, quad_offset_y[i]); pixoffx = LLVMBuildInsertElement(builder, pixoffx, pixxf, nr, ""); pixoffy = LLVMBuildInsertElement(builder, pixoffy, pixyf, nr, ""); } for (attrib = 0; attrib < bld->num_attribs; ++attrib) { const unsigned mask = bld->mask[attrib]; const unsigned interp = bld->interp[attrib]; LLVMValueRef index = lp_build_const_int32(gallivm, attrib * TGSI_NUM_CHANNELS); LLVMValueRef ptr; LLVMValueRef dadxaos = setup_bld->zero; LLVMValueRef dadyaos = setup_bld->zero; LLVMValueRef a0aos = setup_bld->zero; /* always fetch all 4 values for performance/simplicity */ switch (interp) { case LP_INTERP_PERSPECTIVE: /* fall-through */ case LP_INTERP_LINEAR: ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""); ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(setup_bld->vec_type, 0), ""); dadxaos = LLVMBuildLoad(builder, ptr, ""); ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, ""); ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(setup_bld->vec_type, 0), ""); dadyaos = LLVMBuildLoad(builder, ptr, ""); attrib_name(dadxaos, attrib, 0, ".dadxaos"); attrib_name(dadyaos, attrib, 0, ".dadyaos"); /* fall-through */ case LP_INTERP_CONSTANT: case LP_INTERP_FACING: ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, ""); ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(setup_bld->vec_type, 0), ""); a0aos = LLVMBuildLoad(builder, ptr, ""); attrib_name(a0aos, attrib, 0, ".a0aos"); break; case LP_INTERP_POSITION: /* Nothing to do as the position coeffs are already setup in slot 0 */ continue; default: assert(0); break; } /* * a = a0 + (x * dadx + y * dady) * a0aos is the attrib value at top left corner of stamp */ if (interp != LP_INTERP_CONSTANT && interp != LP_INTERP_FACING) { LLVMValueRef x = lp_build_broadcast_scalar(setup_bld, bld->x); LLVMValueRef y = lp_build_broadcast_scalar(setup_bld, bld->y); a0aos = lp_build_fmuladd(builder, x, dadxaos, a0aos); a0aos = lp_build_fmuladd(builder, y, dadyaos, a0aos); } /* * dadq = {0, dadx, dady, dadx + dady} * for two quads (side by side) this is: * {0, dadx, dady, dadx+dady, 2*dadx, 2*dadx+dady, 3*dadx+dady} */ for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { /* this generates a CRAPLOAD of shuffles... */ if (mask & (1 << chan)) { LLVMValueRef dadx, dady; LLVMValueRef dadq, dadq2; LLVMValueRef a; LLVMValueRef chan_index = lp_build_const_int32(gallivm, chan); if (attrib == 0 && chan == 0) { a = bld->x; if (bld->pos_offset) { a = LLVMBuildFAdd(builder, a, lp_build_const_float(gallivm, bld->pos_offset), ""); } a = lp_build_broadcast_scalar(coeff_bld, a); dadx = coeff_bld->one; dady = coeff_bld->zero; } else if (attrib == 0 && chan == 1) { a = bld->y; if (bld->pos_offset) { a = LLVMBuildFAdd(builder, a, lp_build_const_float(gallivm, bld->pos_offset), ""); } a = lp_build_broadcast_scalar(coeff_bld, a); dady = coeff_bld->one; dadx = coeff_bld->zero; } else { dadx = lp_build_extract_broadcast(gallivm, setup_bld->type, coeff_bld->type, dadxaos, chan_index); dady = lp_build_extract_broadcast(gallivm, setup_bld->type, coeff_bld->type, dadyaos, chan_index); /* * a = {a, a, a, a} */ a = lp_build_extract_broadcast(gallivm, setup_bld->type, coeff_bld->type, a0aos, chan_index); } dadx = LLVMBuildFMul(builder, dadx, pixoffx, ""); dady = LLVMBuildFMul(builder, dady, pixoffy, ""); dadq = LLVMBuildFAdd(builder, dadx, dady, ""); /* * Compute the attrib values on the upper-left corner of each * group of quads. * Note that if we process 2 quads at once this doesn't * really exactly to what we want. * We need to access elem 0 and 2 respectively later if we process * 2 quads at once. */ if (interp != LP_INTERP_CONSTANT && interp != LP_INTERP_FACING) { dadq2 = LLVMBuildFAdd(builder, dadq, dadq, ""); a = LLVMBuildFAdd(builder, a, dadq2, ""); } #if PERSPECTIVE_DIVIDE_PER_QUAD /* * a *= 1 / w */ /* * XXX since we're only going to access elements 0,2 out of 8 * if we have 8-wide vectors we should do the division only 4-wide. * a is really a 2-elements in a 4-wide vector disguised as 8-wide * in this case. */ if (interp == LP_INTERP_PERSPECTIVE) { LLVMValueRef w = bld->a[0][3]; assert(attrib != 0); assert(bld->mask[0] & TGSI_WRITEMASK_W); if (!bld->oow) { bld->oow = lp_build_rcp(coeff_bld, w); lp_build_name(bld->oow, "oow"); } a = lp_build_mul(coeff_bld, a, bld->oow); } #endif attrib_name(a, attrib, chan, ".a"); attrib_name(dadq, attrib, chan, ".dadq"); bld->a[attrib][chan] = lp_build_alloca(gallivm, LLVMTypeOf(a), ""); LLVMBuildStore(builder, a, bld->a[attrib][chan]); bld->dadq[attrib][chan] = dadq; } } } }
/** * Generate the runtime callable function for the whole fragment pipeline. */ static struct lp_fragment_shader_variant * generate_fragment(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, const struct lp_fragment_shader_variant_key *key) { struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); struct lp_fragment_shader_variant *variant; struct lp_type fs_type; struct lp_type blend_type; LLVMTypeRef fs_elem_type; LLVMTypeRef fs_vec_type; LLVMTypeRef fs_int_vec_type; LLVMTypeRef blend_vec_type; LLVMTypeRef blend_int_vec_type; LLVMTypeRef arg_types[9]; LLVMTypeRef func_type; LLVMValueRef context_ptr; LLVMValueRef x; LLVMValueRef y; LLVMValueRef a0_ptr; LLVMValueRef dadx_ptr; LLVMValueRef dady_ptr; LLVMValueRef mask_ptr; LLVMValueRef color_ptr; LLVMValueRef depth_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef x0; LLVMValueRef y0; struct lp_build_sampler_soa *sampler; struct lp_build_interp_soa_context interp; LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH]; LLVMValueRef fs_out_color[NUM_CHANNELS][LP_MAX_VECTOR_LENGTH]; LLVMValueRef blend_mask; LLVMValueRef blend_in_color[NUM_CHANNELS]; unsigned num_fs; unsigned i; unsigned chan; #ifdef DEBUG tgsi_dump(shader->base.tokens, 0); if(key->depth.enabled) { debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format)); debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE)); debug_printf("depth.writemask = %u\n", key->depth.writemask); } if(key->alpha.enabled) { debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE)); debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value); } if(key->blend.logicop_enable) { debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func); } else if(key->blend.blend_enable) { debug_printf("blend.rgb_func = %s\n", debug_dump_blend_func (key->blend.rgb_func, TRUE)); debug_printf("rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE)); debug_printf("rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE)); debug_printf("alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE)); debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE)); debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE)); } debug_printf("blend.colormask = 0x%x\n", key->blend.colormask); for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) { if(key->sampler[i].format) { debug_printf("sampler[%u] = \n", i); debug_printf(" .format = %s\n", pf_name(key->sampler[i].format)); debug_printf(" .target = %s\n", debug_dump_tex_target(key->sampler[i].target, TRUE)); debug_printf(" .pot = %u %u %u\n", key->sampler[i].pot_width, key->sampler[i].pot_height, key->sampler[i].pot_depth); debug_printf(" .wrap = %s %s %s\n", debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); debug_printf(" .min_img_filter = %s\n", debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); debug_printf(" .min_mip_filter = %s\n", debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); debug_printf(" .mag_img_filter = %s\n", debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); if(key->sampler[i].compare_mode) debug_printf(" .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE)); debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter); } } #endif variant = CALLOC_STRUCT(lp_fragment_shader_variant); if(!variant) return NULL; variant->shader = shader; memcpy(&variant->key, key, sizeof *key); /* TODO: actually pick these based on the fs and color buffer * characteristics. */ memset(&fs_type, 0, sizeof fs_type); fs_type.floating = TRUE; /* floating point values */ fs_type.sign = TRUE; /* values are signed */ fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ fs_type.width = 32; /* 32-bit float */ fs_type.length = 4; /* 4 element per vector */ num_fs = 4; memset(&blend_type, 0, sizeof blend_type); blend_type.floating = FALSE; /* values are integers */ blend_type.sign = FALSE; /* values are unsigned */ blend_type.norm = TRUE; /* values are in [0,1] or [-1,1] */ blend_type.width = 8; /* 8-bit ubyte values */ blend_type.length = 16; /* 16 elements per vector */ /* * Generate the function prototype. Any change here must be reflected in * lp_jit.h's lp_jit_frag_func function pointer type, and vice-versa. */ fs_elem_type = lp_build_elem_type(fs_type); fs_vec_type = lp_build_vec_type(fs_type); fs_int_vec_type = lp_build_int_vec_type(fs_type); blend_vec_type = lp_build_vec_type(blend_type); blend_int_vec_type = lp_build_int_vec_type(blend_type); arg_types[0] = screen->context_ptr_type; /* context */ arg_types[1] = LLVMInt32Type(); /* x */ arg_types[2] = LLVMInt32Type(); /* y */ arg_types[3] = LLVMPointerType(fs_elem_type, 0); /* a0 */ arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* dadx */ arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dady */ arg_types[6] = LLVMPointerType(fs_int_vec_type, 0); /* mask */ arg_types[7] = LLVMPointerType(blend_vec_type, 0); /* color */ arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); variant->function = LLVMAddFunction(screen->module, "shader", func_type); LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); for(i = 0; i < Elements(arg_types); ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute); context_ptr = LLVMGetParam(variant->function, 0); x = LLVMGetParam(variant->function, 1); y = LLVMGetParam(variant->function, 2); a0_ptr = LLVMGetParam(variant->function, 3); dadx_ptr = LLVMGetParam(variant->function, 4); dady_ptr = LLVMGetParam(variant->function, 5); mask_ptr = LLVMGetParam(variant->function, 6); color_ptr = LLVMGetParam(variant->function, 7); depth_ptr = LLVMGetParam(variant->function, 8); lp_build_name(context_ptr, "context"); lp_build_name(x, "x"); lp_build_name(y, "y"); lp_build_name(a0_ptr, "a0"); lp_build_name(dadx_ptr, "dadx"); lp_build_name(dady_ptr, "dady"); lp_build_name(mask_ptr, "mask"); lp_build_name(color_ptr, "color"); lp_build_name(depth_ptr, "depth"); /* * Function body */ block = LLVMAppendBasicBlock(variant->function, "entry"); builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); generate_pos0(builder, x, y, &x0, &y0); lp_build_interp_soa_init(&interp, shader->base.tokens, builder, fs_type, a0_ptr, dadx_ptr, dady_ptr, x0, y0, 2, 0); #if 0 /* C texture sampling */ sampler = lp_c_sampler_soa_create(context_ptr); #else /* code generated texture sampling */ sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); #endif for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); LLVMValueRef out_color[NUM_CHANNELS]; LLVMValueRef depth_ptr_i; if(i != 0) lp_build_interp_soa_update(&interp); fs_mask[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, mask_ptr, &index, 1, ""), ""); depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, ""); generate_fs(lp, shader, key, builder, fs_type, context_ptr, i, &interp, sampler, &fs_mask[i], out_color, depth_ptr_i); for(chan = 0; chan < NUM_CHANNELS; ++chan) fs_out_color[chan][i] = out_color[chan]; } sampler->destroy(sampler); /* * Convert the fs's output color and mask to fit to the blending type. */ for(chan = 0; chan < NUM_CHANNELS; ++chan) { lp_build_conv(builder, fs_type, blend_type, fs_out_color[chan], num_fs, &blend_in_color[chan], 1); lp_build_name(blend_in_color[chan], "color.%c", "rgba"[chan]); } lp_build_conv_mask(builder, fs_type, blend_type, fs_mask, num_fs, &blend_mask, 1); /* * Blending. */ generate_blend(&key->blend, builder, blend_type, context_ptr, blend_mask, blend_in_color, color_ptr); LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); /* * Translate the LLVM IR into machine code. */ if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { LLVMDumpValue(variant->function); abort(); } LLVMRunFunctionPassManager(screen->pass, variant->function); #ifdef DEBUG LLVMDumpValue(variant->function); debug_printf("\n"); #endif variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, variant->function); #ifdef DEBUG lp_disassemble(variant->jit_function); #endif variant->next = shader->variants; shader->variants = variant; return variant; }
static LLVMValueRef emit_fetch_constant( struct lp_build_tgsi_context * bld_base, const struct tgsi_full_src_register * reg, enum tgsi_opcode_type stype, unsigned swizzle) { struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base); LLVMBuilderRef builder = bld_base->base.gallivm->builder; struct lp_type type = bld_base->base.type; LLVMValueRef res; unsigned chan; assert(!reg->Register.Indirect); /* * Get the constants components */ res = bld->bld_base.base.undef; for (chan = 0; chan < 4; ++chan) { LLVMValueRef index; LLVMValueRef scalar_ptr; LLVMValueRef scalar; LLVMValueRef swizzle; index = lp_build_const_int32(bld->bld_base.base.gallivm, reg->Register.Index * 4 + chan); scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, ""); scalar = LLVMBuildLoad(builder, scalar_ptr, ""); lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]); /* * NOTE: constants array is always assumed to be RGBA */ swizzle = lp_build_const_int32(bld->bld_base.base.gallivm, bld->swizzles[chan]); res = LLVMBuildInsertElement(builder, res, scalar, swizzle, ""); } /* * Broadcast the first quaternion to all others. * * XXX: could be factored into a reusable function. */ if (type.length > 4) { LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; unsigned i; for (chan = 0; chan < 4; ++chan) { shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan); } for (i = 4; i < type.length; ++i) { shuffles[i] = shuffles[i % 4]; } res = LLVMBuildShuffleVector(builder, res, bld->bld_base.base.undef, LLVMConstVector(shuffles, type.length), ""); } return res; }
static LLVMValueRef add_blend_test(struct gallivm_state *gallivm, const struct pipe_blend_state *blend, enum vector_mode mode, struct lp_type type) { LLVMModuleRef module = gallivm->module; LLVMContextRef context = gallivm->context; LLVMTypeRef vec_type; LLVMTypeRef args[4]; LLVMValueRef func; LLVMValueRef src_ptr; LLVMValueRef dst_ptr; LLVMValueRef const_ptr; LLVMValueRef res_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; const enum pipe_format format = PIPE_FORMAT_R8G8B8A8_UNORM; const unsigned rt = 0; const unsigned char swizzle[4] = { 0, 1, 2, 3 }; vec_type = lp_build_vec_type(gallivm, type); args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0); func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidTypeInContext(context), args, 4, 0)); LLVMSetFunctionCallConv(func, LLVMCCallConv); src_ptr = LLVMGetParam(func, 0); dst_ptr = LLVMGetParam(func, 1); const_ptr = LLVMGetParam(func, 2); res_ptr = LLVMGetParam(func, 3); block = LLVMAppendBasicBlockInContext(context, func, "entry"); builder = gallivm->builder; LLVMPositionBuilderAtEnd(builder, block); if (mode == AoS) { LLVMValueRef src; LLVMValueRef dst; LLVMValueRef con; LLVMValueRef res; src = LLVMBuildLoad(builder, src_ptr, "src"); dst = LLVMBuildLoad(builder, dst_ptr, "dst"); con = LLVMBuildLoad(builder, const_ptr, "const"); res = lp_build_blend_aos(gallivm, blend, &format, type, rt, src, dst, NULL, con, swizzle); lp_build_name(res, "res"); LLVMBuildStore(builder, res, res_ptr); } if (mode == SoA) { LLVMValueRef src[4]; LLVMValueRef dst[4]; LLVMValueRef con[4]; LLVMValueRef res[4]; unsigned i; for(i = 0; i < 4; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0); src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), ""); dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), ""); con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), ""); lp_build_name(src[i], "src.%c", "rgba"[i]); lp_build_name(con[i], "con.%c", "rgba"[i]); lp_build_name(dst[i], "dst.%c", "rgba"[i]); } lp_build_blend_soa(gallivm, blend, type, rt, src, dst, con, res); for(i = 0; i < 4; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0); lp_build_name(res[i], "res.%c", "rgba"[i]); LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, "")); } } LLVMBuildRetVoid(builder);; return func; }
/** * Performs blending of src and dst pixels * * @param blend the blend state of the shader variant * @param cbuf_format format of the colour buffer * @param type data type of the pixel vector * @param rt render target index * @param src blend src * @param src_alpha blend src alpha (if not included in src) * @param src1 second blend src (for dual source blend) * @param src1_alpha second blend src alpha (if not included in src1) * @param dst blend dst * @param mask optional mask to apply to the blending result * @param const_ const blend color * @param const_alpha const blend color alpha (if not included in const_) * @param swizzle swizzle values for RGBA * * @return the result of blending src and dst */ LLVMValueRef lp_build_blend_aos(struct gallivm_state *gallivm, const struct pipe_blend_state *blend, enum pipe_format cbuf_format, struct lp_type type, unsigned rt, LLVMValueRef src, LLVMValueRef src_alpha, LLVMValueRef src1, LLVMValueRef src1_alpha, LLVMValueRef dst, LLVMValueRef mask, LLVMValueRef const_, LLVMValueRef const_alpha, const unsigned char swizzle[4], int nr_channels) { const struct pipe_rt_blend_state * state = &blend->rt[rt]; const struct util_format_description * desc; struct lp_build_blend_aos_context bld; LLVMValueRef src_factor, dst_factor; LLVMValueRef result; unsigned alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE; unsigned i; desc = util_format_description(cbuf_format); /* Setup build context */ memset(&bld, 0, sizeof bld); lp_build_context_init(&bld.base, gallivm, type); bld.src = src; bld.src1 = src1; bld.dst = dst; bld.const_ = const_; bld.src_alpha = src_alpha; bld.src1_alpha = src1_alpha; bld.const_alpha = const_alpha; /* Find the alpha channel if not provided seperately */ if (!src_alpha) { for (i = 0; i < 4; ++i) { if (swizzle[i] == 3) { alpha_swizzle = i; } } } if (blend->logicop_enable) { if(!type.floating) { result = lp_build_logicop(gallivm->builder, blend->logicop_func, src, dst); } else { result = src; } } else if (!state->blend_enable) { result = src; } else { boolean rgb_alpha_same = (state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor) || nr_channels == 1; src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor, state->alpha_src_factor, alpha_swizzle, nr_channels); dst_factor = lp_build_blend_factor(&bld, state->rgb_dst_factor, state->alpha_dst_factor, alpha_swizzle, nr_channels); result = lp_build_blend(&bld.base, state->rgb_func, state->rgb_src_factor, state->rgb_dst_factor, src, dst, src_factor, dst_factor, rgb_alpha_same, false); if(state->rgb_func != state->alpha_func && nr_channels > 1 && alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) { LLVMValueRef alpha; alpha = lp_build_blend(&bld.base, state->alpha_func, state->alpha_src_factor, state->alpha_dst_factor, src, dst, src_factor, dst_factor, rgb_alpha_same, false); result = lp_build_blend_swizzle(&bld, result, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle, nr_channels); } } /* Check if color mask is necessary */ if (!util_format_colormask_full(desc, state->colormask)) { LLVMValueRef color_mask; color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, state->colormask, nr_channels, swizzle); lp_build_name(color_mask, "color_mask"); /* Combine with input mask if necessary */ if (mask) { /* We can be blending floating values but masks are always integer... */ unsigned floating = bld.base.type.floating; bld.base.type.floating = 0; mask = lp_build_and(&bld.base, color_mask, mask); bld.base.type.floating = floating; } else { mask = color_mask; } } /* Apply mask, if one exists */ if (mask) { result = lp_build_select(&bld.base, mask, result, dst); } return result; }
static LLVMValueRef add_blend_test(LLVMModuleRef module, const struct pipe_blend_state *blend, enum vector_mode mode, struct lp_type type) { LLVMTypeRef ret_type; LLVMTypeRef vec_type; LLVMTypeRef args[4]; LLVMValueRef func; LLVMValueRef src_ptr; LLVMValueRef dst_ptr; LLVMValueRef const_ptr; LLVMValueRef res_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; ret_type = LLVMInt64Type(); vec_type = lp_build_vec_type(type); args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0); func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 4, 0)); LLVMSetFunctionCallConv(func, LLVMCCallConv); src_ptr = LLVMGetParam(func, 0); dst_ptr = LLVMGetParam(func, 1); const_ptr = LLVMGetParam(func, 2); res_ptr = LLVMGetParam(func, 3); block = LLVMAppendBasicBlock(func, "entry"); builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); if (mode == AoS) { LLVMValueRef src; LLVMValueRef dst; LLVMValueRef con; LLVMValueRef res; src = LLVMBuildLoad(builder, src_ptr, "src"); dst = LLVMBuildLoad(builder, dst_ptr, "dst"); con = LLVMBuildLoad(builder, const_ptr, "const"); res = lp_build_blend_aos(builder, blend, type, src, dst, con, 3); lp_build_name(res, "res"); LLVMBuildStore(builder, res, res_ptr); } if (mode == SoA) { LLVMValueRef src[4]; LLVMValueRef dst[4]; LLVMValueRef con[4]; LLVMValueRef res[4]; unsigned i; for(i = 0; i < 4; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), ""); dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), ""); con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), ""); lp_build_name(src[i], "src.%c", "rgba"[i]); lp_build_name(con[i], "con.%c", "rgba"[i]); lp_build_name(dst[i], "dst.%c", "rgba"[i]); } lp_build_blend_soa(builder, blend, type, src, dst, con, res); for(i = 0; i < 4; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); lp_build_name(res[i], "res.%c", "rgba"[i]); LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, "")); } } LLVMBuildRetVoid(builder);; LLVMDisposeBuilder(builder); return func; }
/** * Generate the code to do inside/outside triangle testing for the * four pixels in a 2x2 quad. This will set the four elements of the * quad mask vector to 0 or ~0. * \param i which quad of the quad group to test, in [0,3] */ static void generate_tri_edge_mask(LLVMBuilderRef builder, unsigned i, LLVMValueRef *mask, /* ivec4, out */ LLVMValueRef c0, /* int32 */ LLVMValueRef c1, /* int32 */ LLVMValueRef c2, /* int32 */ LLVMValueRef step0_ptr, /* ivec4 */ LLVMValueRef step1_ptr, /* ivec4 */ LLVMValueRef step2_ptr) /* ivec4 */ { #define OPTIMIZE_IN_OUT_TEST 0 #if OPTIMIZE_IN_OUT_TEST struct lp_build_if_state ifctx; LLVMValueRef not_draw_all; #endif struct lp_build_flow_context *flow; struct lp_type i32_type; LLVMTypeRef i32vec4_type, mask_type; LLVMValueRef c0_vec, c1_vec, c2_vec; LLVMValueRef in_out_mask; assert(i < 4); /* int32 vector type */ memset(&i32_type, 0, sizeof i32_type); i32_type.floating = FALSE; /* values are integers */ i32_type.sign = TRUE; /* values are signed */ i32_type.norm = FALSE; /* values are not normalized */ i32_type.width = 32; /* 32-bit int values */ i32_type.length = 4; /* 4 elements per vector */ i32vec4_type = lp_build_int32_vec4_type(); mask_type = LLVMIntType(32 * 4); /* * Use a conditional here to do detailed pixel in/out testing. * We only have to do this if c0 != INT_MIN. */ flow = lp_build_flow_create(builder); lp_build_flow_scope_begin(flow); { #if OPTIMIZE_IN_OUT_TEST /* not_draw_all = (c0 != INT_MIN) */ not_draw_all = LLVMBuildICmp(builder, LLVMIntNE, c0, LLVMConstInt(LLVMInt32Type(), INT_MIN, 0), ""); in_out_mask = lp_build_int_const_scalar(i32_type, ~0); lp_build_flow_scope_declare(flow, &in_out_mask); /* if (not_draw_all) {... */ lp_build_if(&ifctx, flow, builder, not_draw_all); #endif { LLVMValueRef step0_vec, step1_vec, step2_vec; LLVMValueRef m0_vec, m1_vec, m2_vec; LLVMValueRef index, m; /* c0_vec = {c0, c0, c0, c0} * Note that we emit this code four times but LLVM optimizes away * three instances of it. */ c0_vec = lp_build_broadcast(builder, i32vec4_type, c0); c1_vec = lp_build_broadcast(builder, i32vec4_type, c1); c2_vec = lp_build_broadcast(builder, i32vec4_type, c2); lp_build_name(c0_vec, "edgeconst0vec"); lp_build_name(c1_vec, "edgeconst1vec"); lp_build_name(c2_vec, "edgeconst2vec"); /* load step0vec, step1, step2 vec from memory */ index = LLVMConstInt(LLVMInt32Type(), i, 0); step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), ""); step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), ""); step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), ""); lp_build_name(step0_vec, "step0vec"); lp_build_name(step1_vec, "step1vec"); lp_build_name(step2_vec, "step2vec"); /* m0_vec = step0_ptr[i] > c0_vec */ m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec); m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec); m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec); /* in_out_mask = m0_vec & m1_vec & m2_vec */ m = LLVMBuildAnd(builder, m0_vec, m1_vec, ""); in_out_mask = LLVMBuildAnd(builder, m, m2_vec, ""); lp_build_name(in_out_mask, "inoutmaskvec"); } #if OPTIMIZE_IN_OUT_TEST lp_build_endif(&ifctx); #endif } lp_build_flow_scope_end(flow); lp_build_flow_destroy(flow); /* This is the initial alive/dead pixel mask for a quad of four pixels. * It's an int[4] vector with each word set to 0 or ~0. * Words will get cleared when pixels faile the Z test, etc. */ *mask = in_out_mask; }
/** * Generate code for performing depth and/or stencil tests. * We operate on a vector of values (typically n 2x2 quads). * * \param depth the depth test state * \param stencil the front/back stencil state * \param type the data type of the fragment depth/stencil values * \param format_desc description of the depth/stencil surface * \param mask the alive/dead pixel mask for the quad (vector) * \param stencil_refs the front/back stencil ref values (scalar) * \param z_src the incoming depth/stencil values (n 2x2 quad values, float32) * \param zs_dst the depth/stencil values in framebuffer * \param face contains boolean value indicating front/back facing polygon */ void lp_build_depth_stencil_test(struct gallivm_state *gallivm, const struct pipe_depth_state *depth, const struct pipe_stencil_state stencil[2], struct lp_type z_src_type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, LLVMValueRef stencil_refs[2], LLVMValueRef z_src, LLVMValueRef z_fb, LLVMValueRef s_fb, LLVMValueRef face, LLVMValueRef *z_value, LLVMValueRef *s_value, boolean do_branch) { LLVMBuilderRef builder = gallivm->builder; struct lp_type z_type; struct lp_build_context z_bld; struct lp_build_context s_bld; struct lp_type s_type; unsigned z_shift = 0, z_width = 0, z_mask = 0; LLVMValueRef z_dst = NULL; LLVMValueRef stencil_vals = NULL; LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; LLVMValueRef z_pass = NULL, s_pass_mask = NULL; LLVMValueRef orig_mask = lp_build_mask_value(mask); LLVMValueRef front_facing = NULL; boolean have_z, have_s; /* * Depths are expected to be between 0 and 1, even if they are stored in * floats. Setting these bits here will ensure that the lp_build_conv() call * below won't try to unnecessarily clamp the incoming values. */ if(z_src_type.floating) { z_src_type.sign = FALSE; z_src_type.norm = TRUE; } else { assert(!z_src_type.sign); assert(z_src_type.norm); } /* Pick the type matching the depth-stencil format. */ z_type = lp_depth_type(format_desc, z_src_type.length); /* Pick the intermediate type for depth operations. */ z_type.width = z_src_type.width; assert(z_type.length == z_src_type.length); /* FIXME: for non-float depth/stencil might generate better code * if we'd always split it up to use 128bit operations. * For stencil we'd almost certainly want to pack to 8xi16 values, * for z just run twice. */ /* Sanity checking */ { const unsigned z_swizzle = format_desc->swizzle[0]; const unsigned s_swizzle = format_desc->swizzle[1]; assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE || s_swizzle != UTIL_FORMAT_SWIZZLE_NONE); assert(depth->enabled || stencil[0].enabled); assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); assert(format_desc->block.width == 1); assert(format_desc->block.height == 1); if (stencil[0].enabled) { assert(s_swizzle < 4); assert(format_desc->channel[s_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); assert(format_desc->channel[s_swizzle].pure_integer); assert(!format_desc->channel[s_swizzle].normalized); assert(format_desc->channel[s_swizzle].size == 8); } if (depth->enabled) { assert(z_swizzle < 4); if (z_type.floating) { assert(z_swizzle == 0); assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT); assert(format_desc->channel[z_swizzle].size == 32); } else { assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); assert(format_desc->channel[z_swizzle].normalized); assert(!z_type.fixed); } } } /* Setup build context for Z vals */ lp_build_context_init(&z_bld, gallivm, z_type); /* Setup build context for stencil vals */ s_type = lp_int_type(z_type); lp_build_context_init(&s_bld, gallivm, s_type); /* Compute and apply the Z/stencil bitmasks and shifts. */ { unsigned s_shift, s_mask; z_dst = z_fb; stencil_vals = s_fb; have_z = get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask); have_s = get_s_shift_and_mask(format_desc, &s_shift, &s_mask); if (have_z) { if (z_mask != 0xffffffff) { z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask); } /* * Align the framebuffer Z 's LSB to the right. */ if (z_shift) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); z_dst = LLVMBuildLShr(builder, z_dst, shift, "z_dst"); } else if (z_bitmask) { z_dst = LLVMBuildAnd(builder, z_dst, z_bitmask, "z_dst"); } else { lp_build_name(z_dst, "z_dst"); } } if (have_s) { if (s_shift) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift); stencil_vals = LLVMBuildLShr(builder, stencil_vals, shift, ""); stencil_shift = shift; /* used below */ } if (s_mask != 0xffffffff) { LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask); stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); } lp_build_name(stencil_vals, "s_dst"); } } if (stencil[0].enabled) { if (face) { LLVMValueRef zero = lp_build_const_int32(gallivm, 0); /* front_facing = face != 0 ? ~0 : 0 */ front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, ""); front_facing = LLVMBuildSExt(builder, front_facing, LLVMIntTypeInContext(gallivm->context, s_bld.type.length*s_bld.type.width), ""); front_facing = LLVMBuildBitCast(builder, front_facing, s_bld.int_vec_type, ""); } /* convert scalar stencil refs into vectors */ stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]); stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]); s_pass_mask = lp_build_stencil_test(&s_bld, stencil, stencil_refs, stencil_vals, front_facing); /* apply stencil-fail operator */ { LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask); stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP, stencil_refs, stencil_vals, s_fail_mask, front_facing); } } if (depth->enabled) { /* * Convert fragment Z to the desired type, aligning the LSB to the right. */ assert(z_type.width == z_src_type.width); assert(z_type.length == z_src_type.length); assert(lp_check_value(z_src_type, z_src)); if (z_src_type.floating) { /* * Convert from floating point values */ if (!z_type.floating) { z_src = lp_build_clamped_float_to_unsigned_norm(gallivm, z_src_type, z_width, z_src); } } else { /* * Convert from unsigned normalized values. */ assert(!z_src_type.sign); assert(!z_src_type.fixed); assert(z_src_type.norm); assert(!z_type.floating); if (z_src_type.width > z_width) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_src_type, z_src_type.width - z_width); z_src = LLVMBuildLShr(builder, z_src, shift, ""); } } assert(lp_check_value(z_type, z_src)); lp_build_name(z_src, "z_src"); /* compare src Z to dst Z, returning 'pass' mask */ z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst); if (!stencil[0].enabled) { /* We can potentially skip all remaining operations here, but only * if stencil is disabled because we still need to update the stencil * buffer values. Don't need to update Z buffer values. */ lp_build_mask_update(mask, z_pass); if (do_branch) { lp_build_mask_check(mask); do_branch = FALSE; } } if (depth->writemask) { LLVMValueRef zselectmask; /* mask off bits that failed Z test */ zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, ""); /* mask off bits that failed stencil test */ if (s_pass_mask) { zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, ""); } /* Mix the old and new Z buffer values. * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i] */ z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst); } if (stencil[0].enabled) { /* update stencil buffer values according to z pass/fail result */ LLVMValueRef z_fail_mask, z_pass_mask; /* apply Z-fail operator */ z_fail_mask = lp_build_andnot(&s_bld, orig_mask, z_pass); stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP, stencil_refs, stencil_vals, z_fail_mask, front_facing); /* apply Z-pass operator */ z_pass_mask = LLVMBuildAnd(builder, orig_mask, z_pass, ""); stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, z_pass_mask, front_facing); } } else { /* No depth test: apply Z-pass operator to stencil buffer values which * passed the stencil test. */ s_pass_mask = LLVMBuildAnd(builder, orig_mask, s_pass_mask, ""); stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, s_pass_mask, front_facing); } /* Put Z and stencil bits in the right place */ if (have_z && z_shift) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); z_dst = LLVMBuildShl(builder, z_dst, shift, ""); } if (stencil_vals && stencil_shift) stencil_vals = LLVMBuildShl(builder, stencil_vals, stencil_shift, ""); /* Finally, merge the z/stencil values */ if (format_desc->block.bits <= 32) { if (have_z && have_s) *z_value = LLVMBuildOr(builder, z_dst, stencil_vals, ""); else if (have_z) *z_value = z_dst; else *z_value = stencil_vals; *s_value = *z_value; } else { *z_value = z_dst; *s_value = stencil_vals; } if (s_pass_mask) lp_build_mask_update(mask, s_pass_mask); if (depth->enabled && stencil[0].enabled) lp_build_mask_update(mask, z_pass); }
/** * Load depth/stencil values. * The stored values are linear, swizzle them. * * \param type the data type of the fragment depth/stencil values * \param format_desc description of the depth/stencil surface * \param loop_counter the current loop iteration * \param depth_ptr pointer to the depth/stencil values of this 4x4 block * \param depth_stride stride of the depth/stencil buffer * \param z_fb contains z values loaded from fb (may include padding) * \param s_fb contains s values loaded from fb (may include padding) */ void lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, struct lp_type z_src_type, const struct util_format_description *format_desc, LLVMValueRef depth_ptr, LLVMValueRef depth_stride, LLVMValueRef *z_fb, LLVMValueRef *s_fb, LLVMValueRef loop_counter) { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4]; LLVMValueRef zs_dst1, zs_dst2; LLVMValueRef zs_dst_ptr; LLVMValueRef depth_offset1, depth_offset2; LLVMTypeRef load_ptr_type; unsigned depth_bytes = format_desc->block.bits / 8; struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length); struct lp_type zs_load_type = zs_type; zs_load_type.length = zs_load_type.length / 2; load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0); if (z_src_type.length == 4) { unsigned i; LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter, lp_build_const_int32(gallivm, 1), ""); LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter, lp_build_const_int32(gallivm, 2), ""); LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb, depth_stride, ""); depth_offset1 = LLVMBuildMul(builder, looplsb, lp_build_const_int32(gallivm, depth_bytes * 2), ""); depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, ""); /* just concatenate the loaded 2x2 values into 4-wide vector */ for (i = 0; i < 4; i++) { shuffles[i] = lp_build_const_int32(gallivm, i); } } else { unsigned i; LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter, lp_build_const_int32(gallivm, 1), ""); assert(z_src_type.length == 8); depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, ""); /* * We load 2x4 values, and need to swizzle them (order * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately. */ for (i = 0; i < 8; i++) { shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); } } depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, ""); /* Load current z/stencil values from z/stencil buffer */ zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, ""); zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, ""); zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, ""); zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, ""); zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, ""); zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, ""); *z_fb = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2, LLVMConstVector(shuffles, zs_type.length), ""); *s_fb = *z_fb; if (format_desc->block.bits < z_src_type.width) { /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */ *z_fb = LLVMBuildZExt(builder, *z_fb, lp_build_int_vec_type(gallivm, z_src_type), ""); } else if (format_desc->block.bits > 32) { /* rely on llvm to handle too wide vector we have here nicely */ unsigned i; struct lp_type typex2 = zs_type; struct lp_type s_type = zs_type; LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH / 4]; LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH / 4]; LLVMValueRef tmp; typex2.width = typex2.width / 2; typex2.length = typex2.length * 2; s_type.width = s_type.width / 2; s_type.floating = 0; tmp = LLVMBuildBitCast(builder, *z_fb, lp_build_vec_type(gallivm, typex2), ""); for (i = 0; i < zs_type.length; i++) { shuffles1[i] = lp_build_const_int32(gallivm, i * 2); shuffles2[i] = lp_build_const_int32(gallivm, i * 2 + 1); } *z_fb = LLVMBuildShuffleVector(builder, tmp, tmp, LLVMConstVector(shuffles1, zs_type.length), ""); *s_fb = LLVMBuildShuffleVector(builder, tmp, tmp, LLVMConstVector(shuffles2, zs_type.length), ""); *s_fb = LLVMBuildBitCast(builder, *s_fb, lp_build_vec_type(gallivm, s_type), ""); lp_build_name(*s_fb, "s_dst"); } lp_build_name(*z_fb, "z_dst"); lp_build_name(*s_fb, "s_dst"); lp_build_name(*z_fb, "z_dst"); }
/** * Generate code for performing depth and/or stencil tests. * We operate on a vector of values (typically a 2x2 quad). * * \param depth the depth test state * \param stencil the front/back stencil state * \param type the data type of the fragment depth/stencil values * \param format_desc description of the depth/stencil surface * \param mask the alive/dead pixel mask for the quad (vector) * \param stencil_refs the front/back stencil ref values (scalar) * \param z_src the incoming depth/stencil values (a 2x2 quad) * \param zs_dst_ptr pointer to depth/stencil values in framebuffer * \param facing contains float value indicating front/back facing polygon */ void lp_build_depth_stencil_test(LLVMBuilderRef builder, const struct pipe_depth_state *depth, const struct pipe_stencil_state stencil[2], struct lp_type type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, LLVMValueRef stencil_refs[2], LLVMValueRef z_src, LLVMValueRef zs_dst_ptr, LLVMValueRef face, LLVMValueRef counter) { struct lp_build_context bld; struct lp_build_context sbld; struct lp_type s_type; LLVMValueRef zs_dst, z_dst = NULL; LLVMValueRef stencil_vals = NULL; LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; LLVMValueRef z_pass = NULL, s_pass_mask = NULL; LLVMValueRef orig_mask = mask->value; /* Sanity checking */ { const unsigned z_swizzle = format_desc->swizzle[0]; const unsigned s_swizzle = format_desc->swizzle[1]; assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE || s_swizzle != UTIL_FORMAT_SWIZZLE_NONE); assert(depth->enabled || stencil[0].enabled); assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); assert(format_desc->block.width == 1); assert(format_desc->block.height == 1); if (stencil[0].enabled) { assert(format_desc->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED || format_desc->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM); } assert(z_swizzle < 4); assert(format_desc->block.bits == type.width); if (type.floating) { assert(z_swizzle == 0); assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT); assert(format_desc->channel[z_swizzle].size == format_desc->block.bits); } else { assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); assert(format_desc->channel[z_swizzle].normalized); assert(!type.fixed); assert(!type.sign); assert(type.norm); } } /* Setup build context for Z vals */ lp_build_context_init(&bld, builder, type); /* Setup build context for stencil vals */ s_type = lp_type_int_vec(type.width); lp_build_context_init(&sbld, builder, s_type); /* Load current z/stencil value from z/stencil buffer */ zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, ""); lp_build_name(zs_dst, "zsbufval"); /* Compute and apply the Z/stencil bitmasks and shifts. */ { unsigned z_shift, z_mask; unsigned s_shift, s_mask; if (get_z_shift_and_mask(format_desc, &z_shift, &z_mask)) { if (z_shift) { LLVMValueRef shift = lp_build_const_int_vec(type, z_shift); z_src = LLVMBuildLShr(builder, z_src, shift, ""); } if (z_mask != 0xffffffff) { LLVMValueRef mask = lp_build_const_int_vec(type, z_mask); z_src = LLVMBuildAnd(builder, z_src, mask, ""); z_dst = LLVMBuildAnd(builder, zs_dst, mask, ""); z_bitmask = mask; /* used below */ } else { z_dst = zs_dst; } lp_build_name(z_dst, "zsbuf.z"); } if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) { if (s_shift) { LLVMValueRef shift = lp_build_const_int_vec(type, s_shift); stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, ""); stencil_shift = shift; /* used below */ } else { stencil_vals = zs_dst; } if (s_mask != 0xffffffff) { LLVMValueRef mask = lp_build_const_int_vec(type, s_mask); stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); } lp_build_name(stencil_vals, "stencil"); } } if (stencil[0].enabled) { /* convert scalar stencil refs into vectors */ stencil_refs[0] = lp_build_broadcast_scalar(&bld, stencil_refs[0]); stencil_refs[1] = lp_build_broadcast_scalar(&bld, stencil_refs[1]); s_pass_mask = lp_build_stencil_test(&sbld, stencil, stencil_refs, stencil_vals, face); /* apply stencil-fail operator */ { LLVMValueRef s_fail_mask = lp_build_andc(&bld, orig_mask, s_pass_mask); stencil_vals = lp_build_stencil_op(&sbld, stencil, S_FAIL_OP, stencil_refs, stencil_vals, s_fail_mask, face); } } if (depth->enabled) { /* compare src Z to dst Z, returning 'pass' mask */ z_pass = lp_build_cmp(&bld, depth->func, z_src, z_dst); if (!stencil[0].enabled) { /* We can potentially skip all remaining operations here, but only * if stencil is disabled because we still need to update the stencil * buffer values. Don't need to update Z buffer values. */ lp_build_mask_update(mask, z_pass); } if (depth->writemask) { LLVMValueRef zselectmask = mask->value; /* mask off bits that failed Z test */ zselectmask = LLVMBuildAnd(builder, zselectmask, z_pass, ""); /* mask off bits that failed stencil test */ if (s_pass_mask) { zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, ""); } /* if combined Z/stencil format, mask off the stencil bits */ if (z_bitmask) { zselectmask = LLVMBuildAnd(builder, zselectmask, z_bitmask, ""); } /* Mix the old and new Z buffer values. * z_dst[i] = (zselectmask[i] & z_src[i]) | (~zselectmask[i] & z_dst[i]) */ z_dst = lp_build_select_bitwise(&bld, zselectmask, z_src, z_dst); } if (stencil[0].enabled) { /* update stencil buffer values according to z pass/fail result */ LLVMValueRef z_fail_mask, z_pass_mask; /* apply Z-fail operator */ z_fail_mask = lp_build_andc(&bld, orig_mask, z_pass); stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_FAIL_OP, stencil_refs, stencil_vals, z_fail_mask, face); /* apply Z-pass operator */ z_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, z_pass, ""); stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, z_pass_mask, face); } } else { /* No depth test: apply Z-pass operator to stencil buffer values which * passed the stencil test. */ s_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, s_pass_mask, ""); stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, s_pass_mask, face); } /* The Z bits are already in the right place but we may need to shift the * stencil bits before ORing Z with Stencil to make the final pixel value. */ if (stencil_vals && stencil_shift) stencil_vals = LLVMBuildShl(bld.builder, stencil_vals, stencil_shift, ""); /* Finally, merge/store the z/stencil values */ if ((depth->enabled && depth->writemask) || (stencil[0].enabled && stencil[0].writemask)) { if (z_dst && stencil_vals) zs_dst = LLVMBuildOr(bld.builder, z_dst, stencil_vals, ""); else if (z_dst) zs_dst = z_dst; else zs_dst = stencil_vals; LLVMBuildStore(builder, zs_dst, zs_dst_ptr); } if (s_pass_mask) lp_build_mask_update(mask, s_pass_mask); if (depth->enabled && stencil[0].enabled) lp_build_mask_update(mask, z_pass); if (counter) lp_build_occlusion_count(builder, type, mask->value, counter); }
/** * Generate the runtime callable function for the whole fragment pipeline. * Note that the function which we generate operates on a block of 16 * pixels at at time. The block contains 2x2 quads. Each quad contains * 2x2 pixels. */ static void generate_fragment(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, struct lp_fragment_shader_variant *variant, unsigned do_tri_test) { struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); const struct lp_fragment_shader_variant_key *key = &variant->key; struct lp_type fs_type; struct lp_type blend_type; LLVMTypeRef fs_elem_type; LLVMTypeRef fs_vec_type; LLVMTypeRef fs_int_vec_type; LLVMTypeRef blend_vec_type; LLVMTypeRef blend_int_vec_type; LLVMTypeRef arg_types[14]; LLVMTypeRef func_type; LLVMTypeRef int32_vec4_type = lp_build_int32_vec4_type(); LLVMValueRef context_ptr; LLVMValueRef x; LLVMValueRef y; LLVMValueRef a0_ptr; LLVMValueRef dadx_ptr; LLVMValueRef dady_ptr; LLVMValueRef color_ptr_ptr; LLVMValueRef depth_ptr; LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef x0; LLVMValueRef y0; struct lp_build_sampler_soa *sampler; struct lp_build_interp_soa_context interp; LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH]; LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][LP_MAX_VECTOR_LENGTH]; LLVMValueRef blend_mask; LLVMValueRef blend_in_color[NUM_CHANNELS]; LLVMValueRef function; unsigned num_fs; unsigned i; unsigned chan; unsigned cbuf; /* TODO: actually pick these based on the fs and color buffer * characteristics. */ memset(&fs_type, 0, sizeof fs_type); fs_type.floating = TRUE; /* floating point values */ fs_type.sign = TRUE; /* values are signed */ fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ fs_type.width = 32; /* 32-bit float */ fs_type.length = 4; /* 4 elements per vector */ num_fs = 4; /* number of quads per block */ memset(&blend_type, 0, sizeof blend_type); blend_type.floating = FALSE; /* values are integers */ blend_type.sign = FALSE; /* values are unsigned */ blend_type.norm = TRUE; /* values are in [0,1] or [-1,1] */ blend_type.width = 8; /* 8-bit ubyte values */ blend_type.length = 16; /* 16 elements per vector */ /* * Generate the function prototype. Any change here must be reflected in * lp_jit.h's lp_jit_frag_func function pointer type, and vice-versa. */ fs_elem_type = lp_build_elem_type(fs_type); fs_vec_type = lp_build_vec_type(fs_type); fs_int_vec_type = lp_build_int_vec_type(fs_type); blend_vec_type = lp_build_vec_type(blend_type); blend_int_vec_type = lp_build_int_vec_type(blend_type); arg_types[0] = screen->context_ptr_type; /* context */ arg_types[1] = LLVMInt32Type(); /* x */ arg_types[2] = LLVMInt32Type(); /* y */ arg_types[3] = LLVMPointerType(fs_elem_type, 0); /* a0 */ arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* dadx */ arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dady */ arg_types[6] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */ arg_types[7] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ arg_types[8] = LLVMInt32Type(); /* c0 */ arg_types[9] = LLVMInt32Type(); /* c1 */ arg_types[10] = LLVMInt32Type(); /* c2 */ /* Note: the step arrays are built as int32[16] but we interpret * them here as int32_vec4[4]. */ arg_types[11] = LLVMPointerType(int32_vec4_type, 0);/* step0 */ arg_types[12] = LLVMPointerType(int32_vec4_type, 0);/* step1 */ arg_types[13] = LLVMPointerType(int32_vec4_type, 0);/* step2 */ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); function = LLVMAddFunction(screen->module, "shader", func_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); variant->function[do_tri_test] = function; /* XXX: need to propagate noalias down into color param now we are * passing a pointer-to-pointer? */ for(i = 0; i < Elements(arg_types); ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute); context_ptr = LLVMGetParam(function, 0); x = LLVMGetParam(function, 1); y = LLVMGetParam(function, 2); a0_ptr = LLVMGetParam(function, 3); dadx_ptr = LLVMGetParam(function, 4); dady_ptr = LLVMGetParam(function, 5); color_ptr_ptr = LLVMGetParam(function, 6); depth_ptr = LLVMGetParam(function, 7); c0 = LLVMGetParam(function, 8); c1 = LLVMGetParam(function, 9); c2 = LLVMGetParam(function, 10); step0_ptr = LLVMGetParam(function, 11); step1_ptr = LLVMGetParam(function, 12); step2_ptr = LLVMGetParam(function, 13); lp_build_name(context_ptr, "context"); lp_build_name(x, "x"); lp_build_name(y, "y"); lp_build_name(a0_ptr, "a0"); lp_build_name(dadx_ptr, "dadx"); lp_build_name(dady_ptr, "dady"); lp_build_name(color_ptr_ptr, "color_ptr"); lp_build_name(depth_ptr, "depth"); lp_build_name(c0, "c0"); lp_build_name(c1, "c1"); lp_build_name(c2, "c2"); lp_build_name(step0_ptr, "step0"); lp_build_name(step1_ptr, "step1"); lp_build_name(step2_ptr, "step2"); /* * Function body */ block = LLVMAppendBasicBlock(function, "entry"); builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); generate_pos0(builder, x, y, &x0, &y0); lp_build_interp_soa_init(&interp, shader->base.tokens, key->flatshade, builder, fs_type, a0_ptr, dadx_ptr, dady_ptr, x0, y0); /* code generated texture sampling */ sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); /* loop over quads in the block */ for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS]; LLVMValueRef depth_ptr_i; int cbuf; if(i != 0) lp_build_interp_soa_update(&interp, i); depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, ""); generate_fs(lp, shader, key, builder, fs_type, context_ptr, i, &interp, sampler, &fs_mask[i], /* output */ out_color, depth_ptr_i, do_tri_test, c0, c1, c2, step0_ptr, step1_ptr, step2_ptr); for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) for(chan = 0; chan < NUM_CHANNELS; ++chan) fs_out_color[cbuf][chan][i] = out_color[cbuf][chan]; } sampler->destroy(sampler); /* Loop over color outputs / color buffers to do blending. */ for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { LLVMValueRef color_ptr; LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), cbuf, 0); /* * Convert the fs's output color and mask to fit to the blending type. */ for(chan = 0; chan < NUM_CHANNELS; ++chan) { lp_build_conv(builder, fs_type, blend_type, fs_out_color[cbuf][chan], num_fs, &blend_in_color[chan], 1); lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]); } lp_build_conv_mask(builder, fs_type, blend_type, fs_mask, num_fs, &blend_mask, 1); color_ptr = LLVMBuildLoad(builder, LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""), ""); lp_build_name(color_ptr, "color_ptr%d", cbuf); /* * Blending. */ generate_blend(&key->blend, builder, blend_type, context_ptr, blend_mask, blend_in_color, color_ptr); } LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); /* Verify the LLVM IR. If invalid, dump and abort */ #ifdef DEBUG if(LLVMVerifyFunction(function, LLVMPrintMessageAction)) { if (1) LLVMDumpValue(function); abort(); } #endif /* Apply optimizations to LLVM IR */ if (1) LLVMRunFunctionPassManager(screen->pass, function); if (LP_DEBUG & DEBUG_JIT) { /* Print the LLVM IR to stderr */ LLVMDumpValue(function); debug_printf("\n"); } /* * Translate the LLVM IR into machine code. */ variant->jit_function[do_tri_test] = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, function); if (LP_DEBUG & DEBUG_ASM) lp_disassemble(variant->jit_function[do_tri_test]); }
/** * Generate the fragment shader, depth/stencil test, and alpha tests. * \param i which quad in the tile, in range [0,3] * \param do_tri_test if 1, do triangle edge in/out testing */ static void generate_fs(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, const struct lp_fragment_shader_variant_key *key, LLVMBuilderRef builder, struct lp_type type, LLVMValueRef context_ptr, unsigned i, const struct lp_build_interp_soa_context *interp, struct lp_build_sampler_soa *sampler, LLVMValueRef *pmask, LLVMValueRef (*color)[4], LLVMValueRef depth_ptr, unsigned do_tri_test, LLVMValueRef c0, LLVMValueRef c1, LLVMValueRef c2, LLVMValueRef step0_ptr, LLVMValueRef step1_ptr, LLVMValueRef step2_ptr) { const struct tgsi_token *tokens = shader->base.tokens; LLVMTypeRef elem_type; LLVMTypeRef vec_type; LLVMTypeRef int_vec_type; LLVMValueRef consts_ptr; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; LLVMValueRef z = interp->pos[2]; struct lp_build_flow_context *flow; struct lp_build_mask_context mask; boolean early_depth_test; unsigned attrib; unsigned chan; unsigned cbuf; assert(i < 4); elem_type = lp_build_elem_type(type); vec_type = lp_build_vec_type(type); int_vec_type = lp_build_int_vec_type(type); consts_ptr = lp_jit_context_constants(builder, context_ptr); flow = lp_build_flow_create(builder); memset(outputs, 0, sizeof outputs); lp_build_flow_scope_begin(flow); /* Declare the color and z variables */ for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { color[cbuf][chan] = LLVMGetUndef(vec_type); lp_build_flow_scope_declare(flow, &color[cbuf][chan]); } } lp_build_flow_scope_declare(flow, &z); /* do triangle edge testing */ if (do_tri_test) { generate_tri_edge_mask(builder, i, pmask, c0, c1, c2, step0_ptr, step1_ptr, step2_ptr); } else { *pmask = build_int32_vec_const(~0); } /* 'mask' will control execution based on quad's pixel alive/killed state */ lp_build_mask_begin(&mask, flow, type, *pmask); if (key->scissor) { LLVMValueRef smask = generate_scissor_test(builder, context_ptr, interp, type); lp_build_mask_update(&mask, smask); } early_depth_test = key->depth.enabled && !key->alpha.enabled && !shader->info.uses_kill && !shader->info.writes_z; if(early_depth_test) generate_depth(builder, key, type, &mask, z, depth_ptr); lp_build_tgsi_soa(builder, tokens, type, &mask, consts_ptr, interp->pos, interp->inputs, outputs, sampler); for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { if(outputs[attrib][chan]) { LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); lp_build_name(out, "output%u.%u.%c", i, attrib, "xyzw"[chan]); switch (shader->info.output_semantic_name[attrib]) { case TGSI_SEMANTIC_COLOR: { unsigned cbuf = shader->info.output_semantic_index[attrib]; lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]); /* Alpha test */ /* XXX: should the alpha reference value be passed separately? */ /* XXX: should only test the final assignment to alpha */ if(cbuf == 0 && chan == 3) { LLVMValueRef alpha = out; LLVMValueRef alpha_ref_value; alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr); alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value); lp_build_alpha_test(builder, &key->alpha, type, &mask, alpha, alpha_ref_value); } color[cbuf][chan] = out; break; } case TGSI_SEMANTIC_POSITION: if(chan == 2) z = out; break; } } } } if(!early_depth_test) generate_depth(builder, key, type, &mask, z, depth_ptr); lp_build_mask_end(&mask); lp_build_flow_scope_end(flow); lp_build_flow_destroy(flow); *pmask = mask.value; }
/** * Generate code to compute texture level of detail (lambda). * \param ddx partial derivatives of (s, t, r, q) with respect to X * \param ddy partial derivatives of (s, t, r, q) with respect to Y * \param lod_bias optional float vector with the shader lod bias * \param explicit_lod optional float vector with the explicit lod * \param width scalar int texture width * \param height scalar int texture height * \param depth scalar int texture depth * * XXX: The resulting lod is scalar, so ignore all but the first element of * derivatives, lod_bias, etc that are passed by the shader. */ void lp_build_lod_selector(struct lp_build_sample_context *bld, unsigned unit, const LLVMValueRef ddx[4], const LLVMValueRef ddy[4], LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ unsigned mip_filter, LLVMValueRef *out_lod_ipart, LLVMValueRef *out_lod_fpart) { LLVMBuilderRef builder = bld->gallivm->builder; struct lp_build_context *float_bld = &bld->float_bld; LLVMValueRef lod; *out_lod_ipart = bld->int_bld.zero; *out_lod_fpart = bld->float_bld.zero; if (bld->static_state->min_max_lod_equal) { /* User is forcing sampling from a particular mipmap level. * This is hit during mipmap generation. */ LLVMValueRef min_lod = bld->dynamic_state->min_lod(bld->dynamic_state, bld->gallivm, unit); lod = min_lod; } else { LLVMValueRef sampler_lod_bias = bld->dynamic_state->lod_bias(bld->dynamic_state, bld->gallivm, unit); LLVMValueRef index0 = lp_build_const_int32(bld->gallivm, 0); if (explicit_lod) { lod = LLVMBuildExtractElement(builder, explicit_lod, index0, ""); } else { LLVMValueRef rho; rho = lp_build_rho(bld, unit, ddx, ddy); /* * Compute lod = log2(rho) */ if (!lod_bias && !bld->static_state->lod_bias_non_zero && !bld->static_state->apply_max_lod && !bld->static_state->apply_min_lod) { /* * Special case when there are no post-log2 adjustments, which * saves instructions but keeping the integer and fractional lod * computations separate from the start. */ if (mip_filter == PIPE_TEX_MIPFILTER_NONE || mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { *out_lod_ipart = lp_build_ilog2(float_bld, rho); *out_lod_fpart = bld->float_bld.zero; return; } if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR && !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) { lp_build_brilinear_rho(float_bld, rho, BRILINEAR_FACTOR, out_lod_ipart, out_lod_fpart); return; } } if (0) { lod = lp_build_log2(float_bld, rho); } else { lod = lp_build_fast_log2(float_bld, rho); } /* add shader lod bias */ if (lod_bias) { lod_bias = LLVMBuildExtractElement(builder, lod_bias, index0, ""); lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias"); } } /* add sampler lod bias */ if (bld->static_state->lod_bias_non_zero) lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias"); /* clamp lod */ if (bld->static_state->apply_max_lod) { LLVMValueRef max_lod = bld->dynamic_state->max_lod(bld->dynamic_state, bld->gallivm, unit); lod = lp_build_min(float_bld, lod, max_lod); } if (bld->static_state->apply_min_lod) { LLVMValueRef min_lod = bld->dynamic_state->min_lod(bld->dynamic_state, bld->gallivm, unit); lod = lp_build_max(float_bld, lod, min_lod); } } if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) { lp_build_brilinear_lod(float_bld, lod, BRILINEAR_FACTOR, out_lod_ipart, out_lod_fpart); } else { lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, out_lod_fpart); } lp_build_name(*out_lod_fpart, "lod_fpart"); } else { *out_lod_ipart = lp_build_iround(float_bld, lod); } lp_build_name(*out_lod_ipart, "lod_ipart"); return; }
/** * Performs blending of src and dst pixels * * @param blend the blend state of the shader variant * @param cbuf_format format of the colour buffer * @param type data type of the pixel vector * @param rt rt number * @param src blend src * @param dst blend dst * @param mask optional mask to apply to the blending result * @param const_ const blend color * @param swizzle swizzle values for RGBA * * @return the result of blending src and dst */ LLVMValueRef lp_build_blend_aos(struct gallivm_state *gallivm, const struct pipe_blend_state *blend, const enum pipe_format *cbuf_format, struct lp_type type, unsigned rt, LLVMValueRef src, LLVMValueRef dst, LLVMValueRef mask, LLVMValueRef const_, const unsigned char swizzle[4]) { const struct pipe_rt_blend_state * state = &blend->rt[rt]; struct lp_build_blend_aos_context bld; LLVMValueRef src_factor, dst_factor; LLVMValueRef result; unsigned alpha_swizzle = swizzle[3]; boolean fullcolormask; /* Setup build context */ memset(&bld, 0, sizeof bld); lp_build_context_init(&bld.base, gallivm, type); bld.src = src; bld.dst = dst; bld.const_ = const_; if (swizzle[3] > UTIL_FORMAT_SWIZZLE_W || swizzle[3] == swizzle[0]) alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE; if (!state->blend_enable) { result = src; } else { boolean rgb_alpha_same = state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor; assert(rgb_alpha_same || alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE); src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor, state->alpha_src_factor, alpha_swizzle); dst_factor = lp_build_blend_factor(&bld, state->rgb_dst_factor, state->alpha_dst_factor, alpha_swizzle); result = lp_build_blend(&bld.base, state->rgb_func, state->rgb_src_factor, state->rgb_dst_factor, src, dst, src_factor, dst_factor, rgb_alpha_same, false); if(state->rgb_func != state->alpha_func && alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) { LLVMValueRef alpha; alpha = lp_build_blend(&bld.base, state->alpha_func, state->alpha_src_factor, state->alpha_dst_factor, src, dst, src_factor, dst_factor, rgb_alpha_same, false); result = lp_build_blend_swizzle(&bld, result, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle); } } /* Check if color mask is necessary */ fullcolormask = util_format_colormask_full(util_format_description(cbuf_format[rt]), state->colormask); if (!fullcolormask) { LLVMValueRef color_mask; color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, state->colormask, swizzle); lp_build_name(color_mask, "color_mask"); /* Combine with input mask if necessary */ if (mask) { mask = lp_build_and(&bld.base, color_mask, mask); } else { mask = color_mask; } } /* Apply mask, if one exists */ if (mask) { result = lp_build_select(&bld.base, mask, result, dst); } return result; }
/** * Register fetch. */ static LLVMValueRef emit_fetch( struct lp_build_tgsi_aos_context *bld, const struct tgsi_full_instruction *inst, unsigned src_op) { struct lp_type type = bld->base.type; const struct tgsi_full_src_register *reg = &inst->Src[src_op]; LLVMValueRef res; unsigned chan; assert(!reg->Register.Indirect); /* * Fetch the from the register file. */ switch (reg->Register.File) { case TGSI_FILE_CONSTANT: /* * Get the constants components */ res = bld->base.undef; for (chan = 0; chan < 4; ++chan) { LLVMValueRef index; LLVMValueRef scalar_ptr; LLVMValueRef scalar; LLVMValueRef swizzle; index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + chan, 0); scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, ""); scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]); /* * NOTE: constants array is always assumed to be RGBA */ swizzle = LLVMConstInt(LLVMInt32Type(), chan, 0); res = LLVMBuildInsertElement(bld->base.builder, res, scalar, swizzle, ""); } /* * Broadcast the first quaternion to all others. * * XXX: could be factored into a reusable function. */ if (type.length > 4) { LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; unsigned i; for (chan = 0; chan < 4; ++chan) { shuffles[chan] = LLVMConstInt(LLVMInt32Type(), chan, 0); } for (i = 4; i < type.length; ++i) { shuffles[i] = shuffles[i % 4]; } res = LLVMBuildShuffleVector(bld->base.builder, res, bld->base.undef, LLVMConstVector(shuffles, type.length), ""); } break; case TGSI_FILE_IMMEDIATE: res = bld->immediates[reg->Register.Index]; assert(res); break; case TGSI_FILE_INPUT: res = bld->inputs[reg->Register.Index]; assert(res); break; case TGSI_FILE_TEMPORARY: { LLVMValueRef temp_ptr; temp_ptr = bld->temps[reg->Register.Index]; res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); if (!res) return bld->base.undef; } break; default: assert(0 && "invalid src register in emit_fetch()"); return bld->base.undef; } /* * Apply sign modifier. */ if (reg->Register.Absolute) { res = lp_build_abs(&bld->base, res); } if(reg->Register.Negate) { res = lp_build_negate(&bld->base, res); } /* * Swizzle the argument */ res = swizzle_aos(bld, res, reg->Register.SwizzleX, reg->Register.SwizzleY, reg->Register.SwizzleZ, reg->Register.SwizzleW); return res; }