/** * Normalized 8bit multiplication. * * - alpha plus one * * makes the following approximation to the division (Sree) * * a*b/255 ~= (a*(b + 1)) >> 256 * * which is the fastest method that satisfies the following OpenGL criteria * * 0*0 = 0 and 255*255 = 255 * * - geometric series * * takes the geometric series approximation to the division * * t/255 = (t >> 8) + (t >> 16) + (t >> 24) .. * * in this case just the first two terms to fit in 16bit arithmetic * * t/255 ~= (t + (t >> 8)) >> 8 * * note that just by itself it doesn't satisfies the OpenGL criteria, as * 255*255 = 254, so the special case b = 255 must be accounted or roundoff * must be used * * - geometric series plus rounding * * when using a geometric series division instead of truncating the result * use roundoff in the approximation (Jim Blinn) * * t/255 ~= (t + (t >> 8) + 0x80) >> 8 * * achieving the exact results * * @sa Alvy Ray Smith, Image Compositing Fundamentals, Tech Memo 4, Aug 15, 1995, * ftp://ftp.alvyray.com/Acrobat/4_Comp.pdf * @sa Michael Herf, The "double blend trick", May 2000, * http://www.stereopsis.com/doubleblend.html */ static LLVMValueRef lp_build_mul_u8n(LLVMBuilderRef builder, struct lp_type i16_type, LLVMValueRef a, LLVMValueRef b) { LLVMValueRef c8; LLVMValueRef ab; c8 = lp_build_int_const_scalar(i16_type, 8); #if 0 /* a*b/255 ~= (a*(b + 1)) >> 256 */ b = LLVMBuildAdd(builder, b, lp_build_int_const_scalar(i16_type, 1), ""); ab = LLVMBuildMul(builder, a, b, ""); #else /* ab/255 ~= (ab + (ab >> 8) + 0x80) >> 8 */ ab = LLVMBuildMul(builder, a, b, ""); ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c8, ""), ""); ab = LLVMBuildAdd(builder, ab, lp_build_int_const_scalar(i16_type, 0x80), ""); #endif ab = LLVMBuildLShr(builder, ab, c8, ""); return ab; }
/* * SI implements derivatives using the local data store (LDS) * All writes to the LDS happen in all executing threads at * the same time. TID is the Thread ID for the current * thread and is a value between 0 and 63, representing * the thread's position in the wavefront. * * For the pixel shader threads are grouped into quads of four pixels. * The TIDs of the pixels of a quad are: * * +------+------+ * |4n + 0|4n + 1| * +------+------+ * |4n + 2|4n + 3| * +------+------+ * * So, masking the TID with 0xfffffffc yields the TID of the top left pixel * of the quad, masking with 0xfffffffd yields the TID of the top pixel of * the current pixel's column, and masking with 0xfffffffe yields the TID * of the left pixel of the current pixel's row. * * Adding 1 yields the TID of the pixel to the right of the left pixel, and * adding 2 yields the TID of the pixel below the top pixel. */ LLVMValueRef ac_build_ddxy(struct ac_llvm_context *ctx, bool has_ds_bpermute, uint32_t mask, int idx, LLVMValueRef lds, LLVMValueRef val) { LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2]; LLVMValueRef result; thread_id = ac_get_thread_id(ctx); tl_tid = LLVMBuildAnd(ctx->builder, thread_id, LLVMConstInt(ctx->i32, mask, false), ""); trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid, LLVMConstInt(ctx->i32, idx, false), ""); if (has_ds_bpermute) { args[0] = LLVMBuildMul(ctx->builder, tl_tid, LLVMConstInt(ctx->i32, 4, false), ""); args[1] = val; tl = ac_build_intrinsic(ctx, "llvm.amdgcn.ds.bpermute", ctx->i32, args, 2, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); args[0] = LLVMBuildMul(ctx->builder, trbl_tid, LLVMConstInt(ctx->i32, 4, false), ""); trbl = ac_build_intrinsic(ctx, "llvm.amdgcn.ds.bpermute", ctx->i32, args, 2, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); } else { LLVMValueRef store_ptr, load_ptr0, load_ptr1; store_ptr = ac_build_gep0(ctx, lds, thread_id); load_ptr0 = ac_build_gep0(ctx, lds, tl_tid); load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid); LLVMBuildStore(ctx->builder, val, store_ptr); tl = LLVMBuildLoad(ctx->builder, load_ptr0, ""); trbl = LLVMBuildLoad(ctx->builder, load_ptr1, ""); } tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, ""); trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, ""); result = LLVMBuildFSub(ctx->builder, trbl, tl, ""); return result; }
static void store_cached_block(struct gallivm_state *gallivm, LLVMValueRef *col, LLVMValueRef tag_value, LLVMValueRef hash_index, LLVMValueRef cache) { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef ptr, indices[3]; LLVMTypeRef type_ptr4x32; unsigned count; type_ptr4x32 = LLVMPointerType(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), 0); indices[0] = lp_build_const_int32(gallivm, 0); indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS); indices[2] = hash_index; ptr = LLVMBuildGEP(builder, cache, indices, ARRAY_SIZE(indices), ""); LLVMBuildStore(builder, tag_value, ptr); indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA); hash_index = LLVMBuildMul(builder, hash_index, lp_build_const_int32(gallivm, 16), ""); for (count = 0; count < 4; count++) { indices[2] = hash_index; ptr = LLVMBuildGEP(builder, cache, indices, ARRAY_SIZE(indices), ""); ptr = LLVMBuildBitCast(builder, ptr, type_ptr4x32, ""); LLVMBuildStore(builder, col[count], ptr); hash_index = LLVMBuildAdd(builder, hash_index, lp_build_const_int32(gallivm, 4), ""); } }
struct LLVMOpaqueValue *bllvm_compile_rirbop(const struct rir_expression *expr, struct llvm_traversal_ctx *ctx) { LLVMValueRef ret; LLVMValueRef left = bllvm_value_from_rir_value_or_die(expr->binaryop.a, ctx); LLVMValueRef right = bllvm_value_from_rir_value_or_die(expr->binaryop.b, ctx); switch(expr->type) { case RIR_EXPRESSION_ADD: ret = LLVMBuildAdd(ctx->builder, left, right, ""); break; case RIR_EXPRESSION_SUB: ret = LLVMBuildSub(ctx->builder, left, right, ""); break; case RIR_EXPRESSION_MUL: ret = LLVMBuildMul(ctx->builder, left, right, ""); break; case RIR_EXPRESSION_DIV: ret = LLVMBuildUDiv(ctx->builder, left, right, ""); break; default: RF_CRITICAL_FAIL("Should never get anything other than binaryop here"); break; } return ret; }
static void pointer_offset(compile_t* c, reach_type_t* t, reach_type_t* t_elem) { FIND_METHOD("_offset"); LLVMTypeRef params[3]; params[0] = t->use_type; params[1] = c->intptr; start_function(c, m, t->use_type, params, 2); // Set up a constant integer for the allocation size. size_t size = (size_t)LLVMABISizeOfType(c->target_data, t_elem->use_type); LLVMValueRef l_size = LLVMConstInt(c->intptr, size, false); LLVMValueRef ptr = LLVMGetParam(m->func, 0); LLVMValueRef n = LLVMGetParam(m->func, 1); // Return ptr + (n * sizeof(len)). LLVMValueRef src = LLVMBuildPtrToInt(c->builder, ptr, c->intptr, ""); LLVMValueRef offset = LLVMBuildMul(c->builder, n, l_size, ""); LLVMValueRef result = LLVMBuildAdd(c->builder, src, offset, ""); result = LLVMBuildIntToPtr(c->builder, result, t->use_type, ""); LLVMBuildRet(c->builder, result); codegen_finishfun(c); BOX_FUNCTION(); }
static void pointer_alloc(compile_t* c, reach_type_t* t, reach_type_t* t_elem) { FIND_METHOD("_alloc"); LLVMTypeRef params[2]; params[0] = t->use_type; params[1] = c->intptr; start_function(c, m, t->use_type, params, 2); // Set up a constant integer for the allocation size. size_t size = (size_t)LLVMABISizeOfType(c->target_data, t_elem->use_type); LLVMValueRef l_size = LLVMConstInt(c->intptr, size, false); LLVMValueRef len = LLVMGetParam(m->func, 1); LLVMValueRef args[2]; args[0] = codegen_ctx(c); args[1] = LLVMBuildMul(c->builder, len, l_size, ""); LLVMValueRef result = gencall_runtime(c, "pony_alloc", args, 2, ""); result = LLVMBuildBitCast(c->builder, result, t->use_type, ""); LLVMBuildRet(c->builder, result); codegen_finishfun(c); }
LLVMValueRef gen_mul(struct node *ast) { return LLVMBuildMul(builder, codegen(ast->one), codegen(ast->two), ""); }
/** * Special case for converting clamped IEEE-754 floats to unsigned norms. * * The mathematical voodoo below may seem excessive but it is actually * paramount we do it this way for several reasons. First, there is no single * precision FP to unsigned integer conversion Intel SSE instruction. Second, * secondly, even if there was, since the FP's mantissa takes only a fraction * of register bits the typically scale and cast approach would require double * precision for accurate results, and therefore half the throughput * * Although the result values can be scaled to an arbitrary bit width specified * by dst_width, the actual result type will have the same width. */ LLVMValueRef lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, struct lp_type src_type, unsigned dst_width, LLVMValueRef src) { LLVMTypeRef int_vec_type = lp_build_int_vec_type(src_type); LLVMValueRef res; unsigned mantissa; unsigned n; unsigned long long ubound; unsigned long long mask; double scale; double bias; assert(src_type.floating); mantissa = lp_mantissa(src_type); /* We cannot carry more bits than the mantissa */ n = MIN2(mantissa, dst_width); /* This magic coefficients will make the desired result to appear in the * lowest significant bits of the mantissa. */ ubound = ((unsigned long long)1 << n); mask = ubound - 1; scale = (double)mask/ubound; bias = (double)((unsigned long long)1 << (mantissa - n)); res = LLVMBuildMul(builder, src, lp_build_const_scalar(src_type, scale), ""); res = LLVMBuildAdd(builder, res, lp_build_const_scalar(src_type, bias), ""); res = LLVMBuildBitCast(builder, res, int_vec_type, ""); if(dst_width > n) { int shift = dst_width - n; res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), ""); /* TODO: Fill in the empty lower bits for additional precision? */ #if 0 { LLVMValueRef msb; msb = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, dst_width - 1), ""); msb = LLVMBuildShl(builder, msb, lp_build_int_const_scalar(src_type, shift), ""); msb = LLVMBuildSub(builder, msb, lp_build_int_const_scalar(src_type, 1), ""); res = LLVMBuildOr(builder, res, msb, ""); } #elif 0 while(shift > 0) { res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, n), ""), ""); shift -= n; n *= 2; } #endif } else res = LLVMBuildAnd(builder, res, lp_build_int_const_scalar(src_type, mask), ""); return res; }
/** * Extract Y, U, V channels from packed YUYV. * @param packed is a <n x i32> vector with the packed YUYV blocks * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) */ static void yuyv_to_yuv_soa(struct gallivm_state *gallivm, unsigned n, LLVMValueRef packed, LLVMValueRef i, LLVMValueRef *y, LLVMValueRef *u, LLVMValueRef *v) { LLVMBuilderRef builder = gallivm->builder; struct lp_type type; LLVMValueRef mask; memset(&type, 0, sizeof type); type.width = 32; type.length = n; assert(lp_check_value(type, packed)); assert(lp_check_value(type, i)); /* * y = (yuyv >> 16*i) & 0xff * u = (yuyv >> 8 ) & 0xff * v = (yuyv >> 24 ) & 0xff */ #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) /* * Avoid shift with per-element count. * No support on x86, gets translated to roughly 5 instructions * per element. Didn't measure performance but cuts shader size * by quite a bit (less difference if cpu has no sse4.1 support). */ if (util_cpu_caps.has_sse2 && n == 4) { LLVMValueRef sel, tmp; struct lp_build_context bld32; lp_build_context_init(&bld32, gallivm, type); tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), ""); sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0)); *y = lp_build_select(&bld32, sel, packed, tmp); } else #endif { LLVMValueRef shift; shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), ""); *y = LLVMBuildLShr(builder, packed, shift, ""); } *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), ""); *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), ""); mask = lp_build_const_int_vec(gallivm, type, 0xff); *y = LLVMBuildAnd(builder, *y, mask, "y"); *u = LLVMBuildAnd(builder, *u, mask, "u"); *v = LLVMBuildAnd(builder, *v, mask, "v"); }
void lp_build_exp2_approx(struct lp_build_context *bld, LLVMValueRef x, LLVMValueRef *p_exp2_int_part, LLVMValueRef *p_frac_part, LLVMValueRef *p_exp2) { const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef ipart = NULL; LLVMValueRef fpart = NULL; LLVMValueRef expipart = NULL; LLVMValueRef expfpart = NULL; LLVMValueRef res = NULL; if(p_exp2_int_part || p_frac_part || p_exp2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) debug_printf("%s: inefficient/imprecise constant arithmetic\n", __FUNCTION__); assert(type.floating && type.width == 32); x = lp_build_min(bld, x, lp_build_const_scalar(type, 129.0)); x = lp_build_max(bld, x, lp_build_const_scalar(type, -126.99999)); /* ipart = int(x - 0.5) */ ipart = LLVMBuildSub(bld->builder, x, lp_build_const_scalar(type, 0.5f), ""); ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, ""); /* fpart = x - ipart */ fpart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, ""); fpart = LLVMBuildSub(bld->builder, x, fpart, ""); } if(p_exp2_int_part || p_exp2) { /* expipart = (float) (1 << ipart) */ expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_scalar(type, 127), ""); expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_scalar(type, 23), ""); expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, ""); } if(p_exp2) { expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial, Elements(lp_build_exp2_polynomial)); res = LLVMBuildMul(bld->builder, expipart, expfpart, ""); } if(p_exp2_int_part) *p_exp2_int_part = expipart; if(p_frac_part) *p_frac_part = fpart; if(p_exp2) *p_exp2 = res; }
static void pointer_delete(compile_t* c, reach_type_t* t, reach_type_t* t_elem) { FIND_METHOD("_delete"); LLVMTypeRef params[3]; params[0] = t->use_type; params[1] = c->intptr; params[2] = c->intptr; start_function(c, m, t_elem->use_type, params, 3); // Set up a constant integer for the allocation size. size_t size = (size_t)LLVMABISizeOfType(c->target_data, t_elem->use_type); LLVMValueRef l_size = LLVMConstInt(c->intptr, size, false); LLVMValueRef ptr = LLVMGetParam(m->func, 0); LLVMValueRef n = LLVMGetParam(m->func, 1); LLVMValueRef len = LLVMGetParam(m->func, 2); LLVMValueRef elem_ptr = LLVMBuildBitCast(c->builder, ptr, LLVMPointerType(t_elem->use_type, 0), ""); LLVMValueRef result = LLVMBuildLoad(c->builder, elem_ptr, ""); LLVMValueRef dst = LLVMBuildPtrToInt(c->builder, elem_ptr, c->intptr, ""); LLVMValueRef offset = LLVMBuildMul(c->builder, n, l_size, ""); LLVMValueRef src = LLVMBuildAdd(c->builder, dst, offset, ""); LLVMValueRef elen = LLVMBuildMul(c->builder, len, l_size, ""); LLVMValueRef args[5]; args[0] = LLVMBuildIntToPtr(c->builder, dst, c->void_ptr, ""); args[1] = LLVMBuildIntToPtr(c->builder, src, c->void_ptr, ""); args[2] = elen; args[3] = LLVMConstInt(c->i32, 1, false); args[4] = LLVMConstInt(c->i1, 0, false); // llvm.memmove.*(ptr, ptr + (n * sizeof(elem)), len * sizeof(elem)) if(target_is_ilp32(c->opt->triple)) { gencall_runtime(c, "llvm.memmove.p0i8.p0i8.i32", args, 5, ""); } else { gencall_runtime(c, "llvm.memmove.p0i8.p0i8.i64", args, 5, ""); } // Return ptr[0]. LLVMBuildRet(c->builder, result); codegen_finishfun(c); }
/** * Inverse of lp_build_clamped_float_to_unsigned_norm above. */ LLVMValueRef lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, unsigned src_width, struct lp_type dst_type, LLVMValueRef src) { LLVMTypeRef vec_type = lp_build_vec_type(dst_type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(dst_type); LLVMValueRef bias_; LLVMValueRef res; unsigned mantissa; unsigned n; unsigned long long ubound; unsigned long long mask; double scale; double bias; mantissa = lp_mantissa(dst_type); n = MIN2(mantissa, src_width); ubound = ((unsigned long long)1 << n); mask = ubound - 1; scale = (double)ubound/mask; bias = (double)((unsigned long long)1 << (mantissa - n)); res = src; if(src_width > mantissa) { int shift = src_width - mantissa; res = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(dst_type, shift), ""); } bias_ = lp_build_const_scalar(dst_type, bias); res = LLVMBuildOr(builder, res, LLVMBuildBitCast(builder, bias_, int_vec_type, ""), ""); res = LLVMBuildBitCast(builder, res, vec_type, ""); res = LLVMBuildSub(builder, res, bias_, ""); res = LLVMBuildMul(builder, res, lp_build_const_scalar(dst_type, scale), ""); return res; }
static inline LLVMValueRef LLVM_visit(ASTNode *node, LLVMBuilderRef builder) { switch(node->type) { case AST_BINARY_OP: { ASTBinaryOp *binary_op = (ASTBinaryOp*)node; LLVMValueRef a = LLVM_visit(binary_op->lhs, builder); LLVMValueRef b = LLVM_visit(binary_op->rhs, builder); switch(binary_op->op) { case '+': return LLVMBuildAdd(builder, a, b, "a + b"); case '-': return LLVMBuildSub(builder, a, b, "a - b"); case '*': return LLVMBuildMul(builder, a, b, "a * b"); case '/': return LLVMBuildSDiv(builder, a, b, "a / b"); } } case AST_INT: { return LLVMConstInt(LLVMInt32Type(), ((ASTInt*)node)->value, 0); } } }
static LLVMValueRef translateIntBinOp(NodeKind Op, LLVMValueRef ValueE1, LLVMValueRef ValueE2) { switch (Op) { case OrOp: return LLVMBuildOr (Builder, ValueE1, ValueE2, ""); case AndOp: return LLVMBuildAnd(Builder, ValueE1, ValueE2, ""); case SumOp: return LLVMBuildAdd(Builder, ValueE1, ValueE2, ""); case SubOp: return LLVMBuildSub(Builder, ValueE1, ValueE2, ""); case MultOp: return LLVMBuildMul(Builder, ValueE1, ValueE2, ""); case DivOp: return LLVMBuildSDiv(Builder, ValueE1, ValueE2, ""); case LtOp: return LLVMBuildICmp(Builder, LLVMIntSLT, ValueE1, ValueE2, ""); case LeOp: return LLVMBuildICmp(Builder, LLVMIntSLE, ValueE1, ValueE2, ""); case GtOp: return LLVMBuildICmp(Builder, LLVMIntSGT, ValueE1, ValueE2, ""); case GeOp: return LLVMBuildICmp(Builder, LLVMIntSGE, ValueE1, ValueE2, ""); case EqOp: return LLVMBuildICmp(Builder, LLVMIntEQ, ValueE1, ValueE2, ""); case DiffOp: return LLVMBuildICmp(Builder, LLVMIntNE, ValueE1, ValueE2, ""); default: return NULL; } }
static LLVMValueRef emit_array_index( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_src_register *reg, unsigned swizzle) { struct gallivm_state * gallivm = bld->bld_base.base.gallivm; LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, bld->addr[reg->Indirect.Index][swizzle], ""); LLVMValueRef offset = lp_build_const_int32(gallivm, reg->Register.Index); LLVMValueRef hw_index = LLVMBuildAdd(gallivm->builder, addr, offset, ""); LLVMValueRef soa_index = LLVMBuildMul(gallivm->builder, hw_index, lp_build_const_int32(gallivm, 4), ""); LLVMValueRef array_index = LLVMBuildAdd(gallivm->builder, soa_index, lp_build_const_int32(gallivm, swizzle), ""); return array_index; }
void genprim_array_serialise_trace(compile_t* c, reach_type_t* t) { // Generate the serialise_trace function. t->serialise_trace_fn = codegen_addfun(c, genname_serialise_trace(t->name), c->trace_type); codegen_startfun(c, t->serialise_trace_fn, NULL, NULL); LLVMSetFunctionCallConv(t->serialise_trace_fn, LLVMCCallConv); LLVMSetLinkage(t->serialise_trace_fn, LLVMExternalLinkage); LLVMValueRef ctx = LLVMGetParam(t->serialise_trace_fn, 0); LLVMValueRef arg = LLVMGetParam(t->serialise_trace_fn, 1); LLVMValueRef object = LLVMBuildBitCast(c->builder, arg, t->use_type, ""); // Read the size. LLVMValueRef size = field_value(c, object, 1); // Calculate the size of the element type. ast_t* typeargs = ast_childidx(t->ast, 2); ast_t* typearg = ast_child(typeargs); reach_type_t* t_elem = reach_type(c->reach, typearg); size_t abisize = (size_t)LLVMABISizeOfType(c->target_data, t_elem->use_type); LLVMValueRef l_size = LLVMConstInt(c->intptr, abisize, false); // Reserve space for the array elements. LLVMValueRef pointer = field_value(c, object, 3); LLVMValueRef args[3]; args[0] = ctx; args[1] = pointer; args[2] = LLVMBuildMul(c->builder, size, l_size, ""); gencall_runtime(c, "pony_serialise_reserve", args, 3, ""); // Trace the array elements. trace_array_elements(c, t, ctx, object, pointer); LLVMBuildRetVoid(c->builder); codegen_finishfun(c); }
static void pointer_copy_to(compile_t* c, reach_type_t* t, reach_type_t* t_elem) { FIND_METHOD("_copy_to"); LLVMTypeRef params[3]; params[0] = t->use_type; params[1] = t->use_type; params[2] = c->intptr; start_function(c, m, t->use_type, params, 3); // Set up a constant integer for the allocation size. size_t size = (size_t)LLVMABISizeOfType(c->target_data, t_elem->use_type); LLVMValueRef l_size = LLVMConstInt(c->intptr, size, false); LLVMValueRef ptr = LLVMGetParam(m->func, 0); LLVMValueRef ptr2 = LLVMGetParam(m->func, 1); LLVMValueRef n = LLVMGetParam(m->func, 2); LLVMValueRef elen = LLVMBuildMul(c->builder, n, l_size, ""); LLVMValueRef args[5]; args[0] = LLVMBuildBitCast(c->builder, ptr2, c->void_ptr, ""); args[1] = LLVMBuildBitCast(c->builder, ptr, c->void_ptr, ""); args[2] = elen; args[3] = LLVMConstInt(c->i32, 1, false); args[4] = LLVMConstInt(c->i1, 0, false); // llvm.memcpy.*(ptr2, ptr, n * sizeof(elem), 1, 0) if(target_is_ilp32(c->opt->triple)) { gencall_runtime(c, "llvm.memcpy.p0i8.p0i8.i32", args, 5, ""); } else { gencall_runtime(c, "llvm.memcpy.p0i8.p0i8.i64", args, 5, ""); } LLVMBuildRet(c->builder, ptr); codegen_finishfun(c); BOX_FUNCTION(); }
/** * Extract Y, U, V channels from packed UYVY. * @param packed is a <n x i32> vector with the packed UYVY blocks * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) */ static void uyvy_to_yuv_soa(LLVMBuilderRef builder, unsigned n, LLVMValueRef packed, LLVMValueRef i, LLVMValueRef *y, LLVMValueRef *u, LLVMValueRef *v) { struct lp_type type; LLVMValueRef shift, mask; memset(&type, 0, sizeof type); type.width = 32; type.length = n; assert(lp_check_value(type, packed)); assert(lp_check_value(type, i)); /* * y = (uyvy >> 16*i) & 0xff * u = (uyvy ) & 0xff * v = (uyvy >> 16 ) & 0xff */ shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), ""); shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), ""); *y = LLVMBuildLShr(builder, packed, shift, ""); *u = packed; *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), ""); mask = lp_build_const_int_vec(type, 0xff); *y = LLVMBuildAnd(builder, *y, mask, "y"); *u = LLVMBuildAnd(builder, *u, mask, "u"); *v = LLVMBuildAnd(builder, *v, mask, "v"); }
static INLINE void yuv_to_rgb_soa(struct gallivm_state *gallivm, unsigned n, LLVMValueRef y, LLVMValueRef u, LLVMValueRef v, LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b) { LLVMBuilderRef builder = gallivm->builder; struct lp_type type; struct lp_build_context bld; LLVMValueRef c0; LLVMValueRef c8; LLVMValueRef c16; LLVMValueRef c128; LLVMValueRef c255; LLVMValueRef cy; LLVMValueRef cug; LLVMValueRef cub; LLVMValueRef cvr; LLVMValueRef cvg; memset(&type, 0, sizeof type); type.sign = TRUE; type.width = 32; type.length = n; lp_build_context_init(&bld, gallivm, type); assert(lp_check_value(type, y)); assert(lp_check_value(type, u)); assert(lp_check_value(type, v)); /* * Constants */ c0 = lp_build_const_int_vec(gallivm, type, 0); c8 = lp_build_const_int_vec(gallivm, type, 8); c16 = lp_build_const_int_vec(gallivm, type, 16); c128 = lp_build_const_int_vec(gallivm, type, 128); c255 = lp_build_const_int_vec(gallivm, type, 255); cy = lp_build_const_int_vec(gallivm, type, 298); cug = lp_build_const_int_vec(gallivm, type, -100); cub = lp_build_const_int_vec(gallivm, type, 516); cvr = lp_build_const_int_vec(gallivm, type, 409); cvg = lp_build_const_int_vec(gallivm, type, -208); /* * y -= 16; * u -= 128; * v -= 128; */ y = LLVMBuildSub(builder, y, c16, ""); u = LLVMBuildSub(builder, u, c128, ""); v = LLVMBuildSub(builder, v, c128, ""); /* * r = 298 * _y + 409 * _v + 128; * g = 298 * _y - 100 * _u - 208 * _v + 128; * b = 298 * _y + 516 * _u + 128; */ y = LLVMBuildMul(builder, y, cy, ""); y = LLVMBuildAdd(builder, y, c128, ""); *r = LLVMBuildMul(builder, v, cvr, ""); *g = LLVMBuildAdd(builder, LLVMBuildMul(builder, u, cug, ""), LLVMBuildMul(builder, v, cvg, ""), ""); *b = LLVMBuildMul(builder, u, cub, ""); *r = LLVMBuildAdd(builder, *r, y, ""); *g = LLVMBuildAdd(builder, *g, y, ""); *b = LLVMBuildAdd(builder, *b, y, ""); /* * r >>= 8; * g >>= 8; * b >>= 8; */ *r = LLVMBuildAShr(builder, *r, c8, "r"); *g = LLVMBuildAShr(builder, *g, c8, "g"); *b = LLVMBuildAShr(builder, *b, c8, "b"); /* * Clamp */ *r = lp_build_clamp(&bld, *r, c0, c255); *g = lp_build_clamp(&bld, *g, c0, c255); *b = lp_build_clamp(&bld, *b, c0, c255); }
static void llvm_emit_tex( const struct lp_build_tgsi_action * action, struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) { struct gallivm_state * gallivm = bld_base->base.gallivm; LLVMValueRef args[7]; unsigned c, sampler_src; struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); if (emit_data->inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { switch (emit_data->inst->Instruction.Opcode) { case TGSI_OPCODE_TXQ: { struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); ctx->uses_tex_buffers = true; bool isEgPlus = (ctx->chip_class >= EVERGREEN); LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, isEgPlus ? 0 : 1); LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, offset, LLVM_R600_BUFFER_INFO_CONST_BUFFER); if (!isEgPlus) { LLVMValueRef maskval[4] = { lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 2), lp_build_const_int32(gallivm, 3), lp_build_const_int32(gallivm, 0), }; LLVMValueRef mask = LLVMConstVector(maskval, 4); cvecval = LLVMBuildShuffleVector(gallivm->builder, cvecval, cvecval, mask, ""); } emit_data->output[0] = cvecval; return; } case TGSI_OPCODE_TXF: { args[0] = LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), ""); args[1] = lp_build_const_int32(gallivm, R600_MAX_CONST_BUFFERS); emit_data->output[0] = build_intrinsic(gallivm->builder, "llvm.R600.load.texbuf", emit_data->dst_type, args, 2, LLVMReadNoneAttribute); if (ctx->chip_class >= EVERGREEN) return; ctx->uses_tex_buffers = true; LLVMDumpValue(emit_data->output[0]); emit_data->output[0] = LLVMBuildBitCast(gallivm->builder, emit_data->output[0], LLVMVectorType(bld_base->base.int_elem_type, 4), ""); LLVMValueRef Mask = llvm_load_const_buffer(bld_base, lp_build_const_int32(gallivm, 0), LLVM_R600_BUFFER_INFO_CONST_BUFFER); Mask = LLVMBuildBitCast(gallivm->builder, Mask, LLVMVectorType(bld_base->base.int_elem_type, 4), ""); emit_data->output[0] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_AND, emit_data->output[0], Mask); LLVMValueRef WComponent = LLVMBuildExtractElement(gallivm->builder, emit_data->output[0], lp_build_const_int32(gallivm, 3), ""); Mask = llvm_load_const_buffer(bld_base, lp_build_const_int32(gallivm, 1), LLVM_R600_BUFFER_INFO_CONST_BUFFER); Mask = LLVMBuildExtractElement(gallivm->builder, Mask, lp_build_const_int32(gallivm, 0), ""); Mask = LLVMBuildBitCast(gallivm->builder, Mask, bld_base->base.int_elem_type, ""); WComponent = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_OR, WComponent, Mask); emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder, emit_data->output[0], WComponent, lp_build_const_int32(gallivm, 3), ""); emit_data->output[0] = LLVMBuildBitCast(gallivm->builder, emit_data->output[0], LLVMVectorType(bld_base->base.elem_type, 4), ""); } return; default: break; } } if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TEX || emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXP) { LLVMValueRef Vector[4] = { LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), ""), LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 1), ""), LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 2), ""), LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 3), ""), }; switch (emit_data->inst->Texture.Texture) { case TGSI_TEXTURE_2D: case TGSI_TEXTURE_RECT: Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type); break; case TGSI_TEXTURE_1D: Vector[1] = Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type); break; default: break; } args[0] = lp_build_gather_values(gallivm, Vector, 4); } else { args[0] = emit_data->args[0]; } assert(emit_data->arg_count + 2 <= Elements(args)); for (c = 1; c < emit_data->arg_count; ++c) args[c] = emit_data->args[c]; if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF) { args[1] = LLVMBuildShl(gallivm->builder, args[1], lp_build_const_int32(gallivm, 1), ""); args[2] = LLVMBuildShl(gallivm->builder, args[2], lp_build_const_int32(gallivm, 1), ""); args[3] = LLVMBuildShl(gallivm->builder, args[3], lp_build_const_int32(gallivm, 1), ""); } sampler_src = emit_data->inst->Instruction.NumSrcRegs-1; args[c++] = lp_build_const_int32(gallivm, emit_data->inst->Src[sampler_src].Register.Index + R600_MAX_CONST_BUFFERS); args[c++] = lp_build_const_int32(gallivm, emit_data->inst->Src[sampler_src].Register.Index); args[c++] = lp_build_const_int32(gallivm, emit_data->inst->Texture.Texture); if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF && (emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) { switch (emit_data->inst->Texture.Texture) { case TGSI_TEXTURE_2D_MSAA: args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D); break; case TGSI_TEXTURE_2D_ARRAY_MSAA: args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D_ARRAY); break; default: break; } if (ctx->has_compressed_msaa_texturing) { LLVMValueRef ldptr_args[10] = { args[0], // Coord args[1], // Offset X args[2], // Offset Y args[3], // Offset Z args[4], args[5], lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 1) }; LLVMValueRef ptr = build_intrinsic(gallivm->builder, "llvm.R600.ldptr", emit_data->dst_type, ldptr_args, 10, LLVMReadNoneAttribute); LLVMValueRef Tmp = LLVMBuildExtractElement(gallivm->builder, args[0], lp_build_const_int32(gallivm, 3), ""); Tmp = LLVMBuildMul(gallivm->builder, Tmp, lp_build_const_int32(gallivm, 4), ""); LLVMValueRef ResX = LLVMBuildExtractElement(gallivm->builder, ptr, lp_build_const_int32(gallivm, 0), ""); ResX = LLVMBuildBitCast(gallivm->builder, ResX, bld_base->base.int_elem_type, ""); Tmp = LLVMBuildLShr(gallivm->builder, ResX, Tmp, ""); Tmp = LLVMBuildAnd(gallivm->builder, Tmp, lp_build_const_int32(gallivm, 0xF), ""); args[0] = LLVMBuildInsertElement(gallivm->builder, args[0], Tmp, lp_build_const_int32(gallivm, 3), ""); args[c++] = lp_build_const_int32(gallivm, emit_data->inst->Texture.Texture); } } emit_data->output[0] = build_intrinsic(gallivm->builder, action->intr_name, emit_data->dst_type, args, c, LLVMReadNoneAttribute); if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXQ && ((emit_data->inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || emit_data->inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY))) if (emit_data->inst->Dst[0].Register.WriteMask & 4) { LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, 0); LLVMValueRef ZLayer = LLVMBuildExtractElement(gallivm->builder, llvm_load_const_buffer(bld_base, offset, CONSTANT_TXQ_BUFFER), lp_build_const_int32(gallivm, 0), ""); emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder, emit_data->output[0], ZLayer, lp_build_const_int32(gallivm, 2), ""); struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); ctx->has_txq_cube_array_z_comp = true; } }
static LLVMValueRef get_stritem_len_fn(struct llvm_ctx *ctx) { if(ctx->stritem_len_fn != NULL) return ctx->stritem_len_fn; /* returns (i32 len, i32 new_tpos) * params (word *utcbptr, i32 tpos) * * when return value "new_tpos" > tmax + 1, the result is invalid. the function * should also not be called when tpos > tmax + 1. */ LLVMTypeRef ret_types[2] = { ctx->i32t, ctx->i32t }, parm_types[2] = { LLVMPointerType(ctx->wordt, 0), ctx->i32t }, ret_type = LLVMStructTypeInContext(ctx->ctx, ret_types, 2, 0), fn_type = LLVMFunctionType(ret_type, parm_types, 2, 0); LLVMValueRef fn = LLVMAddFunction(ctx->module, "__muidl_get_stritem_len", fn_type); LLVMSetVisibility(fn, LLVMHiddenVisibility); LLVMSetLinkage(fn, LLVMInternalLinkage); V fn_args[2]; LLVMGetParams(fn, fn_args); LLVMAddAttribute(fn_args[0], LLVMNoCaptureAttribute); for(int i=0; i<2; i++) { LLVMAddAttribute(fn_args[i], LLVMInRegAttribute); } ctx->stritem_len_fn = fn; LLVMBuilderRef old_builder = ctx->builder; ctx->builder = LLVMCreateBuilderInContext(ctx->ctx); LLVMBasicBlockRef entry_bb = LLVMAppendBasicBlockInContext(ctx->ctx, fn, "EntryBlock"), loop_bb = LLVMAppendBasicBlockInContext(ctx->ctx, fn, "loop"), valid_bb = LLVMAppendBasicBlockInContext(ctx->ctx, fn, "valid"), exit_bb = LLVMAppendBasicBlockInContext(ctx->ctx, fn, "exit"); LLVMPositionBuilderAtEnd(ctx->builder, entry_bb); LLVMValueRef old_utcb = ctx->utcb, old_tpos = ctx->tpos; ctx->utcb = fn_args[0]; ctx->tpos = fn_args[1]; LLVMBuildBr(ctx->builder, loop_bb); LLVMPositionBuilderAtEnd(ctx->builder, exit_bb); LLVMValueRef exit_len_phi = LLVMBuildPhi(ctx->builder, ctx->i32t, "exit.len.phi"), exit_tpos_phi = LLVMBuildPhi(ctx->builder, ctx->i32t, "exit.tpos.phi"); LLVMValueRef rvals[2] = { exit_len_phi, exit_tpos_phi }; LLVMBuildAggregateRet(ctx->builder, rvals, 2); LLVMPositionBuilderAtEnd(ctx->builder, loop_bb); LLVMValueRef len_phi = LLVMBuildPhi(ctx->builder, ctx->i32t, "len.phi"), tpos_phi = LLVMBuildPhi(ctx->builder, ctx->i32t, "tpos.phi"); LLVMAddIncoming(len_phi, &ctx->zero, &entry_bb, 1); LLVMAddIncoming(tpos_phi, &ctx->tpos, &entry_bb, 1); ctx->tpos = tpos_phi; /* test: if *tpos doesn't look like a string item, conk out. */ LLVMValueRef infoword = build_utcb_load(ctx, ctx->tpos, "si.info"); LLVMValueRef is_cond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, ctx->zero, LLVMBuildAnd(ctx->builder, infoword, CONST_WORD(1 << 4), "infoword.si.mask"), "infoword.si.cond"); /* anything + 100 is sure to be > tmax + 1. */ LLVMValueRef fucked_tpos = LLVMBuildAdd(ctx->builder, tpos_phi, CONST_INT(100), "f****d.tpos"); branch_set_phi(ctx, exit_len_phi, len_phi); branch_set_phi(ctx, exit_tpos_phi, fucked_tpos); LLVMBuildCondBr(ctx->builder, is_cond, valid_bb, exit_bb); LLVMPositionBuilderAtEnd(ctx->builder, valid_bb); LLVMValueRef string_length = LLVMBuildTruncOrBitCast(ctx->builder, LLVMBuildLShr(ctx->builder, infoword, CONST_INT(10), "si.info.len"), ctx->i32t, "si.info.len.int"), string_j = LLVMBuildTruncOrBitCast(ctx->builder, LLVMBuildAnd(ctx->builder, CONST_WORD(0x1f), LLVMBuildLShr(ctx->builder, infoword, CONST_WORD(4), "si.info.j.shift"), "si.info.j.masked"), ctx->i32t, "si.info.j"), string_c = LLVMBuildTruncOrBitCast(ctx->builder, LLVMBuildAnd(ctx->builder, CONST_WORD(1 << 9), infoword, "si.info.c.masked"), ctx->i32t, "si.info.c.masked.int"), c_cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, string_c, CONST_WORD(0), "si.info.c.cond"), new_len = LLVMBuildAdd(ctx->builder, len_phi, LLVMBuildMul(ctx->builder, string_length, LLVMBuildAdd(ctx->builder, string_j, CONST_INT(1), "j.plus.one"), "len.incr"), "len.new"), new_tpos = LLVMBuildAdd(ctx->builder, ctx->tpos, LLVMBuildSelect(ctx->builder, c_cond, LLVMBuildAdd(ctx->builder, CONST_INT(2), string_j, "cont.tpos.bump"), CONST_INT(2), "tpos.bump"), "tpos.new"); LLVMAddIncoming(len_phi, &new_len, &valid_bb, 1); LLVMAddIncoming(tpos_phi, &new_tpos, &valid_bb, 1); LLVMAddIncoming(exit_len_phi, &new_len, &valid_bb, 1); LLVMAddIncoming(exit_tpos_phi, &new_tpos, &valid_bb, 1); LLVMBuildCondBr(ctx->builder, c_cond, loop_bb, exit_bb); LLVMDisposeBuilder(ctx->builder); ctx->builder = old_builder; ctx->utcb = old_utcb; ctx->tpos = old_tpos; return ctx->stritem_len_fn; }
/* * gen_operator_expression * * Code generation for operator expressions. Most of them have straightforward * translations into LLVM instructions and are handled directly here. */ static LLVMValueRef gen_operator_expression (gencodectx_t gctx, expr_node_t *exp, LLVMTypeRef neededtype) { expr_node_t *lhs = expr_op_lhs(exp); expr_node_t *rhs = expr_op_rhs(exp); optype_t op = expr_op_type(exp); LLVMBuilderRef builder = gctx->curfn->builder; LLVMTypeRef inttype; LLVMValueRef lval, rval, result; if (op == OPER_FETCH) { return gen_fetch(gctx, rhs, neededtype); } if (op == OPER_ASSIGN) { LLVMValueRef val = llvmgen_assignment(gctx, lhs, rhs); return llvmgen_adjustval(gctx, val, neededtype, 0); } if (op == OPER_SHIFT) { return gen_shift(gctx, lhs, rhs, neededtype); } inttype = LLVMIntTypeInContext(gctx->llvmctx, machine_scalar_bits(gctx->mach)); lval = (lhs == 0 ? 0 : llvmgen_expression(gctx, lhs, inttype)); rval = llvmgen_expression(gctx, rhs, inttype); switch (op) { case OPER_UNARY_PLUS: result = rval; break; case OPER_UNARY_MINUS: result = LLVMBuildNeg(builder, rval, llvmgen_temp(gctx)); break; case OPER_ADD: result = LLVMBuildAdd(builder, lval, rval, llvmgen_temp(gctx)); break; case OPER_SUBTRACT: result = LLVMBuildSub(builder, lval, rval, llvmgen_temp(gctx)); break; case OPER_MULT: result = LLVMBuildMul(builder, lval, rval, llvmgen_temp(gctx)); break; case OPER_DIV: result = LLVMBuildUDiv(builder, lval, rval, llvmgen_temp(gctx)); break; case OPER_MODULO: result = LLVMBuildURem(builder, lval, rval, llvmgen_temp(gctx)); break; case OPER_AND: result = LLVMBuildAnd(builder, lval, rval, llvmgen_temp(gctx)); break; case OPER_OR: result = LLVMBuildOr(builder, lval, rval, llvmgen_temp(gctx)); break; case OPER_NOT: result = LLVMBuildNot(builder, rval, llvmgen_temp(gctx)); break; case OPER_XOR: result = LLVMBuildXor(builder, lval, rval, llvmgen_temp(gctx)); break; case OPER_EQV: result = LLVMBuildXor(builder, lval, rval, llvmgen_temp(gctx)); result = LLVMBuildNot(builder, result, llvmgen_temp(gctx)); break; default: if (op >= OPER_CMP_EQL && op <= OPER_CMP_GEQA) { result = LLVMBuildICmp(builder, llvmgen_predfromop(op, machine_addr_signed(gctx->mach)), lval, rval, llvmgen_temp(gctx)); } else { // Everything should be covered expr_signal(gctx->ectx, STC__INTCMPERR, "gen_operator_expression"); result = LLVMConstNull(inttype); } break; } return llvmgen_adjustval(gctx, result, neededtype, 0); } /* gen_operator_expression */
/** * See http://www.devmaster.net/forums/showthread.php?p=43580 */ void lp_build_log2_approx(struct lp_build_context *bld, LLVMValueRef x, LLVMValueRef *p_exp, LLVMValueRef *p_floor_log2, LLVMValueRef *p_log2) { const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef expmask = lp_build_int_const_scalar(type, 0x7f800000); LLVMValueRef mantmask = lp_build_int_const_scalar(type, 0x007fffff); LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type); LLVMValueRef i = NULL; LLVMValueRef exp = NULL; LLVMValueRef mant = NULL; LLVMValueRef logexp = NULL; LLVMValueRef logmant = NULL; LLVMValueRef res = NULL; if(p_exp || p_floor_log2 || p_log2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) debug_printf("%s: inefficient/imprecise constant arithmetic\n", __FUNCTION__); assert(type.floating && type.width == 32); i = LLVMBuildBitCast(bld->builder, x, int_vec_type, ""); /* exp = (float) exponent(x) */ exp = LLVMBuildAnd(bld->builder, i, expmask, ""); } if(p_floor_log2 || p_log2) { logexp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_scalar(type, 23), ""); logexp = LLVMBuildSub(bld->builder, logexp, lp_build_int_const_scalar(type, 127), ""); logexp = LLVMBuildSIToFP(bld->builder, logexp, vec_type, ""); } if(p_log2) { /* mant = (float) mantissa(x) */ mant = LLVMBuildAnd(bld->builder, i, mantmask, ""); mant = LLVMBuildOr(bld->builder, mant, one, ""); mant = LLVMBuildBitCast(bld->builder, mant, vec_type, ""); logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial, Elements(lp_build_log2_polynomial)); /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), ""); res = LLVMBuildAdd(bld->builder, logmant, logexp, ""); } if(p_exp) *p_exp = exp; if(p_floor_log2) *p_floor_log2 = logexp; if(p_log2) *p_log2 = res; }
void genprim_array_deserialise(compile_t* c, reach_type_t* t) { // Generate the deserisalise function. t->deserialise_fn = codegen_addfun(c, genname_serialise(t->name), c->trace_type); codegen_startfun(c, t->deserialise_fn, NULL, NULL); LLVMSetFunctionCallConv(t->deserialise_fn, LLVMCCallConv); LLVMValueRef ctx = LLVMGetParam(t->deserialise_fn, 0); LLVMValueRef arg = LLVMGetParam(t->deserialise_fn, 1); LLVMValueRef object = LLVMBuildBitCast(c->builder, arg, t->structure_ptr, ""); gendeserialise_typeid(c, t, object); // Deserialise the array contents. LLVMValueRef alloc = field_value(c, object, 2); LLVMValueRef ptr_offset = field_value(c, object, 3); ptr_offset = LLVMBuildPtrToInt(c->builder, ptr_offset, c->intptr, ""); ast_t* typeargs = ast_childidx(t->ast, 2); ast_t* typearg = ast_child(typeargs); reach_type_t* t_elem = reach_type(c->reach, typearg); size_t abisize = (size_t)LLVMABISizeOfType(c->target_data, t_elem->use_type); LLVMValueRef l_size = LLVMConstInt(c->intptr, abisize, false); LLVMValueRef args[3]; args[0] = ctx; args[1] = ptr_offset; args[2] = LLVMBuildMul(c->builder, alloc, l_size, ""); LLVMValueRef ptr = gencall_runtime(c, "pony_deserialise_block", args, 3, ""); LLVMValueRef ptr_loc = LLVMBuildStructGEP(c->builder, object, 3, ""); LLVMBuildStore(c->builder, ptr, ptr_loc); if((t_elem->underlying == TK_PRIMITIVE) && (t_elem->primitive != NULL)) { // Do nothing. A memcpy is sufficient. } else { LLVMValueRef size = field_value(c, object, 1); ptr = LLVMBuildBitCast(c->builder, ptr, LLVMPointerType(t_elem->use_type, 0), ""); LLVMBasicBlockRef entry_block = LLVMGetInsertBlock(c->builder); LLVMBasicBlockRef cond_block = codegen_block(c, "cond"); LLVMBasicBlockRef body_block = codegen_block(c, "body"); LLVMBasicBlockRef post_block = codegen_block(c, "post"); LLVMBuildBr(c->builder, cond_block); // While the index is less than the size, deserialise an element. The // initial index when coming from the entry block is zero. LLVMPositionBuilderAtEnd(c->builder, cond_block); LLVMValueRef phi = LLVMBuildPhi(c->builder, c->intptr, ""); LLVMValueRef zero = LLVMConstInt(c->intptr, 0, false); LLVMAddIncoming(phi, &zero, &entry_block, 1); LLVMValueRef test = LLVMBuildICmp(c->builder, LLVMIntULT, phi, size, ""); LLVMBuildCondBr(c->builder, test, body_block, post_block); // The phi node is the index. Get the element and deserialise it. LLVMPositionBuilderAtEnd(c->builder, body_block); LLVMValueRef elem_ptr = LLVMBuildGEP(c->builder, ptr, &phi, 1, ""); gendeserialise_element(c, t_elem, false, ctx, elem_ptr); // Add one to the phi node and branch back to the cond block. LLVMValueRef one = LLVMConstInt(c->intptr, 1, false); LLVMValueRef inc = LLVMBuildAdd(c->builder, phi, one, ""); body_block = LLVMGetInsertBlock(c->builder); LLVMAddIncoming(phi, &inc, &body_block, 1); LLVMBuildBr(c->builder, cond_block); LLVMPositionBuilderAtEnd(c->builder, post_block); } LLVMBuildRetVoid(c->builder); codegen_finishfun(c); }
void genprim_array_serialise(compile_t* c, reach_type_t* t) { // Generate the serialise function. t->serialise_fn = codegen_addfun(c, genname_serialise(t->name), c->serialise_type); codegen_startfun(c, t->serialise_fn, NULL, NULL); LLVMSetFunctionCallConv(t->serialise_fn, LLVMCCallConv); LLVMValueRef ctx = LLVMGetParam(t->serialise_fn, 0); LLVMValueRef arg = LLVMGetParam(t->serialise_fn, 1); LLVMValueRef addr = LLVMGetParam(t->serialise_fn, 2); LLVMValueRef offset = LLVMGetParam(t->serialise_fn, 3); LLVMValueRef mut = LLVMGetParam(t->serialise_fn, 4); LLVMValueRef object = LLVMBuildBitCast(c->builder, arg, t->structure_ptr, ""); LLVMValueRef offset_addr = LLVMBuildAdd(c->builder, LLVMBuildPtrToInt(c->builder, addr, c->intptr, ""), offset, ""); genserialise_typeid(c, t, offset_addr); // Don't serialise our contents if we are opaque. LLVMBasicBlockRef body_block = codegen_block(c, "body"); LLVMBasicBlockRef post_block = codegen_block(c, "post"); LLVMValueRef test = LLVMBuildICmp(c->builder, LLVMIntNE, mut, LLVMConstInt(c->i32, PONY_TRACE_OPAQUE, false), ""); LLVMBuildCondBr(c->builder, test, body_block, post_block); LLVMPositionBuilderAtEnd(c->builder, body_block); // Write the size twice, effectively rewriting alloc to be the same as size. LLVMValueRef size = field_value(c, object, 1); LLVMValueRef size_loc = field_loc(c, offset_addr, t->structure, c->intptr, 1); LLVMBuildStore(c->builder, size, size_loc); LLVMValueRef alloc_loc = field_loc(c, offset_addr, t->structure, c->intptr, 2); LLVMBuildStore(c->builder, size, alloc_loc); // Write the pointer. LLVMValueRef ptr = field_value(c, object, 3); // The resulting offset will only be invalid (i.e. have the high bit set) if // the size is zero. For an opaque array, we don't serialise the contents, // so we don't get here, so we don't end up with an invalid offset. LLVMValueRef args[5]; args[0] = ctx; args[1] = ptr; LLVMValueRef ptr_offset = gencall_runtime(c, "pony_serialise_offset", args, 2, ""); LLVMValueRef ptr_loc = field_loc(c, offset_addr, t->structure, c->intptr, 3); LLVMBuildStore(c->builder, ptr_offset, ptr_loc); LLVMValueRef ptr_offset_addr = LLVMBuildAdd(c->builder, ptr_offset, LLVMBuildPtrToInt(c->builder, addr, c->intptr, ""), ""); // Serialise elements. ast_t* typeargs = ast_childidx(t->ast, 2); ast_t* typearg = ast_child(typeargs); reach_type_t* t_elem = reach_type(c->reach, typearg); size_t abisize = (size_t)LLVMABISizeOfType(c->target_data, t_elem->use_type); LLVMValueRef l_size = LLVMConstInt(c->intptr, abisize, false); if((t_elem->underlying == TK_PRIMITIVE) && (t_elem->primitive != NULL)) { // memcpy machine words args[0] = LLVMBuildIntToPtr(c->builder, ptr_offset_addr, c->void_ptr, ""); args[1] = LLVMBuildBitCast(c->builder, ptr, c->void_ptr, ""); args[2] = LLVMBuildMul(c->builder, size, l_size, ""); args[3] = LLVMConstInt(c->i32, 1, false); args[4] = LLVMConstInt(c->i1, 0, false); if(target_is_ilp32(c->opt->triple)) { gencall_runtime(c, "llvm.memcpy.p0i8.p0i8.i32", args, 5, ""); } else { gencall_runtime(c, "llvm.memcpy.p0i8.p0i8.i64", args, 5, ""); } } else { ptr = LLVMBuildBitCast(c->builder, ptr, LLVMPointerType(t_elem->use_type, 0), ""); LLVMBasicBlockRef entry_block = LLVMGetInsertBlock(c->builder); LLVMBasicBlockRef cond_block = codegen_block(c, "cond"); LLVMBasicBlockRef body_block = codegen_block(c, "body"); LLVMBasicBlockRef post_block = codegen_block(c, "post"); LLVMValueRef offset_var = LLVMBuildAlloca(c->builder, c->intptr, ""); LLVMBuildStore(c->builder, ptr_offset_addr, offset_var); LLVMBuildBr(c->builder, cond_block); // While the index is less than the size, serialise an element. The // initial index when coming from the entry block is zero. LLVMPositionBuilderAtEnd(c->builder, cond_block); LLVMValueRef phi = LLVMBuildPhi(c->builder, c->intptr, ""); LLVMValueRef zero = LLVMConstInt(c->intptr, 0, false); LLVMAddIncoming(phi, &zero, &entry_block, 1); LLVMValueRef test = LLVMBuildICmp(c->builder, LLVMIntULT, phi, size, ""); LLVMBuildCondBr(c->builder, test, body_block, post_block); // The phi node is the index. Get the element and serialise it. LLVMPositionBuilderAtEnd(c->builder, body_block); LLVMValueRef elem_ptr = LLVMBuildGEP(c->builder, ptr, &phi, 1, ""); ptr_offset_addr = LLVMBuildLoad(c->builder, offset_var, ""); genserialise_element(c, t_elem, false, ctx, elem_ptr, ptr_offset_addr); ptr_offset_addr = LLVMBuildAdd(c->builder, ptr_offset_addr, l_size, ""); LLVMBuildStore(c->builder, ptr_offset_addr, offset_var); // Add one to the phi node and branch back to the cond block. LLVMValueRef one = LLVMConstInt(c->intptr, 1, false); LLVMValueRef inc = LLVMBuildAdd(c->builder, phi, one, ""); body_block = LLVMGetInsertBlock(c->builder); LLVMAddIncoming(phi, &inc, &body_block, 1); LLVMBuildBr(c->builder, cond_block); LLVMPositionBuilderAtEnd(c->builder, post_block); } LLVMBuildBr(c->builder, post_block); LLVMPositionBuilderAtEnd(c->builder, post_block); LLVMBuildRetVoid(c->builder); codegen_finishfun(c); }
/** * Generate a * b */ LLVMValueRef lp_build_mul(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { const struct lp_type type = bld->type; LLVMValueRef shift; LLVMValueRef res; if(a == bld->zero) return bld->zero; if(a == bld->one) return b; if(b == bld->zero) return bld->zero; if(b == bld->one) return a; if(a == bld->undef || b == bld->undef) return bld->undef; if(!type.floating && !type.fixed && type.norm) { if(type.width == 8) { struct lp_type i16_type = lp_wider_type(type); LLVMValueRef al, ah, bl, bh, abl, abh, ab; lp_build_unpack2(bld->builder, type, i16_type, a, &al, &ah); lp_build_unpack2(bld->builder, type, i16_type, b, &bl, &bh); /* PMULLW, PSRLW, PADDW */ abl = lp_build_mul_u8n(bld->builder, i16_type, al, bl); abh = lp_build_mul_u8n(bld->builder, i16_type, ah, bh); ab = lp_build_pack2(bld->builder, i16_type, type, abl, abh); return ab; } /* FIXME */ assert(0); } if(type.fixed) shift = lp_build_int_const_scalar(type, type.width/2); else shift = NULL; if(LLVMIsConstant(a) && LLVMIsConstant(b)) { res = LLVMConstMul(a, b); if(shift) { if(type.sign) res = LLVMConstAShr(res, shift); else res = LLVMConstLShr(res, shift); } } else { res = LLVMBuildMul(bld->builder, a, b, ""); if(shift) { if(type.sign) res = LLVMBuildAShr(bld->builder, res, shift, ""); else res = LLVMBuildLShr(bld->builder, res, shift, ""); } } return res; }
/** * Generic type conversion. * * TODO: Take a precision argument, or even better, add a new precision member * to the lp_type union. */ void lp_build_conv(LLVMBuilderRef builder, struct lp_type src_type, struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, LLVMValueRef *dst, unsigned num_dsts) { struct lp_type tmp_type; LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; unsigned num_tmps; unsigned i; /* Register width must remain constant */ assert(src_type.width * src_type.length == dst_type.width * dst_type.length); /* We must not loose or gain channels. Only precision */ assert(src_type.length * num_srcs == dst_type.length * num_dsts); assert(src_type.length <= LP_MAX_VECTOR_LENGTH); assert(dst_type.length <= LP_MAX_VECTOR_LENGTH); tmp_type = src_type; for(i = 0; i < num_srcs; ++i) tmp[i] = src[i]; num_tmps = num_srcs; /* * Clamp if necessary */ if(memcmp(&src_type, &dst_type, sizeof src_type) != 0) { struct lp_build_context bld; double src_min = lp_const_min(src_type); double dst_min = lp_const_min(dst_type); double src_max = lp_const_max(src_type); double dst_max = lp_const_max(dst_type); LLVMValueRef thres; lp_build_context_init(&bld, builder, tmp_type); if(src_min < dst_min) { if(dst_min == 0.0) thres = bld.zero; else thres = lp_build_const_scalar(src_type, dst_min); for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_max(&bld, tmp[i], thres); } if(src_max > dst_max) { if(dst_max == 1.0) thres = bld.one; else thres = lp_build_const_scalar(src_type, dst_max); for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_min(&bld, tmp[i], thres); } } /* * Scale to the narrowest range */ if(dst_type.floating) { /* Nothing to do */ } else if(tmp_type.floating) { if(!dst_type.fixed && !dst_type.sign && dst_type.norm) { for(i = 0; i < num_tmps; ++i) { tmp[i] = lp_build_clamped_float_to_unsigned_norm(builder, tmp_type, dst_type.width, tmp[i]); } tmp_type.floating = FALSE; } else { double dst_scale = lp_const_scale(dst_type); LLVMTypeRef tmp_vec_type; if (dst_scale != 1.0) { LLVMValueRef scale = lp_build_const_scalar(tmp_type, dst_scale); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildMul(builder, tmp[i], scale, ""); } /* Use an equally sized integer for intermediate computations */ tmp_type.floating = FALSE; tmp_vec_type = lp_build_vec_type(tmp_type); for(i = 0; i < num_tmps; ++i) { #if 0 if(dst_type.sign) tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, ""); else tmp[i] = LLVMBuildFPToUI(builder, tmp[i], tmp_vec_type, ""); #else /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */ tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, ""); #endif } } } else { unsigned src_shift = lp_const_shift(src_type); unsigned dst_shift = lp_const_shift(dst_type); /* FIXME: compensate different offsets too */ if(src_shift > dst_shift) { LLVMValueRef shift = lp_build_int_const_scalar(tmp_type, src_shift - dst_shift); for(i = 0; i < num_tmps; ++i) if(src_type.sign) tmp[i] = LLVMBuildAShr(builder, tmp[i], shift, ""); else tmp[i] = LLVMBuildLShr(builder, tmp[i], shift, ""); } } /* * Truncate or expand bit width */ assert(!tmp_type.floating || tmp_type.width == dst_type.width); if(tmp_type.width > dst_type.width) { assert(num_dsts == 1); tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps); tmp_type.width = dst_type.width; tmp_type.length = dst_type.length; num_tmps = 1; } if(tmp_type.width < dst_type.width) { assert(num_tmps == 1); lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts); tmp_type.width = dst_type.width; tmp_type.length = dst_type.length; num_tmps = num_dsts; } assert(tmp_type.width == dst_type.width); assert(tmp_type.length == dst_type.length); assert(num_tmps == num_dsts); /* * Scale to the widest range */ if(src_type.floating) { /* Nothing to do */ } else if(!src_type.floating && dst_type.floating) { if(!src_type.fixed && !src_type.sign && src_type.norm) { for(i = 0; i < num_tmps; ++i) { tmp[i] = lp_build_unsigned_norm_to_float(builder, src_type.width, dst_type, tmp[i]); } tmp_type.floating = TRUE; } else { double src_scale = lp_const_scale(src_type); LLVMTypeRef tmp_vec_type; /* Use an equally sized integer for intermediate computations */ tmp_type.floating = TRUE; tmp_type.sign = TRUE; tmp_vec_type = lp_build_vec_type(tmp_type); for(i = 0; i < num_tmps; ++i) { #if 0 if(dst_type.sign) tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, ""); else tmp[i] = LLVMBuildUIToFP(builder, tmp[i], tmp_vec_type, ""); #else /* FIXME: there is no SSE counterpart for LLVMBuildUIToFP */ tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, ""); #endif } if (src_scale != 1.0) { LLVMValueRef scale = lp_build_const_scalar(tmp_type, 1.0/src_scale); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildMul(builder, tmp[i], scale, ""); } } } else { unsigned src_shift = lp_const_shift(src_type); unsigned dst_shift = lp_const_shift(dst_type); /* FIXME: compensate different offsets too */ if(src_shift < dst_shift) { LLVMValueRef shift = lp_build_int_const_scalar(tmp_type, dst_shift - src_shift); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildShl(builder, tmp[i], shift, ""); } } for(i = 0; i < num_dsts; ++i) dst[i] = tmp[i]; }
/** * Store depth/stencil values. * Incoming values are swizzled (typically n 2x2 quads), stored linear. * If there's a mask it will do select/store otherwise just store. * * \param type the data type of the fragment depth/stencil values * \param format_desc description of the depth/stencil surface * \param mask the alive/dead pixel mask for the quad (vector) * \param z_fb z values read from fb (with padding) * \param s_fb s values read from fb (with padding) * \param loop_counter the current loop iteration * \param depth_ptr pointer to the depth/stencil values of this 4x4 block * \param depth_stride stride of the depth/stencil buffer * \param z_value the depth values to store (with padding) * \param s_value the stencil values to store (with padding) */ void lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, struct lp_type z_src_type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, LLVMValueRef z_fb, LLVMValueRef s_fb, LLVMValueRef loop_counter, LLVMValueRef depth_ptr, LLVMValueRef depth_stride, LLVMValueRef z_value, LLVMValueRef s_value) { struct lp_build_context z_bld; LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4]; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef mask_value = NULL; LLVMValueRef zs_dst1, zs_dst2; LLVMValueRef zs_dst_ptr1, zs_dst_ptr2; LLVMValueRef depth_offset1, depth_offset2; LLVMTypeRef load_ptr_type; unsigned depth_bytes = format_desc->block.bits / 8; struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length); struct lp_type z_type = zs_type; struct lp_type zs_load_type = zs_type; zs_load_type.length = zs_load_type.length / 2; load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0); z_type.width = z_src_type.width; lp_build_context_init(&z_bld, gallivm, z_type); /* * This is far from ideal, at least for late depth write we should do this * outside the fs loop to avoid all the swizzle stuff. */ if (z_src_type.length == 4) { LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter, lp_build_const_int32(gallivm, 1), ""); LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter, lp_build_const_int32(gallivm, 2), ""); LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb, depth_stride, ""); depth_offset1 = LLVMBuildMul(builder, looplsb, lp_build_const_int32(gallivm, depth_bytes * 2), ""); depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, ""); } else { unsigned i; LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter, lp_build_const_int32(gallivm, 1), ""); assert(z_src_type.length == 8); depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, ""); /* * We load 2x4 values, and need to swizzle them (order * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately. */ for (i = 0; i < 8; i++) { shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); } } depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, ""); zs_dst_ptr1 = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, ""); zs_dst_ptr1 = LLVMBuildBitCast(builder, zs_dst_ptr1, load_ptr_type, ""); zs_dst_ptr2 = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, ""); zs_dst_ptr2 = LLVMBuildBitCast(builder, zs_dst_ptr2, load_ptr_type, ""); if (format_desc->block.bits > 32) { s_value = LLVMBuildBitCast(builder, s_value, z_bld.vec_type, ""); } if (mask) { mask_value = lp_build_mask_value(mask); z_value = lp_build_select(&z_bld, mask_value, z_value, z_fb); if (format_desc->block.bits > 32) { s_fb = LLVMBuildBitCast(builder, s_fb, z_bld.vec_type, ""); s_value = lp_build_select(&z_bld, mask_value, s_value, s_fb); } } if (zs_type.width < z_src_type.width) { /* Truncate ZS values (e.g., when writing to Z16_UNORM) */ z_value = LLVMBuildTrunc(builder, z_value, lp_build_int_vec_type(gallivm, zs_type), ""); } if (format_desc->block.bits <= 32) { if (z_src_type.length == 4) { zs_dst1 = lp_build_extract_range(gallivm, z_value, 0, 2); zs_dst2 = lp_build_extract_range(gallivm, z_value, 2, 2); } else { assert(z_src_type.length == 8); zs_dst1 = LLVMBuildShuffleVector(builder, z_value, z_value, LLVMConstVector(&shuffles[0], zs_load_type.length), ""); zs_dst2 = LLVMBuildShuffleVector(builder, z_value, z_value, LLVMConstVector(&shuffles[4], zs_load_type.length), ""); } } else { if (z_src_type.length == 4) { zs_dst1 = lp_build_interleave2(gallivm, z_type, z_value, s_value, 0); zs_dst2 = lp_build_interleave2(gallivm, z_type, z_value, s_value, 1); } else { unsigned i; LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 2]; assert(z_src_type.length == 8); for (i = 0; i < 8; i++) { shuffles[i*2] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); shuffles[i*2+1] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2 + z_src_type.length); } zs_dst1 = LLVMBuildShuffleVector(builder, z_value, s_value, LLVMConstVector(&shuffles[0], z_src_type.length), ""); zs_dst2 = LLVMBuildShuffleVector(builder, z_value, s_value, LLVMConstVector(&shuffles[8], z_src_type.length), ""); } zs_dst1 = LLVMBuildBitCast(builder, zs_dst1, lp_build_vec_type(gallivm, zs_load_type), ""); zs_dst2 = LLVMBuildBitCast(builder, zs_dst2, lp_build_vec_type(gallivm, zs_load_type), ""); } LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1); LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2); }
/** * Load depth/stencil values. * The stored values are linear, swizzle them. * * \param type the data type of the fragment depth/stencil values * \param format_desc description of the depth/stencil surface * \param loop_counter the current loop iteration * \param depth_ptr pointer to the depth/stencil values of this 4x4 block * \param depth_stride stride of the depth/stencil buffer * \param z_fb contains z values loaded from fb (may include padding) * \param s_fb contains s values loaded from fb (may include padding) */ void lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, struct lp_type z_src_type, const struct util_format_description *format_desc, LLVMValueRef depth_ptr, LLVMValueRef depth_stride, LLVMValueRef *z_fb, LLVMValueRef *s_fb, LLVMValueRef loop_counter) { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4]; LLVMValueRef zs_dst1, zs_dst2; LLVMValueRef zs_dst_ptr; LLVMValueRef depth_offset1, depth_offset2; LLVMTypeRef load_ptr_type; unsigned depth_bytes = format_desc->block.bits / 8; struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length); struct lp_type zs_load_type = zs_type; zs_load_type.length = zs_load_type.length / 2; load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0); if (z_src_type.length == 4) { unsigned i; LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter, lp_build_const_int32(gallivm, 1), ""); LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter, lp_build_const_int32(gallivm, 2), ""); LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb, depth_stride, ""); depth_offset1 = LLVMBuildMul(builder, looplsb, lp_build_const_int32(gallivm, depth_bytes * 2), ""); depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, ""); /* just concatenate the loaded 2x2 values into 4-wide vector */ for (i = 0; i < 4; i++) { shuffles[i] = lp_build_const_int32(gallivm, i); } } else { unsigned i; LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter, lp_build_const_int32(gallivm, 1), ""); assert(z_src_type.length == 8); depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, ""); /* * We load 2x4 values, and need to swizzle them (order * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately. */ for (i = 0; i < 8; i++) { shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); } } depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, ""); /* Load current z/stencil values from z/stencil buffer */ zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, ""); zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, ""); zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, ""); zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, ""); zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, ""); zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, ""); *z_fb = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2, LLVMConstVector(shuffles, zs_type.length), ""); *s_fb = *z_fb; if (format_desc->block.bits < z_src_type.width) { /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */ *z_fb = LLVMBuildZExt(builder, *z_fb, lp_build_int_vec_type(gallivm, z_src_type), ""); } else if (format_desc->block.bits > 32) { /* rely on llvm to handle too wide vector we have here nicely */ unsigned i; struct lp_type typex2 = zs_type; struct lp_type s_type = zs_type; LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH / 4]; LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH / 4]; LLVMValueRef tmp; typex2.width = typex2.width / 2; typex2.length = typex2.length * 2; s_type.width = s_type.width / 2; s_type.floating = 0; tmp = LLVMBuildBitCast(builder, *z_fb, lp_build_vec_type(gallivm, typex2), ""); for (i = 0; i < zs_type.length; i++) { shuffles1[i] = lp_build_const_int32(gallivm, i * 2); shuffles2[i] = lp_build_const_int32(gallivm, i * 2 + 1); } *z_fb = LLVMBuildShuffleVector(builder, tmp, tmp, LLVMConstVector(shuffles1, zs_type.length), ""); *s_fb = LLVMBuildShuffleVector(builder, tmp, tmp, LLVMConstVector(shuffles2, zs_type.length), ""); *s_fb = LLVMBuildBitCast(builder, *s_fb, lp_build_vec_type(gallivm, s_type), ""); lp_build_name(*s_fb, "s_dst"); } lp_build_name(*z_fb, "z_dst"); lp_build_name(*s_fb, "s_dst"); lp_build_name(*z_fb, "z_dst"); }