LLVMValueRef lp_build_one(struct lp_type type) { LLVMTypeRef elem_type; LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; unsigned i; assert(type.length <= LP_MAX_VECTOR_LENGTH); elem_type = lp_build_elem_type(type); if(type.floating) elems[0] = LLVMConstReal(elem_type, 1.0); else if(type.fixed) elems[0] = LLVMConstInt(elem_type, 1LL << (type.width/2), 0); else if(!type.norm) elems[0] = LLVMConstInt(elem_type, 1, 0); else if(type.sign) elems[0] = LLVMConstInt(elem_type, (1LL << (type.width - 1)) - 1, 0); else { /* special case' -- 1.0 for normalized types is more easily attained if * we start with a vector consisting of all bits set */ LLVMTypeRef vec_type = LLVMVectorType(elem_type, type.length); LLVMValueRef vec = LLVMConstAllOnes(vec_type); #if 0 if(type.sign) /* TODO: Unfortunately this caused "Tried to create a shift operation * on a non-integer type!" */ vec = LLVMConstLShr(vec, lp_build_const_int_vec(type, 1)); #endif return vec; } for(i = 1; i < type.length; ++i) elems[i] = elems[0]; if (type.length == 1) return elems[0]; else return LLVMConstVector(elems, type.length); }
/* * llvmgen_assignment * * Generates a store operation from an assignment expression. */ LLVMValueRef llvmgen_assignment (gencodectx_t gctx, expr_node_t *lhs, expr_node_t *rhs) { LLVMBuilderRef builder = (gctx->curfn == 0 ? 0 : gctx->curfn->builder); LLVMValueRef rhsvalue, v, lhsaddr; LLVMTypeRef lhstype, rhstype; llvm_accinfo_t accinfo; int shifts_required = 0; rhsvalue = llvmgen_expression(gctx, rhs, 0); if (rhsvalue == 0) { unsigned int bpval = machine_scalar_bits(gctx->mach); expr_signal(gctx->ectx, STC__EXPRVALRQ); rhsvalue = LLVMConstNull(LLVMIntTypeInContext(gctx->llvmctx, bpval)); } rhstype = LLVMTypeOf(rhsvalue); lhsaddr = llvmgen_addr_expression(gctx, lhs, &accinfo); if (lhsaddr == 0) { expr_signal(gctx->ectx, STC__ADDRVALRQ); return rhsvalue; } // If we're assigning into a field-reference with a non-zero // bit position or a non-CTCE size, we have to do some bit-shifting // to do the store. if (accinfo.posval != 0 || accinfo.sizeval != 0) { shifts_required = 1; lhstype = LLVMIntTypeInContext(gctx->llvmctx, accinfo.width); if ((accinfo.flags & LLVMGEN_M_ACC_CONSTSIZ) != 0) { accinfo.sizeval = LLVMConstInt(gctx->fullwordtype, accinfo.size, 0); } } else if ((accinfo.flags & LLVMGEN_M_ACC_CONSTSIZ) != 0) { lhstype = LLVMIntTypeInContext(gctx->llvmctx, accinfo.size); } else { lhstype = LLVMIntTypeInContext(gctx->llvmctx, accinfo.width); } lhsaddr = llvmgen_adjustval(gctx, lhsaddr, LLVMPointerType(lhstype, 0), 0); if (shifts_required) { LLVMValueRef neg1, srcmask, dstmask, rhstemp; if (LLVMGetTypeKind(rhstype) != LLVMIntegerTypeKind) { rhsvalue = llvmgen_adjustval(gctx, rhsvalue, gctx->fullwordtype, 0); rhstype = LLVMTypeOf(rhsvalue); } else { accinfo.sizeval = llvmgen_adjustval(gctx, accinfo.sizeval, rhstype, 0); accinfo.posval = llvmgen_adjustval(gctx, accinfo.posval, rhstype, 0); } neg1 = LLVMConstAllOnes(rhstype); v = LLVMBuildShl(builder, neg1, accinfo.sizeval, llvmgen_temp(gctx)); srcmask = LLVMBuildNot(builder, v, llvmgen_temp(gctx)); v = LLVMBuildAnd(builder, rhsvalue, srcmask, llvmgen_temp(gctx)); v = LLVMBuildShl(builder, v, accinfo.posval, llvmgen_temp(gctx)); rhstemp = llvmgen_adjustval(gctx, v, lhstype, 0); v = LLVMBuildShl(builder, srcmask, accinfo.posval, llvmgen_temp(gctx)); v = llvmgen_adjustval(gctx, v, lhstype, 0); dstmask = LLVMBuildNot(builder, v, llvmgen_temp(gctx)); v = LLVMBuildLoad(builder, lhsaddr, llvmgen_temp(gctx)); v = llvmgen_adjustval(gctx, v, lhstype, (accinfo.flags & LLVMGEN_M_SEG_SIGNEXT) != 0); v = LLVMBuildAnd(builder, v, dstmask, llvmgen_temp(gctx)); v = LLVMBuildOr(builder, v, rhstemp, llvmgen_temp(gctx)); } else { v = llvmgen_adjustval(gctx, rhsvalue, lhstype, (accinfo.flags & LLVMGEN_M_SEG_SIGNEXT) != 0); } LLVMBuildStore(builder, v, lhsaddr); if ((accinfo.flags & LLVMGEN_M_SEG_VOLATILE) != 0) LLVMSetVolatile(v, 1); return rhsvalue; } /* llvmgen_assignment */
/* * gen_fetch * * Generates a load operation for a fetch expression. */ static LLVMValueRef gen_fetch (gencodectx_t gctx, expr_node_t *rhs, LLVMTypeRef neededtype) { LLVMBuilderRef builder = gctx->curfn->builder; llvm_accinfo_t accinfo; LLVMValueRef addr, val; LLVMTypeRef type; int shifts_required = 0; int signext; // For field references with non-zero bit position, or with // non-CTCE size, we'll have to do bit shifting to extract // the field. addr = llvmgen_addr_expression(gctx, rhs, &accinfo); if (accinfo.posval != 0 || accinfo.sizeval != 0) { type = gctx->fullwordtype; if ((accinfo.flags & LLVMGEN_M_ACC_CONSTSIZ)) { accinfo.sizeval = LLVMConstInt(gctx->fullwordtype, accinfo.size, 0); } shifts_required = 1; } else if ((accinfo.flags & LLVMGEN_M_ACC_CONSTSIZ)) { if (accinfo.size == 0) { // XXX signal invalid size type = gctx->int1type; } else { type = LLVMIntTypeInContext(gctx->llvmctx, accinfo.size); } } else { type = gctx->fullwordtype; } signext = ((accinfo.flags & LLVMGEN_M_SEG_SIGNEXT) != 0); // If we're fetching from a register, there's no load intruction // required - EXCEPT if this was a scalar BIND, where the BIND if ((accinfo.segclass == LLVM_REG && (accinfo.flags & LLVMGEN_M_SEG_DEREFED) == 0) && (accinfo.flags & LLVMGEN_M_SEG_BINDPTR) == 0) { val = llvmgen_adjustval(gctx, addr, type, signext); } else { addr = llvmgen_adjustval(gctx, addr, LLVMPointerType(type, 0), 0); val = LLVMBuildLoad(builder, addr, llvmgen_temp(gctx)); if ((accinfo.flags & LLVMGEN_M_SEG_VOLATILE) != 0) LLVMSetVolatile(val, 1); } if (shifts_required) { val = llvmgen_adjustval(gctx, val, gctx->fullwordtype, signext); if (signext) { val = LLVMBuildAShr(builder, val, accinfo.posval, llvmgen_temp(gctx)); } else { val = LLVMBuildLShr(builder, val, accinfo.posval, llvmgen_temp(gctx)); } if ((accinfo.flags & LLVMGEN_M_ACC_CONSTSIZ) != 0) { LLVMTypeRef trunctype = LLVMIntTypeInContext(gctx->llvmctx, accinfo.size); val = llvmgen_adjustval(gctx, val, trunctype, signext); } else { LLVMValueRef neg1 = LLVMConstAllOnes(gctx->fullwordtype); LLVMValueRef mask; mask = LLVMBuildShl(builder, neg1, accinfo.sizeval, llvmgen_temp(gctx)); mask = LLVMBuildNeg(builder, mask, llvmgen_temp(gctx)); val = LLVMBuildAnd(builder, val, mask, llvmgen_temp(gctx)); if (signext) { val = LLVMBuildSExt(builder, val, gctx->fullwordtype, llvmgen_temp(gctx)); } } } return llvmgen_adjustval(gctx, val, neededtype, signext); } /* gen_fetch */
/** * Build code to compare two values 'a' and 'b' of 'type' using the given func. * \param func one of PIPE_FUNC_x * The result values will be 0 for false or ~0 for true. */ LLVMValueRef lp_build_compare(struct gallivm_state *gallivm, const struct lp_type type, unsigned func, LLVMValueRef a, LLVMValueRef b) { LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type); LLVMValueRef zeros = LLVMConstNull(int_vec_type); LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); LLVMValueRef cond; LLVMValueRef res; assert(func >= PIPE_FUNC_NEVER); assert(func <= PIPE_FUNC_ALWAYS); assert(lp_check_value(type, a)); assert(lp_check_value(type, b)); if(func == PIPE_FUNC_NEVER) return zeros; if(func == PIPE_FUNC_ALWAYS) return ones; #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) /* * There are no unsigned integer comparison instructions in SSE. */ if (!type.floating && !type.sign && type.width * type.length == 128 && util_cpu_caps.has_sse2 && (func == PIPE_FUNC_LESS || func == PIPE_FUNC_LEQUAL || func == PIPE_FUNC_GREATER || func == PIPE_FUNC_GEQUAL) && (gallivm_debug & GALLIVM_DEBUG_PERF)) { debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n", __FUNCTION__, type.length, type.width); } #endif #if HAVE_LLVM < 0x0207 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) if(type.width * type.length == 128) { if(type.floating && util_cpu_caps.has_sse) { /* float[4] comparison */ LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type); LLVMValueRef args[3]; unsigned cc; boolean swap; swap = FALSE; switch(func) { case PIPE_FUNC_EQUAL: cc = 0; break; case PIPE_FUNC_NOTEQUAL: cc = 4; break; case PIPE_FUNC_LESS: cc = 1; break; case PIPE_FUNC_LEQUAL: cc = 2; break; case PIPE_FUNC_GREATER: cc = 1; swap = TRUE; break; case PIPE_FUNC_GEQUAL: cc = 2; swap = TRUE; break; default: assert(0); return lp_build_undef(gallivm, type); } if(swap) { args[0] = b; args[1] = a; } else { args[0] = a; args[1] = b; } args[2] = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), cc, 0); res = lp_build_intrinsic(builder, "llvm.x86.sse.cmp.ps", vec_type, args, 3); res = LLVMBuildBitCast(builder, res, int_vec_type, ""); return res; } else if(util_cpu_caps.has_sse2) { /* int[4] comparison */ static const struct { unsigned swap:1; unsigned eq:1; unsigned gt:1; unsigned not:1; } table[] = { {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */ {1, 0, 1, 0}, /* PIPE_FUNC_LESS */ {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */ {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */ {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */ {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */ {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */ {0, 0, 0, 0} /* PIPE_FUNC_ALWAYS */ }; const char *pcmpeq; const char *pcmpgt; LLVMValueRef args[2]; LLVMValueRef res; LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type); switch (type.width) { case 8: pcmpeq = "llvm.x86.sse2.pcmpeq.b"; pcmpgt = "llvm.x86.sse2.pcmpgt.b"; break; case 16: pcmpeq = "llvm.x86.sse2.pcmpeq.w"; pcmpgt = "llvm.x86.sse2.pcmpgt.w"; break; case 32: pcmpeq = "llvm.x86.sse2.pcmpeq.d"; pcmpgt = "llvm.x86.sse2.pcmpgt.d"; break; default: assert(0); return lp_build_undef(gallivm, type); } /* There are no unsigned comparison instructions. So flip the sign bit * so that the results match. */ if (table[func].gt && !type.sign) { LLVMValueRef msb = lp_build_const_int_vec(gallivm, type, (unsigned long long)1 << (type.width - 1)); a = LLVMBuildXor(builder, a, msb, ""); b = LLVMBuildXor(builder, b, msb, ""); } if(table[func].swap) { args[0] = b; args[1] = a; } else { args[0] = a; args[1] = b; } if(table[func].eq) res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2); else if (table[func].gt) res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2); else res = LLVMConstNull(vec_type); if(table[func].not) res = LLVMBuildNot(builder, res, ""); return res; } } /* if (type.width * type.length == 128) */ #endif #endif /* HAVE_LLVM < 0x0207 */ /* XXX: It is not clear if we should use the ordered or unordered operators */ if(type.floating) { LLVMRealPredicate op; switch(func) { case PIPE_FUNC_NEVER: op = LLVMRealPredicateFalse; break; case PIPE_FUNC_ALWAYS: op = LLVMRealPredicateTrue; break; case PIPE_FUNC_EQUAL: op = LLVMRealUEQ; break; case PIPE_FUNC_NOTEQUAL: op = LLVMRealUNE; break; case PIPE_FUNC_LESS: op = LLVMRealULT; break; case PIPE_FUNC_LEQUAL: op = LLVMRealULE; break; case PIPE_FUNC_GREATER: op = LLVMRealUGT; break; case PIPE_FUNC_GEQUAL: op = LLVMRealUGE; break; default: assert(0); return lp_build_undef(gallivm, type); } #if HAVE_LLVM >= 0x0207 cond = LLVMBuildFCmp(builder, op, a, b, ""); res = LLVMBuildSExt(builder, cond, int_vec_type, ""); #else if (type.length == 1) { cond = LLVMBuildFCmp(builder, op, a, b, ""); res = LLVMBuildSExt(builder, cond, int_vec_type, ""); } else { unsigned i; res = LLVMGetUndef(int_vec_type); debug_printf("%s: warning: using slow element-wise float" " vector comparison\n", __FUNCTION__); for (i = 0; i < type.length; ++i) { LLVMValueRef index = lp_build_const_int32(gallivm, i); cond = LLVMBuildFCmp(builder, op, LLVMBuildExtractElement(builder, a, index, ""), LLVMBuildExtractElement(builder, b, index, ""), ""); cond = LLVMBuildSelect(builder, cond, LLVMConstExtractElement(ones, index), LLVMConstExtractElement(zeros, index), ""); res = LLVMBuildInsertElement(builder, res, cond, index, ""); } } #endif } else { LLVMIntPredicate op; switch(func) { case PIPE_FUNC_EQUAL: op = LLVMIntEQ; break; case PIPE_FUNC_NOTEQUAL: op = LLVMIntNE; break; case PIPE_FUNC_LESS: op = type.sign ? LLVMIntSLT : LLVMIntULT; break; case PIPE_FUNC_LEQUAL: op = type.sign ? LLVMIntSLE : LLVMIntULE; break; case PIPE_FUNC_GREATER: op = type.sign ? LLVMIntSGT : LLVMIntUGT; break; case PIPE_FUNC_GEQUAL: op = type.sign ? LLVMIntSGE : LLVMIntUGE; break; default: assert(0); return lp_build_undef(gallivm, type); } #if HAVE_LLVM >= 0x0207 cond = LLVMBuildICmp(builder, op, a, b, ""); res = LLVMBuildSExt(builder, cond, int_vec_type, ""); #else if (type.length == 1) { cond = LLVMBuildICmp(builder, op, a, b, ""); res = LLVMBuildSExt(builder, cond, int_vec_type, ""); } else { unsigned i; res = LLVMGetUndef(int_vec_type); if (gallivm_debug & GALLIVM_DEBUG_PERF) { debug_printf("%s: using slow element-wise int" " vector comparison\n", __FUNCTION__); } for(i = 0; i < type.length; ++i) { LLVMValueRef index = lp_build_const_int32(gallivm, i); cond = LLVMBuildICmp(builder, op, LLVMBuildExtractElement(builder, a, index, ""), LLVMBuildExtractElement(builder, b, index, ""), ""); cond = LLVMBuildSelect(builder, cond, LLVMConstExtractElement(ones, index), LLVMConstExtractElement(zeros, index), ""); res = LLVMBuildInsertElement(builder, res, cond, index, ""); } } #endif } return res; }
static LLVMValueRef lp_build_gather_avx2(struct gallivm_state *gallivm, unsigned length, unsigned src_width, struct lp_type dst_type, LLVMValueRef base_ptr, LLVMValueRef offsets) { LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef src_type, src_vec_type; LLVMValueRef res; struct lp_type res_type = dst_type; res_type.length *= length; if (dst_type.floating) { src_type = src_width == 64 ? LLVMDoubleTypeInContext(gallivm->context) : LLVMFloatTypeInContext(gallivm->context); } else { src_type = LLVMIntTypeInContext(gallivm->context, src_width); } src_vec_type = LLVMVectorType(src_type, length); /* XXX should allow hw scaling (can handle i8, i16, i32, i64 for x86) */ assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)); if (0) { /* * XXX: This will cause LLVM pre 3.7 to hang; it works on LLVM 3.8 but * will not use the AVX2 gather instrinsics (even with llvm 4.0), at * least with Haswell. See * http://lists.llvm.org/pipermail/llvm-dev/2016-January/094448.html * And the generated code doing the emulation is quite a bit worse * than what we get by doing it ourselves too. */ LLVMTypeRef i32_type = LLVMIntTypeInContext(gallivm->context, 32); LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length); LLVMTypeRef i1_type = LLVMIntTypeInContext(gallivm->context, 1); LLVMTypeRef i1_vec_type = LLVMVectorType(i1_type, length); LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); LLVMValueRef src_ptr; base_ptr = LLVMBuildBitCast(builder, base_ptr, src_ptr_type, ""); /* Rescale offsets from bytes to elements */ LLVMValueRef scale = LLVMConstInt(i32_type, src_width/8, 0); scale = lp_build_broadcast(gallivm, i32_vec_type, scale); assert(LLVMTypeOf(offsets) == i32_vec_type); offsets = LLVMBuildSDiv(builder, offsets, scale, ""); src_ptr = LLVMBuildGEP(builder, base_ptr, &offsets, 1, "vector-gep"); char intrinsic[64]; util_snprintf(intrinsic, sizeof intrinsic, "llvm.masked.gather.v%u%s%u", length, dst_type.floating ? "f" : "i", src_width); LLVMValueRef alignment = LLVMConstInt(i32_type, src_width/8, 0); LLVMValueRef mask = LLVMConstAllOnes(i1_vec_type); LLVMValueRef passthru = LLVMGetUndef(src_vec_type); LLVMValueRef args[] = { src_ptr, alignment, mask, passthru }; res = lp_build_intrinsic(builder, intrinsic, src_vec_type, args, 4, 0); } else { LLVMTypeRef i8_type = LLVMIntTypeInContext(gallivm->context, 8); const char *intrinsic = NULL; unsigned l_idx = 0; assert(src_width == 32 || src_width == 64); if (src_width == 32) { assert(length == 4 || length == 8); } else { assert(length == 2 || length == 4); } static const char *intrinsics[2][2][2] = { {{"llvm.x86.avx2.gather.d.d", "llvm.x86.avx2.gather.d.d.256"}, {"llvm.x86.avx2.gather.d.q", "llvm.x86.avx2.gather.d.q.256"}}, {{"llvm.x86.avx2.gather.d.ps", "llvm.x86.avx2.gather.d.ps.256"}, {"llvm.x86.avx2.gather.d.pd", "llvm.x86.avx2.gather.d.pd.256"}}, }; if ((src_width == 32 && length == 8) || (src_width == 64 && length == 4)) { l_idx = 1; } intrinsic = intrinsics[dst_type.floating][src_width == 64][l_idx]; LLVMValueRef passthru = LLVMGetUndef(src_vec_type); LLVMValueRef mask = LLVMConstAllOnes(src_vec_type); mask = LLVMConstBitCast(mask, src_vec_type); LLVMValueRef scale = LLVMConstInt(i8_type, 1, 0); LLVMValueRef args[] = { passthru, base_ptr, offsets, mask, scale }; res = lp_build_intrinsic(builder, intrinsic, src_vec_type, args, 5, 0); } res = LLVMBuildBitCast(builder, res, lp_build_vec_type(gallivm, res_type), ""); return res; }
LLVMValueRef lp_build_logicop(LLVMBuilderRef builder, unsigned logicop_func, LLVMValueRef src, LLVMValueRef dst) { LLVMTypeRef type; LLVMValueRef res; type = LLVMTypeOf(src); switch (logicop_func) { case PIPE_LOGICOP_CLEAR: res = LLVMConstNull(type); break; case PIPE_LOGICOP_NOR: res = LLVMBuildNot(builder, LLVMBuildOr(builder, src, dst, ""), ""); break; case PIPE_LOGICOP_AND_INVERTED: res = LLVMBuildAnd(builder, LLVMBuildNot(builder, src, ""), dst, ""); break; case PIPE_LOGICOP_COPY_INVERTED: res = LLVMBuildNot(builder, src, ""); break; case PIPE_LOGICOP_AND_REVERSE: res = LLVMBuildAnd(builder, src, LLVMBuildNot(builder, dst, ""), ""); break; case PIPE_LOGICOP_INVERT: res = LLVMBuildNot(builder, dst, ""); break; case PIPE_LOGICOP_XOR: res = LLVMBuildXor(builder, src, dst, ""); break; case PIPE_LOGICOP_NAND: res = LLVMBuildNot(builder, LLVMBuildAnd(builder, src, dst, ""), ""); break; case PIPE_LOGICOP_AND: res = LLVMBuildAnd(builder, src, dst, ""); break; case PIPE_LOGICOP_EQUIV: res = LLVMBuildNot(builder, LLVMBuildXor(builder, src, dst, ""), ""); break; case PIPE_LOGICOP_NOOP: res = dst; break; case PIPE_LOGICOP_OR_INVERTED: res = LLVMBuildOr(builder, LLVMBuildNot(builder, src, ""), dst, ""); break; case PIPE_LOGICOP_COPY: res = src; break; case PIPE_LOGICOP_OR_REVERSE: res = LLVMBuildOr(builder, src, LLVMBuildNot(builder, dst, ""), ""); break; case PIPE_LOGICOP_OR: res = LLVMBuildOr(builder, src, dst, ""); break; case PIPE_LOGICOP_SET: res = LLVMConstAllOnes(type); break; default: assert(0); res = src; } return res; }