/** * Normalized 8bit multiplication. * * - alpha plus one * * makes the following approximation to the division (Sree) * * a*b/255 ~= (a*(b + 1)) >> 256 * * which is the fastest method that satisfies the following OpenGL criteria * * 0*0 = 0 and 255*255 = 255 * * - geometric series * * takes the geometric series approximation to the division * * t/255 = (t >> 8) + (t >> 16) + (t >> 24) .. * * in this case just the first two terms to fit in 16bit arithmetic * * t/255 ~= (t + (t >> 8)) >> 8 * * note that just by itself it doesn't satisfies the OpenGL criteria, as * 255*255 = 254, so the special case b = 255 must be accounted or roundoff * must be used * * - geometric series plus rounding * * when using a geometric series division instead of truncating the result * use roundoff in the approximation (Jim Blinn) * * t/255 ~= (t + (t >> 8) + 0x80) >> 8 * * achieving the exact results * * @sa Alvy Ray Smith, Image Compositing Fundamentals, Tech Memo 4, Aug 15, 1995, * ftp://ftp.alvyray.com/Acrobat/4_Comp.pdf * @sa Michael Herf, The "double blend trick", May 2000, * http://www.stereopsis.com/doubleblend.html */ static LLVMValueRef lp_build_mul_u8n(LLVMBuilderRef builder, struct lp_type i16_type, LLVMValueRef a, LLVMValueRef b) { LLVMValueRef c8; LLVMValueRef ab; c8 = lp_build_int_const_scalar(i16_type, 8); #if 0 /* a*b/255 ~= (a*(b + 1)) >> 256 */ b = LLVMBuildAdd(builder, b, lp_build_int_const_scalar(i16_type, 1), ""); ab = LLVMBuildMul(builder, a, b, ""); #else /* ab/255 ~= (ab + (ab >> 8) + 0x80) >> 8 */ ab = LLVMBuildMul(builder, a, b, ""); ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c8, ""), ""); ab = LLVMBuildAdd(builder, ab, lp_build_int_const_scalar(i16_type, 0x80), ""); #endif ab = LLVMBuildLShr(builder, ab, c8, ""); return ab; }
/* * Derive from the quad's upper left scalar coordinates the coordinates for * all other quad pixels */ static void generate_pos0(LLVMBuilderRef builder, LLVMValueRef x, LLVMValueRef y, LLVMValueRef *x0, LLVMValueRef *y0) { LLVMTypeRef int_elem_type = LLVMInt32Type(); LLVMTypeRef int_vec_type = LLVMVectorType(int_elem_type, QUAD_SIZE); LLVMTypeRef elem_type = LLVMFloatType(); LLVMTypeRef vec_type = LLVMVectorType(elem_type, QUAD_SIZE); LLVMValueRef x_offsets[QUAD_SIZE]; LLVMValueRef y_offsets[QUAD_SIZE]; unsigned i; x = lp_build_broadcast(builder, int_vec_type, x); y = lp_build_broadcast(builder, int_vec_type, y); for(i = 0; i < QUAD_SIZE; ++i) { x_offsets[i] = LLVMConstInt(int_elem_type, quad_offset_x[i], 0); y_offsets[i] = LLVMConstInt(int_elem_type, quad_offset_y[i], 0); } x = LLVMBuildAdd(builder, x, LLVMConstVector(x_offsets, QUAD_SIZE), ""); y = LLVMBuildAdd(builder, y, LLVMConstVector(y_offsets, QUAD_SIZE), ""); *x0 = LLVMBuildSIToFP(builder, x, vec_type, ""); *y0 = LLVMBuildSIToFP(builder, y, vec_type, ""); }
LLVMValueRef gen_add(struct node *ast) { return LLVMBuildAdd(builder, codegen(ast->one), codegen(ast->two), ""); }
int main(int argc, char const *argv[]) { LLVMModuleRef mod = LLVMModuleCreateWithName("sum"); LLVMTypeRef param_types[] = { LLVMInt32Type(), LLVMInt32Type() }; LLVMTypeRef ret_type = LLVMFunctionType(LLVMInt32Type(), /* ret type */ param_types, /* arg types */ 2, /* arg count */ 0 /* is variadic */); LLVMValueRef sum = LLVMAddFunction(mod, "sum", ret_type); LLVMBasicBlockRef entry = LLVMAppendBasicBlock(sum, "entry"); LLVMBuilderRef builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, entry); LLVMValueRef tmp = LLVMBuildAdd(builder, LLVMGetParam(sum, 0), LLVMGetParam(sum, 1), "tmp"); LLVMBuildRet(builder, tmp); char *error = NULL; LLVMVerifyModule(mod, LLVMAbortProcessAction, &error); LLVMDisposeMessage(error); LLVMExecutionEngineRef engine; error = NULL; LLVMLinkInJIT(); LLVMInitializeNativeTarget(); if (LLVMCreateExecutionEngineForModule(&engine, mod, &error) != 0) { fprintf(stderr, "failed to create execution engine\n"); abort(); } if (error) { fprintf(stderr, "error: %s\n", error); LLVMDisposeMessage(error); exit(EXIT_FAILURE); } if (argc < 3) { fprintf(stderr, "usage: %s x y\n", argv[0]); exit(EXIT_FAILURE); } long long x = strtoll(argv[1], NULL, 10); long long y = strtoll(argv[2], NULL, 10); LLVMGenericValueRef args[] = { LLVMCreateGenericValueOfInt(LLVMInt32Type(), x, 0), LLVMCreateGenericValueOfInt(LLVMInt32Type(), y, 0), }; LLVMGenericValueRef res = LLVMRunFunction(engine, sum, 2, args); printf("%d\n", (int)LLVMGenericValueToInt(res, 0)); // write bitcode to file if (LLVMWriteBitcodeToFile(mod, "sum.bc") != 0) { fprintf(stderr, "error writing bitcode to file\n"); } LLVMDisposeBuilder(builder); LLVMDisposeExecutionEngine(engine); }
void lp_build_loop_end_cond(struct lp_build_loop_state *state, LLVMValueRef end, LLVMValueRef step, LLVMIntPredicate llvm_cond) { LLVMBuilderRef builder = state->gallivm->builder; LLVMValueRef next; LLVMValueRef cond; LLVMBasicBlockRef after_block; if (!step) step = LLVMConstInt(LLVMTypeOf(end), 1, 0); next = LLVMBuildAdd(builder, state->counter, step, ""); LLVMBuildStore(builder, next, state->counter_var); cond = LLVMBuildICmp(builder, llvm_cond, next, end, ""); after_block = lp_build_insert_new_block(state->gallivm, "loop_end"); LLVMBuildCondBr(builder, cond, after_block, state->block); LLVMPositionBuilderAtEnd(builder, after_block); state->counter = LLVMBuildLoad(builder, state->counter_var, ""); }
static void pointer_offset(compile_t* c, reach_type_t* t, reach_type_t* t_elem) { FIND_METHOD("_offset"); LLVMTypeRef params[3]; params[0] = t->use_type; params[1] = c->intptr; start_function(c, m, t->use_type, params, 2); // Set up a constant integer for the allocation size. size_t size = (size_t)LLVMABISizeOfType(c->target_data, t_elem->use_type); LLVMValueRef l_size = LLVMConstInt(c->intptr, size, false); LLVMValueRef ptr = LLVMGetParam(m->func, 0); LLVMValueRef n = LLVMGetParam(m->func, 1); // Return ptr + (n * sizeof(len)). LLVMValueRef src = LLVMBuildPtrToInt(c->builder, ptr, c->intptr, ""); LLVMValueRef offset = LLVMBuildMul(c->builder, n, l_size, ""); LLVMValueRef result = LLVMBuildAdd(c->builder, src, offset, ""); result = LLVMBuildIntToPtr(c->builder, result, t->use_type, ""); LLVMBuildRet(c->builder, result); codegen_finishfun(c); BOX_FUNCTION(); }
void genprim_string_serialise_trace(compile_t* c, reach_type_t* t) { // Generate the serialise_trace function. t->serialise_trace_fn = codegen_addfun(c, genname_serialise_trace(t->name), c->serialise_type); codegen_startfun(c, t->serialise_trace_fn, NULL, NULL); LLVMSetFunctionCallConv(t->serialise_trace_fn, LLVMCCallConv); LLVMValueRef ctx = LLVMGetParam(t->serialise_trace_fn, 0); LLVMValueRef arg = LLVMGetParam(t->serialise_trace_fn, 1); LLVMValueRef object = LLVMBuildBitCast(c->builder, arg, t->use_type, ""); // Read the size. LLVMValueRef size = field_value(c, object, 1); LLVMValueRef alloc = LLVMBuildAdd(c->builder, size, LLVMConstInt(c->intptr, 1, false), ""); // Reserve space for the contents. LLVMValueRef ptr = field_value(c, object, 3); LLVMValueRef args[3]; args[0] = ctx; args[1] = ptr; args[2] = alloc; gencall_runtime(c, "pony_serialise_reserve", args, 3, ""); LLVMBuildRetVoid(c->builder); codegen_finishfun(c); }
static LLVMValueRef CreateFibFunction(LLVMModuleRef M, LLVMContextRef Context) { LLVMBuilderRef B = LLVMCreateBuilderInContext(Context); // Create the fib function and insert it into module M. This function is said // to return an int and take an int parameter. LLVMTypeRef ParamTypes[] = {LLVMInt32TypeInContext(Context)}; LLVMTypeRef ReturnType = LLVMInt32TypeInContext(Context); LLVMTypeRef FunctionTy = LLVMFunctionType(ReturnType, ParamTypes, 1, 0); LLVMValueRef FibF = LLVMAddFunction(M, "fib", FunctionTy); // Add a basic block to the function. LLVMBasicBlockRef BB = LLVMAppendBasicBlockInContext(Context, FibF, "EntryBlock"); // Get pointers to the constants. LLVMValueRef One = LLVMConstInt(LLVMInt32TypeInContext(Context), 1, 0); LLVMValueRef Two = LLVMConstInt(LLVMInt32TypeInContext(Context), 2, 0); // Get pointer to the integer argument of the add1 function... LLVMValueRef ArgX = LLVMGetFirstParam(FibF); // Get the arg. LLVMSetValueName(ArgX, "AnArg"); // Give it a nice symbolic name for fun. // Create the true_block. LLVMBasicBlockRef RetBB = LLVMAppendBasicBlockInContext(Context, FibF, "return"); // Create an exit block. LLVMBasicBlockRef RecurseBB = LLVMAppendBasicBlockInContext(Context, FibF, "recurse"); // Create the "if (arg <= 2) goto exitbb" LLVMPositionBuilderAtEnd(B, BB); LLVMValueRef CondInst = LLVMBuildICmp(B, LLVMIntSLE, ArgX, Two, "cond"); LLVMBuildCondBr(B, CondInst, RetBB, RecurseBB); // Create: ret int 1 LLVMPositionBuilderAtEnd(B, RetBB); LLVMBuildRet(B, One); // create fib(x-1) LLVMPositionBuilderAtEnd(B, RecurseBB); LLVMValueRef Sub = LLVMBuildSub(B, ArgX, One, "arg"); LLVMValueRef CallFibX1 = LLVMBuildCall(B, FibF, &Sub, 1, "fibx1"); LLVMSetTailCall(CallFibX1, 1); // create fib(x-2) LLVMPositionBuilderAtEnd(B, RecurseBB); Sub = LLVMBuildSub(B, ArgX, Two, "arg"); LLVMValueRef CallFibX2 = LLVMBuildCall(B, FibF, &Sub, 1, "fibx2"); LLVMSetTailCall(CallFibX2, 1); // fib(x-1)+fib(x-2) LLVMPositionBuilderAtEnd(B, RecurseBB); LLVMValueRef Sum = LLVMBuildAdd(B, CallFibX1, CallFibX2, "addresult"); // Create the return instruction and add it to the basic block LLVMPositionBuilderAtEnd(B, RecurseBB); LLVMBuildRet(B, Sum); return FibF; }
LLVMValueRef gendesc_fieldptr(compile_t* c, LLVMValueRef ptr, LLVMValueRef field_info) { LLVMValueRef offset = LLVMBuildExtractValue(c->builder, field_info, 0, ""); offset = LLVMBuildZExt(c->builder, offset, c->intptr, ""); return LLVMBuildAdd(c->builder, ptr, offset, ""); }
struct LLVMOpaqueValue *bllvm_compile_rirbop(const struct rir_expression *expr, struct llvm_traversal_ctx *ctx) { LLVMValueRef ret; LLVMValueRef left = bllvm_value_from_rir_value_or_die(expr->binaryop.a, ctx); LLVMValueRef right = bllvm_value_from_rir_value_or_die(expr->binaryop.b, ctx); switch(expr->type) { case RIR_EXPRESSION_ADD: ret = LLVMBuildAdd(ctx->builder, left, right, ""); break; case RIR_EXPRESSION_SUB: ret = LLVMBuildSub(ctx->builder, left, right, ""); break; case RIR_EXPRESSION_MUL: ret = LLVMBuildMul(ctx->builder, left, right, ""); break; case RIR_EXPRESSION_DIV: ret = LLVMBuildUDiv(ctx->builder, left, right, ""); break; default: RF_CRITICAL_FAIL("Should never get anything other than binaryop here"); break; } return ret; }
static void store_cached_block(struct gallivm_state *gallivm, LLVMValueRef *col, LLVMValueRef tag_value, LLVMValueRef hash_index, LLVMValueRef cache) { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef ptr, indices[3]; LLVMTypeRef type_ptr4x32; unsigned count; type_ptr4x32 = LLVMPointerType(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), 0); indices[0] = lp_build_const_int32(gallivm, 0); indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS); indices[2] = hash_index; ptr = LLVMBuildGEP(builder, cache, indices, ARRAY_SIZE(indices), ""); LLVMBuildStore(builder, tag_value, ptr); indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA); hash_index = LLVMBuildMul(builder, hash_index, lp_build_const_int32(gallivm, 16), ""); for (count = 0; count < 4; count++) { indices[2] = hash_index; ptr = LLVMBuildGEP(builder, cache, indices, ARRAY_SIZE(indices), ""); ptr = LLVMBuildBitCast(builder, ptr, type_ptr4x32, ""); LLVMBuildStore(builder, col[count], ptr); hash_index = LLVMBuildAdd(builder, hash_index, lp_build_const_int32(gallivm, 4), ""); } }
/** * Special case for converting clamped IEEE-754 floats to unsigned norms. * * The mathematical voodoo below may seem excessive but it is actually * paramount we do it this way for several reasons. First, there is no single * precision FP to unsigned integer conversion Intel SSE instruction. Second, * secondly, even if there was, since the FP's mantissa takes only a fraction * of register bits the typically scale and cast approach would require double * precision for accurate results, and therefore half the throughput * * Although the result values can be scaled to an arbitrary bit width specified * by dst_width, the actual result type will have the same width. */ LLVMValueRef lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, struct lp_type src_type, unsigned dst_width, LLVMValueRef src) { LLVMTypeRef int_vec_type = lp_build_int_vec_type(src_type); LLVMValueRef res; unsigned mantissa; unsigned n; unsigned long long ubound; unsigned long long mask; double scale; double bias; assert(src_type.floating); mantissa = lp_mantissa(src_type); /* We cannot carry more bits than the mantissa */ n = MIN2(mantissa, dst_width); /* This magic coefficients will make the desired result to appear in the * lowest significant bits of the mantissa. */ ubound = ((unsigned long long)1 << n); mask = ubound - 1; scale = (double)mask/ubound; bias = (double)((unsigned long long)1 << (mantissa - n)); res = LLVMBuildMul(builder, src, lp_build_const_scalar(src_type, scale), ""); res = LLVMBuildAdd(builder, res, lp_build_const_scalar(src_type, bias), ""); res = LLVMBuildBitCast(builder, res, int_vec_type, ""); if(dst_width > n) { int shift = dst_width - n; res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), ""); /* TODO: Fill in the empty lower bits for additional precision? */ #if 0 { LLVMValueRef msb; msb = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, dst_width - 1), ""); msb = LLVMBuildShl(builder, msb, lp_build_int_const_scalar(src_type, shift), ""); msb = LLVMBuildSub(builder, msb, lp_build_int_const_scalar(src_type, 1), ""); res = LLVMBuildOr(builder, res, msb, ""); } #elif 0 while(shift > 0) { res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, n), ""), ""); shift -= n; n *= 2; } #endif } else res = LLVMBuildAnd(builder, res, lp_build_int_const_scalar(src_type, mask), ""); return res; }
void lp_build_exp2_approx(struct lp_build_context *bld, LLVMValueRef x, LLVMValueRef *p_exp2_int_part, LLVMValueRef *p_frac_part, LLVMValueRef *p_exp2) { const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef ipart = NULL; LLVMValueRef fpart = NULL; LLVMValueRef expipart = NULL; LLVMValueRef expfpart = NULL; LLVMValueRef res = NULL; if(p_exp2_int_part || p_frac_part || p_exp2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) debug_printf("%s: inefficient/imprecise constant arithmetic\n", __FUNCTION__); assert(type.floating && type.width == 32); x = lp_build_min(bld, x, lp_build_const_scalar(type, 129.0)); x = lp_build_max(bld, x, lp_build_const_scalar(type, -126.99999)); /* ipart = int(x - 0.5) */ ipart = LLVMBuildSub(bld->builder, x, lp_build_const_scalar(type, 0.5f), ""); ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, ""); /* fpart = x - ipart */ fpart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, ""); fpart = LLVMBuildSub(bld->builder, x, fpart, ""); } if(p_exp2_int_part || p_exp2) { /* expipart = (float) (1 << ipart) */ expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_scalar(type, 127), ""); expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_scalar(type, 23), ""); expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, ""); } if(p_exp2) { expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial, Elements(lp_build_exp2_polynomial)); res = LLVMBuildMul(bld->builder, expipart, expfpart, ""); } if(p_exp2_int_part) *p_exp2_int_part = expipart; if(p_frac_part) *p_frac_part = fpart; if(p_exp2) *p_exp2 = res; }
static LLVMValueRef lvalue_index(struct node *ast) { LLVMValueRef ptr; ptr = LLVMBuildAdd(builder, codegen(ast->one), codegen(ast->two), ""); return rvalue_to_lvalue(ptr); }
static void emit_uadd(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { LLVMBuilderRef builder = bld_base->base.gallivm->builder; emit_data->output[emit_data->chan] = LLVMBuildAdd(builder, emit_data->args[0], emit_data->args[1], ""); }
static LLVMValueRef field_loc(compile_t* c, LLVMValueRef offset, LLVMTypeRef structure, LLVMTypeRef ftype, int index) { LLVMValueRef f_offset = LLVMBuildAdd(c->builder, offset, LLVMConstInt(c->intptr, LLVMOffsetOfElement(c->target_data, structure, index), false), ""); return LLVMBuildIntToPtr(c->builder, f_offset, LLVMPointerType(ftype, 0), ""); }
static LLVMValueRef emit_array_index( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_src_register *reg, unsigned swizzle) { struct gallivm_state * gallivm = bld->bld_base.base.gallivm; LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, bld->addr[reg->Indirect.Index][swizzle], ""); LLVMValueRef offset = lp_build_const_int32(gallivm, reg->Register.Index); LLVMValueRef hw_index = LLVMBuildAdd(gallivm->builder, addr, offset, ""); LLVMValueRef soa_index = LLVMBuildMul(gallivm->builder, hw_index, lp_build_const_int32(gallivm, 4), ""); LLVMValueRef array_index = LLVMBuildAdd(gallivm->builder, soa_index, lp_build_const_int32(gallivm, swizzle), ""); return array_index; }
static LLVMValueRef emit_array_index( struct lp_build_tgsi_soa_context *bld, const struct tgsi_ind_register *reg, unsigned offset) { struct gallivm_state * gallivm = bld->bld_base.base.gallivm; LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, bld->addr[reg->Index][reg->Swizzle], ""); return LLVMBuildAdd(gallivm->builder, addr, lp_build_const_int32(gallivm, offset), ""); }
LLVMValueRef gendesc_istrait(compile_t* c, LLVMValueRef desc, ast_t* type) { // Get the trait identifier. reach_type_t* t = reach_type(c->reach, type); assert(t != NULL); LLVMValueRef trait_id = LLVMConstInt(c->i32, t->type_id, false); // Read the count and the trait list from the descriptor. LLVMValueRef count = desc_field(c, desc, DESC_TRAIT_COUNT); LLVMValueRef list = desc_field(c, desc, DESC_TRAITS); LLVMBasicBlockRef entry_block = LLVMGetInsertBlock(c->builder); LLVMBasicBlockRef cond_block = codegen_block(c, "cond"); LLVMBasicBlockRef body_block = codegen_block(c, "body"); LLVMBasicBlockRef post_block = codegen_block(c, "post"); LLVMBuildBr(c->builder, cond_block); // While the index is less than the count, check an ID. LLVMPositionBuilderAtEnd(c->builder, cond_block); LLVMValueRef phi = LLVMBuildPhi(c->builder, c->i32, ""); LLVMValueRef zero = LLVMConstInt(c->i32, 0, false); LLVMAddIncoming(phi, &zero, &entry_block, 1); LLVMValueRef test = LLVMBuildICmp(c->builder, LLVMIntULT, phi, count, ""); LLVMBuildCondBr(c->builder, test, body_block, post_block); // The phi node is the index. Get ID and compare it. LLVMPositionBuilderAtEnd(c->builder, body_block); LLVMValueRef gep[2]; gep[0] = LLVMConstInt(c->i32, 0, false); gep[1] = phi; LLVMValueRef id_ptr = LLVMBuildInBoundsGEP(c->builder, list, gep, 2, ""); LLVMValueRef id = LLVMBuildLoad(c->builder, id_ptr, ""); LLVMValueRef test_id = LLVMBuildICmp(c->builder, LLVMIntEQ, id, trait_id, ""); // Add one to the phi node. LLVMValueRef one = LLVMConstInt(c->i32, 1, false); LLVMValueRef inc = LLVMBuildAdd(c->builder, phi, one, ""); LLVMAddIncoming(phi, &inc, &body_block, 1); // Either to the post block or back to the condition. LLVMBuildCondBr(c->builder, test_id, post_block, cond_block); LLVMPositionBuilderAtEnd(c->builder, post_block); LLVMValueRef result = LLVMBuildPhi(c->builder, c->i1, ""); LLVMAddIncoming(result, &test, &cond_block, 1); LLVMAddIncoming(result, &test_id, &body_block, 1); return result; }
/* * SI implements derivatives using the local data store (LDS) * All writes to the LDS happen in all executing threads at * the same time. TID is the Thread ID for the current * thread and is a value between 0 and 63, representing * the thread's position in the wavefront. * * For the pixel shader threads are grouped into quads of four pixels. * The TIDs of the pixels of a quad are: * * +------+------+ * |4n + 0|4n + 1| * +------+------+ * |4n + 2|4n + 3| * +------+------+ * * So, masking the TID with 0xfffffffc yields the TID of the top left pixel * of the quad, masking with 0xfffffffd yields the TID of the top pixel of * the current pixel's column, and masking with 0xfffffffe yields the TID * of the left pixel of the current pixel's row. * * Adding 1 yields the TID of the pixel to the right of the left pixel, and * adding 2 yields the TID of the pixel below the top pixel. */ LLVMValueRef ac_build_ddxy(struct ac_llvm_context *ctx, bool has_ds_bpermute, uint32_t mask, int idx, LLVMValueRef lds, LLVMValueRef val) { LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2]; LLVMValueRef result; thread_id = ac_get_thread_id(ctx); tl_tid = LLVMBuildAnd(ctx->builder, thread_id, LLVMConstInt(ctx->i32, mask, false), ""); trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid, LLVMConstInt(ctx->i32, idx, false), ""); if (has_ds_bpermute) { args[0] = LLVMBuildMul(ctx->builder, tl_tid, LLVMConstInt(ctx->i32, 4, false), ""); args[1] = val; tl = ac_build_intrinsic(ctx, "llvm.amdgcn.ds.bpermute", ctx->i32, args, 2, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); args[0] = LLVMBuildMul(ctx->builder, trbl_tid, LLVMConstInt(ctx->i32, 4, false), ""); trbl = ac_build_intrinsic(ctx, "llvm.amdgcn.ds.bpermute", ctx->i32, args, 2, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); } else { LLVMValueRef store_ptr, load_ptr0, load_ptr1; store_ptr = ac_build_gep0(ctx, lds, thread_id); load_ptr0 = ac_build_gep0(ctx, lds, tl_tid); load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid); LLVMBuildStore(ctx->builder, val, store_ptr); tl = LLVMBuildLoad(ctx->builder, load_ptr0, ""); trbl = LLVMBuildLoad(ctx->builder, load_ptr1, ""); } tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, ""); trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, ""); result = LLVMBuildFSub(ctx->builder, trbl, tl, ""); return result; }
LLVMValueRef build_store_mrs( struct llvm_ctx *ctx, LLVMValueRef pos, const LLVMValueRef *words, int num_words) { for(int i=0; i<num_words; i++) { V t_addr = UTCB_ADDR_VAL(ctx, pos, "store.mr.ptr"); LLVMBuildStore(ctx->builder, words[i], t_addr); pos = LLVMBuildAdd(ctx->builder, pos, CONST_INT(1), "t.pos"); } return pos; }
LLVMValueRef gendesc_ptr_to_fields(compile_t* c, LLVMValueRef object, LLVMValueRef desc) { // Skip the descriptor. LLVMValueRef offset = desc_field(c, desc, DESC_FIELD_OFFSET); offset = LLVMBuildZExt(c->builder, offset, c->intptr, ""); LLVMValueRef base = LLVMBuildPtrToInt(c->builder, object, c->intptr, ""); LLVMValueRef result = LLVMBuildAdd(c->builder, base, offset, ""); // Return as a c->intptr. return result; }
LLVMValueRef gen_preinc(struct node *ast) { LLVMValueRef result; result = LLVMBuildAdd(builder, codegen(ast->one), CONST(1), ""); check_store(result, lvalue(ast->one)); return result; }
LLVMValueRef gen_postinc(struct node *ast) { LLVMValueRef orig, result; orig = codegen(ast->one); result = LLVMBuildAdd(builder, orig, CONST(1), ""); check_store(result, lvalue(ast->one)); return orig; }
/** * Small vector x scale multiplication optimization. */ LLVMValueRef lp_build_mul_imm(struct lp_build_context *bld, LLVMValueRef a, int b) { LLVMValueRef factor; if(b == 0) return bld->zero; if(b == 1) return a; if(b == -1) return LLVMBuildNeg(bld->builder, a, ""); if(b == 2 && bld->type.floating) return lp_build_add(bld, a, a); if(util_is_pot(b)) { unsigned shift = ffs(b) - 1; if(bld->type.floating) { #if 0 /* * Power of two multiplication by directly manipulating the mantissa. * * XXX: This might not be always faster, it will introduce a small error * for multiplication by zero, and it will produce wrong results * for Inf and NaN. */ unsigned mantissa = lp_mantissa(bld->type); factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa); a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), ""); a = LLVMBuildAdd(bld->builder, a, factor, ""); a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), ""); return a; #endif } else { factor = lp_build_const_scalar(bld->type, shift); return LLVMBuildShl(bld->builder, a, factor, ""); } } factor = lp_build_const_scalar(bld->type, (double)b); return lp_build_mul(bld, a, factor); }
static void trace_array_elements(compile_t* c, reach_type_t* t, LLVMValueRef ctx, LLVMValueRef object, LLVMValueRef pointer) { // Get the type argument for the array. This will be used to generate the // per-element trace call. ast_t* typeargs = ast_childidx(t->ast, 2); ast_t* typearg = ast_child(typeargs); if(!gentrace_needed(typearg)) return; reach_type_t* t_elem = reach_type(c->reach, typearg); pointer = LLVMBuildBitCast(c->builder, pointer, LLVMPointerType(t_elem->use_type, 0), ""); LLVMBasicBlockRef entry_block = LLVMGetInsertBlock(c->builder); LLVMBasicBlockRef cond_block = codegen_block(c, "cond"); LLVMBasicBlockRef body_block = codegen_block(c, "body"); LLVMBasicBlockRef post_block = codegen_block(c, "post"); // Read the size. LLVMValueRef size = field_value(c, object, 1); LLVMBuildBr(c->builder, cond_block); // While the index is less than the size, trace an element. The initial // index when coming from the entry block is zero. LLVMPositionBuilderAtEnd(c->builder, cond_block); LLVMValueRef phi = LLVMBuildPhi(c->builder, c->intptr, ""); LLVMValueRef zero = LLVMConstInt(c->intptr, 0, false); LLVMAddIncoming(phi, &zero, &entry_block, 1); LLVMValueRef test = LLVMBuildICmp(c->builder, LLVMIntULT, phi, size, ""); LLVMBuildCondBr(c->builder, test, body_block, post_block); // The phi node is the index. Get the element and trace it. LLVMPositionBuilderAtEnd(c->builder, body_block); LLVMValueRef elem_ptr = LLVMBuildGEP(c->builder, pointer, &phi, 1, "elem"); LLVMValueRef elem = LLVMBuildLoad(c->builder, elem_ptr, ""); gentrace(c, ctx, elem, typearg); // Add one to the phi node and branch back to the cond block. LLVMValueRef one = LLVMConstInt(c->intptr, 1, false); LLVMValueRef inc = LLVMBuildAdd(c->builder, phi, one, ""); body_block = LLVMGetInsertBlock(c->builder); LLVMAddIncoming(phi, &inc, &body_block, 1); LLVMBuildBr(c->builder, cond_block); LLVMPositionBuilderAtEnd(c->builder, post_block); }
static LLVMValueRef get_instance_index( struct radeon_llvm_context * radeon_bld, unsigned divisor) { struct gallivm_state * gallivm = radeon_bld->soa.bld_base.base.gallivm; LLVMValueRef result = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_INSTANCE_ID); result = LLVMBuildAdd(gallivm->builder, result, LLVMGetParam( radeon_bld->main_fn, SI_PARAM_START_INSTANCE), ""); if (divisor > 1) result = LLVMBuildUDiv(gallivm->builder, result, lp_build_const_int32(gallivm, divisor), ""); return result; }
/** * Build LLVM code for texture coord wrapping, for nearest filtering, * for scaled integer texcoords. * \param block_length is the length of the pixel block along the * coordinate axis * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size * \param length the texture size along one dimension * \param stride pixel stride along the coordinate axis (in bytes) * \param is_pot if TRUE, length is a power of two * \param wrap_mode one of PIPE_TEX_WRAP_x * \param out_offset byte offset for the wrapped coordinate * \param out_i resulting sub-block pixel coordinate for coord0 */ static void lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, unsigned block_length, LLVMValueRef coord, LLVMValueRef length, LLVMValueRef stride, boolean is_pot, unsigned wrap_mode, LLVMValueRef *out_offset, LLVMValueRef *out_i) { struct lp_build_context *int_coord_bld = &bld->int_coord_bld; LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef length_minus_one; length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); switch(wrap_mode) { case PIPE_TEX_WRAP_REPEAT: if(is_pot) coord = LLVMBuildAnd(builder, coord, length_minus_one, ""); else { /* Add a bias to the texcoord to handle negative coords */ LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); coord = LLVMBuildAdd(builder, coord, bias, ""); coord = LLVMBuildURem(builder, coord, length, ""); } break; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); coord = lp_build_min(int_coord_bld, coord, length_minus_one); break; case PIPE_TEX_WRAP_CLAMP: case PIPE_TEX_WRAP_CLAMP_TO_BORDER: case PIPE_TEX_WRAP_MIRROR_REPEAT: case PIPE_TEX_WRAP_MIRROR_CLAMP: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: default: assert(0); } lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride, out_offset, out_i); }
/** * Generate a + b */ LLVMValueRef lp_build_add(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { const struct lp_type type = bld->type; LLVMValueRef res; if(a == bld->zero) return b; if(b == bld->zero) return a; if(a == bld->undef || b == bld->undef) return bld->undef; if(bld->type.norm) { const char *intrinsic = NULL; if(a == bld->one || b == bld->one) return bld->one; if(util_cpu_caps.has_sse2 && type.width * type.length == 128 && !type.floating && !type.fixed) { if(type.width == 8) intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b"; if(type.width == 16) intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w"; } if(intrinsic) return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); } if(LLVMIsConstant(a) && LLVMIsConstant(b)) res = LLVMConstAdd(a, b); else res = LLVMBuildAdd(bld->builder, a, b, ""); /* clamp to ceiling of 1.0 */ if(bld->type.norm && (bld->type.floating || bld->type.fixed)) res = lp_build_min_simple(bld, res, bld->one); /* XXX clamp to floor of -1 or 0??? */ return res; }
static void pointer_delete(compile_t* c, reach_type_t* t, reach_type_t* t_elem) { FIND_METHOD("_delete"); LLVMTypeRef params[3]; params[0] = t->use_type; params[1] = c->intptr; params[2] = c->intptr; start_function(c, m, t_elem->use_type, params, 3); // Set up a constant integer for the allocation size. size_t size = (size_t)LLVMABISizeOfType(c->target_data, t_elem->use_type); LLVMValueRef l_size = LLVMConstInt(c->intptr, size, false); LLVMValueRef ptr = LLVMGetParam(m->func, 0); LLVMValueRef n = LLVMGetParam(m->func, 1); LLVMValueRef len = LLVMGetParam(m->func, 2); LLVMValueRef elem_ptr = LLVMBuildBitCast(c->builder, ptr, LLVMPointerType(t_elem->use_type, 0), ""); LLVMValueRef result = LLVMBuildLoad(c->builder, elem_ptr, ""); LLVMValueRef dst = LLVMBuildPtrToInt(c->builder, elem_ptr, c->intptr, ""); LLVMValueRef offset = LLVMBuildMul(c->builder, n, l_size, ""); LLVMValueRef src = LLVMBuildAdd(c->builder, dst, offset, ""); LLVMValueRef elen = LLVMBuildMul(c->builder, len, l_size, ""); LLVMValueRef args[5]; args[0] = LLVMBuildIntToPtr(c->builder, dst, c->void_ptr, ""); args[1] = LLVMBuildIntToPtr(c->builder, src, c->void_ptr, ""); args[2] = elen; args[3] = LLVMConstInt(c->i32, 1, false); args[4] = LLVMConstInt(c->i1, 0, false); // llvm.memmove.*(ptr, ptr + (n * sizeof(elem)), len * sizeof(elem)) if(target_is_ilp32(c->opt->triple)) { gencall_runtime(c, "llvm.memmove.p0i8.p0i8.i32", args, 5, ""); } else { gencall_runtime(c, "llvm.memmove.p0i8.p0i8.i64", args, 5, ""); } // Return ptr[0]. LLVMBuildRet(c->builder, result); codegen_finishfun(c); }