static void txf_fetch_args( struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) { const struct tgsi_full_instruction * inst = emit_data->inst; struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); const struct tgsi_texture_offset * off = inst->TexOffsets; LLVMTypeRef offset_type = bld_base->int_bld.elem_type; /* fetch tex coords */ tex_fetch_args(bld_base, emit_data); /* fetch tex offsets */ if (inst->Texture.NumOffsets) { assert(inst->Texture.NumOffsets == 1); emit_data->args[1] = LLVMConstBitCast( bld->immediates[off->Index][off->SwizzleX], offset_type); emit_data->args[2] = LLVMConstBitCast( bld->immediates[off->Index][off->SwizzleY], offset_type); emit_data->args[3] = LLVMConstBitCast( bld->immediates[off->Index][off->SwizzleZ], offset_type); } else { emit_data->args[1] = bld_base->int_bld.zero; emit_data->args[2] = bld_base->int_bld.zero; emit_data->args[3] = bld_base->int_bld.zero; } emit_data->arg_count = 4; }
static LLVMValueRef emit_fetch_immediate( struct lp_build_tgsi_context *bld_base, const struct tgsi_full_src_register *reg, enum tgsi_opcode_type type, unsigned swizzle) { LLVMTypeRef ctype; LLVMContextRef ctx = bld_base->base.gallivm->context; switch (type) { case TGSI_TYPE_UNSIGNED: case TGSI_TYPE_SIGNED: ctype = LLVMInt32TypeInContext(ctx); break; case TGSI_TYPE_UNTYPED: case TGSI_TYPE_FLOAT: ctype = LLVMFloatTypeInContext(ctx); break; default: ctype = 0; break; } struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype); }
static LLVMValueRef make_desc_ptr(LLVMValueRef func, LLVMTypeRef type) { if(func == NULL) return LLVMConstNull(type); return LLVMConstBitCast(func, type); }
LLVMValueRef gencall_create(compile_t* c, reach_type_t* t) { LLVMValueRef args[2]; args[0] = codegen_ctx(c); args[1] = LLVMConstBitCast(t->desc, c->descriptor_ptr); LLVMValueRef result = gencall_runtime(c, "pony_create", args, 2, ""); return LLVMBuildBitCast(c->builder, result, t->use_type, ""); }
static LLVMValueRef emit_fetch( struct lp_build_tgsi_context *bld_base, const struct tgsi_full_src_register *reg, enum tgsi_opcode_type type, unsigned swizzle) { struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); LLVMBuilderRef builder = bld_base->base.gallivm->builder; LLVMValueRef result, ptr; if (swizzle == ~0) { LLVMValueRef values[TGSI_NUM_CHANNELS]; unsigned chan; for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { values[chan] = emit_fetch(bld_base, reg, type, chan); } return lp_build_gather_values(bld_base->base.gallivm, values, TGSI_NUM_CHANNELS); } if (reg->Register.Indirect) { struct tgsi_declaration_range range = get_array_range(bld_base, reg->Register.File, ®->Indirect); return LLVMBuildExtractElement(builder, emit_array_fetch(bld_base, reg->Register.File, type, range, swizzle), emit_array_index(bld, ®->Indirect, reg->Register.Index - range.First), ""); } switch(reg->Register.File) { case TGSI_FILE_IMMEDIATE: { LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type); return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype); } case TGSI_FILE_INPUT: result = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)]; break; case TGSI_FILE_TEMPORARY: ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle); result = LLVMBuildLoad(builder, ptr, ""); break; case TGSI_FILE_OUTPUT: ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle); result = LLVMBuildLoad(builder, ptr, ""); break; default: return LLVMGetUndef(tgsi2llvmtype(bld_base, type)); } return bitcast(bld_base, type, result); }
static LLVMValueRef make_function_ptr(compile_t* c, const char* name, LLVMTypeRef type) { LLVMValueRef fun = LLVMGetNamedFunction(c->module, name); if(fun == NULL) return LLVMConstNull(type); return LLVMConstBitCast(fun, type); }
static LLVMValueRef make_field_list(compile_t* c, gentype_t* g) { // The list is an array of field descriptors. int count; if(g->underlying == TK_TUPLETYPE) count = g->field_count; else count = 0; LLVMTypeRef type = LLVMArrayType(c->field_descriptor, count); // If we aren't a tuple, return a null pointer to a list. if(count == 0) return LLVMConstNull(LLVMPointerType(type, 0)); // Create a constant array of field descriptors. size_t buf_size = count *sizeof(LLVMValueRef); LLVMValueRef* list = (LLVMValueRef*)pool_alloc_size(buf_size); for(int i = 0; i < count; i++) { gentype_t fg; if(!gentype(c, g->fields[i], &fg)) return NULL; LLVMValueRef fdesc[2]; fdesc[0] = LLVMConstInt(c->i32, LLVMOffsetOfElement(c->target_data, g->primitive, i), false); if(fg.desc != NULL) { // We are a concrete type. fdesc[1] = LLVMConstBitCast(fg.desc, c->descriptor_ptr); } else { // We aren't a concrete type. fdesc[1] = LLVMConstNull(c->descriptor_ptr); } list[i] = LLVMConstStructInContext(c->context, fdesc, 2, false); } LLVMValueRef field_array = LLVMConstArray(c->field_descriptor, list, count); // Create a global to hold the array. const char* name = genname_fieldlist(g->type_name); LLVMValueRef global = LLVMAddGlobal(c->module, type, name); LLVMSetGlobalConstant(global, true); LLVMSetLinkage(global, LLVMInternalLinkage); LLVMSetInitializer(global, field_array); pool_free_size(buf_size, list); return global; }
LLVMValueRef gencall_create(compile_t* c, gentype_t* g) { // Disable debug anchor dwarf_location(&c->dwarf, NULL); LLVMValueRef args[2]; args[0] = codegen_ctx(c); args[1] = LLVMConstBitCast(g->desc, c->descriptor_ptr); LLVMValueRef result = gencall_runtime(c, "pony_create", args, 2, ""); return LLVMBuildBitCast(c->builder, result, g->use_type, ""); }
static LLVMValueRef create_main(compile_t* c, gentype_t* g, LLVMValueRef ctx) { // Create the main actor and become it. LLVMValueRef args[2]; args[0] = ctx; args[1] = LLVMConstBitCast(g->desc, c->descriptor_ptr); LLVMValueRef actor = gencall_runtime(c, "pony_create", args, 2, ""); args[0] = ctx; args[1] = actor; gencall_runtime(c, "pony_become", args, 2, ""); return actor; }
static LLVMValueRef make_unbox_function(compile_t* c, gentype_t* g, const char* name) { LLVMValueRef fun = LLVMGetNamedFunction(c->module, name); if(fun == NULL) return LLVMConstNull(c->void_ptr); // Create a new unboxing function that forwards to the real function. LLVMTypeRef f_type = LLVMGetElementType(LLVMTypeOf(fun)); int count = LLVMCountParamTypes(f_type); // If it takes no arguments, it's a special number constructor. Don't put it // in the vtable. if(count == 0) return LLVMConstNull(c->void_ptr); size_t buf_size = count *sizeof(LLVMTypeRef); LLVMTypeRef* params = (LLVMTypeRef*)pool_alloc_size(buf_size); LLVMGetParamTypes(f_type, params); LLVMTypeRef ret_type = LLVMGetReturnType(f_type); // It's the same type, but it takes the boxed type instead of the primitive // type as the receiver. params[0] = g->structure_ptr; const char* unbox_name = genname_unbox(name); LLVMTypeRef unbox_type = LLVMFunctionType(ret_type, params, count, false); LLVMValueRef unbox_fun = codegen_addfun(c, unbox_name, unbox_type); codegen_startfun(c, unbox_fun, false); // Extract the primitive type from element 1 and call the real function. LLVMValueRef this_ptr = LLVMGetParam(unbox_fun, 0); LLVMValueRef primitive_ptr = LLVMBuildStructGEP(c->builder, this_ptr, 1, ""); LLVMValueRef primitive = LLVMBuildLoad(c->builder, primitive_ptr, ""); LLVMValueRef* args = (LLVMValueRef*)pool_alloc_size(buf_size); args[0] = primitive; for(int i = 1; i < count; i++) args[i] = LLVMGetParam(unbox_fun, i); LLVMValueRef result = codegen_call(c, fun, args, count); LLVMBuildRet(c->builder, result); codegen_finishfun(c); pool_free_size(buf_size, params); pool_free_size(buf_size, args); return LLVMConstBitCast(unbox_fun, c->void_ptr); }
static LLVMValueRef emit_fetch_immediate( struct lp_build_tgsi_context *bld_base, const struct tgsi_full_src_register *reg, enum tgsi_opcode_type type, unsigned swizzle) { LLVMTypeRef ctype; LLVMContextRef ctx = bld_base->base.gallivm->context; switch (type) { case TGSI_TYPE_UNSIGNED: case TGSI_TYPE_SIGNED: ctype = LLVMInt32TypeInContext(ctx); break; case TGSI_TYPE_UNTYPED: case TGSI_TYPE_FLOAT: ctype = LLVMFloatTypeInContext(ctx); break; default: ctype = 0; break; } struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); if (swizzle == ~0) { LLVMValueRef values[TGSI_NUM_CHANNELS] = {}; unsigned chan; for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { values[chan] = LLVMConstBitCast(bld->immediates[reg->Register.Index][chan], ctype); } return lp_build_gather_values(bld_base->base.gallivm, values, TGSI_NUM_CHANNELS); } else { return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype); } }
LLVMValueRef gencall_allocstruct(compile_t* c, gentype_t* g) { // Disable debug anchor dwarf_location(&c->dwarf, NULL); // We explicitly want a boxed version. // Get the size of the structure. size_t size = (size_t)LLVMABISizeOfType(c->target_data, g->structure); // Get the finaliser, if there is one. const char* final = genname_finalise(g->type_name); LLVMValueRef final_fun = LLVMGetNamedFunction(c->module, final); // Allocate the object. LLVMValueRef args[3]; args[0] = codegen_ctx(c); LLVMValueRef result; if(final_fun == NULL) { if(size <= HEAP_MAX) { uint32_t index = ponyint_heap_index(size); args[1] = LLVMConstInt(c->i32, index, false); result = gencall_runtime(c, "pony_alloc_small", args, 2, ""); } else { args[1] = LLVMConstInt(c->intptr, size, false); result = gencall_runtime(c, "pony_alloc_large", args, 2, ""); } } else { args[1] = LLVMConstInt(c->intptr, size, false); args[2] = LLVMConstBitCast(final_fun, c->final_fn); result = gencall_runtime(c, "pony_alloc_final", args, 3, ""); } result = LLVMBuildBitCast(c->builder, result, g->structure_ptr, ""); // Set the descriptor. if(g->underlying != TK_STRUCT) { LLVMValueRef desc_ptr = LLVMBuildStructGEP(c->builder, result, 0, ""); LLVMBuildStore(c->builder, g->desc, desc_ptr); } return result; }
/** * Convert float[] to int[] with floor(). */ LLVMValueRef lp_build_ifloor(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef res; assert(type.floating); assert(lp_check_value(type, a)); if(util_cpu_caps.has_sse4_1) { res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); } else { /* Take the sign bit and add it to 1 constant */ LLVMTypeRef vec_type = lp_build_vec_type(type); unsigned mantissa = lp_mantissa(type); LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); LLVMValueRef sign; LLVMValueRef offset; /* sign = a < 0 ? ~0 : 0 */ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); sign = LLVMBuildAnd(bld->builder, sign, mask, ""); sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), ""); lp_build_name(sign, "floor.sign"); /* offset = -0.99999(9)f */ offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa)); offset = LLVMConstBitCast(offset, int_vec_type); /* offset = a < 0 ? -0.99999(9)f : 0.0f */ offset = LLVMBuildAnd(bld->builder, offset, sign, ""); offset = LLVMBuildBitCast(bld->builder, offset, vec_type, ""); lp_build_name(offset, "floor.offset"); res = LLVMBuildAdd(bld->builder, a, offset, ""); lp_build_name(res, "floor.res"); } res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); lp_build_name(res, "floor"); return res; }
LLVMValueRef lp_build_sgn(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMValueRef cond; LLVMValueRef res; /* Handle non-zero case */ if(!type.sign) { /* if not zero then sign must be positive */ res = bld->one; } else if(type.floating) { /* Take the sign bit and add it to 1 constant */ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); LLVMValueRef sign; LLVMValueRef one; sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); sign = LLVMBuildAnd(bld->builder, sign, mask, ""); one = LLVMConstBitCast(bld->one, int_vec_type); res = LLVMBuildOr(bld->builder, sign, one, ""); res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); } else { LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0); cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero); res = lp_build_select(bld, cond, bld->one, minus_one); } /* Handle zero */ cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero); res = lp_build_select(bld, cond, bld->zero, bld->one); return res; }
/** * See http://www.devmaster.net/forums/showthread.php?p=43580 */ void lp_build_log2_approx(struct lp_build_context *bld, LLVMValueRef x, LLVMValueRef *p_exp, LLVMValueRef *p_floor_log2, LLVMValueRef *p_log2) { const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef expmask = lp_build_int_const_scalar(type, 0x7f800000); LLVMValueRef mantmask = lp_build_int_const_scalar(type, 0x007fffff); LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type); LLVMValueRef i = NULL; LLVMValueRef exp = NULL; LLVMValueRef mant = NULL; LLVMValueRef logexp = NULL; LLVMValueRef logmant = NULL; LLVMValueRef res = NULL; if(p_exp || p_floor_log2 || p_log2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) debug_printf("%s: inefficient/imprecise constant arithmetic\n", __FUNCTION__); assert(type.floating && type.width == 32); i = LLVMBuildBitCast(bld->builder, x, int_vec_type, ""); /* exp = (float) exponent(x) */ exp = LLVMBuildAnd(bld->builder, i, expmask, ""); } if(p_floor_log2 || p_log2) { logexp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_scalar(type, 23), ""); logexp = LLVMBuildSub(bld->builder, logexp, lp_build_int_const_scalar(type, 127), ""); logexp = LLVMBuildSIToFP(bld->builder, logexp, vec_type, ""); } if(p_log2) { /* mant = (float) mantissa(x) */ mant = LLVMBuildAnd(bld->builder, i, mantmask, ""); mant = LLVMBuildOr(bld->builder, mant, one, ""); mant = LLVMBuildBitCast(bld->builder, mant, vec_type, ""); logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial, Elements(lp_build_log2_polynomial)); /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), ""); res = LLVMBuildAdd(bld->builder, logmant, logexp, ""); } if(p_exp) *p_exp = exp; if(p_floor_log2) *p_floor_log2 = logexp; if(p_log2) *p_log2 = res; }
static LLVMValueRef make_unbox_function(compile_t* c, reach_type_t* t, reach_method_t* m) { // Create a new unboxing function that forwards to the real function. LLVMTypeRef f_type = LLVMGetElementType(LLVMTypeOf(m->func)); int count = LLVMCountParamTypes(f_type); // Leave space for a receiver if it's a constructor vtable entry. size_t buf_size = (count + 1) * sizeof(LLVMTypeRef); LLVMTypeRef* params = (LLVMTypeRef*)ponyint_pool_alloc_size(buf_size); LLVMGetParamTypes(f_type, params); LLVMTypeRef ret_type = LLVMGetReturnType(f_type); const char* unbox_name = genname_unbox(m->full_name); if(ast_id(m->r_fun) != TK_NEW) { // It's the same type, but it takes the boxed type instead of the primitive // type as the receiver. params[0] = t->structure_ptr; } else { // For a constructor, the unbox_fun has a receiver, even though the real // method does not. memmove(¶ms[1], ¶ms[0], count * sizeof(LLVMTypeRef*)); params[0] = t->structure_ptr; count++; } LLVMTypeRef unbox_type = LLVMFunctionType(ret_type, params, count, false); LLVMValueRef unbox_fun = codegen_addfun(c, unbox_name, unbox_type); codegen_startfun(c, unbox_fun, NULL, NULL); // Extract the primitive type from element 1 and call the real function. LLVMValueRef this_ptr = LLVMGetParam(unbox_fun, 0); LLVMValueRef primitive_ptr = LLVMBuildStructGEP(c->builder, this_ptr, 1, ""); LLVMValueRef primitive = LLVMBuildLoad(c->builder, primitive_ptr, ""); LLVMValueRef* args = (LLVMValueRef*)ponyint_pool_alloc_size(buf_size); if(ast_id(m->r_fun) != TK_NEW) { // If it's not a constructor, pass the extracted primitive as the receiver. args[0] = primitive; for(int i = 1; i < count; i++) args[i] = LLVMGetParam(unbox_fun, i); } else { count--; for(int i = 0; i < count; i++) args[i] = LLVMGetParam(unbox_fun, i + 1); } LLVMValueRef result = codegen_call(c, m->func, args, count); LLVMBuildRet(c->builder, result); codegen_finishfun(c); ponyint_pool_free_size(buf_size, params); ponyint_pool_free_size(buf_size, args); return LLVMConstBitCast(unbox_fun, c->void_ptr); }
static LLVMValueRef lp_build_gather_avx2(struct gallivm_state *gallivm, unsigned length, unsigned src_width, struct lp_type dst_type, LLVMValueRef base_ptr, LLVMValueRef offsets) { LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef src_type, src_vec_type; LLVMValueRef res; struct lp_type res_type = dst_type; res_type.length *= length; if (dst_type.floating) { src_type = src_width == 64 ? LLVMDoubleTypeInContext(gallivm->context) : LLVMFloatTypeInContext(gallivm->context); } else { src_type = LLVMIntTypeInContext(gallivm->context, src_width); } src_vec_type = LLVMVectorType(src_type, length); /* XXX should allow hw scaling (can handle i8, i16, i32, i64 for x86) */ assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)); if (0) { /* * XXX: This will cause LLVM pre 3.7 to hang; it works on LLVM 3.8 but * will not use the AVX2 gather instrinsics (even with llvm 4.0), at * least with Haswell. See * http://lists.llvm.org/pipermail/llvm-dev/2016-January/094448.html * And the generated code doing the emulation is quite a bit worse * than what we get by doing it ourselves too. */ LLVMTypeRef i32_type = LLVMIntTypeInContext(gallivm->context, 32); LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length); LLVMTypeRef i1_type = LLVMIntTypeInContext(gallivm->context, 1); LLVMTypeRef i1_vec_type = LLVMVectorType(i1_type, length); LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); LLVMValueRef src_ptr; base_ptr = LLVMBuildBitCast(builder, base_ptr, src_ptr_type, ""); /* Rescale offsets from bytes to elements */ LLVMValueRef scale = LLVMConstInt(i32_type, src_width/8, 0); scale = lp_build_broadcast(gallivm, i32_vec_type, scale); assert(LLVMTypeOf(offsets) == i32_vec_type); offsets = LLVMBuildSDiv(builder, offsets, scale, ""); src_ptr = LLVMBuildGEP(builder, base_ptr, &offsets, 1, "vector-gep"); char intrinsic[64]; util_snprintf(intrinsic, sizeof intrinsic, "llvm.masked.gather.v%u%s%u", length, dst_type.floating ? "f" : "i", src_width); LLVMValueRef alignment = LLVMConstInt(i32_type, src_width/8, 0); LLVMValueRef mask = LLVMConstAllOnes(i1_vec_type); LLVMValueRef passthru = LLVMGetUndef(src_vec_type); LLVMValueRef args[] = { src_ptr, alignment, mask, passthru }; res = lp_build_intrinsic(builder, intrinsic, src_vec_type, args, 4, 0); } else { LLVMTypeRef i8_type = LLVMIntTypeInContext(gallivm->context, 8); const char *intrinsic = NULL; unsigned l_idx = 0; assert(src_width == 32 || src_width == 64); if (src_width == 32) { assert(length == 4 || length == 8); } else { assert(length == 2 || length == 4); } static const char *intrinsics[2][2][2] = { {{"llvm.x86.avx2.gather.d.d", "llvm.x86.avx2.gather.d.d.256"}, {"llvm.x86.avx2.gather.d.q", "llvm.x86.avx2.gather.d.q.256"}}, {{"llvm.x86.avx2.gather.d.ps", "llvm.x86.avx2.gather.d.ps.256"}, {"llvm.x86.avx2.gather.d.pd", "llvm.x86.avx2.gather.d.pd.256"}}, }; if ((src_width == 32 && length == 8) || (src_width == 64 && length == 4)) { l_idx = 1; } intrinsic = intrinsics[dst_type.floating][src_width == 64][l_idx]; LLVMValueRef passthru = LLVMGetUndef(src_vec_type); LLVMValueRef mask = LLVMConstAllOnes(src_vec_type); mask = LLVMConstBitCast(mask, src_vec_type); LLVMValueRef scale = LLVMConstInt(i8_type, 1, 0); LLVMValueRef args[] = { passthru, base_ptr, offsets, mask, scale }; res = lp_build_intrinsic(builder, intrinsic, src_vec_type, args, 5, 0); } res = LLVMBuildBitCast(builder, res, lp_build_vec_type(gallivm, res_type), ""); return res; }