void lp_add_function_attr(LLVMValueRef function_or_call, int attr_idx, enum lp_func_attr attr) { #if HAVE_LLVM < 0x0400 LLVMAttribute llvm_attr = lp_attr_to_llvm_attr(attr); if (LLVMIsAFunction(function_or_call)) { if (attr_idx == -1) { LLVMAddFunctionAttr(function_or_call, llvm_attr); } else { LLVMAddAttribute(LLVMGetParam(function_or_call, attr_idx - 1), llvm_attr); } } else { LLVMAddInstrAttribute(function_or_call, attr_idx, llvm_attr); } #else LLVMModuleRef module; if (LLVMIsAFunction(function_or_call)) { module = LLVMGetGlobalParent(function_or_call); } else { LLVMBasicBlockRef bb = LLVMGetInstructionParent(function_or_call); LLVMValueRef function = LLVMGetBasicBlockParent(bb); module = LLVMGetGlobalParent(function); } LLVMContextRef ctx = LLVMGetModuleContext(module); const char *attr_name = attr_to_str(attr); unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, strlen(attr_name)); LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0); if (LLVMIsAFunction(function_or_call)) LLVMAddAttributeAtIndex(function_or_call, attr_idx, llvm_attr); else LLVMAddCallSiteAttribute(function_or_call, attr_idx, llvm_attr); #endif }
LLVMValueRef lp_build_intrinsic(LLVMBuilderRef builder, const char *name, LLVMTypeRef ret_type, LLVMValueRef *args, unsigned num_args, unsigned attr_mask) { LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); LLVMValueRef function, call; bool set_callsite_attrs = HAVE_LLVM >= 0x0400 && !(attr_mask & LP_FUNC_ATTR_LEGACY); function = LLVMGetNamedFunction(module, name); if(!function) { LLVMTypeRef arg_types[LP_MAX_FUNC_ARGS]; unsigned i; assert(num_args <= LP_MAX_FUNC_ARGS); for(i = 0; i < num_args; ++i) { assert(args[i]); arg_types[i] = LLVMTypeOf(args[i]); } function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args); /* * If llvm removes an intrinsic we use, we'll hit this abort (rather * than a call to address zero in the jited code). */ if (LLVMGetIntrinsicID(function) == 0) { _debug_printf("llvm (version 0x%x) found no intrinsic for %s, going to crash...\n", HAVE_LLVM, name); abort(); } if (!set_callsite_attrs) lp_add_func_attributes(function, attr_mask); if (gallivm_debug & GALLIVM_DEBUG_IR) { lp_debug_dump_value(function); } } call = LLVMBuildCall(builder, function, args, num_args, ""); if (set_callsite_attrs) lp_add_func_attributes(call, attr_mask); return call; }
/** * lp_build_assert. * * Build an assertion in LLVM IR by building a function call to the * lp_assert() function above. * * \param condition should be an 'i1' or 'i32' value * \param msg a string to print if the assertion fails. */ LLVMValueRef lp_build_assert(LLVMBuilderRef builder, LLVMValueRef condition, const char *msg) { LLVMModuleRef module; LLVMTypeRef arg_types[2]; LLVMValueRef msg_string, assert_func, params[2], r; module = LLVMGetGlobalParent(LLVMGetBasicBlockParent( LLVMGetInsertBlock(builder))); msg_string = lp_build_const_string_variable(module, msg, strlen(msg) + 1); arg_types[0] = LLVMInt32Type(); arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0); /* lookup the lp_assert function */ assert_func = LLVMGetNamedFunction(module, "lp_assert"); /* Create the assertion function if not found */ if (!assert_func) { LLVMTypeRef func_type = LLVMFunctionType(LLVMVoidType(), arg_types, 2, 0); assert_func = LLVMAddFunction(module, "lp_assert", func_type); LLVMSetFunctionCallConv(assert_func, LLVMCCallConv); LLVMSetLinkage(assert_func, LLVMExternalLinkage); LLVMAddGlobalMapping(lp_build_engine, assert_func, func_to_pointer((func_pointer)lp_assert)); } assert(assert_func); /* build function call param list */ params[0] = LLVMBuildZExt(builder, condition, arg_types[0], ""); params[1] = LLVMBuildBitCast(builder, msg_string, arg_types[1], ""); /* check arg types */ assert(LLVMTypeOf(params[0]) == arg_types[0]); assert(LLVMTypeOf(params[1]) == arg_types[1]); r = LLVMBuildCall(builder, assert_func, params, 2, ""); return r; }
/** * Fetch a pixel into a 4 float AoS. * * \param format_desc describes format of the image we're fetching from * \param ptr address of the pixel block (or the texel if uncompressed) * \param i, j the sub-block pixel coordinates. For non-compressed formats * these will always be (0, 0). * \return a 4 element vector with the pixel's RGBA values. */ LLVMValueRef lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, LLVMValueRef base_ptr, LLVMValueRef offset, LLVMValueRef i, LLVMValueRef j) { LLVMBuilderRef builder = gallivm->builder; unsigned num_pixels = type.length / 4; struct lp_build_context bld; assert(type.length <= LP_MAX_VECTOR_LENGTH); assert(type.length % 4 == 0); lp_build_context_init(&bld, gallivm, type); /* * Trivial case * * The format matches the type (apart of a swizzle) so no need for * scaling or converting. */ if (format_matches_type(format_desc, type) && format_desc->block.bits <= type.width * 4 && util_is_power_of_two(format_desc->block.bits)) { LLVMValueRef packed; /* * The format matches the type (apart of a swizzle) so no need for * scaling or converting. */ packed = lp_build_gather(gallivm, type.length/4, format_desc->block.bits, type.width*4, base_ptr, offset); assert(format_desc->block.bits <= type.width * type.length); packed = LLVMBuildBitCast(gallivm->builder, packed, lp_build_vec_type(gallivm, type), ""); return lp_build_format_swizzle_aos(format_desc, &bld, packed); } /* * Bit arithmetic */ if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && format_desc->block.width == 1 && format_desc->block.height == 1 && util_is_power_of_two(format_desc->block.bits) && format_desc->block.bits <= 32 && format_desc->is_bitmask && !format_desc->is_mixed && (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED || format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) { LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; LLVMValueRef res; unsigned k; /* * Unpack a pixel at a time into a <4 x float> RGBA vector */ for (k = 0; k < num_pixels; ++k) { LLVMValueRef packed; packed = lp_build_gather_elem(gallivm, num_pixels, format_desc->block.bits, 32, base_ptr, offset, k); tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm, format_desc, packed); } /* * Type conversion. * * TODO: We could avoid floating conversion for integer to * integer conversions. */ if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) { debug_printf("%s: unpacking %s with floating point\n", __FUNCTION__, format_desc->short_name); } lp_build_conv(gallivm, lp_float32_vec4_type(), type, tmps, num_pixels, &res, 1); return lp_build_format_swizzle_aos(format_desc, &bld, res); } /* * YUV / subsampled formats */ if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { struct lp_type tmp_type; LLVMValueRef tmp; memset(&tmp_type, 0, sizeof tmp_type); tmp_type.width = 8; tmp_type.length = num_pixels * 4; tmp_type.norm = TRUE; tmp = lp_build_fetch_subsampled_rgba_aos(gallivm, format_desc, num_pixels, base_ptr, offset, i, j); lp_build_conv(gallivm, tmp_type, type, &tmp, 1, &tmp, 1); return tmp; } /* * Fallback to util_format_description::fetch_rgba_8unorm(). */ if (format_desc->fetch_rgba_8unorm && !type.floating && type.width == 8 && !type.sign && type.norm) { /* * Fallback to calling util_format_description::fetch_rgba_8unorm. * * This is definitely not the most efficient way of fetching pixels, as * we miss the opportunity to do vectorization, but this it is a * convenient for formats or scenarios for which there was no opportunity * or incentive to optimize. */ LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); char name[256]; LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); LLVMTypeRef pi8t = LLVMPointerType(i8t, 0); LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef function; LLVMValueRef tmp_ptr; LLVMValueRef tmp; LLVMValueRef res; LLVMValueRef callee; unsigned k; util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm", format_desc->short_name); if (gallivm_debug & GALLIVM_DEBUG_PERF) { debug_printf("%s: falling back to %s\n", __FUNCTION__, name); } /* * Declare and bind format_desc->fetch_rgba_8unorm(). */ function = LLVMGetNamedFunction(module, name); if (!function) { /* * Function to call looks like: * fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) */ LLVMTypeRef ret_type; LLVMTypeRef arg_types[4]; LLVMTypeRef function_type; ret_type = LLVMVoidTypeInContext(gallivm->context); arg_types[0] = pi8t; arg_types[1] = pi8t; arg_types[2] = i32t; arg_types[3] = i32t; function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); function = LLVMAddFunction(module, name, function_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); LLVMSetLinkage(function, LLVMExternalLinkage); assert(LLVMIsDeclaration(function)); } /* make const pointer for the C fetch_rgba_float function */ callee = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm)); /* cast the callee pointer to the function's type */ function = LLVMBuildBitCast(builder, callee, LLVMTypeOf(function), "cast callee"); tmp_ptr = lp_build_alloca(gallivm, i32t, ""); res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels)); /* * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result * in the SoA vectors. */ for (k = 0; k < num_pixels; ++k) { LLVMValueRef index = lp_build_const_int32(gallivm, k); LLVMValueRef args[4]; args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, ""); args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, base_ptr, offset, k); if (num_pixels == 1) { args[2] = i; args[3] = j; } else { args[2] = LLVMBuildExtractElement(builder, i, index, ""); args[3] = LLVMBuildExtractElement(builder, j, index, ""); } LLVMBuildCall(builder, function, args, Elements(args), ""); tmp = LLVMBuildLoad(builder, tmp_ptr, ""); if (num_pixels == 1) { res = tmp; } else { res = LLVMBuildInsertElement(builder, res, tmp, index, ""); } } /* Bitcast from <n x i32> to <4n x i8> */ res = LLVMBuildBitCast(builder, res, bld.vec_type, ""); return res; } /* * Fallback to util_format_description::fetch_rgba_float(). */ if (format_desc->fetch_rgba_float) { /* * Fallback to calling util_format_description::fetch_rgba_float. * * This is definitely not the most efficient way of fetching pixels, as * we miss the opportunity to do vectorization, but this it is a * convenient for formats or scenarios for which there was no opportunity * or incentive to optimize. */ LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); char name[256]; LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context); LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4); LLVMTypeRef pf32t = LLVMPointerType(f32t, 0); LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef function; LLVMValueRef tmp_ptr; LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; LLVMValueRef res; LLVMValueRef callee; unsigned k; util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", format_desc->short_name); if (gallivm_debug & GALLIVM_DEBUG_PERF) { debug_printf("%s: falling back to %s\n", __FUNCTION__, name); } /* * Declare and bind format_desc->fetch_rgba_float(). */ function = LLVMGetNamedFunction(module, name); if (!function) { /* * Function to call looks like: * fetch(float *dst, const uint8_t *src, unsigned i, unsigned j) */ LLVMTypeRef ret_type; LLVMTypeRef arg_types[4]; LLVMTypeRef function_type; ret_type = LLVMVoidTypeInContext(gallivm->context); arg_types[0] = pf32t; arg_types[1] = pi8t; arg_types[2] = i32t; arg_types[3] = i32t; function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); function = LLVMAddFunction(module, name, function_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); LLVMSetLinkage(function, LLVMExternalLinkage); assert(LLVMIsDeclaration(function)); } /* Note: we're using this casting here instead of LLVMAddGlobalMapping() * to work around a bug in LLVM 2.6. */ /* make const pointer for the C fetch_rgba_float function */ callee = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer) format_desc->fetch_rgba_float)); /* cast the callee pointer to the function's type */ function = LLVMBuildBitCast(builder, callee, LLVMTypeOf(function), "cast callee"); tmp_ptr = lp_build_alloca(gallivm, f32x4t, ""); /* * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result * in the SoA vectors. */ for (k = 0; k < num_pixels; ++k) { LLVMValueRef args[4]; args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, ""); args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, base_ptr, offset, k); if (num_pixels == 1) { args[2] = i; args[3] = j; } else { LLVMValueRef index = lp_build_const_int32(gallivm, k); args[2] = LLVMBuildExtractElement(builder, i, index, ""); args[3] = LLVMBuildExtractElement(builder, j, index, ""); } LLVMBuildCall(builder, function, args, Elements(args), ""); tmps[k] = LLVMBuildLoad(builder, tmp_ptr, ""); } lp_build_conv(gallivm, lp_float32_vec4_type(), type, tmps, num_pixels, &res, 1); return res; } assert(0); return lp_build_undef(gallivm, type); }
void lp_build_tgsi_aos(struct gallivm_state *gallivm, const struct tgsi_token *tokens, struct lp_type type, const unsigned char swizzles[4], LLVMValueRef consts_ptr, const LLVMValueRef *inputs, LLVMValueRef *outputs, struct lp_build_sampler_aos *sampler, const struct tgsi_shader_info *info) { struct lp_build_tgsi_aos_context bld; struct tgsi_parse_context parse; uint num_immediates = 0; unsigned chan; int pc = 0; /* Setup build context */ memset(&bld, 0, sizeof bld); lp_build_context_init(&bld.bld_base.base, gallivm, type); lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type)); lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type)); lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type)); for (chan = 0; chan < 4; ++chan) { bld.swizzles[chan] = swizzles[chan]; bld.inv_swizzles[swizzles[chan]] = chan; } bld.inputs = inputs; bld.outputs = outputs; bld.consts_ptr = consts_ptr; bld.sampler = sampler; bld.indirect_files = info->indirect_files; bld.bld_base.emit_swizzle = swizzle_aos; bld.bld_base.info = info; bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant; bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate; bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input; bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary; /* Set opcode actions */ lp_set_default_actions_cpu(&bld.bld_base); if (!lp_bld_tgsi_list_init(&bld.bld_base)) { return; } tgsi_parse_init(&parse, tokens); while (!tgsi_parse_end_of_tokens(&parse)) { tgsi_parse_token(&parse); switch(parse.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_DECLARATION: /* Inputs already interpolated */ lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration); break; case TGSI_TOKEN_TYPE_INSTRUCTION: /* save expanded instruction */ lp_bld_tgsi_add_instruction(&bld.bld_base, &parse.FullToken.FullInstruction); break; case TGSI_TOKEN_TYPE_IMMEDIATE: /* simply copy the immediate values into the next immediates[] slot */ { const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; float imm[4]; assert(size <= 4); assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); for (chan = 0; chan < 4; ++chan) { imm[chan] = 0.0f; } for (chan = 0; chan < size; ++chan) { unsigned swizzle = bld.swizzles[chan]; imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float; } bld.immediates[num_immediates] = lp_build_const_aos(gallivm, type, imm[0], imm[1], imm[2], imm[3], NULL); num_immediates++; } break; case TGSI_TOKEN_TYPE_PROPERTY: break; default: assert(0); } } while (pc != -1) { struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc; const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(instr->Instruction.Opcode); if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc)) _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", opcode_info->mnemonic); } if (0) { LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); LLVMValueRef function = LLVMGetBasicBlockParent(block); debug_printf("11111111111111111111111111111 \n"); tgsi_dump(tokens, 0); lp_debug_dump_value(function); debug_printf("2222222222222222222222222222 \n"); } tgsi_parse_free(&parse); FREE(bld.bld_base.instructions); if (0) { LLVMModuleRef module = LLVMGetGlobalParent( LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); LLVMDumpModule(module); } }