void lp_build_init(void) { if (gallivm_initialized) return; #ifdef DEBUG gallivm_debug = debug_get_option_gallivm_debug(); #endif lp_set_target_options(); #if USE_MCJIT LLVMLinkInMCJIT(); #else LLVMLinkInJIT(); #endif util_cpu_detect(); /* AMD Bulldozer AVX's throughput is the same as SSE2; and because using * 8-wide vector needs more floating ops than 4-wide (due to padding), it is * actually more efficient to use 4-wide vectors on this processor. * * See also: * - http://www.anandtech.com/show/4955/the-bulldozer-review-amd-fx8150-tested/2 */ if (HAVE_AVX && util_cpu_caps.has_avx && util_cpu_caps.has_intel) { lp_native_vector_width = 256; } else { /* Leave it at 128, even when no SIMD extensions are available. * Really needs to be a multiple of 128 so can fit 4 floats. */ lp_native_vector_width = 128; } lp_native_vector_width = debug_get_num_option("LP_NATIVE_VECTOR_WIDTH", lp_native_vector_width); gallivm_initialized = TRUE; #if 0 /* For simulating less capable machines */ util_cpu_caps.has_sse3 = 0; util_cpu_caps.has_ssse3 = 0; util_cpu_caps.has_sse4_1 = 0; #endif }
void lp_build_init(void) { if (gallivm_initialized) return; #ifdef DEBUG gallivm_debug = debug_get_option_gallivm_debug(); #endif lp_set_target_options(); #if USE_MCJIT LLVMLinkInMCJIT(); #else LLVMLinkInJIT(); #endif util_cpu_detect(); if (HAVE_AVX && util_cpu_caps.has_avx) { lp_native_vector_width = 256; } else { /* Leave it at 128, even when no SIMD extensions are available. * Really needs to be a multiple of 128 so can fit 4 floats. */ lp_native_vector_width = 128; } lp_native_vector_width = debug_get_num_option("LP_NATIVE_VECTOR_WIDTH", lp_native_vector_width); gallivm_initialized = TRUE; #if 0 /* For simulating less capable machines */ util_cpu_caps.has_sse3 = 0; util_cpu_caps.has_ssse3 = 0; util_cpu_caps.has_sse4_1 = 0; #endif }
void lp_build_init(void) { #ifdef DEBUG gallivm_debug = debug_get_option_gallivm_debug(); #endif lp_set_target_options(); LLVMInitializeNativeTarget(); LLVMLinkInJIT(); if (!lp_build_module) lp_build_module = LLVMModuleCreateWithName("gallivm"); if (!lp_build_provider) lp_build_provider = LLVMCreateModuleProviderForExistingModule(lp_build_module); if (!lp_build_engine) { enum LLVM_CodeGenOpt_Level optlevel; char *error = NULL; if (gallivm_debug & GALLIVM_DEBUG_NO_OPT) { optlevel = None; } else { optlevel = Default; } if (LLVMCreateJITCompiler(&lp_build_engine, lp_build_provider, (unsigned)optlevel, &error)) { _debug_printf("%s\n", error); LLVMDisposeMessage(error); assert(0); } #if defined(DEBUG) || defined(PROFILE) lp_register_oprofile_jit_event_listener(lp_build_engine); #endif } if (!lp_build_target) lp_build_target = LLVMGetExecutionEngineTargetData(lp_build_engine); if (!lp_build_pass) { lp_build_pass = LLVMCreateFunctionPassManager(lp_build_provider); LLVMAddTargetData(lp_build_target, lp_build_pass); if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) { /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, * but there are more on SVN. */ /* TODO: Add more passes */ LLVMAddCFGSimplificationPass(lp_build_pass); LLVMAddPromoteMemoryToRegisterPass(lp_build_pass); LLVMAddConstantPropagationPass(lp_build_pass); if(util_cpu_caps.has_sse4_1) { /* FIXME: There is a bug in this pass, whereby the combination of fptosi * and sitofp (necessary for trunc/floor/ceil/round implementation) * somehow becomes invalid code. */ LLVMAddInstructionCombiningPass(lp_build_pass); } LLVMAddGVNPass(lp_build_pass); } else { /* We need at least this pass to prevent the backends to fail in * unexpected ways. */ LLVMAddPromoteMemoryToRegisterPass(lp_build_pass); } } util_cpu_detect(); #if 0 /* For simulating less capable machines */ util_cpu_caps.has_sse3 = 0; util_cpu_caps.has_ssse3 = 0; util_cpu_caps.has_sse4_1 = 0; #endif }