SWIGEXPORT void JNICALL Java_org_jllvm_bindings_ScalarJNI_LLVMAddGVNPass(JNIEnv *jenv, jclass jcls, jlong jarg1) { LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; (void)jenv; (void)jcls; arg1 = *(LLVMPassManagerRef *)&jarg1; LLVMAddGVNPass(arg1); }
/** * Create the LLVM (optimization) pass manager and install * relevant optimization passes. * \return TRUE for success, FALSE for failure */ static boolean create_pass_manager(struct gallivm_state *gallivm) { assert(!gallivm->passmgr); assert(gallivm->target); gallivm->passmgr = LLVMCreateFunctionPassManager(gallivm->provider); if (!gallivm->passmgr) return FALSE; LLVMAddTargetData(gallivm->target, gallivm->passmgr); if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) { /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, * but there are more on SVN. * TODO: Add more passes. */ LLVMAddCFGSimplificationPass(gallivm->passmgr); if (HAVE_LLVM >= 0x207 && sizeof(void*) == 4) { /* For LLVM >= 2.7 and 32-bit build, use this order of passes to * avoid generating bad code. * Test with piglit glsl-vs-sqrt-zero test. */ LLVMAddConstantPropagationPass(gallivm->passmgr); LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr); } else { LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr); LLVMAddConstantPropagationPass(gallivm->passmgr); } if (util_cpu_caps.has_sse4_1) { /* FIXME: There is a bug in this pass, whereby the combination * of fptosi and sitofp (necessary for trunc/floor/ceil/round * implementation) somehow becomes invalid code. */ LLVMAddInstructionCombiningPass(gallivm->passmgr); } LLVMAddGVNPass(gallivm->passmgr); } else { /* We need at least this pass to prevent the backends to fail in * unexpected ways. */ LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr); } return TRUE; }
void JITImpl::init() { if (initialized) return; LLVMLinkInJIT(); LLVMInitializeNativeTarget(); LLVMMemoryBufferRef memBuffer = LLVMExtraCreateMemoryBufferWithPtr(instructionBitcode, instructionBitcodeSize); char *outMessage; if (LLVMParseBitcode(memBuffer, &module, &outMessage)) { std::cerr << "Error loading bitcode: " << outMessage << '\n'; std::abort(); } // TODO experiment with opt level. if (LLVMCreateJITCompilerForModule(&executionEngine, module, 1, &outMessage)) { std::cerr << "Error creating JIT compiler: " << outMessage << '\n'; std::abort(); } builder = LLVMCreateBuilder(); LLVMValueRef callee = LLVMGetNamedFunction(module, "jitInstructionTemplate"); assert(callee && "jitInstructionTemplate() not found in module"); jitFunctionType = LLVMGetElementType(LLVMTypeOf(callee)); functions.init(module); FPM = LLVMCreateFunctionPassManagerForModule(module); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(executionEngine), FPM); LLVMAddBasicAliasAnalysisPass(FPM); LLVMAddJumpThreadingPass(FPM); LLVMAddGVNPass(FPM); LLVMAddJumpThreadingPass(FPM); LLVMAddCFGSimplificationPass(FPM); LLVMAddDeadStoreEliminationPass(FPM); LLVMAddInstructionCombiningPass(FPM); LLVMInitializeFunctionPassManager(FPM); if (DEBUG_JIT) { LLVMExtraRegisterJitDisassembler(executionEngine, LLVMGetTarget(module)); } initialized = true; }
int LLVM_execute(LLVMCompiledProgram program) { LLVMModuleRef module = program.module; LLVMValueRef function = program.function; char *error = NULL; // Used to retrieve messages from functions LLVMExecutionEngineRef engine; LLVMModuleProviderRef provider = LLVMCreateModuleProviderForExistingModule(module); error = NULL; if(LLVMCreateJITCompiler(&engine, provider, 2, &error) != 0) { fprintf(stderr, "%s\n", error); LLVMDisposeMessage(error); abort(); } LLVMPassManagerRef pass = LLVMCreatePassManager(); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); LLVMAddConstantPropagationPass(pass); LLVMAddInstructionCombiningPass(pass); LLVMAddPromoteMemoryToRegisterPass(pass); LLVMAddGVNPass(pass); LLVMAddCFGSimplificationPass(pass); LLVMRunPassManager(pass, module); #ifdef NDEBUG #else LLVMDumpModule(module); #endif LLVMGenericValueRef exec_args[] = {}; LLVMGenericValueRef exec_res = LLVMRunFunction(engine, function, 0, exec_args); int result = LLVMGenericValueToInt(exec_res, 0); LLVMDisposePassManager(pass); LLVMDisposeExecutionEngine(engine); return result; }
SCM llvm_compile_module(SCM scm_llvm, SCM scm_name) { struct llvm_module_t *self = get_llvm(scm_llvm); if (self->engine != NULL) scm_misc_error("llvm-compile", "LLVM module already compiled", SCM_EOL); char *error = NULL; if (LLVMCreateJITCompilerForModule(&self->engine, self->module, 2, &error)) { SCM scm_error = scm_from_locale_string(error); LLVMDisposeMessage(error); scm_misc_error("llvm-compile", "Error initialising JIT engine: ~a", scm_list_1(scm_error)); }; LLVMPassManagerRef pass_manager = LLVMCreatePassManager(); LLVMAddConstantPropagationPass(pass_manager); LLVMAddInstructionCombiningPass(pass_manager); LLVMAddPromoteMemoryToRegisterPass(pass_manager); LLVMAddGVNPass(pass_manager); LLVMAddCFGSimplificationPass(pass_manager); LLVMRunPassManager(pass_manager, self->module); LLVMDisposePassManager(pass_manager); return SCM_UNSPECIFIED; }
ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, const struct pipe_blend_state *blend, enum vector_mode mode, struct lp_type type) { LLVMModuleRef module = NULL; LLVMValueRef func = NULL; LLVMExecutionEngineRef engine = NULL; LLVMModuleProviderRef provider = NULL; LLVMPassManagerRef pass = NULL; char *error = NULL; blend_test_ptr_t blend_test_ptr; boolean success; const unsigned n = LP_TEST_NUM_SAMPLES; int64_t cycles[LP_TEST_NUM_SAMPLES]; double cycles_avg = 0.0; unsigned i, j; if(verbose >= 1) dump_blend_type(stdout, blend, mode, type); module = LLVMModuleCreateWithName("test"); func = add_blend_test(module, blend, mode, type); if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { LLVMDumpModule(module); abort(); } LLVMDisposeMessage(error); provider = LLVMCreateModuleProviderForExistingModule(module); if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { if(verbose < 1) dump_blend_type(stderr, blend, mode, type); fprintf(stderr, "%s\n", error); LLVMDisposeMessage(error); abort(); } #if 0 pass = LLVMCreatePassManager(); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, * but there are more on SVN. */ LLVMAddConstantPropagationPass(pass); LLVMAddInstructionCombiningPass(pass); LLVMAddPromoteMemoryToRegisterPass(pass); LLVMAddGVNPass(pass); LLVMAddCFGSimplificationPass(pass); LLVMRunPassManager(pass, module); #else (void)pass; #endif if(verbose >= 2) LLVMDumpModule(module); blend_test_ptr = (blend_test_ptr_t)LLVMGetPointerToGlobal(engine, func); if(verbose >= 2) lp_disassemble(blend_test_ptr); success = TRUE; for(i = 0; i < n && success; ++i) { if(mode == AoS) { ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; random_vec(type, src); random_vec(type, dst); random_vec(type, con); { double fsrc[LP_MAX_VECTOR_LENGTH]; double fdst[LP_MAX_VECTOR_LENGTH]; double fcon[LP_MAX_VECTOR_LENGTH]; double fref[LP_MAX_VECTOR_LENGTH]; read_vec(type, src, fsrc); read_vec(type, dst, fdst); read_vec(type, con, fcon); for(j = 0; j < type.length; j += 4) compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j); write_vec(type, ref, fref); } start_counter = rdtsc(); blend_test_ptr(src, dst, con, res); end_counter = rdtsc(); cycles[i] = end_counter - start_counter; if(!compare_vec(type, res, ref)) { success = FALSE; if(verbose < 1) dump_blend_type(stderr, blend, mode, type); fprintf(stderr, "MISMATCH\n"); fprintf(stderr, " Src: "); dump_vec(stderr, type, src); fprintf(stderr, "\n"); fprintf(stderr, " Dst: "); dump_vec(stderr, type, dst); fprintf(stderr, "\n"); fprintf(stderr, " Con: "); dump_vec(stderr, type, con); fprintf(stderr, "\n"); fprintf(stderr, " Res: "); dump_vec(stderr, type, res); fprintf(stderr, "\n"); fprintf(stderr, " Ref: "); dump_vec(stderr, type, ref); fprintf(stderr, "\n"); } } if(mode == SoA) { const unsigned stride = type.length*type.width/8; ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; boolean mismatch; for(j = 0; j < 4; ++j) { random_vec(type, src + j*stride); random_vec(type, dst + j*stride); random_vec(type, con + j*stride); } { double fsrc[4]; double fdst[4]; double fcon[4]; double fref[4]; unsigned k; for(k = 0; k < type.length; ++k) { for(j = 0; j < 4; ++j) { fsrc[j] = read_elem(type, src + j*stride, k); fdst[j] = read_elem(type, dst + j*stride, k); fcon[j] = read_elem(type, con + j*stride, k); } compute_blend_ref(blend, fsrc, fdst, fcon, fref); for(j = 0; j < 4; ++j) write_elem(type, ref + j*stride, k, fref[j]); } } start_counter = rdtsc(); blend_test_ptr(src, dst, con, res); end_counter = rdtsc(); cycles[i] = end_counter - start_counter; mismatch = FALSE; for (j = 0; j < 4; ++j) if(!compare_vec(type, res + j*stride, ref + j*stride)) mismatch = TRUE; if (mismatch) { success = FALSE; if(verbose < 1) dump_blend_type(stderr, blend, mode, type); fprintf(stderr, "MISMATCH\n"); for(j = 0; j < 4; ++j) { char channel = "RGBA"[j]; fprintf(stderr, " Src%c: ", channel); dump_vec(stderr, type, src + j*stride); fprintf(stderr, "\n"); fprintf(stderr, " Dst%c: ", channel); dump_vec(stderr, type, dst + j*stride); fprintf(stderr, "\n"); fprintf(stderr, " Con%c: ", channel); dump_vec(stderr, type, con + j*stride); fprintf(stderr, "\n"); fprintf(stderr, " Res%c: ", channel); dump_vec(stderr, type, res + j*stride); fprintf(stderr, "\n"); fprintf(stderr, " Ref%c: ", channel); dump_vec(stderr, type, ref + j*stride); fprintf(stderr, "\n"); } } } } /* * Unfortunately the output of cycle counter is not very reliable as it comes * -- sometimes we get outliers (due IRQs perhaps?) which are * better removed to avoid random or biased data. */ { double sum = 0.0, sum2 = 0.0; double avg, std; unsigned m; for(i = 0; i < n; ++i) { sum += cycles[i]; sum2 += cycles[i]*cycles[i]; } avg = sum/n; std = sqrtf((sum2 - n*avg*avg)/n); m = 0; sum = 0.0; for(i = 0; i < n; ++i) { if(fabs(cycles[i] - avg) <= 4.0*std) { sum += cycles[i]; ++m; } } cycles_avg = sum/m; } if(fp) write_tsv_row(fp, blend, mode, type, cycles_avg, success); if (!success) { if(verbose < 2) LLVMDumpModule(module); LLVMWriteBitcodeToFile(module, "blend.bc"); fprintf(stderr, "blend.bc written\n"); fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n"); abort(); } LLVMFreeMachineCodeForFunction(engine, func); LLVMDisposeExecutionEngine(engine); if(pass) LLVMDisposePassManager(pass); return success; }
PIPE_ALIGN_STACK static boolean test_round(unsigned verbose, FILE *fp) { LLVMModuleRef module = NULL; LLVMValueRef test_round = NULL, test_trunc, test_floor, test_ceil; LLVMExecutionEngineRef engine = lp_build_engine; LLVMPassManagerRef pass = NULL; char *error = NULL; test_round_t round_func, trunc_func, floor_func, ceil_func; float unpacked[4]; unsigned packed; boolean success = TRUE; int i; module = LLVMModuleCreateWithName("test"); test_round = add_test(module, "round", lp_build_round); test_trunc = add_test(module, "trunc", lp_build_trunc); test_floor = add_test(module, "floor", lp_build_floor); test_ceil = add_test(module, "ceil", lp_build_ceil); if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { printf("LLVMVerifyModule: %s\n", error); LLVMDumpModule(module); abort(); } LLVMDisposeMessage(error); #if 0 pass = LLVMCreatePassManager(); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, * but there are more on SVN. */ LLVMAddConstantPropagationPass(pass); LLVMAddInstructionCombiningPass(pass); LLVMAddPromoteMemoryToRegisterPass(pass); LLVMAddGVNPass(pass); LLVMAddCFGSimplificationPass(pass); LLVMRunPassManager(pass, module); #else (void)pass; #endif round_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_round)); trunc_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_trunc)); floor_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_floor)); ceil_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_ceil)); memset(unpacked, 0, sizeof unpacked); packed = 0; if (0) LLVMDumpModule(module); for (i = 0; i < 3; i++) { v4sf xvals[3] = { {-10.0, -1, 0, 12.0}, {-1.5, -0.25, 1.25, 2.5}, {-0.99, -0.01, 0.01, 0.99} }; v4sf x = xvals[i]; v4sf y, ref; float *xp = (float *) &x; float *refp = (float *) &ref; printf("\n"); printv("x ", x); refp[0] = round(xp[0]); refp[1] = round(xp[1]); refp[2] = round(xp[2]); refp[3] = round(xp[3]); y = round_func(x); printv("C round(x) ", ref); printv("LLVM round(x)", y); compare(ref, y); refp[0] = trunc(xp[0]); refp[1] = trunc(xp[1]); refp[2] = trunc(xp[2]); refp[3] = trunc(xp[3]); y = trunc_func(x); printv("C trunc(x) ", ref); printv("LLVM trunc(x)", y); compare(ref, y); refp[0] = floor(xp[0]); refp[1] = floor(xp[1]); refp[2] = floor(xp[2]); refp[3] = floor(xp[3]); y = floor_func(x); printv("C floor(x) ", ref); printv("LLVM floor(x)", y); compare(ref, y); refp[0] = ceil(xp[0]); refp[1] = ceil(xp[1]); refp[2] = ceil(xp[2]); refp[3] = ceil(xp[3]); y = ceil_func(x); printv("C ceil(x) ", ref); printv("LLVM ceil(x) ", y); compare(ref, y); } LLVMFreeMachineCodeForFunction(engine, test_round); LLVMFreeMachineCodeForFunction(engine, test_trunc); LLVMFreeMachineCodeForFunction(engine, test_floor); LLVMFreeMachineCodeForFunction(engine, test_ceil); LLVMDisposeExecutionEngine(engine); if(pass) LLVMDisposePassManager(pass); return success; }
/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */ CAMLprim value llvm_add_gvn(LLVMPassManagerRef PM) { LLVMAddGVNPass(PM); return Val_unit; }
void lp_build_init(void) { #ifdef DEBUG gallivm_debug = debug_get_option_gallivm_debug(); #endif lp_set_target_options(); LLVMInitializeNativeTarget(); LLVMLinkInJIT(); if (!lp_build_module) lp_build_module = LLVMModuleCreateWithName("gallivm"); if (!lp_build_provider) lp_build_provider = LLVMCreateModuleProviderForExistingModule(lp_build_module); if (!lp_build_engine) { enum LLVM_CodeGenOpt_Level optlevel; char *error = NULL; if (gallivm_debug & GALLIVM_DEBUG_NO_OPT) { optlevel = None; } else { optlevel = Default; } if (LLVMCreateJITCompiler(&lp_build_engine, lp_build_provider, (unsigned)optlevel, &error)) { _debug_printf("%s\n", error); LLVMDisposeMessage(error); assert(0); } #if defined(DEBUG) || defined(PROFILE) lp_register_oprofile_jit_event_listener(lp_build_engine); #endif } if (!lp_build_target) lp_build_target = LLVMGetExecutionEngineTargetData(lp_build_engine); if (!lp_build_pass) { lp_build_pass = LLVMCreateFunctionPassManager(lp_build_provider); LLVMAddTargetData(lp_build_target, lp_build_pass); if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) { /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, * but there are more on SVN. */ /* TODO: Add more passes */ LLVMAddCFGSimplificationPass(lp_build_pass); LLVMAddPromoteMemoryToRegisterPass(lp_build_pass); LLVMAddConstantPropagationPass(lp_build_pass); if(util_cpu_caps.has_sse4_1) { /* FIXME: There is a bug in this pass, whereby the combination of fptosi * and sitofp (necessary for trunc/floor/ceil/round implementation) * somehow becomes invalid code. */ LLVMAddInstructionCombiningPass(lp_build_pass); } LLVMAddGVNPass(lp_build_pass); } else { /* We need at least this pass to prevent the backends to fail in * unexpected ways. */ LLVMAddPromoteMemoryToRegisterPass(lp_build_pass); } } util_cpu_detect(); #if 0 /* For simulating less capable machines */ util_cpu_caps.has_sse3 = 0; util_cpu_caps.has_ssse3 = 0; util_cpu_caps.has_sse4_1 = 0; #endif }
int main(int c, char **v) { LLVMContextRef *contexts; LLVMModuleRef *modules; char *error; const char *mode = "opt"; const char **filenames; unsigned numFiles; unsigned i; bool moreOptions; static int verboseFlag = 0; static int timingFlag = 0; static int disassembleFlag = 0; bool manyContexts = true; double beforeAll; if (c == 1) usage(); moreOptions = true; while (moreOptions) { static struct option longOptions[] = { {"verbose", no_argument, &verboseFlag, 1}, {"timing", no_argument, &timingFlag, 1}, {"disassemble", no_argument, &disassembleFlag, 1}, {"mode", required_argument, 0, 0}, {"contexts", required_argument, 0, 0}, {"help", no_argument, 0, 0} }; int optionIndex; int optionValue; optionValue = getopt_long(c, v, "", longOptions, &optionIndex); switch (optionValue) { case -1: moreOptions = false; break; case 0: { const char* thisOption = longOptions[optionIndex].name; if (!strcmp(thisOption, "help")) usage(); if (!strcmp(thisOption, "contexts")) { if (!strcasecmp(optarg, "one")) manyContexts = false; else if (!strcasecmp(optarg, "many")) manyContexts = true; else { fprintf(stderr, "Invalid argument for --contexts.\n"); exit(1); } break; } if (!strcmp(thisOption, "mode")) { mode = strdup(optarg); break; } break; } case '?': exit(0); break; default: printf("optionValue = %d\n", optionValue); abort(); break; } } LLVMLinkInMCJIT(); LLVMInitializeNativeTarget(); LLVMInitializeX86AsmPrinter(); LLVMInitializeX86Disassembler(); filenames = (const char **)(v + optind); numFiles = c - optind; contexts = malloc(sizeof(LLVMContextRef) * numFiles); modules = malloc(sizeof(LLVMModuleRef) * numFiles); if (manyContexts) { for (i = 0; i < numFiles; ++i) contexts[i] = LLVMContextCreate(); } else { LLVMContextRef context = LLVMContextCreate(); for (i = 0; i < numFiles; ++i) contexts[i] = context; } for (i = 0; i < numFiles; ++i) { LLVMMemoryBufferRef buffer; const char* filename = filenames[i]; if (LLVMCreateMemoryBufferWithContentsOfFile(filename, &buffer, &error)) { fprintf(stderr, "Error reading file %s: %s\n", filename, error); exit(1); } if (LLVMParseBitcodeInContext(contexts[i], buffer, modules + i, &error)) { fprintf(stderr, "Error parsing file %s: %s\n", filename, error); exit(1); } LLVMDisposeMemoryBuffer(buffer); if (verboseFlag) { printf("Module #%u (%s) after parsing:\n", i, filename); LLVMDumpModule(modules[i]); } } if (verboseFlag) printf("Generating code for modules...\n"); if (timingFlag) beforeAll = currentTime(); for (i = 0; i < numFiles; ++i) { LLVMModuleRef module; LLVMExecutionEngineRef engine; struct LLVMMCJITCompilerOptions options; LLVMValueRef value; LLVMPassManagerRef functionPasses = 0; LLVMPassManagerRef modulePasses = 0; double before; if (timingFlag) before = currentTime(); module = modules[i]; LLVMInitializeMCJITCompilerOptions(&options, sizeof(options)); options.OptLevel = 2; options.EnableFastISel = 0; options.MCJMM = LLVMCreateSimpleMCJITMemoryManager( 0, mmAllocateCodeSection, mmAllocateDataSection, mmApplyPermissions, mmDestroy); if (LLVMCreateMCJITCompilerForModule(&engine, module, &options, sizeof(options), &error)) { fprintf(stderr, "Error building MCJIT: %s\n", error); exit(1); } if (!strcasecmp(mode, "simple")) { modulePasses = LLVMCreatePassManager(); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), modulePasses); LLVMAddConstantPropagationPass(modulePasses); LLVMAddInstructionCombiningPass(modulePasses); LLVMAddPromoteMemoryToRegisterPass(modulePasses); LLVMAddBasicAliasAnalysisPass(modulePasses); LLVMAddTypeBasedAliasAnalysisPass(modulePasses); LLVMAddGVNPass(modulePasses); LLVMAddCFGSimplificationPass(modulePasses); LLVMRunPassManager(modulePasses, module); } else if (!strcasecmp(mode, "opt")) { LLVMPassManagerBuilderRef passBuilder; passBuilder = LLVMPassManagerBuilderCreate(); LLVMPassManagerBuilderSetOptLevel(passBuilder, 2); LLVMPassManagerBuilderSetSizeLevel(passBuilder, 0); functionPasses = LLVMCreateFunctionPassManagerForModule(module); modulePasses = LLVMCreatePassManager(); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), modulePasses); LLVMPassManagerBuilderPopulateFunctionPassManager(passBuilder, functionPasses); LLVMPassManagerBuilderPopulateModulePassManager(passBuilder, modulePasses); LLVMPassManagerBuilderDispose(passBuilder); LLVMInitializeFunctionPassManager(functionPasses); for (value = LLVMGetFirstFunction(module); value; value = LLVMGetNextFunction(value)) LLVMRunFunctionPassManager(functionPasses, value); LLVMFinalizeFunctionPassManager(functionPasses); LLVMRunPassManager(modulePasses, module); } else { fprintf(stderr, "Bad optimization mode: %s.\n", mode); fprintf(stderr, "Valid modes are: \"simple\" or \"opt\".\n"); exit(1); } if (verboseFlag) { printf("Module #%d (%s) after optimization:\n", i, filenames[i]); LLVMDumpModule(module); } for (value = LLVMGetFirstFunction(module); value; value = LLVMGetNextFunction(value)) { if (LLVMIsDeclaration(value)) continue; LLVMGetPointerToGlobal(engine, value); } if (functionPasses) LLVMDisposePassManager(functionPasses); if (modulePasses) LLVMDisposePassManager(modulePasses); LLVMDisposeExecutionEngine(engine); if (timingFlag) { double after = currentTime(); printf("Module #%d (%s) took %lf ms.\n", i, filenames[i], (after - before) * 1000); } } if (timingFlag) { double after = currentTime(); printf("Compilation took a total of %lf ms.\n", (after - beforeAll) * 1000); } if (disassembleFlag) { LLVMDisasmContextRef disassembler; struct MemorySection *section; disassembler = LLVMCreateDisasm("x86_64-apple-darwin", 0, 0, 0, symbolLookupCallback); if (!disassembler) { fprintf(stderr, "Error building disassembler.\n"); exit(1); } for (section = sectionHead; section; section = section->next) { printf("Disassembly for section %p:\n", section); char pcString[20]; char instructionString[1000]; uint8_t *pc; uint8_t *end; pc = section->start; end = pc + section->size; while (pc < end) { snprintf( pcString, sizeof(pcString), "0x%lx", (unsigned long)(uintptr_t)pc); size_t instructionSize = LLVMDisasmInstruction( disassembler, pc, end - pc, (uintptr_t)pc, instructionString, sizeof(instructionString)); if (!instructionSize) snprintf(instructionString, sizeof(instructionString), ".byte 0x%02x", *pc++); else pc += instructionSize; printf(" %16s: %s\n", pcString, instructionString); } } } return 0; }
PIPE_ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, struct lp_type src_type, struct lp_type dst_type) { LLVMModuleRef module = NULL; LLVMValueRef func = NULL; LLVMExecutionEngineRef engine = NULL; LLVMModuleProviderRef provider = NULL; LLVMPassManagerRef pass = NULL; char *error = NULL; conv_test_ptr_t conv_test_ptr; boolean success; const unsigned n = LP_TEST_NUM_SAMPLES; int64_t cycles[LP_TEST_NUM_SAMPLES]; double cycles_avg = 0.0; unsigned num_srcs; unsigned num_dsts; double eps; unsigned i, j; if(verbose >= 1) dump_conv_types(stdout, src_type, dst_type); if(src_type.length > dst_type.length) { num_srcs = 1; num_dsts = src_type.length/dst_type.length; } else { num_dsts = 1; num_srcs = dst_type.length/src_type.length; } assert(src_type.width * src_type.length == dst_type.width * dst_type.length); /* We must not loose or gain channels. Only precision */ assert(src_type.length * num_srcs == dst_type.length * num_dsts); eps = MAX2(lp_const_eps(src_type), lp_const_eps(dst_type)); module = LLVMModuleCreateWithName("test"); func = add_conv_test(module, src_type, num_srcs, dst_type, num_dsts); if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { LLVMDumpModule(module); abort(); } LLVMDisposeMessage(error); provider = LLVMCreateModuleProviderForExistingModule(module); if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { if(verbose < 1) dump_conv_types(stderr, src_type, dst_type); fprintf(stderr, "%s\n", error); LLVMDisposeMessage(error); abort(); } #if 0 pass = LLVMCreatePassManager(); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, * but there are more on SVN. */ LLVMAddConstantPropagationPass(pass); LLVMAddInstructionCombiningPass(pass); LLVMAddPromoteMemoryToRegisterPass(pass); LLVMAddGVNPass(pass); LLVMAddCFGSimplificationPass(pass); LLVMRunPassManager(pass, module); #else (void)pass; #endif if(verbose >= 2) LLVMDumpModule(module); conv_test_ptr = (conv_test_ptr_t)LLVMGetPointerToGlobal(engine, func); if(verbose >= 2) lp_disassemble(conv_test_ptr); success = TRUE; for(i = 0; i < n && success; ++i) { unsigned src_stride = src_type.length*src_type.width/8; unsigned dst_stride = dst_type.length*dst_type.width/8; PIPE_ALIGN_VAR(16) uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; PIPE_ALIGN_VAR(16) uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; int64_t start_counter = 0; int64_t end_counter = 0; for(j = 0; j < num_srcs; ++j) { random_vec(src_type, src + j*src_stride); read_vec(src_type, src + j*src_stride, fref + j*src_type.length); } for(j = 0; j < num_dsts; ++j) { write_vec(dst_type, ref + j*dst_stride, fref + j*dst_type.length); } start_counter = rdtsc(); conv_test_ptr(src, dst); end_counter = rdtsc(); cycles[i] = end_counter - start_counter; for(j = 0; j < num_dsts; ++j) { if(!compare_vec_with_eps(dst_type, dst + j*dst_stride, ref + j*dst_stride, eps)) success = FALSE; } if (!success) { if(verbose < 1) dump_conv_types(stderr, src_type, dst_type); fprintf(stderr, "MISMATCH\n"); for(j = 0; j < num_srcs; ++j) { fprintf(stderr, " Src%u: ", j); dump_vec(stderr, src_type, src + j*src_stride); fprintf(stderr, "\n"); } #if 1 fprintf(stderr, " Ref: "); for(j = 0; j < src_type.length*num_srcs; ++j) fprintf(stderr, " %f", fref[j]); fprintf(stderr, "\n"); #endif for(j = 0; j < num_dsts; ++j) { fprintf(stderr, " Dst%u: ", j); dump_vec(stderr, dst_type, dst + j*dst_stride); fprintf(stderr, "\n"); fprintf(stderr, " Ref%u: ", j); dump_vec(stderr, dst_type, ref + j*dst_stride); fprintf(stderr, "\n"); } } } /* * Unfortunately the output of cycle counter is not very reliable as it comes * -- sometimes we get outliers (due IRQs perhaps?) which are * better removed to avoid random or biased data. */ { double sum = 0.0, sum2 = 0.0; double avg, std; unsigned m; for(i = 0; i < n; ++i) { sum += cycles[i]; sum2 += cycles[i]*cycles[i]; } avg = sum/n; std = sqrtf((sum2 - n*avg*avg)/n); m = 0; sum = 0.0; for(i = 0; i < n; ++i) { if(fabs(cycles[i] - avg) <= 4.0*std) { sum += cycles[i]; ++m; } } cycles_avg = sum/m; } if(fp) write_tsv_row(fp, src_type, dst_type, cycles_avg, success); if (!success) { static boolean firsttime = TRUE; if(firsttime) { if(verbose < 2) LLVMDumpModule(module); LLVMWriteBitcodeToFile(module, "conv.bc"); fprintf(stderr, "conv.bc written\n"); fprintf(stderr, "Invoke as \"llc -o - conv.bc\"\n"); firsttime = FALSE; /* abort(); */ } } LLVMFreeMachineCodeForFunction(engine, func); LLVMDisposeExecutionEngine(engine); if(pass) LLVMDisposePassManager(pass); return success; }