예제 #1
0
void gla::PrivateManager::runLLVMOptimizations1()
{
    VerifyModule(module);

    // TODO: generate code performance: When we have backend support for shuffles, or we canonicalize
    // shuffles into multiinserts, we can replace the InstSimplify passes with
    // InstCombine passes.

    // First, do some global (module-level) optimizations, which can free up
    // function passes to do more.
    llvm::PassManager globalPM;
    globalPM.add(llvm::createGlobalOptimizerPass());
    globalPM.add(llvm::createIPSCCPPass());
    globalPM.add(llvm::createConstantMergePass());
    globalPM.add(llvm::createInstructionSimplifierPass());
    if (options.optimizations.inlineThreshold)
        globalPM.add(llvm::createAlwaysInlinerPass());
    globalPM.add(llvm::createPromoteMemoryToRegisterPass());
    globalPM.run(*module);

    // Next, do interprocedural passes
    // TODO: generated code performance: If we ever have non-inlined functions, we'll want to add some interprocedural passes

    VerifyModule(module);

    // Set up the function-level optimizations we want
    // TODO: generated code performance: explore ordering of passes more
    llvm::FunctionPassManager passManager(module);


    // Add target data to unblock optimizations that require it
    // This matches default except for endianness (little) and pointer size/alignment (32)
    llvm::DataLayout* DL = new llvm::DataLayout("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64");
    passManager.add(DL);

    // Create immutable passes once
    passManager.add(llvm::createBasicAliasAnalysisPass());
    passManager.add(llvm::createTypeBasedAliasAnalysisPass());

    // Provide the backend queries
    passManager.add(gla_llvm::createBackEndPointerPass(backEnd));

    // TODO: explore SimplifyLibCalls
    // TODO: compile-time performance: see if we can avoid running gvn/sccp multiple times

    // Early, simple optimizations to enable others/make others more efficient
    //passManager.add(llvm::createScalarReplAggregatesPass());
    passManager.add(llvm::createInstructionCombiningPass());
    passManager.add(llvm::createEarlyCSEPass());
    passManager.add(llvm::createCorrelatedValuePropagationPass());

    bool hasCf = HasControlFlow(module);

    if (hasCf) {
        passManager.add(llvm::createCFGSimplificationPass());
        passManager.add(llvm::createLoopSimplifyPass());
        passManager.add(gla_llvm::createCanonicalizeCFGPass());
        passManager.add(gla_llvm::createDecomposeInstsPass());
        passManager.add(gla_llvm::createCanonicalizeCFGPass());

        // TODO: Compile-time performance: something goes stale in FlattenConditionalAssignments (dom trees?).
        //       Running it multiple times here catches more, whereas running it multiple times internally does not help.
        //       Once that's fixed, most at this level can be eliminated.
        passManager.add(gla_llvm::createFlattenConditionalAssignmentsPass(options.optimizations.flattenHoistThreshold));
        passManager.add(gla_llvm::createFlattenConditionalAssignmentsPass(options.optimizations.flattenHoistThreshold));
        passManager.add(gla_llvm::createFlattenConditionalAssignmentsPass(options.optimizations.flattenHoistThreshold));

        passManager.add(gla_llvm::createCanonicalizeCFGPass());
    } else
        passManager.add(gla_llvm::createDecomposeInstsPass());

    int innerAoS, outerSoA;
    backEnd->getRegisterForm(outerSoA, innerAoS);
    if (innerAoS == 1) {
        passManager.add(gla_llvm::createScalarizePass());
    }

    if (options.optimizations.reassociate)
        passManager.add(llvm::createReassociatePass());
    passManager.add(llvm::createInstructionCombiningPass());

    //if (options.optimizations.gvn)
    //    passManager.add(llvm::createGVNPass());
    passManager.add(llvm::createSCCPPass());

    if (hasCf) {
        passManager.add(llvm::createLoopSimplifyPass());
        passManager.add(gla_llvm::createCanonicalizeCFGPass());
        passManager.add(gla_llvm::createFlattenConditionalAssignmentsPass(options.optimizations.flattenHoistThreshold));
        passManager.add(gla_llvm::createFlattenConditionalAssignmentsPass(options.optimizations.flattenHoistThreshold));
        passManager.add(gla_llvm::createCanonicalizeCFGPass());
    }

    // Make multiinsert intrinsics, and clean up afterwards
    passManager.add(llvm::createInstructionCombiningPass());
    if (options.optimizations.coalesce)
        passManager.add(gla_llvm::createCoalesceInsertsPass());
    if (options.optimizations.adce)
        passManager.add(llvm::createAggressiveDCEPass());
    passManager.add(llvm::createInstructionCombiningPass());

    if (hasCf) {
        // Loop optimizations, and clean up afterwards
        passManager.add(llvm::createLICMPass());
        passManager.add(llvm::createIndVarSimplifyPass());
        if (options.optimizations.loopUnrollThreshold) {
            // Loop rotation creates a less desirable loop form for loops that do not get unrolled,
            // but is needed if a loop will be unrolled.
            passManager.add(llvm::createLoopRotatePass(options.optimizations.loopUnrollThreshold));
            passManager.add(llvm::createIndVarSimplifyPass());
            passManager.add(llvm::createLoopUnrollPass(options.optimizations.loopUnrollThreshold));
        }
        passManager.add(llvm::createLoopStrengthReducePass());
        if (options.optimizations.adce)
            passManager.add(llvm::createAggressiveDCEPass());

        passManager.add(llvm::createInstructionCombiningPass());
        //if (options.optimizations.gvn)
        //    passManager.add(llvm::createGVNPass());
        passManager.add(llvm::createSCCPPass());
    }

    // Run intrinisic combining
    passManager.add(gla_llvm::createCanonicalizeCFGPass());
    passManager.add(llvm::createInstructionCombiningPass());
    passManager.add(gla_llvm::createIntrinsicCombinePass());
    passManager.add(gla_llvm::createCanonicalizeCFGPass());

    //if (options.optimizations.gvn)
    //    passManager.add(llvm::createGVNPass());
    passManager.add(llvm::createSCCPPass());

    // TODO: generated code: Consider if we really want it. For some reason StandardPasses.h
    // doesn't have it listed.
    // passManager.add(llvm::createSinkingPass());

    // Run some post-redundancy-elimination passes
    //passManager.add(llvm::createScalarReplAggregatesPass());
    passManager.add(llvm::createInstructionCombiningPass());
    passManager.add(llvm::createCorrelatedValuePropagationPass());
    if (options.optimizations.adce)
        passManager.add(llvm::createAggressiveDCEPass());

    if (hasCf) {
        // LunarGLASS CFG optimizations
        passManager.add(llvm::createLoopSimplifyPass());
        passManager.add(gla_llvm::createCanonicalizeCFGPass());
        passManager.add(gla_llvm::createFlattenConditionalAssignmentsPass(options.optimizations.flattenHoistThreshold));
        passManager.add(gla_llvm::createCanonicalizeCFGPass());

        passManager.add(llvm::createInstructionCombiningPass());
        if (options.optimizations.adce)
            passManager.add(llvm::createAggressiveDCEPass());
    }

    RunOnModule(passManager, module);

    VerifyModule(module);

    // Post Function passes cleanup
    llvm::PassManager pm;
    pm.add(llvm::createInstructionCombiningPass());
    pm.add(llvm::createDeadStoreEliminationPass());
    if (options.optimizations.adce)
        pm.add(llvm::createAggressiveDCEPass());
    pm.add(llvm::createStripDeadPrototypesPass());

    // TODO: function-call functionality: Consider using the below in the presense of functions
    // pm.add(llvm::createGlobalDCEPass());

    pm.run(*module);

    VerifyModule(module);

    // TODO: Refactor the below use of GlobalOpt. Perhaps we want to repeat our
    // some function passes?

    llvm::PassManager modulePassManager;
    modulePassManager.add(llvm::createGlobalOptimizerPass());

    // Optimize the whole module
    bool changed = modulePassManager.run(*module);

    if (changed) {
        // removing globals created stack allocations we want to eliminate
        llvm::FunctionPassManager postGlobalManager(module);
        postGlobalManager.add(llvm::createPromoteMemoryToRegisterPass());

        // run across all functions
        postGlobalManager.doInitialization();
        for (llvm::Module::iterator function = module->begin(), lastFunction = module->end(); function != lastFunction; ++function) {
            postGlobalManager.run(*function);
        }
        postGlobalManager.doFinalization();
    }

    if (! backEnd->preferRegistersOverMemory()) {
        llvm::FunctionPassManager memoryPassManager(module);
        memoryPassManager.add(llvm::createDemoteRegisterToMemoryPass());

        memoryPassManager.doInitialization();
        for (llvm::Module::iterator function = module->begin(), lastFunction = module->end(); function != lastFunction; ++function) {
            memoryPassManager.run(*function);
        }
        memoryPassManager.doFinalization();
    }

    VerifyModule(module);

    // Put the IR into a canonical form for BottomTranslator.
    llvm::PassManager canonicalize;

    canonicalize.add(llvm::createIndVarSimplifyPass());
    canonicalize.add(gla_llvm::createCanonicalizeCFGPass());
    canonicalize.add(gla_llvm::createBackEndPointerPass(backEnd));
    canonicalize.add(gla_llvm::createGatherInstsPass());
    canonicalize.add(gla_llvm::createCanonicalizeInstsPass());
    canonicalize.add(llvm::createStripDeadPrototypesPass());
    canonicalize.run(*module);

    VerifyModule(module);
}
예제 #2
0
bool Compiler::Compile() {
    return CompileInstructions() &&
           VerifyModule() &&
           OptimizeModule() &&
           CreateEngine();
}