Exemplo n.º 1
0
    Function* Create(const STREAMOUT_COMPILE_STATE& state)
    {
        static std::size_t soNum = 0;

        std::stringstream fnName("SOShader", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
        fnName << soNum++;

        // SO function signature
        // typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT*)

        std::vector<Type*> args{
            PointerType::get(Gen_SWR_STREAMOUT_CONTEXT(JM()), 0), // SWR_STREAMOUT_CONTEXT*
        };

        FunctionType* fTy = FunctionType::get(IRB()->getVoidTy(), args, false);
        Function* soFunc = Function::Create(fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);

        // create return basic block
        BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", soFunc);
        BasicBlock* returnBB = BasicBlock::Create(JM()->mContext, "return", soFunc);

        IRB()->SetInsertPoint(entry);

        // arguments
        auto argitr = soFunc->getArgumentList().begin();
        Value* pSoCtx = &*argitr++;
        pSoCtx->setName("pSoCtx");

        const STREAMOUT_STREAM& streamState = state.stream;
        buildStream(state, streamState, pSoCtx, returnBB, soFunc);

        BR(returnBB);

        IRB()->SetInsertPoint(returnBB);
        RET_VOID();

        JitManager::DumpToFile(soFunc, "SoFunc");

        ::FunctionPassManager passes(JM()->mpCurrentModule);

        passes.add(createBreakCriticalEdgesPass());
        passes.add(createCFGSimplificationPass());
        passes.add(createEarlyCSEPass());
        passes.add(createPromoteMemoryToRegisterPass());
        passes.add(createCFGSimplificationPass());
        passes.add(createEarlyCSEPass());
        passes.add(createInstructionCombiningPass());
        passes.add(createInstructionSimplifierPass());
        passes.add(createConstantPropagationPass());
        passes.add(createSCCPPass());
        passes.add(createAggressiveDCEPass());

        passes.run(*soFunc);

        JitManager::DumpToFile(soFunc, "SoFunc_optimized");

        return soFunc;
    }
Exemplo n.º 2
0
static void addOptimizationPasses(T *PM)
{
#ifdef JL_DEBUG_BUILD
    PM->add(createVerifierPass());
#endif

#ifdef __has_feature
#   if __has_feature(address_sanitizer)
#   if defined(LLVM37) && !defined(LLVM38)
    // LLVM 3.7 BUG: ASAN pass doesn't properly initialize its dependencies
    initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
#   endif
    PM->add(createAddressSanitizerFunctionPass());
#   endif
#   if __has_feature(memory_sanitizer)
    PM->add(llvm::createMemorySanitizerPass(true));
#   endif
#endif
    if (jl_options.opt_level <= 1) {
        return;
    }
#ifdef LLVM37
    PM->add(createTargetTransformInfoWrapperPass(jl_TargetMachine->getTargetIRAnalysis()));
#else
    jl_TargetMachine->addAnalysisPasses(*PM);
#endif
#ifdef LLVM38
    PM->add(createTypeBasedAAWrapperPass());
#else
    PM->add(createTypeBasedAliasAnalysisPass());
#endif
    if (jl_options.opt_level >= 3) {
#ifdef LLVM38
        PM->add(createBasicAAWrapperPass());
#else
        PM->add(createBasicAliasAnalysisPass());
#endif
    }
    // list of passes from vmkit
    PM->add(createCFGSimplificationPass()); // Clean up disgusting code
    PM->add(createPromoteMemoryToRegisterPass());// Kill useless allocas

#ifndef INSTCOMBINE_BUG
    PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl.
#endif
    PM->add(createSROAPass());                 // Break up aggregate allocas
#ifndef INSTCOMBINE_BUG
    PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl.
#endif
    PM->add(createJumpThreadingPass());        // Thread jumps.
    // NOTE: CFG simp passes after this point seem to hurt native codegen.
    // See issue #6112. Should be re-evaluated when we switch to MCJIT.
    //PM->add(createCFGSimplificationPass());    // Merge & remove BBs
#ifndef INSTCOMBINE_BUG
    PM->add(createInstructionCombiningPass()); // Combine silly seq's
#endif

    //PM->add(createCFGSimplificationPass());    // Merge & remove BBs
    PM->add(createReassociatePass());          // Reassociate expressions

    // this has the potential to make some things a bit slower
    //PM->add(createBBVectorizePass());

    PM->add(createEarlyCSEPass()); //// ****

    PM->add(createLoopIdiomPass()); //// ****
    PM->add(createLoopRotatePass());           // Rotate loops.
    // LoopRotate strips metadata from terminator, so run LowerSIMD afterwards
    PM->add(createLowerSimdLoopPass());        // Annotate loop marked with "simdloop" as LLVM parallel loop
    PM->add(createLICMPass());                 // Hoist loop invariants
    PM->add(createLoopUnswitchPass());         // Unswitch loops.
    // Subsequent passes not stripping metadata from terminator
#ifndef INSTCOMBINE_BUG
    PM->add(createInstructionCombiningPass());
#endif
    PM->add(createIndVarSimplifyPass());       // Canonicalize indvars
    PM->add(createLoopDeletionPass());         // Delete dead loops
#if defined(LLVM35)
    PM->add(createSimpleLoopUnrollPass());     // Unroll small loops
#else
    PM->add(createLoopUnrollPass());           // Unroll small loops
#endif
#if !defined(LLVM35) && !defined(INSTCOMBINE_BUG)
    PM->add(createLoopVectorizePass());        // Vectorize loops
#endif
    //PM->add(createLoopStrengthReducePass());   // (jwb added)

#ifndef INSTCOMBINE_BUG
    PM->add(createInstructionCombiningPass()); // Clean up after the unroller
#endif
    PM->add(createGVNPass());                  // Remove redundancies
    //PM->add(createMemCpyOptPass());            // Remove memcpy / form memset
    PM->add(createSCCPPass());                 // Constant prop with SCCP

    // Run instcombine after redundancy elimination to exploit opportunities
    // opened up by them.
    PM->add(createSinkingPass()); ////////////// ****
    PM->add(createInstructionSimplifierPass());///////// ****
#ifndef INSTCOMBINE_BUG
    PM->add(createInstructionCombiningPass());
#endif
    PM->add(createJumpThreadingPass());         // Thread jumps
    PM->add(createDeadStoreEliminationPass());  // Delete dead stores
#if !defined(INSTCOMBINE_BUG)
    if (jl_options.opt_level >= 3) {
#ifdef LLVM39
        initializeDemandedBitsPass(*PassRegistry::getPassRegistry());
#endif
        PM->add(createSLPVectorizerPass());     // Vectorize straight-line code
    }
#endif

    PM->add(createAggressiveDCEPass());         // Delete dead instructions
#if !defined(INSTCOMBINE_BUG)
    if (jl_options.opt_level >= 3)
        PM->add(createInstructionCombiningPass());   // Clean up after SLP loop vectorizer
#endif
#if defined(LLVM35)
    PM->add(createLoopVectorizePass());         // Vectorize loops
    PM->add(createInstructionCombiningPass());  // Clean up after loop vectorizer
#endif
    //PM->add(createCFGSimplificationPass());     // Merge & remove BBs
}
Exemplo n.º 3
0
/// adopted from: llvm-2.9/include/llvm/Support/StandardPasses.h
void optimizeFunction(Function* f, const bool disableLICM, const bool disableLoopRotate) {
    assert (f);
    assert (f->getParent());
    Module* mod = f->getParent();
    TargetData* targetData = new TargetData(mod);

    const unsigned OptimizationLevel = 3;
    const bool OptimizeSize = false;
    const bool UnitAtATime = true;
    const bool UnrollLoops = true;
    const bool SimplifyLibCalls = true;
    const bool HaveExceptions = false;
    Pass* InliningPass = createFunctionInliningPass(275);

    //PassManager Passes;
    FunctionPassManager Passes(mod);
    Passes.add(targetData);

    //
    // custom
    //
    Passes.add(createScalarReplAggregatesPass(-1, false));

    //
    // createStandardFunctionPasses
    //
    Passes.add(createCFGSimplificationPass());
    Passes.add(createPromoteMemoryToRegisterPass());
    Passes.add(createInstructionCombiningPass());

    // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
    // BasicAliasAnalysis wins if they disagree. This is intended to help
    // support "obvious" type-punning idioms.
    Passes.add(createTypeBasedAliasAnalysisPass());
    Passes.add(createBasicAliasAnalysisPass());

    // Start of function pass.
    // Break up aggregate allocas, using SSAUpdater.
    Passes.add(createScalarReplAggregatesPass(-1, false));
    Passes.add(createEarlyCSEPass());              // Catch trivial redundancies
    if (SimplifyLibCalls)
        Passes.add(createSimplifyLibCallsPass());    // Library Call Optimizations
    Passes.add(createJumpThreadingPass());         // Thread jumps.
    Passes.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
    Passes.add(createCFGSimplificationPass());     // Merge & remove BBs
    Passes.add(createInstructionCombiningPass());  // Combine silly seq's

    Passes.add(createTailCallEliminationPass());   // Eliminate tail calls
    Passes.add(createCFGSimplificationPass());     // Merge & remove BBs
    Passes.add(createReassociatePass());           // Reassociate expressions
    if (!disableLoopRotate) Passes.add(createLoopRotatePass());            // Rotate Loop // makes packetized Mandelbrot fail
    if (!disableLICM) Passes.add(createLICMPass());                  // Hoist loop invariants // makes scalar driver crash after optimization
    //Passes.add(createLoopUnswitchPass(OptimizeSize || OptimizationLevel < 3)); // breaks DCT with UNIFORM_ANALYSIS=0
    Passes.add(createInstructionCombiningPass());
    Passes.add(createIndVarSimplifyPass());        // Canonicalize indvars
    Passes.add(createLoopIdiomPass());             // Recognize idioms like memset.
    Passes.add(createLoopDeletionPass());          // Delete dead loops
    if (UnrollLoops)
        Passes.add(createLoopUnrollPass());          // Unroll small loops
    Passes.add(createInstructionCombiningPass());  // Clean up after the unroller
    if (OptimizationLevel > 1)
        Passes.add(createGVNPass());                 // Remove redundancies
    Passes.add(createMemCpyOptPass());             // Remove memcpy / form memset
    Passes.add(createSCCPPass());                  // Constant prop with SCCP

    // Run instcombine after redundancy elimination to exploit opportunities
    // opened up by them.
    Passes.add(createInstructionCombiningPass());
    Passes.add(createJumpThreadingPass());         // Thread jumps
    Passes.add(createCorrelatedValuePropagationPass());
    Passes.add(createDeadStoreEliminationPass());  // Delete dead stores
    Passes.add(createAggressiveDCEPass());         // Delete dead instructions
    Passes.add(createCFGSimplificationPass());     // Merge & remove BBs

    WFVOPENCL_DEBUG( Passes.add(createVerifierPass()); );