static void AddStandardCompilePasses(PassManager &PM) { PM.add(createVerifierPass()); // Verify that input is correct #if LLVM_VERSION_CODE < LLVM_VERSION(3, 0) addPass(PM, createLowerSetJmpPass()); // Lower llvm.setjmp/.longjmp #endif // If the -strip-debug command line option was specified, do it. if (StripDebug) addPass(PM, createStripSymbolsPass(true)); if (DisableOptimizations) return; #if LLVM_VERSION_CODE < LLVM_VERSION(2, 7) addPass(PM, createRaiseAllocationsPass()); // call %malloc -> malloc inst #endif addPass(PM, createCFGSimplificationPass()); // Clean up disgusting code addPass(PM, createPromoteMemoryToRegisterPass());// Kill useless allocas addPass(PM, createGlobalOptimizerPass()); // Optimize out global vars addPass(PM, createGlobalDCEPass()); // Remove unused fns and globs addPass(PM, createIPConstantPropagationPass());// IP Constant Propagation addPass(PM, createDeadArgEliminationPass()); // Dead argument elimination addPass(PM, createInstructionCombiningPass()); // Clean up after IPCP & DAE addPass(PM, createCFGSimplificationPass()); // Clean up after IPCP & DAE addPass(PM, createPruneEHPass()); // Remove dead EH info addPass(PM, createFunctionAttrsPass()); // Deduce function attrs if (!DisableInline) addPass(PM, createFunctionInliningPass()); // Inline small functions addPass(PM, createArgumentPromotionPass()); // Scalarize uninlined fn args addPass(PM, createSimplifyLibCallsPass()); // Library Call Optimizations addPass(PM, createInstructionCombiningPass()); // Cleanup for scalarrepl. addPass(PM, createJumpThreadingPass()); // Thread jumps. addPass(PM, createCFGSimplificationPass()); // Merge & remove BBs addPass(PM, createScalarReplAggregatesPass()); // Break up aggregate allocas addPass(PM, createInstructionCombiningPass()); // Combine silly seq's #if LLVM_VERSION_CODE < LLVM_VERSION(2, 7) addPass(PM, createCondPropagationPass()); // Propagate conditionals #endif addPass(PM, createTailCallEliminationPass()); // Eliminate tail calls addPass(PM, createCFGSimplificationPass()); // Merge & remove BBs addPass(PM, createReassociatePass()); // Reassociate expressions addPass(PM, createLoopRotatePass()); addPass(PM, createLICMPass()); // Hoist loop invariants addPass(PM, createLoopUnswitchPass()); // Unswitch loops. #if LLVM_VERSION_CODE < LLVM_VERSION(2, 9) addPass(PM, createLoopIndexSplitPass()); // Index split loops. #endif // FIXME : Removing instcombine causes nestedloop regression. addPass(PM, createInstructionCombiningPass()); addPass(PM, createIndVarSimplifyPass()); // Canonicalize indvars addPass(PM, createLoopDeletionPass()); // Delete dead loops addPass(PM, createLoopUnrollPass()); // Unroll small loops addPass(PM, createInstructionCombiningPass()); // Clean up after the unroller addPass(PM, createGVNPass()); // Remove redundancies addPass(PM, createMemCpyOptPass()); // Remove memcpy / form memset addPass(PM, createSCCPPass()); // Constant prop with SCCP // Run instcombine after redundancy elimination to exploit opportunities // opened up by them. addPass(PM, createInstructionCombiningPass()); #if LLVM_VERSION_CODE < LLVM_VERSION(2, 7) addPass(PM, createCondPropagationPass()); // Propagate conditionals #endif addPass(PM, createDeadStoreEliminationPass()); // Delete dead stores addPass(PM, createAggressiveDCEPass()); // Delete dead instructions addPass(PM, createCFGSimplificationPass()); // Merge & remove BBs addPass(PM, createStripDeadPrototypesPass()); // Get rid of dead prototypes #if LLVM_VERSION_CODE < LLVM_VERSION(3, 0) addPass(PM, createDeadTypeEliminationPass()); // Eliminate dead types #endif addPass(PM, createConstantMergePass()); // Merge dup global constants }
static void createOptimizationPasses() { // Create and set up the per-function pass manager. // FIXME: Move the code generator to be function-at-a-time. PerFunctionPasses = new FunctionPassManager(new ExistingModuleProvider(TheModule)); PerFunctionPasses->add(new TargetData(*TheTarget->getTargetData())); // In -O0 if checking is disabled, we don't even have per-function passes. bool HasPerFunctionPasses = false; #ifdef ENABLE_CHECKING PerFunctionPasses->add(createVerifierPass()); HasPerFunctionPasses = true; #endif if (optimize > 0 && !DisableLLVMOptimizations) { HasPerFunctionPasses = true; PerFunctionPasses->add(createCFGSimplificationPass()); if (optimize == 1) PerFunctionPasses->add(createPromoteMemoryToRegisterPass()); else PerFunctionPasses->add(createScalarReplAggregatesPass()); PerFunctionPasses->add(createInstructionCombiningPass()); // PerFunctionPasses->add(createCFGSimplificationPass()); } // FIXME: AT -O0/O1, we should stream out functions at a time. PerModulePasses = new PassManager(); PerModulePasses->add(new TargetData(*TheTarget->getTargetData())); bool HasPerModulePasses = false; if (optimize > 0 && !DisableLLVMOptimizations) { HasPerModulePasses = true; PassManager *PM = PerModulePasses; if (flag_unit_at_a_time) PM->add(createRaiseAllocationsPass()); // call %malloc -> malloc inst PM->add(createCFGSimplificationPass()); // Clean up disgusting code PM->add(createPromoteMemoryToRegisterPass()); // Kill useless allocas if (flag_unit_at_a_time) { PM->add(createGlobalOptimizerPass()); // Optimize out global vars PM->add(createGlobalDCEPass()); // Remove unused fns and globs PM->add(createIPConstantPropagationPass()); // IP Constant Propagation PM->add(createDeadArgEliminationPass()); // Dead argument elimination } PM->add(createInstructionCombiningPass()); // Clean up after IPCP & DAE // DISABLE PREDSIMPLIFY UNTIL PR967 is fixed. //PM->add(createPredicateSimplifierPass()); // Canonicalize registers PM->add(createCFGSimplificationPass()); // Clean up after IPCP & DAE if (flag_unit_at_a_time) PM->add(createPruneEHPass()); // Remove dead EH info if (optimize > 1) { if (flag_inline_trees > 1) // respect -fno-inline-functions PM->add(createFunctionInliningPass()); // Inline small functions if (flag_unit_at_a_time && !lang_hooks.flag_no_builtin()) PM->add(createSimplifyLibCallsPass()); // Library Call Optimizations if (optimize > 2) PM->add(createArgumentPromotionPass()); // Scalarize uninlined fn args } PM->add(createTailDuplicationPass()); // Simplify cfg by copying code PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl. PM->add(createCFGSimplificationPass()); // Merge & remove BBs PM->add(createScalarReplAggregatesPass()); // Break up aggregate allocas PM->add(createInstructionCombiningPass()); // Combine silly seq's PM->add(createCondPropagationPass()); // Propagate conditionals PM->add(createTailCallEliminationPass()); // Eliminate tail calls PM->add(createCFGSimplificationPass()); // Merge & remove BBs PM->add(createReassociatePass()); // Reassociate expressions PM->add(createLoopRotatePass()); // Rotate Loop PM->add(createLICMPass()); // Hoist loop invariants PM->add(createLoopUnswitchPass(optimize_size ? true : false)); PM->add(createInstructionCombiningPass()); // Clean up after LICM/reassoc PM->add(createIndVarSimplifyPass()); // Canonicalize indvars if (flag_unroll_loops) PM->add(createLoopUnrollPass()); // Unroll small loops PM->add(createInstructionCombiningPass()); // Clean up after the unroller PM->add(createLoadValueNumberingPass()); // GVN for load instructions PM->add(createGCSEPass()); // Remove common subexprs PM->add(createSCCPPass()); // Constant prop with SCCP // Run instcombine after redundancy elimination to exploit opportunities // opened up by them. PM->add(createInstructionCombiningPass()); PM->add(createCondPropagationPass()); // Propagate conditionals PM->add(createDeadStoreEliminationPass()); // Delete dead stores PM->add(createAggressiveDCEPass()); // SSA based 'Aggressive DCE' PM->add(createCFGSimplificationPass()); // Merge & remove BBs if (optimize > 1 && flag_unit_at_a_time) PM->add(createConstantMergePass()); // Merge dup global constants PM->add(createStripDeadPrototypesPass()); // Get rid of dead prototypes } if (emit_llvm_bc) { // Emit an LLVM .bc file to the output. This is used when passed // -emit-llvm -c to the GCC driver. PerModulePasses->add(CreateBitcodeWriterPass(*AsmOutStream)); // Disable emission of .ident into the output file... which is completely // wrong for llvm/.bc emission cases. flag_no_ident = 1; HasPerModulePasses = true; } else if (emit_llvm) { // Emit an LLVM .ll file to the output. This is used when passed // -emit-llvm -S to the GCC driver. PerModulePasses->add(new PrintModulePass(AsmOutFile)); // Disable emission of .ident into the output file... which is completely // wrong for llvm/.bc emission cases. flag_no_ident = 1; HasPerModulePasses = true; } else { FunctionPassManager *PM; // If there are passes we have to run on the entire module, we do codegen // as a separate "pass" after that happens. // FIXME: This is disabled right now until bugs can be worked out. Reenable // this for fast -O0 compiles! if (HasPerModulePasses || 1) { CodeGenPasses = PM = new FunctionPassManager(new ExistingModuleProvider(TheModule)); PM->add(new TargetData(*TheTarget->getTargetData())); } else { // If there are no module-level passes that have to be run, we codegen as // each function is parsed. PM = PerFunctionPasses; HasPerFunctionPasses = true; } // Normal mode, emit a .s file by running the code generator. switch (TheTarget->addPassesToEmitFile(*PM, *AsmOutStream, TargetMachine::AssemblyFile, /*FAST*/optimize == 0)) { default: case FileModel::Error: cerr << "Error interfacing to target machine!\n"; exit(1); case FileModel::AsmFile: break; } if (TheTarget->addPassesToEmitFileFinish(*PM, 0, /*Fast*/optimize == 0)) { cerr << "Error interfacing to target machine!\n"; exit(1); } } if (HasPerFunctionPasses) { PerFunctionPasses->doInitialization(); } else { delete PerFunctionPasses; PerFunctionPasses = 0; } if (!HasPerModulePasses) { delete PerModulePasses; PerModulePasses = 0; } }
Function * futamurize( const Function * orig_func, DenseMap<const Value*, Value*> &argmap, std::set<const unsigned char *> &constant_addresses_set ) { LLVMContext &context = getGlobalContext(); // Make a copy of the function, removing constant arguments Function * specialized_func = CloneFunction( orig_func, argmap ); specialized_func->setName( orig_func->getNameStr() + "_1" ); // add it to our module LLVM_Module->getFunctionList().push_back( specialized_func ); printf("\nspecialized_func = %p <%s>\n", specialized_func, specialized_func->getName().data()); //~ specialized_func->dump(); // Optimize it FunctionPassManager PM( LLVM_Module ); createStandardFunctionPasses( &PM, 3 ); PM.add(createScalarReplAggregatesPass()); // Break up aggregate allocas PM.add(createInstructionCombiningPass()); // Cleanup for scalarrepl. PM.add(createJumpThreadingPass()); // Thread jumps. PM.add(createCFGSimplificationPass()); // Merge & remove BBs PM.add(createInstructionCombiningPass()); // Combine silly seq's PM.add(createTailCallEliminationPass()); // Eliminate tail calls PM.add(createCFGSimplificationPass()); // Merge & remove BBs PM.add(createReassociatePass()); // Reassociate expressions PM.add(createLoopRotatePass()); // Rotate Loop PM.add(createLICMPass()); // Hoist loop invariants PM.add(createLoopUnswitchPass( false )); PM.add(createInstructionCombiningPass()); PM.add(createIndVarSimplifyPass()); // Canonicalize indvars PM.add(createLoopDeletionPass()); // Delete dead loops PM.add(createLoopUnroll2Pass()); // Unroll small loops PM.add(createInstructionCombiningPass()); // Clean up after the unroller PM.add(createGVNPass()); // Remove redundancies PM.add(createMemCpyOptPass()); // Remove memcpy / form memset PM.add(createSCCPPass()); // Constant prop with SCCP PM.add(createPromoteMemoryToRegisterPass()); PM.add(createConstantPropagationPass()); PM.add(createDeadStoreEliminationPass()); PM.add(createAggressiveDCEPass()); PM.add(new MemoryDependenceAnalysis()); //~ PM.add(createAAEvalPass()); const PassInfo * pinfo = Pass::lookupPassInfo( "print-alias-sets" ); if( !pinfo ) { printf( "print-alias-sets not found\n" ); exit(-1); } PM.add( pinfo->createPass() ); FunctionPassManager PM_Inline( LLVM_Module ); PM_Inline.add(createSingleFunctionInliningPass()); bool Changed = false; int iterations = 2; int inline_iterations = 6; do { Changed = false; // first do some optimizations PM.doInitialization(); PM.run( *specialized_func ); PM.doFinalization(); // Load from Constant Memory detection const TargetData *TD = LLVM_EE->getTargetData(); for (inst_iterator I = inst_begin(specialized_func), E = inst_end(specialized_func); I != E; ++I) { Instruction * inst = (Instruction *) &*I; // get all Load instructions LoadInst * load = dyn_cast<LoadInst>( inst ); if( !load ) continue; if( load->isVolatile() ) continue; if (load->use_empty()) continue; // Don't muck with dead instructions... // get the address loaded by load instruction Value *ptr_value = load->getPointerOperand(); // we're only interested in constant addresses ConstantExpr * ptr_constant_expr = dyn_cast<ConstantExpr>( ptr_value ); if( !ptr_constant_expr ) continue; ptr_constant_expr->dump(); // compute real address of constant pointer expression Constant * ptr_constant = ConstantFoldConstantExpression( ptr_constant_expr, TD ); if( !ptr_constant ) continue; ptr_constant->dump(); // convert to int constant ConstantInt *int_constant = dyn_cast<ConstantInt>( ConstantExpr::getPtrToInt( ptr_constant, Type::getInt64Ty( context ))); if( !int_constant ) continue; int_constant->dump(); // get data size int data_length = TD->getTypeAllocSize( load->getType() ); ptr_value->getType()->dump(); // get real address (at last !) const unsigned char * c_ptr = (const unsigned char *) int_constant->getLimitedValue(); printf( "%ld %d %d\n", c_ptr, constant_addresses_set.count( c_ptr ), data_length ); // check what's in this address int isconst = 1; for( int offset=0; offset<data_length; offset++ ) isconst &= constant_addresses_set.count( c_ptr + offset ); if( !isconst ) continue; printf( "It is constant.\n" ); // make a LLVM const with the data Constant *new_constant = NULL; switch( data_length ) { case 1: new_constant = ConstantInt::get( Type::getInt8Ty( context ), *(uint8_t*)c_ptr, false /* signed */ ); break; case 2: new_constant = ConstantInt::get( Type::getInt16Ty( context ), *(uint16_t*)c_ptr, false /* signed */ ); break; case 4: new_constant = ConstantInt::get( Type::getInt32Ty( context ), *(uint32_t*)c_ptr, false /* signed */ ); break; case 8: new_constant = ConstantInt::get( Type::getInt64Ty( context ), *(uint64_t*)c_ptr, false /* signed */ ); break; default: { StringRef const_data ( (const char *) c_ptr, data_length ); new_constant = ConstantArray::get( context, const_data, false /* dont add terminating null */ ); } } if( !new_constant ) continue; new_constant->dump(); //~ // get the type that is loaded const Type *Ty = load->getType(); // do we need a cast ? if( load->getType() != new_constant->getType() ) { new_constant = ConstantExpr::getBitCast( new_constant, Ty ); new_constant->dump(); } // zap the load and replace with constant address load->replaceAllUsesWith( new_constant ); printf( "\nREPLACED :...\n" ); load->dump(); new_constant->dump(); Changed = true; } if( Changed ) continue; // re-optimize and do another pass of constant load elimination // if we can't do anything else, do an inlining pass if( inline_iterations > 0 ) { inline_iterations --; PM_Inline.doInitialization(); Changed |= PM_Inline.run( *specialized_func ); PM_Inline.doFinalization(); //~ for( int i=0; i<3; i++ ) { PM.doInitialization(); Changed |= PM.run( *specialized_func ); PM.doFinalization(); } } if( iterations>0 && !Changed ) iterations--; } while( Changed || iterations>0 ); return specialized_func; }