Exemplo n.º 1
0
// =============================================================================
// runOnModule
// 
//  
// 
// =============================================================================
bool TwetoPassImpl::runOnModule(Module & M)
{

	MSG("\n============== Tweto Pass =============\n");
	this->llvmMod = &M;

	LinkExternalBitcode(this->llvmMod, ExternalBitCodePath);

	// Retrieve the method that does all the vtable calculations
	// in order to call the actual 'write' method (see replaceCallsInProcess)
	this->writeFun =
	    this->llvmMod->getFunction("tweto_call_write_method");
	if (!this->writeFun) {
		std::cerr << "tweto_call_write_method is missing,";
		std::cerr << "pass aborded!" << std::endl;
		return false;
	}
	// Retrieve the base write method in order to get the types
	// of the 'this' pointer, the address and the data
	// (should be always mangled like this, so if the Basic-protocol's includes
	//  are correctly imported, no error will be throw)
	this->basicWriteFun = this->llvmMod->getFunction(wFunName);
	if (!this->basicWriteFun) {
		std::cerr << "basic's write method is missing,";
		std::cerr << "pass aborded!" << std::endl;
		return false;
	}

	writeFun->dump();

	// Initialize function passes
	DataLayout *target = new DataLayout(this->llvmMod);
	DataLayoutPass *dlpass = new DataLayoutPass(*target);
	funPassManager = new FunctionPassManager(this->llvmMod);
	funPassManager->add(dlpass);
	funPassManager->add(createIndVarSimplifyPass());
	funPassManager->add(createLoopUnrollPass());
	funPassManager->add(createInstructionCombiningPass());
	funPassManager->add(createReassociatePass());
	funPassManager->add(createGVNPass());
	funPassManager->add(createCFGSimplificationPass());
	funPassManager->add(createConstantPropagationPass());

	// Modules
	std::vector < sc_core::sc_module * >modules =
	    sc_core::sc_get_curr_simcontext()->get_module_registry()->
	    m_module_vec;
	std::vector < sc_core::sc_module * >::iterator modIt;
	for (modIt = modules.begin(); modIt < modules.end(); ++modIt) {
		sc_core::sc_module * initiatorMod = *modIt;
		std::string moduleName =
		    (std::string) initiatorMod->name();
		// Optimize this module
		optimize(initiatorMod);
	}

	// Check if the module is corrupt
	verifyModule(*this->llvmMod);
	std::ostringstream oss;
	oss << callOptCounter;
	MSG("\n Pass report - " + oss.str() + "/" + "?");
	MSG(" - opt/total\n");
	MSG("===========================================\n\n");

	// TODO : handle false case
	return true;
}
Exemplo n.º 2
0
static void AddStandardCompilePasses(PassManager &PM) {
  PM.add(createVerifierPass());                  // Verify that input is correct

#if LLVM_VERSION_CODE < LLVM_VERSION(3, 0)
  addPass(PM, createLowerSetJmpPass());          // Lower llvm.setjmp/.longjmp
#endif

  // If the -strip-debug command line option was specified, do it.
  if (StripDebug)
    addPass(PM, createStripSymbolsPass(true));

  if (DisableOptimizations) return;

  addPass(PM, createCFGSimplificationPass());    // Clean up disgusting code
  addPass(PM, createPromoteMemoryToRegisterPass());// Kill useless allocas
  addPass(PM, createGlobalOptimizerPass());      // Optimize out global vars
  addPass(PM, createGlobalDCEPass());            // Remove unused fns and globs
  addPass(PM, createIPConstantPropagationPass());// IP Constant Propagation
  addPass(PM, createDeadArgEliminationPass());   // Dead argument elimination
  addPass(PM, createInstructionCombiningPass()); // Clean up after IPCP & DAE
  addPass(PM, createCFGSimplificationPass());    // Clean up after IPCP & DAE

  addPass(PM, createPruneEHPass());              // Remove dead EH info
  addPass(PM, createFunctionAttrsPass());        // Deduce function attrs

  if (!DisableInline)
    addPass(PM, createFunctionInliningPass());   // Inline small functions
  addPass(PM, createArgumentPromotionPass());    // Scalarize uninlined fn args

#if LLVM_VERSION_CODE < LLVM_VERSION(3, 4)
  addPass(PM, createSimplifyLibCallsPass());     // Library Call Optimizations
#endif
  addPass(PM, createInstructionCombiningPass()); // Cleanup for scalarrepl.
  addPass(PM, createJumpThreadingPass());        // Thread jumps.
  addPass(PM, createCFGSimplificationPass());    // Merge & remove BBs
  addPass(PM, createScalarReplAggregatesPass()); // Break up aggregate allocas
  addPass(PM, createInstructionCombiningPass()); // Combine silly seq's

  addPass(PM, createTailCallEliminationPass());  // Eliminate tail calls
  addPass(PM, createCFGSimplificationPass());    // Merge & remove BBs
  addPass(PM, createReassociatePass());          // Reassociate expressions
  addPass(PM, createLoopRotatePass());
  addPass(PM, createLICMPass());                 // Hoist loop invariants
  addPass(PM, createLoopUnswitchPass());         // Unswitch loops.
  // FIXME : Removing instcombine causes nestedloop regression.
  addPass(PM, createInstructionCombiningPass());
  addPass(PM, createIndVarSimplifyPass());       // Canonicalize indvars
  addPass(PM, createLoopDeletionPass());         // Delete dead loops
  addPass(PM, createLoopUnrollPass());           // Unroll small loops
  addPass(PM, createInstructionCombiningPass()); // Clean up after the unroller
  addPass(PM, createGVNPass());                  // Remove redundancies
  addPass(PM, createMemCpyOptPass());            // Remove memcpy / form memset
  addPass(PM, createSCCPPass());                 // Constant prop with SCCP

  // Run instcombine after redundancy elimination to exploit opportunities
  // opened up by them.
  addPass(PM, createInstructionCombiningPass());

  addPass(PM, createDeadStoreEliminationPass()); // Delete dead stores
  addPass(PM, createAggressiveDCEPass());        // Delete dead instructions
  addPass(PM, createCFGSimplificationPass());    // Merge & remove BBs
  addPass(PM, createStripDeadPrototypesPass());  // Get rid of dead prototypes
#if LLVM_VERSION_CODE < LLVM_VERSION(3, 0)
  addPass(PM, createDeadTypeEliminationPass());  // Eliminate dead types
#endif
  addPass(PM, createConstantMergePass());        // Merge dup global constants
}
Exemplo n.º 3
0
static void addOptimizationPasses(T *PM)
{
#ifdef JL_DEBUG_BUILD
    PM->add(createVerifierPass());
#endif

#ifdef __has_feature
#   if __has_feature(address_sanitizer)
#   if defined(LLVM37) && !defined(LLVM38)
    // LLVM 3.7 BUG: ASAN pass doesn't properly initialize its dependencies
    initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
#   endif
    PM->add(createAddressSanitizerFunctionPass());
#   endif
#   if __has_feature(memory_sanitizer)
    PM->add(llvm::createMemorySanitizerPass(true));
#   endif
#endif
    if (jl_options.opt_level <= 1) {
        return;
    }
#ifdef LLVM37
    PM->add(createTargetTransformInfoWrapperPass(jl_TargetMachine->getTargetIRAnalysis()));
#else
    jl_TargetMachine->addAnalysisPasses(*PM);
#endif
#ifdef LLVM38
    PM->add(createTypeBasedAAWrapperPass());
#else
    PM->add(createTypeBasedAliasAnalysisPass());
#endif
    if (jl_options.opt_level >= 3) {
#ifdef LLVM38
        PM->add(createBasicAAWrapperPass());
#else
        PM->add(createBasicAliasAnalysisPass());
#endif
    }
    // list of passes from vmkit
    PM->add(createCFGSimplificationPass()); // Clean up disgusting code
    PM->add(createPromoteMemoryToRegisterPass());// Kill useless allocas

#ifndef INSTCOMBINE_BUG
    PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl.
#endif
    PM->add(createSROAPass());                 // Break up aggregate allocas
#ifndef INSTCOMBINE_BUG
    PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl.
#endif
    PM->add(createJumpThreadingPass());        // Thread jumps.
    // NOTE: CFG simp passes after this point seem to hurt native codegen.
    // See issue #6112. Should be re-evaluated when we switch to MCJIT.
    //PM->add(createCFGSimplificationPass());    // Merge & remove BBs
#ifndef INSTCOMBINE_BUG
    PM->add(createInstructionCombiningPass()); // Combine silly seq's
#endif

    //PM->add(createCFGSimplificationPass());    // Merge & remove BBs
    PM->add(createReassociatePass());          // Reassociate expressions

    // this has the potential to make some things a bit slower
    //PM->add(createBBVectorizePass());

    PM->add(createEarlyCSEPass()); //// ****

    PM->add(createLoopIdiomPass()); //// ****
    PM->add(createLoopRotatePass());           // Rotate loops.
    // LoopRotate strips metadata from terminator, so run LowerSIMD afterwards
    PM->add(createLowerSimdLoopPass());        // Annotate loop marked with "simdloop" as LLVM parallel loop
    PM->add(createLICMPass());                 // Hoist loop invariants
    PM->add(createLoopUnswitchPass());         // Unswitch loops.
    // Subsequent passes not stripping metadata from terminator
#ifndef INSTCOMBINE_BUG
    PM->add(createInstructionCombiningPass());
#endif
    PM->add(createIndVarSimplifyPass());       // Canonicalize indvars
    PM->add(createLoopDeletionPass());         // Delete dead loops
#if defined(LLVM35)
    PM->add(createSimpleLoopUnrollPass());     // Unroll small loops
#else
    PM->add(createLoopUnrollPass());           // Unroll small loops
#endif
#if !defined(LLVM35) && !defined(INSTCOMBINE_BUG)
    PM->add(createLoopVectorizePass());        // Vectorize loops
#endif
    //PM->add(createLoopStrengthReducePass());   // (jwb added)

#ifndef INSTCOMBINE_BUG
    PM->add(createInstructionCombiningPass()); // Clean up after the unroller
#endif
    PM->add(createGVNPass());                  // Remove redundancies
    //PM->add(createMemCpyOptPass());            // Remove memcpy / form memset
    PM->add(createSCCPPass());                 // Constant prop with SCCP

    // Run instcombine after redundancy elimination to exploit opportunities
    // opened up by them.
    PM->add(createSinkingPass()); ////////////// ****
    PM->add(createInstructionSimplifierPass());///////// ****
#ifndef INSTCOMBINE_BUG
    PM->add(createInstructionCombiningPass());
#endif
    PM->add(createJumpThreadingPass());         // Thread jumps
    PM->add(createDeadStoreEliminationPass());  // Delete dead stores
#if !defined(INSTCOMBINE_BUG)
    if (jl_options.opt_level >= 3) {
#ifdef LLVM39
        initializeDemandedBitsPass(*PassRegistry::getPassRegistry());
#endif
        PM->add(createSLPVectorizerPass());     // Vectorize straight-line code
    }
#endif

    PM->add(createAggressiveDCEPass());         // Delete dead instructions
#if !defined(INSTCOMBINE_BUG)
    if (jl_options.opt_level >= 3)
        PM->add(createInstructionCombiningPass());   // Clean up after SLP loop vectorizer
#endif
#if defined(LLVM35)
    PM->add(createLoopVectorizePass());         // Vectorize loops
    PM->add(createInstructionCombiningPass());  // Clean up after loop vectorizer
#endif
    //PM->add(createCFGSimplificationPass());     // Merge & remove BBs
}
Exemplo n.º 4
0
/// adopted from: llvm-2.9/include/llvm/Support/StandardPasses.h
void optimizeFunction(Function* f, const bool disableLICM, const bool disableLoopRotate) {
    assert (f);
    assert (f->getParent());
    Module* mod = f->getParent();
    TargetData* targetData = new TargetData(mod);

    const unsigned OptimizationLevel = 3;
    const bool OptimizeSize = false;
    const bool UnitAtATime = true;
    const bool UnrollLoops = true;
    const bool SimplifyLibCalls = true;
    const bool HaveExceptions = false;
    Pass* InliningPass = createFunctionInliningPass(275);

    //PassManager Passes;
    FunctionPassManager Passes(mod);
    Passes.add(targetData);

    //
    // custom
    //
    Passes.add(createScalarReplAggregatesPass(-1, false));

    //
    // createStandardFunctionPasses
    //
    Passes.add(createCFGSimplificationPass());
    Passes.add(createPromoteMemoryToRegisterPass());
    Passes.add(createInstructionCombiningPass());

    // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
    // BasicAliasAnalysis wins if they disagree. This is intended to help
    // support "obvious" type-punning idioms.
    Passes.add(createTypeBasedAliasAnalysisPass());
    Passes.add(createBasicAliasAnalysisPass());

    // Start of function pass.
    // Break up aggregate allocas, using SSAUpdater.
    Passes.add(createScalarReplAggregatesPass(-1, false));
    Passes.add(createEarlyCSEPass());              // Catch trivial redundancies
    if (SimplifyLibCalls)
        Passes.add(createSimplifyLibCallsPass());    // Library Call Optimizations
    Passes.add(createJumpThreadingPass());         // Thread jumps.
    Passes.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
    Passes.add(createCFGSimplificationPass());     // Merge & remove BBs
    Passes.add(createInstructionCombiningPass());  // Combine silly seq's

    Passes.add(createTailCallEliminationPass());   // Eliminate tail calls
    Passes.add(createCFGSimplificationPass());     // Merge & remove BBs
    Passes.add(createReassociatePass());           // Reassociate expressions
    if (!disableLoopRotate) Passes.add(createLoopRotatePass());            // Rotate Loop // makes packetized Mandelbrot fail
    if (!disableLICM) Passes.add(createLICMPass());                  // Hoist loop invariants // makes scalar driver crash after optimization
    //Passes.add(createLoopUnswitchPass(OptimizeSize || OptimizationLevel < 3)); // breaks DCT with UNIFORM_ANALYSIS=0
    Passes.add(createInstructionCombiningPass());
    Passes.add(createIndVarSimplifyPass());        // Canonicalize indvars
    Passes.add(createLoopIdiomPass());             // Recognize idioms like memset.
    Passes.add(createLoopDeletionPass());          // Delete dead loops
    if (UnrollLoops)
        Passes.add(createLoopUnrollPass());          // Unroll small loops
    Passes.add(createInstructionCombiningPass());  // Clean up after the unroller
    if (OptimizationLevel > 1)
        Passes.add(createGVNPass());                 // Remove redundancies
    Passes.add(createMemCpyOptPass());             // Remove memcpy / form memset
    Passes.add(createSCCPPass());                  // Constant prop with SCCP

    // Run instcombine after redundancy elimination to exploit opportunities
    // opened up by them.
    Passes.add(createInstructionCombiningPass());
    Passes.add(createJumpThreadingPass());         // Thread jumps
    Passes.add(createCorrelatedValuePropagationPass());
    Passes.add(createDeadStoreEliminationPass());  // Delete dead stores
    Passes.add(createAggressiveDCEPass());         // Delete dead instructions
    Passes.add(createCFGSimplificationPass());     // Merge & remove BBs

    WFVOPENCL_DEBUG( Passes.add(createVerifierPass()); );
Exemplo n.º 5
0
/// initialize_module - perform setup of the LLVM code generation system.
void BrainFTraceRecorder::initialize_module() {
  LLVMContext &Context = module->getContext();
  
  // Initialize the code generator, and enable aggressive code generation.
  InitializeNativeTarget();
  EngineBuilder builder(module);
  builder.setOptLevel(CodeGenOpt::Aggressive);
  EE = builder.create();
  
  // Create a FunctionPassManager to handle running optimization passes
  // on our generated code.  Setup a basic suite of optimizations for it.
  FPM = new llvm::FunctionPassManager(module);
  FPM->add(createInstructionCombiningPass());
  FPM->add(createCFGSimplificationPass());
  FPM->add(createScalarReplAggregatesPass());
  FPM->add(createSimplifyLibCallsPass());
  FPM->add(createInstructionCombiningPass());
  FPM->add(createJumpThreadingPass());
  FPM->add(createCFGSimplificationPass());
  FPM->add(createInstructionCombiningPass());
  FPM->add(createCFGSimplificationPass());
  FPM->add(createReassociatePass());
  FPM->add(createLoopRotatePass());
  FPM->add(createLICMPass());
  FPM->add(createLoopUnswitchPass(false));
  FPM->add(createInstructionCombiningPass());  
  FPM->add(createIndVarSimplifyPass());
  FPM->add(createLoopDeletionPass());
  FPM->add(createLoopUnrollPass());
  FPM->add(createInstructionCombiningPass());
  FPM->add(createGVNPass());
  FPM->add(createSCCPPass());
  FPM->add(createInstructionCombiningPass());
  FPM->add(createJumpThreadingPass());
  FPM->add(createDeadStoreEliminationPass());
  FPM->add(createAggressiveDCEPass());
  FPM->add(createCFGSimplificationPass());
  
  // Cache the LLVM type signature of an opcode function
  int_type = sizeof(size_t) == 4 ? 
                  IntegerType::getInt32Ty(Context) : 
                  IntegerType::getInt64Ty(Context);
  const Type *data_type =
    PointerType::getUnqual(IntegerType::getInt8Ty(Context));
  std::vector<const Type*> args;
  args.push_back(int_type);
  args.push_back(data_type);
  op_type =
    FunctionType::get(Type::getVoidTy(Context), args, false);
  
  // Setup a global variable in the LLVM module to represent the bytecode
  // array.  Bind it to the actual bytecode array at JIT time.
  const Type *bytecode_type = PointerType::getUnqual(op_type);
  bytecode_array = cast<GlobalValue>(module->
    getOrInsertGlobal("BytecodeArray", bytecode_type));
  EE->addGlobalMapping(bytecode_array, BytecodeArray);
  
  // Setup a similar mapping for the global mode flag.
  const IntegerType *flag_type = IntegerType::get(Context, 8);
  mode_flag =
    cast<GlobalValue>(module->getOrInsertGlobal("mode", flag_type));
  EE->addGlobalMapping(mode_flag, &mode);
  
  // Setup a similar mapping for the global extension root flag.
  ext_root =
    cast<GlobalValue>(module->getOrInsertGlobal("ext_root", int_type));
  EE->addGlobalMapping(ext_root, &extension_root);
  
  // Setup a similar mapping for the global extension leaf flag.
  ext_leaf =
    cast<GlobalValue>(module->getOrInsertGlobal("ext_leaf", int_type));
  EE->addGlobalMapping(ext_leaf, &extension_leaf);

  // Cache LLVM declarations for putchar() and getchar().
  const Type *int_type = sizeof(int) == 4 ? IntegerType::getInt32Ty(Context)
                                       : IntegerType::getInt64Ty(Context);
  putchar_func =
    module->getOrInsertFunction("putchar", int_type, int_type, NULL);
  getchar_func = module->getOrInsertFunction("getchar", int_type, NULL);
}
static void createOptimizationPasses() {
  // Create and set up the per-function pass manager.
  // FIXME: Move the code generator to be function-at-a-time.
  PerFunctionPasses =
    new FunctionPassManager(new ExistingModuleProvider(TheModule));
  PerFunctionPasses->add(new TargetData(*TheTarget->getTargetData()));

  // In -O0 if checking is disabled, we don't even have per-function passes.
  bool HasPerFunctionPasses = false;
#ifdef ENABLE_CHECKING
  PerFunctionPasses->add(createVerifierPass());
  HasPerFunctionPasses = true;
#endif

  if (optimize > 0 && !DisableLLVMOptimizations) {
    HasPerFunctionPasses = true;
    PerFunctionPasses->add(createCFGSimplificationPass());
    if (optimize == 1)
      PerFunctionPasses->add(createPromoteMemoryToRegisterPass());
    else
      PerFunctionPasses->add(createScalarReplAggregatesPass());
    PerFunctionPasses->add(createInstructionCombiningPass());
    //    PerFunctionPasses->add(createCFGSimplificationPass());
  }

  // FIXME: AT -O0/O1, we should stream out functions at a time.
  PerModulePasses = new PassManager();
  PerModulePasses->add(new TargetData(*TheTarget->getTargetData()));
  bool HasPerModulePasses = false;

  if (optimize > 0 && !DisableLLVMOptimizations) {
    HasPerModulePasses = true;
    PassManager *PM = PerModulePasses;
    if (flag_unit_at_a_time)
      PM->add(createRaiseAllocationsPass());      // call %malloc -> malloc inst
    PM->add(createCFGSimplificationPass());       // Clean up disgusting code
    PM->add(createPromoteMemoryToRegisterPass()); // Kill useless allocas
    if (flag_unit_at_a_time) {
      PM->add(createGlobalOptimizerPass());       // Optimize out global vars
      PM->add(createGlobalDCEPass());             // Remove unused fns and globs
      PM->add(createIPConstantPropagationPass()); // IP Constant Propagation
      PM->add(createDeadArgEliminationPass());    // Dead argument elimination
    }
    PM->add(createInstructionCombiningPass());    // Clean up after IPCP & DAE
    // DISABLE PREDSIMPLIFY UNTIL PR967 is fixed.
    //PM->add(createPredicateSimplifierPass());   // Canonicalize registers
    PM->add(createCFGSimplificationPass());       // Clean up after IPCP & DAE
    if (flag_unit_at_a_time)
      PM->add(createPruneEHPass());               // Remove dead EH info

    if (optimize > 1) {
      if (flag_inline_trees > 1)                // respect -fno-inline-functions
        PM->add(createFunctionInliningPass());  // Inline small functions
      if (flag_unit_at_a_time && !lang_hooks.flag_no_builtin())
        PM->add(createSimplifyLibCallsPass());  // Library Call Optimizations

      if (optimize > 2)
    	PM->add(createArgumentPromotionPass()); // Scalarize uninlined fn args
    }
    
    PM->add(createTailDuplicationPass());       // Simplify cfg by copying code
    PM->add(createInstructionCombiningPass());  // Cleanup for scalarrepl.
    PM->add(createCFGSimplificationPass());     // Merge & remove BBs
    PM->add(createScalarReplAggregatesPass());  // Break up aggregate allocas
    PM->add(createInstructionCombiningPass());  // Combine silly seq's
    PM->add(createCondPropagationPass());       // Propagate conditionals
    PM->add(createTailCallEliminationPass());   // Eliminate tail calls
    PM->add(createCFGSimplificationPass());     // Merge & remove BBs
    PM->add(createReassociatePass());           // Reassociate expressions
    PM->add(createLoopRotatePass());            // Rotate Loop
    PM->add(createLICMPass());                  // Hoist loop invariants
    PM->add(createLoopUnswitchPass(optimize_size ? true : false));
    PM->add(createInstructionCombiningPass());  // Clean up after LICM/reassoc
    PM->add(createIndVarSimplifyPass());        // Canonicalize indvars
    if (flag_unroll_loops)
      PM->add(createLoopUnrollPass());          // Unroll small loops
    PM->add(createInstructionCombiningPass());  // Clean up after the unroller
    PM->add(createLoadValueNumberingPass());    // GVN for load instructions
    PM->add(createGCSEPass());                  // Remove common subexprs
    PM->add(createSCCPPass());                  // Constant prop with SCCP
    
    // Run instcombine after redundancy elimination to exploit opportunities
    // opened up by them.
    PM->add(createInstructionCombiningPass());
    PM->add(createCondPropagationPass());       // Propagate conditionals
    PM->add(createDeadStoreEliminationPass());  // Delete dead stores
    PM->add(createAggressiveDCEPass());         // SSA based 'Aggressive DCE'
    PM->add(createCFGSimplificationPass());     // Merge & remove BBs
    
    if (optimize > 1 && flag_unit_at_a_time)
      PM->add(createConstantMergePass());       // Merge dup global constants 
    PM->add(createStripDeadPrototypesPass());   // Get rid of dead prototypes
  }
  
  if (emit_llvm_bc) {
    // Emit an LLVM .bc file to the output.  This is used when passed
    // -emit-llvm -c to the GCC driver.
    PerModulePasses->add(CreateBitcodeWriterPass(*AsmOutStream));

    // Disable emission of .ident into the output file... which is completely
    // wrong for llvm/.bc emission cases.
    flag_no_ident = 1;
    HasPerModulePasses = true;
  } else if (emit_llvm) {
    // Emit an LLVM .ll file to the output.  This is used when passed 
    // -emit-llvm -S to the GCC driver.
    PerModulePasses->add(new PrintModulePass(AsmOutFile));
    
    // Disable emission of .ident into the output file... which is completely
    // wrong for llvm/.bc emission cases.
    flag_no_ident = 1;
    HasPerModulePasses = true;
  } else {
    FunctionPassManager *PM;
    
    // If there are passes we have to run on the entire module, we do codegen
    // as a separate "pass" after that happens.
    // FIXME: This is disabled right now until bugs can be worked out.  Reenable
    // this for fast -O0 compiles!
    if (HasPerModulePasses || 1) {
      CodeGenPasses = PM =
        new FunctionPassManager(new ExistingModuleProvider(TheModule));
      PM->add(new TargetData(*TheTarget->getTargetData()));
    } else {
      // If there are no module-level passes that have to be run, we codegen as
      // each function is parsed.
      PM = PerFunctionPasses;
      HasPerFunctionPasses = true;
    }

    // Normal mode, emit a .s file by running the code generator.
    switch (TheTarget->addPassesToEmitFile(*PM, *AsmOutStream,
                                           TargetMachine::AssemblyFile,
                                           /*FAST*/optimize == 0)) {
    default:
    case FileModel::Error:
      cerr << "Error interfacing to target machine!\n";
      exit(1);
    case FileModel::AsmFile:
      break;
    }

    if (TheTarget->addPassesToEmitFileFinish(*PM, 0, /*Fast*/optimize == 0)) {
      cerr << "Error interfacing to target machine!\n";
      exit(1);
    }
  }
  
  if (HasPerFunctionPasses) {
    PerFunctionPasses->doInitialization();
  } else {
    delete PerFunctionPasses;
    PerFunctionPasses = 0;
  }
  if (!HasPerModulePasses) {
    delete PerModulePasses;
    PerModulePasses = 0;
  }
}
Function * futamurize( const Function * orig_func, DenseMap<const Value*, Value*> &argmap, std::set<const unsigned char *> &constant_addresses_set )
{
	LLVMContext &context = getGlobalContext();
	
	
	// Make a copy of the function, removing constant arguments
	Function * specialized_func = CloneFunction( orig_func, argmap );
	specialized_func->setName( orig_func->getNameStr() + "_1" );
	
	// add it to our module
	LLVM_Module->getFunctionList().push_back( specialized_func );
	
	printf("\nspecialized_func = %p <%s>\n", specialized_func, specialized_func->getName().data());
	//~ specialized_func->dump();

	// Optimize it
	FunctionPassManager PM( LLVM_Module );
	createStandardFunctionPasses( &PM, 3 );
	
	PM.add(createScalarReplAggregatesPass());  // Break up aggregate allocas
	PM.add(createInstructionCombiningPass());  // Cleanup for scalarrepl.
	PM.add(createJumpThreadingPass());         // Thread jumps.
	PM.add(createCFGSimplificationPass());     // Merge & remove BBs
	PM.add(createInstructionCombiningPass());  // Combine silly seq's
	PM.add(createTailCallEliminationPass());   // Eliminate tail calls
	PM.add(createCFGSimplificationPass());     // Merge & remove BBs
	PM.add(createReassociatePass());           // Reassociate expressions
	PM.add(createLoopRotatePass());            // Rotate Loop
	PM.add(createLICMPass());                  // Hoist loop invariants
	PM.add(createLoopUnswitchPass( false ));
	PM.add(createInstructionCombiningPass());
	PM.add(createIndVarSimplifyPass());        // Canonicalize indvars
	PM.add(createLoopDeletionPass());          // Delete dead loops
	PM.add(createLoopUnroll2Pass());            // Unroll small loops
	PM.add(createInstructionCombiningPass());  // Clean up after the unroller
	PM.add(createGVNPass());                   // Remove redundancies
	PM.add(createMemCpyOptPass());             // Remove memcpy / form memset
	PM.add(createSCCPPass());                  // Constant prop with SCCP
	PM.add(createPromoteMemoryToRegisterPass()); 
	PM.add(createConstantPropagationPass());            
	PM.add(createDeadStoreEliminationPass());            
	PM.add(createAggressiveDCEPass());            
	PM.add(new MemoryDependenceAnalysis());            
	//~ PM.add(createAAEvalPass());              
	
	const PassInfo * pinfo = Pass::lookupPassInfo( "print-alias-sets" );
	if( !pinfo ) { printf( "print-alias-sets not found\n" ); exit(-1); }
	PM.add( pinfo->createPass() );
	
	FunctionPassManager PM_Inline( LLVM_Module );
	PM_Inline.add(createSingleFunctionInliningPass());            
	
	bool Changed = false;
	int iterations = 2;
	int inline_iterations = 6;
	
	do
	{
		Changed = false;
		
		// first do some optimizations
		PM.doInitialization();
		PM.run( *specialized_func );
		PM.doFinalization();
		
		// Load from Constant Memory detection
		const TargetData *TD = LLVM_EE->getTargetData();
		
		for (inst_iterator I = inst_begin(specialized_func), E = inst_end(specialized_func); I != E; ++I) 
		{
			Instruction * inst = (Instruction *) &*I;

			// get all Load instructions
			LoadInst * load = dyn_cast<LoadInst>( inst );
			if( !load ) continue;
			if( load->isVolatile() ) continue;

			if (load->use_empty()) continue;        // Don't muck with dead instructions...

			// get the address loaded by load instruction
			Value *ptr_value = load->getPointerOperand();
			
			// we're only interested in constant addresses
			ConstantExpr * ptr_constant_expr =  dyn_cast<ConstantExpr>( ptr_value );
			if( !ptr_constant_expr ) continue;			
			ptr_constant_expr->dump();
			
			// compute real address of constant pointer expression
			Constant * ptr_constant = ConstantFoldConstantExpression( ptr_constant_expr, TD );
			if( !ptr_constant ) continue;
			ptr_constant->dump();
			
			// convert to int constant
			ConstantInt *int_constant =  dyn_cast<ConstantInt>( ConstantExpr::getPtrToInt( ptr_constant, Type::getInt64Ty( context )));
			if( !int_constant ) continue;
			int_constant->dump();
			
			// get data size
			int data_length = TD->getTypeAllocSize( load->getType() );
			ptr_value->getType()->dump();
			
			// get real address (at last !)
			const unsigned char * c_ptr = (const unsigned char *) int_constant->getLimitedValue();
			
			printf( "%ld %d %d\n", c_ptr, constant_addresses_set.count( c_ptr ), data_length );
			
			// check what's in this address	
			int isconst = 1;
			for( int offset=0; offset<data_length; offset++ )
				isconst &= constant_addresses_set.count( c_ptr + offset );
			
			if( !isconst ) continue;
			printf( "It is constant.\n" );
			
			// make a LLVM const with the data
			Constant *new_constant = NULL;
			switch( data_length )
			{
				case 1:	new_constant = ConstantInt::get( Type::getInt8Ty( context ),  *(uint8_t*)c_ptr, false /* signed */ );	break;
				case 2:	new_constant = ConstantInt::get( Type::getInt16Ty( context ), *(uint16_t*)c_ptr, false /* signed */ );	break;
				case 4:	new_constant = ConstantInt::get( Type::getInt32Ty( context ), *(uint32_t*)c_ptr, false /* signed */ );	break;
				case 8:	new_constant = ConstantInt::get( Type::getInt64Ty( context ), *(uint64_t*)c_ptr, false /* signed */ );	break;
				default:
				{
					StringRef const_data ( (const char *) c_ptr, data_length );
					new_constant = ConstantArray::get( context, const_data, false /* dont add terminating null */ );
				}
			}
			
			if( !new_constant ) continue;
			
			new_constant->dump();
							
			//~ // get the type that is loaded
			const Type *Ty = load->getType();
			
			// do we need a cast ?
			if( load->getType() != new_constant->getType() )
			{
				new_constant = ConstantExpr::getBitCast( new_constant, Ty );
				new_constant->dump();
			}
			
			// zap the load and replace with constant address
			load->replaceAllUsesWith( new_constant );
			printf( "\nREPLACED :...\n" );
			load->dump();
			new_constant->dump();
			
			Changed = true;
		}	
		
		if( Changed )
			continue;	// re-optimize and do another pass of constant load elimination
		
		// if we can't do anything else, do an inlining pass
		if( inline_iterations > 0 )
		{
			inline_iterations --;
			
			PM_Inline.doInitialization();
			Changed |= PM_Inline.run( *specialized_func );
			PM_Inline.doFinalization();

			//~ for( int i=0; i<3; i++ )
			{
				PM.doInitialization();
				Changed |= PM.run( *specialized_func );
				PM.doFinalization();
			}
		}
		
		if( iterations>0 && !Changed ) 
			iterations--;
	} while( Changed || iterations>0 );
	
	return specialized_func;
}