Ejemplo n.º 1
0
  Context::Context(LLVMState* ls)
    : ls_(ls)
    , root_info_(0)
    , inlined_block_(false)
    , inline_depth_(0)
    , rds_(new jit::RuntimeDataHolder)
    , function_(0)
    , state_(0)
    , out_args_(0)
    , counter_(0)
  {
    VoidTy = Type::getVoidTy(ctx_);

    Int1Ty = Type::getInt1Ty(ctx_);
    Int8Ty = Type::getInt8Ty(ctx_);
    Int16Ty = Type::getInt16Ty(ctx_);
    Int32Ty = Type::getInt32Ty(ctx_);
    Int64Ty = Type::getInt64Ty(ctx_);

#ifdef IS_X8664
    IntPtrTy = Int64Ty;
#else
    IntPtrTy = Int32Ty;
#endif

    VoidPtrTy = llvm::PointerType::getUnqual(Int8Ty);

    FloatTy = Type::getFloatTy(ctx_);
    DoubleTy = Type::getDoubleTy(ctx_);

    Int8PtrTy = llvm::PointerType::getUnqual(Int8Ty);

    Zero = llvm::ConstantInt::get(Int32Ty, 0);
    One = llvm::ConstantInt::get(Int32Ty, 1);

    module_ = new llvm::Module("rubinius", ctx_);

    autogen_types::makeLLVMModuleContents(module_);

    llvm::EngineBuilder factory(module_);
    std::string error;

    factory.setAllocateGVsWithCode(false);
    memory_ = new jit::RubiniusRequestJITMemoryManager(ls->memory());
    factory.setJITMemoryManager(memory_);
    factory.setEngineKind(EngineKind::JIT);
    factory.setErrorStr(&error);

#if RBX_LLVM_API_VER > 300
    llvm::TargetOptions opts;
    opts.NoFramePointerElim = true;
    opts.NoFramePointerElimNonLeaf = true;
    opts.JITEmitDebugInfo = true;

    factory.setTargetOptions(opts);
#endif

    factory.setMCPU(ls_->cpu());

    engine_ = factory.create();
    if(!engine_) {
      std::cerr << "Error setting up LLVM Execution Engine: "<< error << std::endl;
      rubinius::bug("error configuring LLVM");
    }
    if(ls_->jit_event_listener()) {
      engine_->RegisterJITEventListener(ls_->jit_event_listener());
    }

    builder_ = new llvm::PassManagerBuilder();
    builder_->OptLevel = 2;
    passes_ = new llvm::FunctionPassManager(module_);

#if RBX_LLVM_API_VER >= 302
    module_->setDataLayout(engine_->getDataLayout()->getStringRepresentation());
    passes_->add(new llvm::DataLayout(*engine_->getDataLayout()));
#else
    module_->setDataLayout(engine_->getTargetData()->getStringRepresentation());
    passes_->add(new llvm::TargetData(*engine_->getTargetData()));
#endif

    builder_->populateFunctionPassManager(*passes_);

    // Eliminate unnecessary alloca.
    passes_->add(createPromoteMemoryToRegisterPass());
    // Do simple "peephole" optimizations and bit-twiddling optzns.
    passes_->add(createInstructionCombiningPass());
    // Reassociate expressions.
    passes_->add(createReassociatePass());
    // Eliminate Common SubExpressions.
    passes_->add(createGVNPass());
    passes_->add(createDeadStoreEliminationPass());

    passes_->add(createInstructionCombiningPass());

    // Simplify the control flow graph (deleting unreachable blocks, etc).
    passes_->add(createCFGSimplificationPass());

    passes_->add(create_rubinius_alias_analysis());
    passes_->add(createGVNPass());
    // passes_->add(createCFGSimplificationPass());
    passes_->add(createDeadStoreEliminationPass());
    // passes_->add(createVerifierPass());
    passes_->add(createScalarReplAggregatesPass());

    passes_->add(create_overflow_folding_pass());
    passes_->add(create_guard_eliminator_pass());

    passes_->add(createCFGSimplificationPass());
    passes_->add(createInstructionCombiningPass());
    passes_->add(createScalarReplAggregatesPass());
    passes_->add(createDeadStoreEliminationPass());
    passes_->add(createCFGSimplificationPass());
    passes_->add(createInstructionCombiningPass());
    passes_->doInitialization();

    ObjTy = ptr_type("Object");

    profiling_ = new GlobalVariable(
        *module_, Int1Ty, false,
        GlobalVariable::ExternalLinkage,
        0, "profiling_flag");

    metadata_id_ = ctx_.getMDKindID("rbx-classid");
  }
Ejemplo n.º 2
0
/// Optimize - Perform link time optimizations. This will run the scalar
/// optimizations, any loaded plugin-optimization modules, and then the
/// inter-procedural optimizations if applicable.
void Optimize(Module* M) {

  // Instantiate the pass manager to organize the passes.
  PassManager Passes;

  // If we're verifying, start off with a verification pass.
  if (VerifyEach)
    Passes.add(createVerifierPass());

  // Add an appropriate TargetData instance for this module...
#if LLVM_VERSION_CODE >= LLVM_VERSION(3, 1)
  addPass(Passes, new DataLayout(M));
#else
  addPass(Passes, new TargetData(M));
#endif

  // DWD - Run the opt standard pass list as well.
  AddStandardCompilePasses(Passes);

  if (!DisableOptimizations) {
    // Now that composite has been compiled, scan through the module, looking
    // for a main function.  If main is defined, mark all other functions
    // internal.
    if (!DisableInternalize)
#if LLVM_VERSION_CODE >= LLVM_VERSION(3, 2)
      addPass(Passes, createInternalizePass());
#else
      addPass(Passes, createInternalizePass(true));
#endif

    // Propagate constants at call sites into the functions they call.  This
    // opens opportunities for globalopt (and inlining) by substituting function
    // pointers passed as arguments to direct uses of functions.  
    addPass(Passes, createIPSCCPPass());

    // Now that we internalized some globals, see if we can hack on them!
    addPass(Passes, createGlobalOptimizerPass());

    // Linking modules together can lead to duplicated global constants, only
    // keep one copy of each constant...
    addPass(Passes, createConstantMergePass());

    // Remove unused arguments from functions...
    addPass(Passes, createDeadArgEliminationPass());

    // Reduce the code after globalopt and ipsccp.  Both can open up significant
    // simplification opportunities, and both can propagate functions through
    // function pointers.  When this happens, we often have to resolve varargs
    // calls, etc, so let instcombine do this.
    addPass(Passes, createInstructionCombiningPass());

    if (!DisableInline)
      addPass(Passes, createFunctionInliningPass()); // Inline small functions

    addPass(Passes, createPruneEHPass());            // Remove dead EH info
    addPass(Passes, createGlobalOptimizerPass());    // Optimize globals again.
    addPass(Passes, createGlobalDCEPass());          // Remove dead functions

    // If we didn't decide to inline a function, check to see if we can
    // transform it to pass arguments by value instead of by reference.
    addPass(Passes, createArgumentPromotionPass());

    // The IPO passes may leave cruft around.  Clean up after them.
    addPass(Passes, createInstructionCombiningPass());
    addPass(Passes, createJumpThreadingPass());        // Thread jumps.
    addPass(Passes, createScalarReplAggregatesPass()); // Break up allocas

    // Run a few AA driven optimizations here and now, to cleanup the code.
    addPass(Passes, createFunctionAttrsPass());      // Add nocapture
    addPass(Passes, createGlobalsModRefPass());      // IP alias analysis

    addPass(Passes, createLICMPass());               // Hoist loop invariants
    addPass(Passes, createGVNPass());                // Remove redundancies
    addPass(Passes, createMemCpyOptPass());          // Remove dead memcpy's
    addPass(Passes, createDeadStoreEliminationPass()); // Nuke dead stores

    // Cleanup and simplify the code after the scalar optimizations.
    addPass(Passes, createInstructionCombiningPass());

    addPass(Passes, createJumpThreadingPass());        // Thread jumps.
    addPass(Passes, createPromoteMemoryToRegisterPass()); // Cleanup jumpthread.
    
    // Delete basic blocks, which optimization passes may have killed...
    addPass(Passes, createCFGSimplificationPass());

    // Now that we have optimized the program, discard unreachable functions...
    addPass(Passes, createGlobalDCEPass());
  }

  // If the -s or -S command line options were specified, strip the symbols out
  // of the resulting program to make it smaller.  -s and -S are GNU ld options
  // that we are supporting; they alias -strip-all and -strip-debug.
  if (Strip || StripDebug)
    addPass(Passes, createStripSymbolsPass(StripDebug && !Strip));

#if 0
  // Create a new optimization pass for each one specified on the command line
  std::auto_ptr<TargetMachine> target;
  for (unsigned i = 0; i < OptimizationList.size(); ++i) {
    const PassInfo *Opt = OptimizationList[i];
    if (Opt->getNormalCtor())
      addPass(Passes, Opt->getNormalCtor()());
    else
      std::cerr << "llvm-ld: cannot create pass: "******"\n";
  }
#endif

  // The user's passes may leave cruft around. Clean up after them them but
  // only if we haven't got DisableOptimizations set
  if (!DisableOptimizations) {
    addPass(Passes, createInstructionCombiningPass());
    addPass(Passes, createCFGSimplificationPass());
    addPass(Passes, createAggressiveDCEPass());
    addPass(Passes, createGlobalDCEPass());
  }

  // Make sure everything is still good.
  if (!DontVerify)
    Passes.add(createVerifierPass());

  // Run our queue of passes all at once now, efficiently.
  Passes.run(*M);
}
Ejemplo n.º 3
0
static void AddStandardCompilePasses(PassManager &PM) {
  PM.add(createVerifierPass());                  // Verify that input is correct

#if LLVM_VERSION_CODE < LLVM_VERSION(3, 0)
  addPass(PM, createLowerSetJmpPass());          // Lower llvm.setjmp/.longjmp
#endif

  // If the -strip-debug command line option was specified, do it.
  if (StripDebug)
    addPass(PM, createStripSymbolsPass(true));

  if (DisableOptimizations) return;

#if LLVM_VERSION_CODE < LLVM_VERSION(2, 7)
  addPass(PM, createRaiseAllocationsPass());     // call %malloc -> malloc inst
#endif
  addPass(PM, createCFGSimplificationPass());    // Clean up disgusting code
  addPass(PM, createPromoteMemoryToRegisterPass());// Kill useless allocas
  addPass(PM, createGlobalOptimizerPass());      // Optimize out global vars
  addPass(PM, createGlobalDCEPass());            // Remove unused fns and globs
  addPass(PM, createIPConstantPropagationPass());// IP Constant Propagation
  addPass(PM, createDeadArgEliminationPass());   // Dead argument elimination
  addPass(PM, createInstructionCombiningPass()); // Clean up after IPCP & DAE
  addPass(PM, createCFGSimplificationPass());    // Clean up after IPCP & DAE

  addPass(PM, createPruneEHPass());              // Remove dead EH info
  addPass(PM, createFunctionAttrsPass());        // Deduce function attrs

  if (!DisableInline)
    addPass(PM, createFunctionInliningPass());   // Inline small functions
  addPass(PM, createArgumentPromotionPass());    // Scalarize uninlined fn args

  addPass(PM, createSimplifyLibCallsPass());     // Library Call Optimizations
  addPass(PM, createInstructionCombiningPass()); // Cleanup for scalarrepl.
  addPass(PM, createJumpThreadingPass());        // Thread jumps.
  addPass(PM, createCFGSimplificationPass());    // Merge & remove BBs
  addPass(PM, createScalarReplAggregatesPass()); // Break up aggregate allocas
  addPass(PM, createInstructionCombiningPass()); // Combine silly seq's
#if LLVM_VERSION_CODE < LLVM_VERSION(2, 7)
  addPass(PM, createCondPropagationPass());      // Propagate conditionals
#endif

  addPass(PM, createTailCallEliminationPass());  // Eliminate tail calls
  addPass(PM, createCFGSimplificationPass());    // Merge & remove BBs
  addPass(PM, createReassociatePass());          // Reassociate expressions
  addPass(PM, createLoopRotatePass());
  addPass(PM, createLICMPass());                 // Hoist loop invariants
  addPass(PM, createLoopUnswitchPass());         // Unswitch loops.
#if LLVM_VERSION_CODE < LLVM_VERSION(2, 9)
  addPass(PM, createLoopIndexSplitPass());       // Index split loops.
#endif
  // FIXME : Removing instcombine causes nestedloop regression.
  addPass(PM, createInstructionCombiningPass());
  addPass(PM, createIndVarSimplifyPass());       // Canonicalize indvars
  addPass(PM, createLoopDeletionPass());         // Delete dead loops
  addPass(PM, createLoopUnrollPass());           // Unroll small loops
  addPass(PM, createInstructionCombiningPass()); // Clean up after the unroller
  addPass(PM, createGVNPass());                  // Remove redundancies
  addPass(PM, createMemCpyOptPass());            // Remove memcpy / form memset
  addPass(PM, createSCCPPass());                 // Constant prop with SCCP

  // Run instcombine after redundancy elimination to exploit opportunities
  // opened up by them.
  addPass(PM, createInstructionCombiningPass());
#if LLVM_VERSION_CODE < LLVM_VERSION(2, 7)
  addPass(PM, createCondPropagationPass());      // Propagate conditionals
#endif

  addPass(PM, createDeadStoreEliminationPass()); // Delete dead stores
  addPass(PM, createAggressiveDCEPass());        // Delete dead instructions
  addPass(PM, createCFGSimplificationPass());    // Merge & remove BBs
  addPass(PM, createStripDeadPrototypesPass());  // Get rid of dead prototypes
#if LLVM_VERSION_CODE < LLVM_VERSION(3, 0)
  addPass(PM, createDeadTypeEliminationPass());  // Eliminate dead types
#endif
  addPass(PM, createConstantMergePass());        // Merge dup global constants
}
Function * futamurize( const Function * orig_func, DenseMap<const Value*, Value*> &argmap, std::set<const unsigned char *> &constant_addresses_set )
{
	LLVMContext &context = getGlobalContext();
	
	
	// Make a copy of the function, removing constant arguments
	Function * specialized_func = CloneFunction( orig_func, argmap );
	specialized_func->setName( orig_func->getNameStr() + "_1" );
	
	// add it to our module
	LLVM_Module->getFunctionList().push_back( specialized_func );
	
	printf("\nspecialized_func = %p <%s>\n", specialized_func, specialized_func->getName().data());
	//~ specialized_func->dump();

	// Optimize it
	FunctionPassManager PM( LLVM_Module );
	createStandardFunctionPasses( &PM, 3 );
	
	PM.add(createScalarReplAggregatesPass());  // Break up aggregate allocas
	PM.add(createInstructionCombiningPass());  // Cleanup for scalarrepl.
	PM.add(createJumpThreadingPass());         // Thread jumps.
	PM.add(createCFGSimplificationPass());     // Merge & remove BBs
	PM.add(createInstructionCombiningPass());  // Combine silly seq's
	PM.add(createTailCallEliminationPass());   // Eliminate tail calls
	PM.add(createCFGSimplificationPass());     // Merge & remove BBs
	PM.add(createReassociatePass());           // Reassociate expressions
	PM.add(createLoopRotatePass());            // Rotate Loop
	PM.add(createLICMPass());                  // Hoist loop invariants
	PM.add(createLoopUnswitchPass( false ));
	PM.add(createInstructionCombiningPass());
	PM.add(createIndVarSimplifyPass());        // Canonicalize indvars
	PM.add(createLoopDeletionPass());          // Delete dead loops
	PM.add(createLoopUnroll2Pass());            // Unroll small loops
	PM.add(createInstructionCombiningPass());  // Clean up after the unroller
	PM.add(createGVNPass());                   // Remove redundancies
	PM.add(createMemCpyOptPass());             // Remove memcpy / form memset
	PM.add(createSCCPPass());                  // Constant prop with SCCP
	PM.add(createPromoteMemoryToRegisterPass()); 
	PM.add(createConstantPropagationPass());            
	PM.add(createDeadStoreEliminationPass());            
	PM.add(createAggressiveDCEPass());            
	PM.add(new MemoryDependenceAnalysis());            
	//~ PM.add(createAAEvalPass());              
	
	const PassInfo * pinfo = Pass::lookupPassInfo( "print-alias-sets" );
	if( !pinfo ) { printf( "print-alias-sets not found\n" ); exit(-1); }
	PM.add( pinfo->createPass() );
	
	FunctionPassManager PM_Inline( LLVM_Module );
	PM_Inline.add(createSingleFunctionInliningPass());            
	
	bool Changed = false;
	int iterations = 2;
	int inline_iterations = 6;
	
	do
	{
		Changed = false;
		
		// first do some optimizations
		PM.doInitialization();
		PM.run( *specialized_func );
		PM.doFinalization();
		
		// Load from Constant Memory detection
		const TargetData *TD = LLVM_EE->getTargetData();
		
		for (inst_iterator I = inst_begin(specialized_func), E = inst_end(specialized_func); I != E; ++I) 
		{
			Instruction * inst = (Instruction *) &*I;

			// get all Load instructions
			LoadInst * load = dyn_cast<LoadInst>( inst );
			if( !load ) continue;
			if( load->isVolatile() ) continue;

			if (load->use_empty()) continue;        // Don't muck with dead instructions...

			// get the address loaded by load instruction
			Value *ptr_value = load->getPointerOperand();
			
			// we're only interested in constant addresses
			ConstantExpr * ptr_constant_expr =  dyn_cast<ConstantExpr>( ptr_value );
			if( !ptr_constant_expr ) continue;			
			ptr_constant_expr->dump();
			
			// compute real address of constant pointer expression
			Constant * ptr_constant = ConstantFoldConstantExpression( ptr_constant_expr, TD );
			if( !ptr_constant ) continue;
			ptr_constant->dump();
			
			// convert to int constant
			ConstantInt *int_constant =  dyn_cast<ConstantInt>( ConstantExpr::getPtrToInt( ptr_constant, Type::getInt64Ty( context )));
			if( !int_constant ) continue;
			int_constant->dump();
			
			// get data size
			int data_length = TD->getTypeAllocSize( load->getType() );
			ptr_value->getType()->dump();
			
			// get real address (at last !)
			const unsigned char * c_ptr = (const unsigned char *) int_constant->getLimitedValue();
			
			printf( "%ld %d %d\n", c_ptr, constant_addresses_set.count( c_ptr ), data_length );
			
			// check what's in this address	
			int isconst = 1;
			for( int offset=0; offset<data_length; offset++ )
				isconst &= constant_addresses_set.count( c_ptr + offset );
			
			if( !isconst ) continue;
			printf( "It is constant.\n" );
			
			// make a LLVM const with the data
			Constant *new_constant = NULL;
			switch( data_length )
			{
				case 1:	new_constant = ConstantInt::get( Type::getInt8Ty( context ),  *(uint8_t*)c_ptr, false /* signed */ );	break;
				case 2:	new_constant = ConstantInt::get( Type::getInt16Ty( context ), *(uint16_t*)c_ptr, false /* signed */ );	break;
				case 4:	new_constant = ConstantInt::get( Type::getInt32Ty( context ), *(uint32_t*)c_ptr, false /* signed */ );	break;
				case 8:	new_constant = ConstantInt::get( Type::getInt64Ty( context ), *(uint64_t*)c_ptr, false /* signed */ );	break;
				default:
				{
					StringRef const_data ( (const char *) c_ptr, data_length );
					new_constant = ConstantArray::get( context, const_data, false /* dont add terminating null */ );
				}
			}
			
			if( !new_constant ) continue;
			
			new_constant->dump();
							
			//~ // get the type that is loaded
			const Type *Ty = load->getType();
			
			// do we need a cast ?
			if( load->getType() != new_constant->getType() )
			{
				new_constant = ConstantExpr::getBitCast( new_constant, Ty );
				new_constant->dump();
			}
			
			// zap the load and replace with constant address
			load->replaceAllUsesWith( new_constant );
			printf( "\nREPLACED :...\n" );
			load->dump();
			new_constant->dump();
			
			Changed = true;
		}	
		
		if( Changed )
			continue;	// re-optimize and do another pass of constant load elimination
		
		// if we can't do anything else, do an inlining pass
		if( inline_iterations > 0 )
		{
			inline_iterations --;
			
			PM_Inline.doInitialization();
			Changed |= PM_Inline.run( *specialized_func );
			PM_Inline.doFinalization();

			//~ for( int i=0; i<3; i++ )
			{
				PM.doInitialization();
				Changed |= PM.run( *specialized_func );
				PM.doFinalization();
			}
		}
		
		if( iterations>0 && !Changed ) 
			iterations--;
	} while( Changed || iterations>0 );
	
	return specialized_func;
}