/// MatchOpcodeHeuristic - Predict that a comparison of an integer for less
/// than zero, less than or equal to zero, or equal to a constant, will
/// fail.
/// @returns a Prediction that is a pair in which the first element is the
/// successor taken, and the second the successor not taken.
Prediction BranchHeuristicsInfo::MatchOpcodeHeuristic(BasicBlock *root) const {
  // Last instruction of basic block.
  TerminatorInst *TI = root->getTerminator();

  // Basic block successors, the true and false branches.
  BasicBlock *trueSuccessor = TI->getSuccessor(0);
  BasicBlock *falseSuccessor = TI->getSuccessor(1);

  // Is the last instruction a Branch Instruction?
  BranchInst *BI = dyn_cast<BranchInst>(TI);
  if (!BI || !BI->isConditional())
    return empty;

  // Conditional instruction.
  Value *cond = BI->getCondition();

  // Heuristics can only apply to integer comparisons.
  ICmpInst *II = dyn_cast<ICmpInst>(cond);
  if (!II)
    return empty;

  // An integer comparison has always to operands.
  Value *operand1 = II->getOperand(0);
  ConstantInt *op1const = dyn_cast<ConstantInt>(operand1);

  Value *operand2 = II->getOperand(1);
  ConstantInt *op2const = dyn_cast<ConstantInt>(operand2);

  // The type of comparison used.
  enum ICmpInst::Predicate pred = II->getUnsignedPredicate();

  // The return successors (the first taken and the second not taken).
  Edge falseEdge = std::make_pair(falseSuccessor, trueSuccessor);
  Edge trueEdge  = std::make_pair(trueSuccessor, falseSuccessor);

  // Check several comparison operators.
  switch (pred) {
    case ICmpInst::ICMP_EQ: // if ($var == constant) or if (constant == $var).
      // If it's a equal comparison against a constant integer, match.
      if (op1const || op2const)
        return falseEdge;

      break;
    case ICmpInst::ICMP_NE: // if ($var != constant) or if (constant != $var).
      // If it's a not equal comparison against a constant integer, match.
      if (op1const || op2const)
        return trueEdge;

      break;
    case ICmpInst::ICMP_SLT: // if ($var < 0) or if (0 < $var).
    case ICmpInst::ICMP_ULT:
      if (!op1const && (op2const && op2const->isZero()))
        return falseEdge;
      else if (!op2const && (op1const && op1const->isZero()))
        return trueEdge;

      break;
    case ICmpInst::ICMP_SLE: // if ($var <= 0) or if (0 <= $var).
    case ICmpInst::ICMP_ULE:
      if (!op1const && (op2const && op2const->isZero()))
        return falseEdge;
      else if (!op2const && (op1const && op1const->isZero()))
        return trueEdge;

      break;
    case ICmpInst::ICMP_SGT: // if ($var > 0) or if (0 > $var).
    case ICmpInst::ICMP_UGT:
      if (!op1const && (op2const && op2const->isZero()))
        return trueEdge;
      else if (!op2const && (op1const && op1const->isZero()))
        return falseEdge;

      break;
    case ICmpInst::ICMP_SGE: // if ($var >= 0) or if (0 >= $var).
    case ICmpInst::ICMP_UGE:
      if (!op1const && (op2const && op2const->isZero()))
        return trueEdge;
      else if (!op2const && (op1const && op1const->isZero()))
        return falseEdge;

      break;
    default: // Do not process any other comparison operators.
      break;
  }

  // Heuristic not matched.
  return empty;
}
/// MatchGuardHeuristic - Predict that a comparison in which a register is
/// an operand, the register is used before being defined in a successor
/// block, and the successor block does not post-dominate will reach the
/// successor block.
/// @returns a Prediction that is a pair in which the first element is the
/// successor taken, and the second the successor not taken.
Prediction BranchHeuristicsInfo::MatchGuardHeuristic(BasicBlock *root) const {
  bool matched = false;
  Prediction pred;

  // Last instruction of basic block.
  TerminatorInst *TI = root->getTerminator();

  // Basic block successors. True and False branches.
  BasicBlock *trueSuccessor = TI->getSuccessor(0);
  BasicBlock *falseSuccessor = TI->getSuccessor(1);

  // Is the last instruction a Branch Instruction?
  BranchInst *BI = dyn_cast<BranchInst>(TI);
  if (!BI || !BI->isConditional())
    return empty;

  // Conditional instruction.
  Value *cond = BI->getCondition();

  // Find if the variable used in the branch instruction is
  // in fact a comparison instruction.
  CmpInst *CI = dyn_cast<CmpInst>(cond);
  if (!CI)
    return empty;

  // Seek over all of the operands of this comparison instruction.
  for (unsigned ops = 0; ops < CI->getNumOperands(); ++ops) {
    // Find the operand.
    Value *operand = CI->getOperand(ops);

    // Check if the operand is neither a function argument or a value.
    if (!isa<Argument>(operand) && !isa<User>(operand))
      continue;

    // Check if this variable was used in the true successor and
    // does not post dominate.
    // Since LLVM is in SSA form, it's impossible for a variable being used
    // before being defined, so that statement is skipped.
    if (operand->isUsedInBasicBlock(trueSuccessor) &&
        !PDT->dominates(trueSuccessor, root)) {
      // If a heuristic was already matched, predict none and abort immediately.
      if (matched)
        return empty;

      matched = true;
      pred = std::make_pair(trueSuccessor, falseSuccessor);
    }

    // Check if this variable was used in the false successor and
    // does not post dominate.
    if (operand->isUsedInBasicBlock(falseSuccessor) &&
        !PDT->dominates(falseSuccessor, root)) {
      // If a heuristic was already matched, predict none and abort immediately.
      if (matched)
        return empty;

      matched = true;
      pred = std::make_pair(falseSuccessor, trueSuccessor);
    }
  }

  return (matched ? pred : empty);
}
Ejemplo n.º 3
0
void GCOVProfiler::emitProfileNotes() {
  NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
  if (!CU_Nodes) return;

  for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
    // Each compile unit gets its own .gcno file. This means that whether we run
    // this pass over the original .o's as they're produced, or run it after
    // LTO, we'll generate the same .gcno files.

    DICompileUnit CU(CU_Nodes->getOperand(i));
    std::error_code EC;
    raw_fd_ostream out(mangleName(CU, "gcno"), EC, sys::fs::F_None);
    std::string EdgeDestinations;

    DIArray SPs = CU.getSubprograms();
    unsigned FunctionIdent = 0;
    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
      DISubprogram SP(SPs.getElement(i));
      assert((!SP || SP.isSubprogram()) &&
        "A MDNode in subprograms of a CU should be null or a DISubprogram.");
      if (!SP)
        continue;

      Function *F = SP.getFunction();
      if (!F) continue;
      if (!functionHasLines(F)) continue;

      // gcov expects every function to start with an entry block that has a
      // single successor, so split the entry block to make sure of that.
      BasicBlock &EntryBlock = F->getEntryBlock();
      BasicBlock::iterator It = EntryBlock.begin();
      while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
        ++It;
      EntryBlock.splitBasicBlock(It);

      Funcs.push_back(make_unique<GCOVFunction>(SP, &out, FunctionIdent++,
                                                Options.UseCfgChecksum,
                                                Options.ExitBlockBeforeBody));
      GCOVFunction &Func = *Funcs.back();

      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
        GCOVBlock &Block = Func.getBlock(BB);
        TerminatorInst *TI = BB->getTerminator();
        if (int successors = TI->getNumSuccessors()) {
          for (int i = 0; i != successors; ++i) {
            Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
          }
        } else if (isa<ReturnInst>(TI)) {
          Block.addEdge(Func.getReturnBlock());
        }

        uint32_t Line = 0;
        for (BasicBlock::iterator I = BB->begin(), IE = BB->end();
             I != IE; ++I) {
          // Debug intrinsic locations correspond to the location of the
          // declaration, not necessarily any statements or expressions.
          if (isa<DbgInfoIntrinsic>(I)) continue;

          const DebugLoc &Loc = I->getDebugLoc();
          if (!Loc)
            continue;

          // Artificial lines such as calls to the global constructors.
          if (Loc.getLine() == 0) continue;

          if (Line == Loc.getLine()) continue;
          Line = Loc.getLine();
          if (SP != getDISubprogram(Loc.getScope()))
            continue;

          GCOVLines &Lines = Block.getFile(SP.getFilename());
          Lines.addLine(Loc.getLine());
        }
      }
      EdgeDestinations += Func.getEdgeDestinations();
    }

    FileChecksums.push_back(hash_value(EdgeDestinations));
    out.write("oncg", 4);
    out.write(ReversedVersion, 4);
    out.write(reinterpret_cast<char*>(&FileChecksums.back()), 4);

    for (auto &Func : Funcs) {
      Func->setCfgChecksum(FileChecksums.back());
      Func->writeOut();
    }

    out.write("\0\0\0\0\0\0\0\0", 8);  // EOF
    out.close();
  }
}
Ejemplo n.º 4
0
bool GCOVProfiler::emitProfileArcs() {
  NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
  if (!CU_Nodes) return false;

  bool Result = false;
  bool InsertIndCounterIncrCode = false;
  for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
    DICompileUnit CU(CU_Nodes->getOperand(i));
    DIArray SPs = CU.getSubprograms();
    SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
      DISubprogram SP(SPs.getElement(i));
      assert((!SP || SP.isSubprogram()) &&
        "A MDNode in subprograms of a CU should be null or a DISubprogram.");
      if (!SP)
        continue;
      Function *F = SP.getFunction();
      if (!F) continue;
      if (!functionHasLines(F)) continue;
      if (!Result) Result = true;
      unsigned Edges = 0;
      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
        TerminatorInst *TI = BB->getTerminator();
        if (isa<ReturnInst>(TI))
          ++Edges;
        else
          Edges += TI->getNumSuccessors();
      }

      ArrayType *CounterTy =
        ArrayType::get(Type::getInt64Ty(*Ctx), Edges);
      GlobalVariable *Counters =
        new GlobalVariable(*M, CounterTy, false,
                           GlobalValue::InternalLinkage,
                           Constant::getNullValue(CounterTy),
                           "__llvm_gcov_ctr");
      CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP));

      UniqueVector<BasicBlock *> ComplexEdgePreds;
      UniqueVector<BasicBlock *> ComplexEdgeSuccs;

      unsigned Edge = 0;
      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
        TerminatorInst *TI = BB->getTerminator();
        int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
        if (Successors) {
          if (Successors == 1) {
            IRBuilder<> Builder(BB->getFirstInsertionPt());
            Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
                                                                Edge);
            Value *Count = Builder.CreateLoad(Counter);
            Count = Builder.CreateAdd(Count, Builder.getInt64(1));
            Builder.CreateStore(Count, Counter);
          } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
            IRBuilder<> Builder(BI);
            Value *Sel = Builder.CreateSelect(BI->getCondition(),
                                              Builder.getInt64(Edge),
                                              Builder.getInt64(Edge + 1));
            SmallVector<Value *, 2> Idx;
            Idx.push_back(Builder.getInt64(0));
            Idx.push_back(Sel);
            Value *Counter = Builder.CreateInBoundsGEP(Counters->getValueType(),
                                                       Counters, Idx);
            Value *Count = Builder.CreateLoad(Counter);
            Count = Builder.CreateAdd(Count, Builder.getInt64(1));
            Builder.CreateStore(Count, Counter);
          } else {
            ComplexEdgePreds.insert(BB);
            for (int i = 0; i != Successors; ++i)
              ComplexEdgeSuccs.insert(TI->getSuccessor(i));
          }

          Edge += Successors;
        }
      }

      if (!ComplexEdgePreds.empty()) {
        GlobalVariable *EdgeTable =
          buildEdgeLookupTable(F, Counters,
                               ComplexEdgePreds, ComplexEdgeSuccs);
        GlobalVariable *EdgeState = getEdgeStateValue();

        for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
          IRBuilder<> Builder(ComplexEdgePreds[i + 1]->getFirstInsertionPt());
          Builder.CreateStore(Builder.getInt32(i), EdgeState);
        }

        for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) {
          // Call runtime to perform increment.
          IRBuilder<> Builder(ComplexEdgeSuccs[i+1]->getFirstInsertionPt());
          Value *CounterPtrArray =
            Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0,
                                               i * ComplexEdgePreds.size());

          // Build code to increment the counter.
          InsertIndCounterIncrCode = true;
          Builder.CreateCall2(getIncrementIndirectCounterFunc(),
                              EdgeState, CounterPtrArray);
        }
      }
    }

    Function *WriteoutF = insertCounterWriteout(CountersBySP);
    Function *FlushF = insertFlush(CountersBySP);

    // Create a small bit of code that registers the "__llvm_gcov_writeout" to
    // be executed at exit and the "__llvm_gcov_flush" function to be executed
    // when "__gcov_flush" is called.
    FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
    Function *F = Function::Create(FTy, GlobalValue::InternalLinkage,
                                   "__llvm_gcov_init", M);
    F->setUnnamedAddr(true);
    F->setLinkage(GlobalValue::InternalLinkage);
    F->addFnAttr(Attribute::NoInline);
    if (Options.NoRedZone)
      F->addFnAttr(Attribute::NoRedZone);

    BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
    IRBuilder<> Builder(BB);

    FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
    Type *Params[] = {
      PointerType::get(FTy, 0),
      PointerType::get(FTy, 0)
    };
    FTy = FunctionType::get(Builder.getVoidTy(), Params, false);

    // Initialize the environment and register the local writeout and flush
    // functions.
    Constant *GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
    Builder.CreateCall2(GCOVInit, WriteoutF, FlushF);
    Builder.CreateRetVoid();

    appendToGlobalCtors(*M, F, 0);
  }

  if (InsertIndCounterIncrCode)
    insertIndirectCounterIncrement();

  return Result;
}
Ejemplo n.º 5
0
// In this pass we look for GEP and cast instructions that are used
// across basic blocks and rewrite them to improve basic-block-at-a-time
// selection.
bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
  bool MadeChange = false;

  // Split all critical edges where the dest block has a PHI.
  TerminatorInst *BBTI = BB.getTerminator();
  if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) {
    for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
      BasicBlock *SuccBB = BBTI->getSuccessor(i);
      if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
        SplitEdgeNicely(BBTI, i, BackEdges, this);
    }
  }

  // Keep track of non-local addresses that have been sunk into this block.
  // This allows us to avoid inserting duplicate code for blocks with multiple
  // load/stores of the same address.
  DenseMap<Value*, Value*> SunkAddrs;

  for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) {
    Instruction *I = BBI++;

    if (CastInst *CI = dyn_cast<CastInst>(I)) {
      // If the source of the cast is a constant, then this should have
      // already been constant folded.  The only reason NOT to constant fold
      // it is if something (e.g. LSR) was careful to place the constant
      // evaluation in a block other than then one that uses it (e.g. to hoist
      // the address of globals out of a loop).  If this is the case, we don't
      // want to forward-subst the cast.
      if (isa<Constant>(CI->getOperand(0)))
        continue;

      bool Change = false;
      if (TLI) {
        Change = OptimizeNoopCopyExpression(CI, *TLI);
        MadeChange |= Change;
      }

      if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I))) {
        MadeChange |= MoveExtToFormExtLoad(I);
        MadeChange |= OptimizeExtUses(I);
      }
    } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
      MadeChange |= OptimizeCmpExpression(CI);
    } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
      if (TLI)
        MadeChange |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType(),
                                         SunkAddrs);
    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
      if (TLI)
        MadeChange |= OptimizeMemoryInst(I, SI->getOperand(1),
                                         SI->getOperand(0)->getType(),
                                         SunkAddrs);
    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
      if (GEPI->hasAllZeroIndices()) {
        /// The GEP operand must be a pointer, so must its result -> BitCast
        Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
                                          GEPI->getName(), GEPI);
        GEPI->replaceAllUsesWith(NC);
        GEPI->eraseFromParent();
        MadeChange = true;
        BBI = NC;
      }
    } else if (CallInst *CI = dyn_cast<CallInst>(I)) {
      // If we found an inline asm expession, and if the target knows how to
      // lower it to normal LLVM code, do so now.
      if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
        if (TLI->ExpandInlineAsm(CI)) {
          BBI = BB.begin();
          // Avoid processing instructions out of order, which could cause
          // reuse before a value is defined.
          SunkAddrs.clear();
        } else
          // Sink address computing for memory operands into the block.
          MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs);
      } else {
        // Other CallInst optimizations that don't need to muck with the
        // enclosing iterator here.
        MadeChange |= OptimizeCallInst(CI);
      }
    }
  }

  return MadeChange;
}
Ejemplo n.º 6
0
bool LoaderPass::runOnModule(Module &M) {
  ProfileInfoLoader PIL("profile-loader", Filename);

  EdgeInformation.clear();
  std::vector<unsigned> Counters = PIL.getRawEdgeCounts();
  if (Counters.size() > 0) {
    ReadCount = 0;
    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
      if (F->isDeclaration()) continue;
      DEBUG(dbgs() << "Working on " << F->getName() << "\n");
      readEdge(getEdge(0,&F->getEntryBlock()), Counters);
      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
        TerminatorInst *TI = BB->getTerminator();
        for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
          readEdge(getEdge(BB,TI->getSuccessor(s)), Counters);
        }
      }
    }
    if (ReadCount != Counters.size()) {
      errs() << "WARNING: profile information is inconsistent with "
             << "the current program!\n";
    }
    NumEdgesRead = ReadCount;
  }

  Counters = PIL.getRawOptimalEdgeCounts();
  if (Counters.size() > 0) {
    ReadCount = 0;
    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
      if (F->isDeclaration()) continue;
      DEBUG(dbgs() << "Working on " << F->getName() << "\n");
      readEdge(getEdge(0,&F->getEntryBlock()), Counters);
      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
        TerminatorInst *TI = BB->getTerminator();
        if (TI->getNumSuccessors() == 0) {
          readEdge(getEdge(BB,0), Counters);
        }
        for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
          readEdge(getEdge(BB,TI->getSuccessor(s)), Counters);
        }
      }
      while (SpanningTree.size() > 0) {

        unsigned size = SpanningTree.size();

        BBisUnvisited.clear();
        for (std::set<Edge>::iterator ei = SpanningTree.begin(),
             ee = SpanningTree.end(); ei != ee; ++ei) {
          BBisUnvisited.insert(ei->first);
          BBisUnvisited.insert(ei->second);
        }
        while (BBisUnvisited.size() > 0) {
          recurseBasicBlock(*BBisUnvisited.begin());
        }

        if (SpanningTree.size() == size) {
          DEBUG(dbgs()<<"{");
          for (std::set<Edge>::iterator ei = SpanningTree.begin(),
               ee = SpanningTree.end(); ei != ee; ++ei) {
            DEBUG(dbgs()<< *ei <<",");
          }
          assert(0 && "No edge calculated!");
        }

      }
    }
    if (ReadCount != Counters.size()) {
      errs() << "WARNING: profile information is inconsistent with "
             << "the current program!\n";
    }
    NumEdgesRead = ReadCount;
  }

  BlockInformation.clear();
  Counters = PIL.getRawBlockCounts();
  if (Counters.size() > 0) {
    ReadCount = 0;
    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
      if (F->isDeclaration()) continue;
      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
        if (ReadCount < Counters.size())
          // Here the data realm changes from the unsigned of the file to the
          // double of the ProfileInfo. This conversion is save because we know
          // that everything thats representable in unsinged is also
          // representable in double.
          BlockInformation[F][BB] = (double)Counters[ReadCount++];
    }
    if (ReadCount != Counters.size()) {
      errs() << "WARNING: profile information is inconsistent with "
             << "the current program!\n";
    }
  }

  FunctionInformation.clear();
  Counters = PIL.getRawFunctionCounts();
  if (Counters.size() > 0) {
    ReadCount = 0;
    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
      if (F->isDeclaration()) continue;
      if (ReadCount < Counters.size())
        // Here the data realm changes from the unsigned of the file to the
        // double of the ProfileInfo. This conversion is save because we know
        // that everything thats representable in unsinged is also
        // representable in double.
        FunctionInformation[F] = (double)Counters[ReadCount++];
    }
    if (ReadCount != Counters.size()) {
      errs() << "WARNING: profile information is inconsistent with "
             << "the current program!\n";
    }
  }

  return false;
}
Ejemplo n.º 7
0
bool EdgeProfiler::runOnModule(Module &M) {
    Function *Main = M.getFunction("main");
    if (Main == 0) {
        errs() << "WARNING: cannot insert edge profiling into a module"
               << " with no main function!\n";
        return false;  // No main, no instrumentation!
    }

    std::set<BasicBlock*> BlocksToInstrument;
    unsigned NumEdges = 0;
    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
        if (F->isDeclaration()) continue;
        // Reserve space for (0,entry) edge.
        ++NumEdges;
        for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
            // Keep track of which blocks need to be instrumented.  We don't want to
            // instrument blocks that are added as the result of breaking critical
            // edges!
            BlocksToInstrument.insert(BB);
            NumEdges += BB->getTerminator()->getNumSuccessors();
        }
    }

    const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumEdges);
    GlobalVariable *Counters =
        new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
                           Constant::getNullValue(ATy), "EdgeProfCounters");
    NumEdgesInserted = NumEdges;

    // Instrument all of the edges...
    unsigned i = 0;
    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
        if (F->isDeclaration()) continue;
        // Create counter for (0,entry) edge.
        IncrementCounterInBlock(&F->getEntryBlock(), i++, Counters);
        for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
            if (BlocksToInstrument.count(BB)) {  // Don't instrument inserted blocks
                // Okay, we have to add a counter of each outgoing edge.  If the
                // outgoing edge is not critical don't split it, just insert the counter
                // in the source or destination of the edge.
                TerminatorInst *TI = BB->getTerminator();
                for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
                    // If the edge is critical, split it.
                    SplitCriticalEdge(TI, s, this);

                    // Okay, we are guaranteed that the edge is no longer critical.  If we
                    // only have a single successor, insert the counter in this block,
                    // otherwise insert it in the successor block.
                    if (TI->getNumSuccessors() == 1) {
                        // Insert counter at the start of the block
                        IncrementCounterInBlock(BB, i++, Counters);
                    } else {
                        // Insert counter at the start of the block
                        IncrementCounterInBlock(TI->getSuccessor(s), i++, Counters);
                    }
                }
            }
    }

    // Add the initialization call to main.
    InsertProfilingInitCall(Main, "llvm_start_edge_profiling", Counters);
    return true;
}
Ejemplo n.º 8
0
bool GCOVProfiler::emitProfileArcs() {
  NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
  if (!CU_Nodes) return false;

  bool Result = false;  
  bool InsertIndCounterIncrCode = false;
  for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
    DICompileUnit CU(CU_Nodes->getOperand(i));
    DIArray SPs = CU.getSubprograms();
    SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
      DISubprogram SP(SPs.getElement(i));
      if (!SP.Verify()) continue;
      Function *F = SP.getFunction();
      if (!F) continue;
      if (!Result) Result = true;
      unsigned Edges = 0;
      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
        TerminatorInst *TI = BB->getTerminator();
        if (isa<ReturnInst>(TI))
          ++Edges;
        else
          Edges += TI->getNumSuccessors();
      }
      
      ArrayType *CounterTy =
        ArrayType::get(Type::getInt64Ty(*Ctx), Edges);
      GlobalVariable *Counters =
        new GlobalVariable(*M, CounterTy, false,
                           GlobalValue::InternalLinkage,
                           Constant::getNullValue(CounterTy),
                           "__llvm_gcov_ctr");
      CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP));
      
      UniqueVector<BasicBlock *> ComplexEdgePreds;
      UniqueVector<BasicBlock *> ComplexEdgeSuccs;
      
      unsigned Edge = 0;
      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
        TerminatorInst *TI = BB->getTerminator();
        int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
        if (Successors) {
          IRBuilder<> Builder(TI);
          
          if (Successors == 1) {
            Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
                                                                Edge);
            Value *Count = Builder.CreateLoad(Counter);
            Count = Builder.CreateAdd(Count, Builder.getInt64(1));
            Builder.CreateStore(Count, Counter);
          } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
            Value *Sel = Builder.CreateSelect(BI->getCondition(),
                                              Builder.getInt64(Edge),
                                              Builder.getInt64(Edge + 1));
            SmallVector<Value *, 2> Idx;
            Idx.push_back(Builder.getInt64(0));
            Idx.push_back(Sel);
            Value *Counter = Builder.CreateInBoundsGEP(Counters, Idx);
            Value *Count = Builder.CreateLoad(Counter);
            Count = Builder.CreateAdd(Count, Builder.getInt64(1));
            Builder.CreateStore(Count, Counter);
          } else {
            ComplexEdgePreds.insert(BB);
            for (int i = 0; i != Successors; ++i)
              ComplexEdgeSuccs.insert(TI->getSuccessor(i));
          }
          Edge += Successors;
        }
      }
      
      if (!ComplexEdgePreds.empty()) {
        GlobalVariable *EdgeTable =
          buildEdgeLookupTable(F, Counters,
                               ComplexEdgePreds, ComplexEdgeSuccs);
        GlobalVariable *EdgeState = getEdgeStateValue();
        
        for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
          IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator());
          Builder.CreateStore(Builder.getInt32(i), EdgeState);
        }
        for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) {
          // call runtime to perform increment
          BasicBlock::iterator InsertPt =
            ComplexEdgeSuccs[i+1]->getFirstInsertionPt();
          IRBuilder<> Builder(InsertPt);
          Value *CounterPtrArray =
            Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0,
                                               i * ComplexEdgePreds.size());

          // Build code to increment the counter.
          InsertIndCounterIncrCode = true;
          Builder.CreateCall2(getIncrementIndirectCounterFunc(),
                              EdgeState, CounterPtrArray);
        }
      }
    }

    insertCounterWriteout(CountersBySP);
    insertFlush(CountersBySP);
  }

  if (InsertIndCounterIncrCode)
    insertIndirectCounterIncrement();

  return Result;
}
Ejemplo n.º 9
0
/// InsertUniqueBackedgeBlock - This method is called when the specified loop
/// has more than one backedge in it.  If this occurs, revector all of these
/// backedges to target a new basic block and have that block branch to the loop
/// header.  This ensures that loops have exactly one backedge.
///
BasicBlock *
LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
  assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");

  // Get information about the loop
  BasicBlock *Header = L->getHeader();
  Function *F = Header->getParent();

  // Unique backedge insertion currently depends on having a preheader.
  if (!Preheader)
    return 0;

  // Figure out which basic blocks contain back-edges to the loop header.
  std::vector<BasicBlock*> BackedgeBlocks;
  for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
    BasicBlock *P = *I;

    // Indirectbr edges cannot be split, so we must fail if we find one.
    if (isa<IndirectBrInst>(P->getTerminator()))
      return 0;

    if (P != Preheader) BackedgeBlocks.push_back(P);
  }

  // Create and insert the new backedge block...
  BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(),
                                           Header->getName()+".backedge", F);
  BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);

  DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "
               << BEBlock->getName() << "\n");

  // Move the new backedge block to right after the last backedge block.
  Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos;
  F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock);

  // Now that the block has been inserted into the function, create PHI nodes in
  // the backedge block which correspond to any PHI nodes in the header block.
  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
    PHINode *PN = cast<PHINode>(I);
    PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".be",
                                     BETerminator);
    NewPN->reserveOperandSpace(BackedgeBlocks.size());
    if (AA) AA->copyValue(PN, NewPN);

    // Loop over the PHI node, moving all entries except the one for the
    // preheader over to the new PHI node.
    unsigned PreheaderIdx = ~0U;
    bool HasUniqueIncomingValue = true;
    Value *UniqueValue = 0;
    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
      BasicBlock *IBB = PN->getIncomingBlock(i);
      Value *IV = PN->getIncomingValue(i);
      if (IBB == Preheader) {
        PreheaderIdx = i;
      } else {
        NewPN->addIncoming(IV, IBB);
        if (HasUniqueIncomingValue) {
          if (UniqueValue == 0)
            UniqueValue = IV;
          else if (UniqueValue != IV)
            HasUniqueIncomingValue = false;
        }
      }
    }

    // Delete all of the incoming values from the old PN except the preheader's
    assert(PreheaderIdx != ~0U && "PHI has no preheader entry??");
    if (PreheaderIdx != 0) {
      PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx));
      PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx));
    }
    // Nuke all entries except the zero'th.
    for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i)
      PN->removeIncomingValue(e-i, false);

    // Finally, add the newly constructed PHI node as the entry for the BEBlock.
    PN->addIncoming(NewPN, BEBlock);

    // As an optimization, if all incoming values in the new PhiNode (which is a
    // subset of the incoming values of the old PHI node) have the same value,
    // eliminate the PHI Node.
    if (HasUniqueIncomingValue) {
      NewPN->replaceAllUsesWith(UniqueValue);
      if (AA) AA->deleteValue(NewPN);
      BEBlock->getInstList().erase(NewPN);
    }
  }

  // Now that all of the PHI nodes have been inserted and adjusted, modify the
  // backedge blocks to just to the BEBlock instead of the header.
  for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) {
    TerminatorInst *TI = BackedgeBlocks[i]->getTerminator();
    for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op)
      if (TI->getSuccessor(Op) == Header)
        TI->setSuccessor(Op, BEBlock);
  }

  //===--- Update all analyses which we must preserve now -----------------===//

  // Update Loop Information - we know that this block is now in the current
  // loop and all parent loops.
  L->addBasicBlockToLoop(BEBlock, LI->getBase());

  // Update dominator information
  DT->splitBlock(BEBlock);

  return BEBlock;
}
Ejemplo n.º 10
0
bool InstructionGraph::runOnFunction(Function &M) {
    errs()<<"ins grph ";

    // if the function dppcreated, then we skip
    if(M.hasFnAttribute(GENERATEDATTR))
        return false;
    errs()<<"skip check";
    Func = &M;

    ExternalInsNode = getOrInsertInstruction(0);
    //assert(!CallsExternalNode);
    //CallsExternalNode = new CallGraphNode(0);
    Root = ExternalInsNode;
    for(Function::iterator BB=M.begin(), BBE = M.end(); BB != BBE; ++BB)
    {
        TerminatorInst* curBBTerm =  BB->getTerminator();
        unsigned numSuc = curBBTerm->getNumSuccessors();
        for(unsigned sucInd=0; sucInd < numSuc; sucInd++)
        {
            BasicBlock* curSuc = curBBTerm->getSuccessor(sucInd);
            std::vector<BasicBlock*>* curSucPredecessors;
            if(BasicBlock2Predecessors.find(curSuc)==BasicBlock2Predecessors.end())
            {
                curSucPredecessors = new std::vector<BasicBlock*>();
                BasicBlock2Predecessors[curSuc] = curSucPredecessors;
            }
            else
                curSucPredecessors = BasicBlock2Predecessors[curSuc];
            curSucPredecessors->push_back(&(*BB));
        }
    }
    PostDominatorTree* PDT = getAnalysisIfAvailable<PostDominatorTree>();
    LoopInfo* LI = getAnalysisIfAvailable<LoopInfo>();
    for (Function::iterator BB = M.begin(), BBE = M.end(); BB != BBE; ++BB)
    {
        BasicBlock* curBBPtr = &(*BB);
        std::vector<BasicBlock*>* earliestPred=0;
        if(BasicBlock2Predecessors.find(curBBPtr)==BasicBlock2Predecessors.end())
        {
            assert (&(M.getEntryBlock()) == curBBPtr);
        }
        else
        {
          // now we shall search for the actual predicate instruction
            earliestPred = searchEarliestPredicate(curBBPtr, PDT, BasicBlock2Predecessors,LI);
        }
        /*errs()<<"bb:"<<BB->getName() << " has "<<" pred \n";
        for(unsigned k = 0;k < earliestPred->size(); k++)
        {
          errs()<< earliestPred->at(k)->getName()<<"\n";
        }*/

        for(BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI)
        {
           addToInstructionGraph(BI, earliestPred);
        }
        delete earliestPred;
        errs()<<BB->getName()<<"\t" <<ExternalInsNode->DependentInstructions.size()<<" root dep\n";
    }
    // we should have all the node
    errs()<<InstructionMap.size()<< " instructions added as graph nodes\n";

    return false;
}
Ejemplo n.º 11
0
/// \brief Assign DWARF discriminators.
///
/// To assign discriminators, we examine the boundaries of every
/// basic block and its successors. Suppose there is a basic block B1
/// with successor B2. The last instruction I1 in B1 and the first
/// instruction I2 in B2 are located at the same file and line number.
/// This situation is illustrated in the following code snippet:
///
///       if (i < 10) x = i;
///
///     entry:
///       br i1 %cmp, label %if.then, label %if.end, !dbg !10
///     if.then:
///       %1 = load i32* %i.addr, align 4, !dbg !10
///       store i32 %1, i32* %x, align 4, !dbg !10
///       br label %if.end, !dbg !10
///     if.end:
///       ret void, !dbg !12
///
/// Notice how the branch instruction in block 'entry' and all the
/// instructions in block 'if.then' have the exact same debug location
/// information (!dbg !10).
///
/// To distinguish instructions in block 'entry' from instructions in
/// block 'if.then', we generate a new lexical block for all the
/// instruction in block 'if.then' that share the same file and line
/// location with the last instruction of block 'entry'.
///
/// This new lexical block will have the same location information as
/// the previous one, but with a new DWARF discriminator value.
///
/// One of the main uses of this discriminator value is in runtime
/// sample profilers. It allows the profiler to distinguish instructions
/// at location !dbg !10 that execute on different basic blocks. This is
/// important because while the predicate 'if (x < 10)' may have been
/// executed millions of times, the assignment 'x = i' may have only
/// executed a handful of times (meaning that the entry->if.then edge is
/// seldom taken).
///
/// If we did not have discriminator information, the profiler would
/// assign the same weight to both blocks 'entry' and 'if.then', which
/// in turn will make it conclude that the entry->if.then edge is very
/// hot.
///
/// To decide where to create new discriminator values, this function
/// traverses the CFG and examines instruction at basic block boundaries.
/// If the last instruction I1 of a block B1 is at the same file and line
/// location as instruction I2 of successor B2, then it creates a new
/// lexical block for I2 and all the instruction in B2 that share the same
/// file and line location as I2. This new lexical block will have a
/// different discriminator number than I1.
bool AddDiscriminators::runOnFunction(Function &F) {
  // If the function has debug information, but the user has disabled
  // discriminators, do nothing.
  // Simlarly, if the function has no debug info, do nothing.
  // Finally, if this module is built with dwarf versions earlier than 4,
  // do nothing (discriminator support is a DWARF 4 feature).
  if (NoDiscriminators ||
      !hasDebugInfo(F) ||
      F.getParent()->getDwarfVersion() < 4)
    return false;

  bool Changed = false;
  Module *M = F.getParent();
  LLVMContext &Ctx = M->getContext();
  DIBuilder Builder(*M, /*AllowUnresolved*/ false);

  // Traverse all the blocks looking for instructions in different
  // blocks that are at the same file:line location.
  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
    BasicBlock *B = I;
    TerminatorInst *Last = B->getTerminator();
    DILocation LastDIL = Last->getDebugLoc().get();
    if (!LastDIL)
      continue;

    for (unsigned I = 0; I < Last->getNumSuccessors(); ++I) {
      BasicBlock *Succ = Last->getSuccessor(I);
      Instruction *First = Succ->getFirstNonPHIOrDbgOrLifetime();
      DILocation FirstDIL = First->getDebugLoc().get();
      if (!FirstDIL)
        continue;

      // If the first instruction (First) of Succ is at the same file
      // location as B's last instruction (Last), add a new
      // discriminator for First's location and all the instructions
      // in Succ that share the same location with First.
      if (!FirstDIL->canDiscriminate(*LastDIL)) {
        // Create a new lexical scope and compute a new discriminator
        // number for it.
        StringRef Filename = FirstDIL->getFilename();
        auto *Scope = FirstDIL->getScope();
        auto *File = Builder.createFile(Filename, Scope->getDirectory());

        // FIXME: Calculate the discriminator here, based on local information,
        // and delete MDLocation::computeNewDiscriminator().  The current
        // solution gives different results depending on other modules in the
        // same context.  All we really need is to discriminate between
        // FirstDIL and LastDIL -- a local map would suffice.
        unsigned Discriminator = FirstDIL->computeNewDiscriminator();
        auto *NewScope =
            Builder.createLexicalBlockFile(Scope, File, Discriminator);
        auto *NewDIL =
            MDLocation::get(Ctx, FirstDIL->getLine(), FirstDIL->getColumn(),
                            NewScope, FirstDIL->getInlinedAt());
        DebugLoc newDebugLoc = NewDIL;

        // Attach this new debug location to First and every
        // instruction following First that shares the same location.
        for (BasicBlock::iterator I1(*First), E1 = Succ->end(); I1 != E1;
             ++I1) {
          if (I1->getDebugLoc().get() != FirstDIL)
            break;
          I1->setDebugLoc(newDebugLoc);
          DEBUG(dbgs() << NewDIL->getFilename() << ":" << NewDIL->getLine()
                       << ":" << NewDIL->getColumn() << ":"
                       << NewDIL->getDiscriminator() << *I1 << "\n");
        }
        DEBUG(dbgs() << "\n");
        Changed = true;
      }
    }
  }
  return Changed;
}
Ejemplo n.º 12
0
/// RemoveBlockIfDead - If the specified block is dead, remove it, update loop
/// information, and remove any dead successors it has.
///
void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
                                     std::vector<Instruction*> &Worklist,
                                     Loop *L) {
  if (pred_begin(BB) != pred_end(BB)) {
    // This block isn't dead, since an edge to BB was just removed, see if there
    // are any easy simplifications we can do now.
    if (BasicBlock *Pred = BB->getSinglePredecessor()) {
      // If it has one pred, fold phi nodes in BB.
      while (isa<PHINode>(BB->begin()))
        ReplaceUsesOfWith(BB->begin(), 
                          cast<PHINode>(BB->begin())->getIncomingValue(0), 
                          Worklist, L, LPM);
      
      // If this is the header of a loop and the only pred is the latch, we now
      // have an unreachable loop.
      if (Loop *L = LI->getLoopFor(BB))
        if (loopHeader == BB && L->contains(Pred)) {
          // Remove the branch from the latch to the header block, this makes
          // the header dead, which will make the latch dead (because the header
          // dominates the latch).
          LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L);
          Pred->getTerminator()->eraseFromParent();
          new UnreachableInst(BB->getContext(), Pred);
          
          // The loop is now broken, remove it from LI.
          RemoveLoopFromHierarchy(L);
          
          // Reprocess the header, which now IS dead.
          RemoveBlockIfDead(BB, Worklist, L);
          return;
        }
      
      // If pred ends in a uncond branch, add uncond branch to worklist so that
      // the two blocks will get merged.
      if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
        if (BI->isUnconditional())
          Worklist.push_back(BI);
    }
    return;
  }

  DEBUG(dbgs() << "Nuking dead block: " << *BB);
  
  // Remove the instructions in the basic block from the worklist.
  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
    RemoveFromWorklist(I, Worklist);
    
    // Anything that uses the instructions in this basic block should have their
    // uses replaced with undefs.
    // If I is not void type then replaceAllUsesWith undef.
    // This allows ValueHandlers and custom metadata to adjust itself.
    if (!I->getType()->isVoidTy())
      I->replaceAllUsesWith(UndefValue::get(I->getType()));
  }
  
  // If this is the edge to the header block for a loop, remove the loop and
  // promote all subloops.
  if (Loop *BBLoop = LI->getLoopFor(BB)) {
    if (BBLoop->getLoopLatch() == BB) {
      RemoveLoopFromHierarchy(BBLoop);
      if (currentLoop == BBLoop) {
        currentLoop = 0;
        redoLoop = false;
      }
    }
  }

  // Remove the block from the loop info, which removes it from any loops it
  // was in.
  LI->removeBlock(BB);
  
  
  // Remove phi node entries in successors for this block.
  TerminatorInst *TI = BB->getTerminator();
  SmallVector<BasicBlock*, 4> Succs;
  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
    Succs.push_back(TI->getSuccessor(i));
    TI->getSuccessor(i)->removePredecessor(BB);
  }
  
  // Unique the successors, remove anything with multiple uses.
  array_pod_sort(Succs.begin(), Succs.end());
  Succs.erase(std::unique(Succs.begin(), Succs.end()), Succs.end());
  
  // Remove the basic block, including all of the instructions contained in it.
  LPM->deleteSimpleAnalysisValue(BB, L);  
  BB->eraseFromParent();
  // Remove successor blocks here that are not dead, so that we know we only
  // have dead blocks in this list.  Nondead blocks have a way of becoming dead,
  // then getting removed before we revisit them, which is badness.
  //
  for (unsigned i = 0; i != Succs.size(); ++i)
    if (pred_begin(Succs[i]) != pred_end(Succs[i])) {
      // One exception is loop headers.  If this block was the preheader for a
      // loop, then we DO want to visit the loop so the loop gets deleted.
      // We know that if the successor is a loop header, that this loop had to
      // be the preheader: the case where this was the latch block was handled
      // above and headers can only have two predecessors.
      if (!LI->isLoopHeader(Succs[i])) {
        Succs.erase(Succs.begin()+i);
        --i;
      }
    }
  
  for (unsigned i = 0, e = Succs.size(); i != e; ++i)
    RemoveBlockIfDead(Succs[i], Worklist, L);
}
Ejemplo n.º 13
0
bool LoaderPass::runOnModule(Module &M) {
    ProfileInfoLoader PIL("profile-loader", Filename, M);

    EdgeInformation.clear();
    std::vector<unsigned> ECs = PIL.getRawEdgeCounts();
    if (ECs.size() > 0) {
        unsigned ei = 0;
        for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
            if (F->isDeclaration()) continue;
            if (ei < ECs.size())
                EdgeInformation[F][ProfileInfo::getEdge(0, &F->getEntryBlock())] +=
                    ECs[ei++];
            for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
                // Okay, we have to add a counter of each outgoing edge.  If the
                // outgoing edge is not critical don't split it, just insert the counter
                // in the source or destination of the edge.
                TerminatorInst *TI = BB->getTerminator();
                for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
                    if (ei < ECs.size())
                        EdgeInformation[F][ProfileInfo::getEdge(BB, TI->getSuccessor(s))] +=
                            ECs[ei++];
                }
            }
        }
        if (ei != ECs.size()) {
            cerr << "WARNING: profile information is inconsistent with "
                 << "the current program!\n";
        }
    }

    BlockInformation.clear();
    std::vector<unsigned> BCs = PIL.getRawBlockCounts();
    if (BCs.size() > 0) {
        unsigned bi = 0;
        for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
            if (F->isDeclaration()) continue;
            for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
                if (bi < BCs.size())
                    BlockInformation[F][BB] = BCs[bi++];
        }
        if (bi != BCs.size()) {
            cerr << "WARNING: profile information is inconsistent with "
                 << "the current program!\n";
        }
    }

    FunctionInformation.clear();
    std::vector<unsigned> FCs = PIL.getRawFunctionCounts();
    if (FCs.size() > 0) {
        unsigned fi = 0;
        for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
            if (F->isDeclaration()) continue;
            if (fi < FCs.size())
                FunctionInformation[F] = FCs[fi++];
        }
        if (fi != FCs.size()) {
            cerr << "WARNING: profile information is inconsistent with "
                 << "the current program!\n";
        }
    }

    return false;
}
Ejemplo n.º 14
0
/// \brief Analyze a call site for potential inlining.
///
/// Returns true if inlining this call is viable, and false if it is not
/// viable. It computes the cost and adjusts the threshold based on numerous
/// factors and heuristics. If this method returns false but the computed cost
/// is below the computed threshold, then inlining was forcibly disabled by
/// some artifact of the routine.
bool CallAnalyzer::analyzeCall(CallSite CS) {
  ++NumCallsAnalyzed;

  // Track whether the post-inlining function would have more than one basic
  // block. A single basic block is often intended for inlining. Balloon the
  // threshold by 50% until we pass the single-BB phase.
  bool SingleBB = true;
  int SingleBBBonus = Threshold / 2;
  Threshold += SingleBBBonus;

  // Perform some tweaks to the cost and threshold based on the direct
  // callsite information.

  // We want to more aggressively inline vector-dense kernels, so up the
  // threshold, and we'll lower it if the % of vector instructions gets too
  // low.
  assert(NumInstructions == 0);
  assert(NumVectorInstructions == 0);
  FiftyPercentVectorBonus = Threshold;
  TenPercentVectorBonus = Threshold / 2;

  // Give out bonuses per argument, as the instructions setting them up will
  // be gone after inlining.
  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
    if (TD && CS.isByValArgument(I)) {
      // We approximate the number of loads and stores needed by dividing the
      // size of the byval type by the target's pointer size.
      PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
      unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType());
      unsigned PointerSize = TD->getPointerSizeInBits();
      // Ceiling division.
      unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;

      // If it generates more than 8 stores it is likely to be expanded as an
      // inline memcpy so we take that as an upper bound. Otherwise we assume
      // one load and one store per word copied.
      // FIXME: The maxStoresPerMemcpy setting from the target should be used
      // here instead of a magic number of 8, but it's not available via
      // DataLayout.
      NumStores = std::min(NumStores, 8U);

      Cost -= 2 * NumStores * InlineConstants::InstrCost;
    } else {
      // For non-byval arguments subtract off one instruction per call
      // argument.
      Cost -= InlineConstants::InstrCost;
    }
  }

  // If there is only one call of the function, and it has internal linkage,
  // the cost of inlining it drops dramatically.
  bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() &&
    &F == CS.getCalledFunction();
  if (OnlyOneCallAndLocalLinkage)
    Cost += InlineConstants::LastCallToStaticBonus;

  // If the instruction after the call, or if the normal destination of the
  // invoke is an unreachable instruction, the function is noreturn. As such,
  // there is little point in inlining this unless there is literally zero
  // cost.
  Instruction *Instr = CS.getInstruction();
  if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
    if (isa<UnreachableInst>(II->getNormalDest()->begin()))
      Threshold = 1;
  } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr)))
    Threshold = 1;

  // If this function uses the coldcc calling convention, prefer not to inline
  // it.
  if (F.getCallingConv() == CallingConv::Cold)
    Cost += InlineConstants::ColdccPenalty;

  // Check if we're done. This can happen due to bonuses and penalties.
  if (Cost > Threshold)
    return false;

  if (F.empty())
    return true;

  Function *Caller = CS.getInstruction()->getParent()->getParent();
  // Check if the caller function is recursive itself.
  for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end();
       U != E; ++U) {
    CallSite Site(cast<Value>(*U));
    if (!Site)
      continue;
    Instruction *I = Site.getInstruction();
    if (I->getParent()->getParent() == Caller) {
      IsCallerRecursive = true;
      break;
    }
  }

  // Track whether we've seen a return instruction. The first return
  // instruction is free, as at least one will usually disappear in inlining.
  bool HasReturn = false;

  // Populate our simplified values by mapping from function arguments to call
  // arguments with known important simplifications.
  CallSite::arg_iterator CAI = CS.arg_begin();
  for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
       FAI != FAE; ++FAI, ++CAI) {
    assert(CAI != CS.arg_end());
    if (Constant *C = dyn_cast<Constant>(CAI))
      SimplifiedValues[FAI] = C;

    Value *PtrArg = *CAI;
    if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
      ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue());

      // We can SROA any pointer arguments derived from alloca instructions.
      if (isa<AllocaInst>(PtrArg)) {
        SROAArgValues[FAI] = PtrArg;
        SROAArgCosts[PtrArg] = 0;
      }
    }
  }
  NumConstantArgs = SimplifiedValues.size();
  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
  NumAllocaArgs = SROAArgValues.size();

  // The worklist of live basic blocks in the callee *after* inlining. We avoid
  // adding basic blocks of the callee which can be proven to be dead for this
  // particular call site in order to get more accurate cost estimates. This
  // requires a somewhat heavyweight iteration pattern: we need to walk the
  // basic blocks in a breadth-first order as we insert live successors. To
  // accomplish this, prioritizing for small iterations because we exit after
  // crossing our threshold, we use a small-size optimized SetVector.
  typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
                                  SmallPtrSet<BasicBlock *, 16> > BBSetVector;
  BBSetVector BBWorklist;
  BBWorklist.insert(&F.getEntryBlock());
  // Note that we *must not* cache the size, this loop grows the worklist.
  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
    // Bail out the moment we cross the threshold. This means we'll under-count
    // the cost, but only when undercounting doesn't matter.
    if (Cost > (Threshold + VectorBonus))
      break;

    BasicBlock *BB = BBWorklist[Idx];
    if (BB->empty())
      continue;

    // Handle the terminator cost here where we can track returns and other
    // function-wide constructs.
    TerminatorInst *TI = BB->getTerminator();

    // We never want to inline functions that contain an indirectbr.  This is
    // incorrect because all the blockaddress's (in static global initializers
    // for example) would be referring to the original function, and this
    // indirect jump would jump from the inlined copy of the function into the 
    // original function which is extremely undefined behavior.
    // FIXME: This logic isn't really right; we can safely inline functions
    // with indirectbr's as long as no other function or global references the
    // blockaddress of a block within the current function.  And as a QOI issue,
    // if someone is using a blockaddress without an indirectbr, and that
    // reference somehow ends up in another function or global, we probably
    // don't want to inline this function.
    if (isa<IndirectBrInst>(TI))
      return false;

    if (!HasReturn && isa<ReturnInst>(TI))
      HasReturn = true;
    else
      Cost += InlineConstants::InstrCost;

    // Analyze the cost of this block. If we blow through the threshold, this
    // returns false, and we can bail on out.
    if (!analyzeBlock(BB)) {
      if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
        return false;

      // If the caller is a recursive function then we don't want to inline
      // functions which allocate a lot of stack space because it would increase
      // the caller stack usage dramatically.
      if (IsCallerRecursive &&
          AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
        return false;

      break;
    }

    // Add in the live successors by first checking whether we have terminator
    // that may be simplified based on the values simplified by this call.
    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
      if (BI->isConditional()) {
        Value *Cond = BI->getCondition();
        if (ConstantInt *SimpleCond
              = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
          BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
          continue;
        }
      }
    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
      Value *Cond = SI->getCondition();
      if (ConstantInt *SimpleCond
            = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
        BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor());
        continue;
      }
    }

    // If we're unable to select a particular successor, just count all of
    // them.
    for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
         ++TIdx)
      BBWorklist.insert(TI->getSuccessor(TIdx));

    // If we had any successors at this point, than post-inlining is likely to
    // have them as well. Note that we assume any basic blocks which existed
    // due to branches or switches which folded above will also fold after
    // inlining.
    if (SingleBB && TI->getNumSuccessors() > 1) {
      // Take off the bonus we applied to the threshold.
      Threshold -= SingleBBBonus;
      SingleBB = false;
    }
  }

  // If this is a noduplicate call, we can still inline as long as 
  // inlining this would cause the removal of the caller (so the instruction
  // is not actually duplicated, just moved).
  if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
    return false;

  Threshold += VectorBonus;

  return Cost < Threshold;
}
Ejemplo n.º 15
0
/// If \param [in] BB has more than one predecessor that is a conditional
/// branch, attempt to use parallel and/or for the branch condition. \returns
/// true on success.
///
/// Before:
///   ......
///   %cmp10 = fcmp une float %tmp1, %tmp2
///   br i1 %cmp1, label %if.then, label %lor.rhs
///
/// lor.rhs:
///   ......
///   %cmp11 = fcmp une float %tmp3, %tmp4
///   br i1 %cmp11, label %if.then, label %ifend
///
/// if.end:  // the merge block
///   ......
///
/// if.then: // has two predecessors, both of them contains conditional branch.
///   ......
///   br label %if.end;
///
/// After:
///  ......
///  %cmp10 = fcmp une float %tmp1, %tmp2
///  ......
///  %cmp11 = fcmp une float %tmp3, %tmp4
///  %cmp12 = or i1 %cmp10, %cmp11    // parallel-or mode.
///  br i1 %cmp12, label %if.then, label %ifend
///
///  if.end:
///    ......
///
///  if.then:
///    ......
///    br label %if.end;
///
///  Current implementation handles two cases.
///  Case 1: \param BB is on the else-path.
///
///          BB1
///        /     |
///       BB2    |
///      /   \   |
///     BB3   \  |     where, BB1, BB2 contain conditional branches.
///      \    |  /     BB3 contains unconditional branch.
///       \   | /      BB4 corresponds to \param BB which is also the merge.
///  BB => BB4
///
///
///  Corresponding source code:
///
///  if (a == b && c == d)
///    statement; // BB3
///
///  Case 2: \param BB BB is on the then-path.
///
///             BB1
///          /      |
///         |      BB2
///         \    /    |  where BB1, BB2 contain conditional branches.
///  BB =>   BB3      |  BB3 contains unconditiona branch and corresponds
///           \     /    to \param BB.  BB4 is the merge.
///             BB4
///
///  Corresponding source code:
///
///  if (a == b || c == d)
///    statement;  // BB3
///
///  In both cases,  \param BB is the common successor of conditional branches.
///  In Case 1, \param BB (BB4) has an unconditional branch (BB3) as
///  its predecessor.  In Case 2, \param BB (BB3) only has conditional branches
///  as its predecessors.
bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
  PHINode *PHI = dyn_cast<PHINode>(BB->begin());
  if (PHI)
    return false; // For simplicity, avoid cases containing PHI nodes.

  BasicBlock *LastCondBlock = nullptr;
  BasicBlock *FirstCondBlock = nullptr;
  BasicBlock *UnCondBlock = nullptr;
  int Idx = -1;

  // Check predecessors of \param BB.
  SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
  for (SmallPtrSetIterator<BasicBlock *> PI = Preds.begin(), PE = Preds.end();
       PI != PE; ++PI) {
    BasicBlock *Pred = *PI;
    BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator());

    // All predecessors should terminate with a branch.
    if (!PBI)
      return false;

    BasicBlock *PP = Pred->getSinglePredecessor();

    if (PBI->isUnconditional()) {
      // Case 1: Pred (BB3) is an unconditional block, it should
      // have a single predecessor (BB2) that is also a predecessor
      // of \param BB (BB4) and should not have address-taken.
      // There should exist only one such unconditional
      // branch among the predecessors.
      if (UnCondBlock || !PP || (Preds.count(PP) == 0) ||
          Pred->hasAddressTaken())
        return false;

      UnCondBlock = Pred;
      continue;
    }

    // Only conditional branches are allowed beyond this point.
    assert(PBI->isConditional());

    // Condition's unique use should be the branch instruction.
    Value *PC = PBI->getCondition();
    if (!PC || !PC->hasOneUse())
      return false;

    if (PP && Preds.count(PP)) {
      // These are internal condition blocks to be merged from, e.g.,
      // BB2 in both cases.
      // Should not be address-taken.
      if (Pred->hasAddressTaken())
        return false;

      // Instructions in the internal condition blocks should be safe
      // to hoist up.
      for (BasicBlock::iterator BI = Pred->begin(), BE = PBI->getIterator();
           BI != BE;) {
        Instruction *CI = &*BI++;
        if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI))
          return false;
      }
    } else {
      // This is the condition block to be merged into, e.g. BB1 in
      // both cases.
      if (FirstCondBlock)
        return false;
      FirstCondBlock = Pred;
    }

    // Find whether BB is uniformly on the true (or false) path
    // for all of its predecessors.
    BasicBlock *PS1 = PBI->getSuccessor(0);
    BasicBlock *PS2 = PBI->getSuccessor(1);
    BasicBlock *PS = (PS1 == BB) ? PS2 : PS1;
    int CIdx = (PS1 == BB) ? 0 : 1;

    if (Idx == -1)
      Idx = CIdx;
    else if (CIdx != Idx)
      return false;

    // PS is the successor which is not BB. Check successors to identify
    // the last conditional branch.
    if (Preds.count(PS) == 0) {
      // Case 2.
      LastCondBlock = Pred;
    } else {
      // Case 1
      BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator());
      if (BPS && BPS->isUnconditional()) {
        // Case 1: PS(BB3) should be an unconditional branch.
        LastCondBlock = Pred;
      }
    }
  }

  if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock))
    return false;

  TerminatorInst *TBB = LastCondBlock->getTerminator();
  BasicBlock *PS1 = TBB->getSuccessor(0);
  BasicBlock *PS2 = TBB->getSuccessor(1);
  BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator());
  BranchInst *PBI2 = dyn_cast<BranchInst>(PS2->getTerminator());

  // If PS1 does not jump into PS2, but PS2 jumps into PS1,
  // attempt branch inversion.
  if (!PBI1 || !PBI1->isUnconditional() ||
      (PS1->getTerminator()->getSuccessor(0) != PS2)) {
    // Check whether PS2 jumps into PS1.
    if (!PBI2 || !PBI2->isUnconditional() ||
        (PS2->getTerminator()->getSuccessor(0) != PS1))
      return false;

    // Do branch inversion.
    BasicBlock *CurrBlock = LastCondBlock;
    bool EverChanged = false;
    for (; CurrBlock != FirstCondBlock;
         CurrBlock = CurrBlock->getSinglePredecessor()) {
      BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator());
      CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
      if (!CI)
        continue;

      CmpInst::Predicate Predicate = CI->getPredicate();
      // Canonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq
      if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) {
        CI->setPredicate(ICmpInst::getInversePredicate(Predicate));
        BI->swapSuccessors();
        EverChanged = true;
      }
    }
    return EverChanged;
  }

  // PS1 must have a conditional branch.
  if (!PBI1 || !PBI1->isUnconditional())
    return false;

  // PS2 should not contain PHI node.
  PHI = dyn_cast<PHINode>(PS2->begin());
  if (PHI)
    return false;

  // Do the transformation.
  BasicBlock *CB;
  BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator());
  bool Iteration = true;
  IRBuilder<>::InsertPointGuard Guard(Builder);
  Value *PC = PBI->getCondition();

  do {
    CB = PBI->getSuccessor(1 - Idx);
    // Delete the conditional branch.
    FirstCondBlock->getInstList().pop_back();
    FirstCondBlock->getInstList()
        .splice(FirstCondBlock->end(), CB->getInstList());
    PBI = cast<BranchInst>(FirstCondBlock->getTerminator());
    Value *CC = PBI->getCondition();
    // Merge conditions.
    Builder.SetInsertPoint(PBI);
    Value *NC;
    if (Idx == 0)
      // Case 2, use parallel or.
      NC = Builder.CreateOr(PC, CC);
    else
      // Case 1, use parallel and.
      NC = Builder.CreateAnd(PC, CC);

    PBI->replaceUsesOfWith(CC, NC);
    PC = NC;
    if (CB == LastCondBlock)
      Iteration = false;
    // Remove internal conditional branches.
    CB->dropAllReferences();
    // make CB unreachable and let downstream to delete the block.
    new UnreachableInst(CB->getContext(), CB);
  } while (Iteration);

  LLVM_DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock);
  return true;
}
Ejemplo n.º 16
0
/// NormalizeLandingPads - Normalize and discover landing pads, noting them
/// in the LandingPads set.  A landing pad is normal if the only CFG edges
/// that end at it are unwind edges from invoke instructions. If we inlined
/// through an invoke we could have a normal branch from the previous
/// unwind block through to the landing pad for the original invoke.
/// Abnormal landing pads are fixed up by redirecting all unwind edges to
/// a new basic block which falls through to the original.
bool DwarfEHPrepare::NormalizeLandingPads() {
  bool Changed = false;

  const MCAsmInfo *MAI = TM->getMCAsmInfo();
  bool usingSjLjEH = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;

  for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
    TerminatorInst *TI = I->getTerminator();
    if (!isa<InvokeInst>(TI))
      continue;
    BasicBlock *LPad = TI->getSuccessor(1);
    // Skip landing pads that have already been normalized.
    if (LandingPads.count(LPad))
      continue;

    // Check that only invoke unwind edges end at the landing pad.
    bool OnlyUnwoundTo = true;
    bool SwitchOK = usingSjLjEH;
    for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad);
         PI != PE; ++PI) {
      TerminatorInst *PT = (*PI)->getTerminator();
      // The SjLj dispatch block uses a switch instruction. This is effectively
      // an unwind edge, so we can disregard it here. There will only ever
      // be one dispatch, however, so if there are multiple switches, one
      // of them truly is a normal edge, not an unwind edge.
      if (SwitchOK && isa<SwitchInst>(PT)) {
        SwitchOK = false;
        continue;
      }
      if (!isa<InvokeInst>(PT) || LPad == PT->getSuccessor(0)) {
        OnlyUnwoundTo = false;
        break;
      }
    }

    if (OnlyUnwoundTo) {
      // Only unwind edges lead to the landing pad.  Remember the landing pad.
      LandingPads.insert(LPad);
      continue;
    }

    // At least one normal edge ends at the landing pad.  Redirect the unwind
    // edges to a new basic block which falls through into this one.

    // Create the new basic block.
    BasicBlock *NewBB = BasicBlock::Create(F->getContext(),
                                           LPad->getName() + "_unwind_edge");

    // Insert it into the function right before the original landing pad.
    LPad->getParent()->getBasicBlockList().insert(LPad, NewBB);

    // Redirect unwind edges from the original landing pad to NewBB.
    for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ) {
      TerminatorInst *PT = (*PI++)->getTerminator();
      if (isa<InvokeInst>(PT) && PT->getSuccessor(1) == LPad)
        // Unwind to the new block.
        PT->setSuccessor(1, NewBB);
    }

    // If there are any PHI nodes in LPad, we need to update them so that they
    // merge incoming values from NewBB instead.
    for (BasicBlock::iterator II = LPad->begin(); isa<PHINode>(II); ++II) {
      PHINode *PN = cast<PHINode>(II);
      pred_iterator PB = pred_begin(NewBB), PE = pred_end(NewBB);

      // Check to see if all of the values coming in via unwind edges are the
      // same.  If so, we don't need to create a new PHI node.
      Value *InVal = PN->getIncomingValueForBlock(*PB);
      for (pred_iterator PI = PB; PI != PE; ++PI) {
        if (PI != PB && InVal != PN->getIncomingValueForBlock(*PI)) {
          InVal = 0;
          break;
        }
      }

      if (InVal == 0) {
        // Different unwind edges have different values.  Create a new PHI node
        // in NewBB.
        PHINode *NewPN = PHINode::Create(PN->getType(),
                                         PN->getNumIncomingValues(),
                                         PN->getName()+".unwind", NewBB);
        // Add an entry for each unwind edge, using the value from the old PHI.
        for (pred_iterator PI = PB; PI != PE; ++PI)
          NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI);

        // Now use this new PHI as the common incoming value for NewBB in PN.
        InVal = NewPN;
      }

      // Revector exactly one entry in the PHI node to come from NewBB
      // and delete all other entries that come from unwind edges.  If
      // there are both normal and unwind edges from the same predecessor,
      // this leaves an entry for the normal edge.
      for (pred_iterator PI = PB; PI != PE; ++PI)
        PN->removeIncomingValue(*PI);
      PN->addIncoming(InVal, NewBB);
    }

    // Add a fallthrough from NewBB to the original landing pad.
    BranchInst::Create(LPad, NewBB);

    // Now update DominatorTree analysis information.
    DT->splitBlock(NewBB);

    // Remember the newly constructed landing pad.  The original landing pad
    // LPad is no longer a landing pad now that all unwind edges have been
    // revectored to NewBB.
    LandingPads.insert(NewBB);
    ++NumLandingPadsSplit;
    Changed = true;
  }

  return Changed;
}
bool OptimalEdgeProfiler::runOnModule(Module &M) {
  Function *Main = M.getFunction("main");
  if (Main == 0) {
    errs() << "WARNING: cannot insert edge profiling into a module"
           << " with no main function!\n";
    return false;  // No main, no instrumentation!
  }

  // NumEdges counts all the edges that may be instrumented. Later on its
  // decided which edges to actually instrument, to achieve optimal profiling.
  // For the entry block a virtual edge (0,entry) is reserved, for each block
  // with no successors an edge (BB,0) is reserved. These edges are necessary
  // to calculate a truly optimal maximum spanning tree and thus an optimal
  // instrumentation.
  unsigned NumEdges = 0;

  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
    if (F->isDeclaration()) continue;
    // Reserve space for (0,entry) edge.
    ++NumEdges;
    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
      // Keep track of which blocks need to be instrumented.  We don't want to
      // instrument blocks that are added as the result of breaking critical
      // edges!
      if (BB->getTerminator()->getNumSuccessors() == 0) {
        // Reserve space for (BB,0) edge.
        ++NumEdges;
      } else {
        NumEdges += BB->getTerminator()->getNumSuccessors();
      }
    }
  }

  // In the profiling output a counter for each edge is reserved, but only few
  // are used. This is done to be able to read back in the profile without
  // calulating the maximum spanning tree again, instead each edge counter that
  // is not used is initialised with -1 to signal that this edge counter has to
  // be calculated from other edge counters on reading the profile info back
  // in.

  const Type *Int32 = Type::getInt32Ty(M.getContext());
  const ArrayType *ATy = ArrayType::get(Int32, NumEdges);
  GlobalVariable *Counters =
    new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
                       Constant::getNullValue(ATy), "OptEdgeProfCounters");
  NumEdgesInserted = 0;

  std::vector<Constant*> Initializer(NumEdges);
  Constant* Zero = ConstantInt::get(Int32, 0);
  Constant* Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted);

  // Instrument all of the edges not in MST...
  unsigned i = 0;
  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
    if (F->isDeclaration()) continue;
    DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n");

    // Calculate a Maximum Spanning Tree with the edge weights determined by
    // ProfileEstimator. ProfileEstimator also assign weights to the virtual
    // edges (0,entry) and (BB,0) (for blocks with no successors) and this
    // edges also participate in the maximum spanning tree calculation. 
    // The third parameter of MaximumSpanningTree() has the effect that not the
    // actual MST is returned but the edges _not_ in the MST.

    ProfileInfo::EdgeWeights ECs = 
      getAnalysis<ProfileInfo>(*F).getEdgeWeights(F);
    std::vector<ProfileInfo::EdgeWeight> EdgeVector(ECs.begin(), ECs.end());
    MaximumSpanningTree<BasicBlock> MST (EdgeVector);
    std::stable_sort(MST.begin(),MST.end());

    // Check if (0,entry) not in the MST. If not, instrument edge
    // (IncrementCounterInBlock()) and set the counter initially to zero, if
    // the edge is in the MST the counter is initialised to -1.

    BasicBlock *entry = &(F->getEntryBlock());
    ProfileInfo::Edge edge = ProfileInfo::getEdge(0,entry);
    if (!std::binary_search(MST.begin(), MST.end(), edge)) {
      printEdgeCounter(edge,entry,i);
      IncrementCounterInBlock(entry, i, Counters); ++NumEdgesInserted;
      Initializer[i++] = (Zero);
    } else{
      Initializer[i++] = (Uncounted);
    }

    // InsertedBlocks contains all blocks that were inserted for splitting an
    // edge, this blocks do not have to be instrumented.
    DenseSet<BasicBlock*> InsertedBlocks;
    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
      // Check if block was not inserted and thus does not have to be
      // instrumented.
      if (InsertedBlocks.count(BB)) continue;

      // Okay, we have to add a counter of each outgoing edge not in MST. If
      // the outgoing edge is not critical don't split it, just insert the
      // counter in the source or destination of the edge. Also, if the block
      // has no successors, the virtual edge (BB,0) is processed.
      TerminatorInst *TI = BB->getTerminator();
      if (TI->getNumSuccessors() == 0) {
        ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,0);
        if (!std::binary_search(MST.begin(), MST.end(), edge)) {
          printEdgeCounter(edge,BB,i);
          IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
          Initializer[i++] = (Zero);
        } else{
          Initializer[i++] = (Uncounted);
        }
      }
      for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
        BasicBlock *Succ = TI->getSuccessor(s);
        ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,Succ);
        if (!std::binary_search(MST.begin(), MST.end(), edge)) {

          // If the edge is critical, split it.
          bool wasInserted = SplitCriticalEdge(TI, s, this);
          Succ = TI->getSuccessor(s);
          if (wasInserted)
            InsertedBlocks.insert(Succ);

          // Okay, we are guaranteed that the edge is no longer critical.  If
          // we only have a single successor, insert the counter in this block,
          // otherwise insert it in the successor block.
          if (TI->getNumSuccessors() == 1) {
            // Insert counter at the start of the block
            printEdgeCounter(edge,BB,i);
            IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
          } else {
            // Insert counter at the start of the block
            printEdgeCounter(edge,Succ,i);
            IncrementCounterInBlock(Succ, i, Counters); ++NumEdgesInserted;
          }
          Initializer[i++] = (Zero);
        } else {
          Initializer[i++] = (Uncounted);
        }
      }
    }
  }

  // Check if the number of edges counted at first was the number of edges we
  // considered for instrumentation.
  assert(i==NumEdges && "the number of edges in counting array is wrong");

  // Assing the now completely defined initialiser to the array.
  Constant *init = ConstantArray::get(ATy, Initializer);
  Counters->setInitializer(init);

  // Add the initialization call to main.
  InsertProfilingInitCall(Main, "llvm_start_opt_edge_profiling", Counters);
  return true;
}
Ejemplo n.º 18
0
bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
  if (skipOptnoneFunction(L))
    return false;

  // Only visit top-level loops.
  if (L->getParentLoop())
    return false;

  // If LoopSimplify form is not available, stay out of trouble.
  if (!L->isLoopSimplifyForm())
    return false;

  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  bool Changed = false;

  // If there is more than one top-level loop in this function, extract all of
  // the loops. Otherwise there is exactly one top-level loop; in this case if
  // this function is more than a minimal wrapper around the loop, extract
  // the loop.
  bool ShouldExtractLoop = false;

  // Extract the loop if the entry block doesn't branch to the loop header.
  TerminatorInst *EntryTI =
    L->getHeader()->getParent()->getEntryBlock().getTerminator();
  if (!isa<BranchInst>(EntryTI) ||
      !cast<BranchInst>(EntryTI)->isUnconditional() ||
      EntryTI->getSuccessor(0) != L->getHeader()) {
    ShouldExtractLoop = true;
  } else {
    // Check to see if any exits from the loop are more than just return
    // blocks.
    SmallVector<BasicBlock*, 8> ExitBlocks;
    L->getExitBlocks(ExitBlocks);
    for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
      if (!isa<ReturnInst>(ExitBlocks[i]->getTerminator())) {
        ShouldExtractLoop = true;
        break;
      }
  }

  if (ShouldExtractLoop) {
    // We must omit landing pads. Landing pads must accompany the invoke
    // instruction. But this would result in a loop in the extracted
    // function. An infinite cycle occurs when it tries to extract that loop as
    // well.
    SmallVector<BasicBlock*, 8> ExitBlocks;
    L->getExitBlocks(ExitBlocks);
    for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
      if (ExitBlocks[i]->isLandingPad()) {
        ShouldExtractLoop = false;
        break;
      }
  }

  if (ShouldExtractLoop) {
    if (NumLoops == 0) return Changed;
    --NumLoops;
    CodeExtractor Extractor(DT, *L);
    if (Extractor.extractCodeRegion() != nullptr) {
      Changed = true;
      // After extraction, the loop is replaced by a function call, so
      // we shouldn't try to run any more loop passes on it.
      LPM.deleteLoopFromQueue(L);
    }
    ++NumExtracted;
  }

  return Changed;
}
Ejemplo n.º 19
0
// the verifier iterates through each path to gather the total
// number of edge frequencies
bool PathProfileVerifier::runOnModule (Module &M) {
    PathProfileInfo& pathProfileInfo = getAnalysis<PathProfileInfo>();

    // setup a data structure to map path edges which index an
    // array of edge counters
    NestedBlockToIndexMap arrayMap;
    unsigned i = 0;
    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
        if (F->isDeclaration()) continue;

        arrayMap[0][F->begin()][0] = i++;

        for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
            TerminatorInst *TI = BB->getTerminator();

            unsigned duplicate = 0;
            BasicBlock* prev = 0;
            for (unsigned s = 0, e = TI->getNumSuccessors(); s != e;
                    prev = TI->getSuccessor(s), ++s) {
                if (prev == TI->getSuccessor(s))
                    duplicate++;
                else duplicate = 0;

                arrayMap[BB][TI->getSuccessor(s)][duplicate] = i++;
            }
        }
    }

    std::vector<unsigned> edgeArray(i);

    // iterate through each path and increment the edge counters as needed
    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
        if (F->isDeclaration()) continue;

        pathProfileInfo.setCurrentFunction(F);

        DEBUG(dbgs() << "function '" << F->getName() << "' ran "
              << pathProfileInfo.pathsRun()
              << "/" << pathProfileInfo.getPotentialPathCount()
              << " potential paths\n");

        for( ProfilePathIterator nextPath = pathProfileInfo.pathBegin(),
                endPath = pathProfileInfo.pathEnd();
                nextPath != endPath; nextPath++ ) {
            ProfilePath* currentPath = nextPath->second;

            ProfilePathEdgeVector* pev = currentPath->getPathEdges();
            DEBUG(dbgs () << "path #" << currentPath->getNumber() << ": "
                  << currentPath->getCount() << "\n");
            // setup the entry edge (normally path profiling doesn't care about this)
            if (currentPath->getFirstBlockInPath() == &F->getEntryBlock())
                edgeArray[arrayMap[0][currentPath->getFirstBlockInPath()][0]]
                += currentPath->getCount();

            for( ProfilePathEdgeIterator nextEdge = pev->begin(),
                    endEdge = pev->end(); nextEdge != endEdge; nextEdge++ ) {
                if (nextEdge != pev->begin())
                    DEBUG(dbgs() << " :: ");

                BasicBlock* source = nextEdge->getSource();
                BasicBlock* target = nextEdge->getTarget();
                unsigned duplicateNumber = nextEdge->getDuplicateNumber();
                DEBUG(dbgs () << source->getNameStr() << " --{" << duplicateNumber
                      << "}--> " << target->getNameStr());

                // Ensure all the referenced edges exist
                // TODO: make this a separate function
                if( !arrayMap.count(source) ) {
                    errs() << "  error [" << F->getNameStr() << "()]: source '"
                           << source->getNameStr()
                           << "' does not exist in the array map.\n";
                } else if( !arrayMap[source].count(target) ) {
                    errs() << "  error [" << F->getNameStr() << "()]: target '"
                           << target->getNameStr()
                           << "' does not exist in the array map.\n";
                } else if( !arrayMap[source][target].count(duplicateNumber) ) {
                    errs() << "  error [" << F->getNameStr() << "()]: edge "
                           << source->getNameStr() << " -> " << target->getNameStr()
                           << " duplicate number " << duplicateNumber
                           << " does not exist in the array map.\n";
                } else {
                    edgeArray[arrayMap[source][target][duplicateNumber]]
                    += currentPath->getCount();
                }
            }

            DEBUG(errs() << "\n");

            delete pev;
        }
    }

    std::string errorInfo;
    std::string filename = EdgeProfileFilename;

    // Open a handle to the file
    FILE* edgeFile = fopen(filename.c_str(),"wb");

    if (!edgeFile) {
        errs() << "error: unable to open file '" << filename << "' for output.\n";
        return false;
    }

    errs() << "Generating edge profile '" << filename << "' ...\n";

    // write argument info
    unsigned type = ArgumentInfo;
    unsigned num = pathProfileInfo.argList.size();
    int zeros = 0;

    fwrite(&type,sizeof(unsigned),1,edgeFile);
    fwrite(&num,sizeof(unsigned),1,edgeFile);
    fwrite(pathProfileInfo.argList.c_str(),1,num,edgeFile);
    if (num&3)
        fwrite(&zeros, 1, 4-(num&3), edgeFile);

    type = EdgeInfo;
    num = edgeArray.size();
    fwrite(&type,sizeof(unsigned),1,edgeFile);
    fwrite(&num,sizeof(unsigned),1,edgeFile);

    // write each edge to the file
    for( std::vector<unsigned>::iterator s = edgeArray.begin(),
            e = edgeArray.end(); s != e; s++)
        fwrite(&*s, sizeof (unsigned), 1, edgeFile);

    fclose (edgeFile);

    return true;
}