//
// Method: findGlobalPoolNodes()
//
// Description:
//  This method finds DSNodes that are reachable from globals and that need a
//  pool.  The Automatic Pool Allocation transform will use the returned
//  information to build global pools for the DSNodes in question.
//
//  Note that this method does not assign DSNodes to pools; it merely decides
//  which DSNodes are reachable from globals and will need a pool of global
//  scope.
//
// Outputs:
//  Nodes - The DSNodes that are both reachable from globals and which should
//          have global pools will be *added* to this container.
//
void
AllHeapNodesHeuristic::findGlobalPoolNodes (DSNodeSet_t & Nodes) {
  // Get the globals graph for the program.
  DSGraph* GG = Graphs->getGlobalsGraph();

  // Get all of the nodes reachable from globals.
  DenseSet<const DSNode*> GlobalHeapNodes;
  GetNodesReachableFromGlobals (GG, GlobalHeapNodes);
  //
  // Create a global pool for each global DSNode.
  //
  for (DenseSet<const DSNode *>::iterator NI = GlobalHeapNodes.begin();
              NI != GlobalHeapNodes.end();++NI) {
    const DSNode * N = *NI;
    PoolMap[N] = OnePool(N);
  }

  //
  // Now find all DSNodes belonging to function-local DSGraphs which are
  // mirrored in the globals graph.  These DSNodes require a global pool, too.
  //
  for (Module::iterator F = M->begin(); F != M->end(); ++F) {
    if (Graphs->hasDSGraph(*F)) {
      DSGraph* G = Graphs->getDSGraph(*F);
      DSGraph::NodeMapTy NodeMap;
      G->computeGToGGMapping (NodeMap);
      //
      // Scan through all DSNodes in the local graph.  If a local DSNode has a
      // corresponding DSNode in the globals graph that is reachable from a 
      // global, then add the local DSNode to the set of DSNodes reachable from
      // a global.
      //
      DSGraph::node_iterator ni = G->node_begin();
      for (; ni != G->node_end(); ++ni) {
        DSNode * N = ni;
        DSNode * GGN = NodeMap[N].getNode();
        
        //assert (!GGN || GlobalHeapNodes.count (GGN));
        if (GGN && GlobalHeapNodes.count (GGN))
          PoolMap[GGN].NodesInPool.push_back (N);
      }
    }
  }

  //
  // Copy the values into the output container.  Note that DenseSet has no
  // iterator traits (or whatever allows us to treat DenseSet has a generic
  // container), so we have to use a loop to copy values from the DenseSet into
  // the output container.
  //
  for (DenseSet<const DSNode*>::iterator I = GlobalHeapNodes.begin(),
         E = GlobalHeapNodes.end(); I != E; ++I) {
    Nodes.insert (*I);
  }

  return;
}
Example #2
0
static void MarkNodesWhichMustBePassedIn(DenseSet<const DSNode*> &MarkedNodes,
                                         Function &F, DSGraph* G,
                                         EntryPointAnalysis* EPA) {
  // All DSNodes reachable from arguments must be passed in...
  // Unless this is an entry point to the program
  if (!EPA->isEntryPoint(&F)) {
    for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
            I != E; ++I) {
      DSGraph::ScalarMapTy::iterator AI = G->getScalarMap().find(I);
      if (AI != G->getScalarMap().end())
        if (DSNode * N = AI->second.getNode())
          N->markReachableNodes(MarkedNodes);
    }
  }

  // Marked the returned node as needing to be passed in.
  if (DSNode * RetNode = G->getReturnNodeFor(F).getNode())
    RetNode->markReachableNodes(MarkedNodes);

  // Calculate which DSNodes are reachable from globals.  If a node is reachable
  // from a global, we will create a global pool for it, so no argument passage
  // is required.
  DenseSet<const DSNode*> NodesFromGlobals;
  GetNodesReachableFromGlobals(G, NodesFromGlobals);

  // Remove any nodes reachable from a global.  These nodes will be put into
  // global pools, which do not require arguments to be passed in.

  for (DenseSet<const DSNode*>::iterator I = NodesFromGlobals.begin(),
          E = NodesFromGlobals.end(); I != E; ++I)
    MarkedNodes.erase(*I);
}
void AliasAnalysisChecker::collectMissingAliases(
    const DenseSet<ValuePair> &DynamicAliases,
    vector<ValuePair> &MissingAliases) {
  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
  AliasAnalysis &BaselineAA = getAnalysis<BaselineAliasAnalysis>();

  MissingAliases.clear();
  for (DenseSet<ValuePair>::const_iterator I = DynamicAliases.begin();
       I != DynamicAliases.end(); ++I) {
    Value *V1 = I->first, *V2 = I->second;
    if (IntraProc && !DynAAUtils::IsIntraProcQuery(V1, V2)) {
      continue;
    }

    // Ignore BitCasts and PhiNodes. The reports on them are typically
    // redundant.
    if (isa<BitCastInst>(V1) || isa<BitCastInst>(V2))
      continue;
    if (isa<PHINode>(V1) || isa<PHINode>(V2))
      continue;

    if (!CheckAllPointers) {
      if (!DynAAUtils::PointerIsDereferenced(V1) ||
          !DynAAUtils::PointerIsDereferenced(V2)) {
        continue;
      }
    }

    if (BaselineAA.alias(V1, V2) != AliasAnalysis::NoAlias &&
        AA.alias(V1, V2) == AliasAnalysis::NoAlias) {
      MissingAliases.push_back(make_pair(V1, V2));
    }
  }
}
Example #4
0
//
// Method: findGlobalPoolNodes()
//
// Description:
//  This method finds DSNodes that are reachable from globals and that need a
//  pool.  The Automatic Pool Allocation transform will use the returned
//  information to build global pools for the DSNodes in question.
//
//  Note that this method does not assign DSNodes to pools; it merely decides
//  which DSNodes are reachable from globals and will need a pool of global
//  scope.
//
// Outputs:
//  Nodes - The DSNodes that are both reachable from globals and which should
//          have global pools will be *added* to this container.
//
void
Heuristic::findGlobalPoolNodes (DSNodeSet_t & Nodes) {
  // Get the globals graph for the program.
  DSGraph* GG = Graphs->getGlobalsGraph();

  // Get all of the nodes reachable from globals.
  DenseSet<const DSNode*> GlobalHeapNodes;
  GetNodesReachableFromGlobals (GG, GlobalHeapNodes);

  //
  // Now find all DSNodes belonging to function-local DSGraphs which are
  // mirrored in the globals graph.  These DSNodes require a global pool, too.
  //
  for (Module::iterator F = M->begin(); F != M->end(); ++F) {
    if (Graphs->hasDSGraph(*F)) {
      DSGraph* G = Graphs->getDSGraph(*F);
      GetNodesReachableFromGlobals (G, GlobalHeapNodes);
    }
  }

  //
  // Copy the values into the output container.  Note that DenseSet has no
  // iterator traits (or whatever allows us to treat DenseSet has a generic
  // container), so we have to use a loop to copy values from the DenseSet into
  // the output container.
  //
  for (DenseSet<const DSNode*>::iterator I = GlobalHeapNodes.begin(),
         E = GlobalHeapNodes.end(); I != E; ++I) {
    Nodes.insert (*I);
  }

  return;
}
// Collects missing aliases to <MissingAliases>.
void AliasAnalysisChecker::collectMissingAliases(
    const DenseSet<ValuePair> &DynamicAliases) {
  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
  AliasAnalysis &BaselineAA = getAnalysis<BaselineAliasAnalysis>();

  MissingAliases.clear();
  for (DenseSet<ValuePair>::const_iterator I = DynamicAliases.begin();
       I != DynamicAliases.end(); ++I) {
    Value *V1 = I->first, *V2 = I->second;
    if (IntraProc && !DynAAUtils::IsIntraProcQuery(V1, V2)) {
      continue;
    }

    if (!CheckAllPointers) {
      if (!DynAAUtils::PointerIsDereferenced(V1) ||
          !DynAAUtils::PointerIsDereferenced(V2)) {
        continue;
      }
    }

    if (BaselineAA.alias(V1, V2) != AliasAnalysis::NoAlias &&
        AA.alias(V1, V2) == AliasAnalysis::NoAlias) {
      MissingAliases.push_back(make_pair(V1, V2));
    }
  }
}
Example #6
0
void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
                                    BasicBlock *NewSucc) {
  // When an edge in the graph has been threaded, values that we could not 
  // determine a value for before (i.e. were marked overdefined) may be possible
  // to solve now.  We do NOT try to proactively update these values.  Instead,
  // we clear their entries from the cache, and allow lazy updating to recompute
  // them when needed.
  
  // The updating process is fairly simple: we need to dropped cached info
  // for all values that were marked overdefined in OldSucc, and for those same
  // values in any successor of OldSucc (except NewSucc) in which they were
  // also marked overdefined.
  std::vector<BasicBlock*> worklist;
  worklist.push_back(OldSucc);
  
  DenseSet<Value*> ClearSet;
  for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(),
       E = OverDefinedCache.end(); I != E; ++I) {
    if (I->first == OldSucc)
      ClearSet.insert(I->second);
  }
  
  // Use a worklist to perform a depth-first search of OldSucc's successors.
  // NOTE: We do not need a visited list since any blocks we have already
  // visited will have had their overdefined markers cleared already, and we
  // thus won't loop to their successors.
  while (!worklist.empty()) {
    BasicBlock *ToUpdate = worklist.back();
    worklist.pop_back();
    
    // Skip blocks only accessible through NewSucc.
    if (ToUpdate == NewSucc) continue;
    
    bool changed = false;
    for (DenseSet<Value*>::iterator I = ClearSet.begin(), E = ClearSet.end();
         I != E; ++I) {
      // If a value was marked overdefined in OldSucc, and is here too...
      DenseSet<OverDefinedPairTy>::iterator OI =
        OverDefinedCache.find(std::make_pair(ToUpdate, *I));
      if (OI == OverDefinedCache.end()) continue;

      // Remove it from the caches.
      ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(*I, this)];
      ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate);

      assert(CI != Entry.end() && "Couldn't find entry to update?");
      Entry.erase(CI);
      OverDefinedCache.erase(OI);

      // If we removed anything, then we potentially need to update 
      // blocks successors too.
      changed = true;
    }

    if (!changed) continue;
    
    worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate));
  }
}
// Find the number of arguments we need to add to the functions.
void CSDataRando::findFunctionArgNodes(const std::vector<const Function *> &Functions) {
  std::vector<DSNodeHandle> RootNodes;
  for (const Function *F : Functions) {
    DSGraph *G = DSA->getDSGraph(*F);
    G->getFunctionArgumentsForCall(F, RootNodes);
  }

  // No additional args to pass.
  if (RootNodes.size() == 0) {
    return;
  }

  DenseSet<const DSNode*> MarkedNodes;
  for (DSNodeHandle &NH : RootNodes) {
    if (DSNode *N = NH.getNode()) {
      N->markReachableNodes(MarkedNodes);
    }
  }

  // Remove global nodes from the arg nodes. If we are using the bottom-up
  // analysis then if a node is a global node all contexts will use the global map.
  for (auto i : GlobalNodes) {
    MarkedNodes.erase(i);
  }

  // Remove any nodes that are marked do not encrypt.
  SmallVector<const DSNode*, 8> MarkedNodeWorkList;
  for (auto i : MarkedNodes) {
    if (i->isDoNotEncryptNode()) {
      MarkedNodeWorkList.push_back(i);
    }
  }
  for (auto i : MarkedNodeWorkList) {
    MarkedNodes.erase(i);
  }

  if (MarkedNodes.empty()) {
    return;
  }

  // Create a FuncInfo entry for each of the functions with the arg nodes that
  // need to be passed
  for (const Function *F : Functions) {
    FuncInfo &FI = FunctionInfo[F];
    FI.ArgNodes.insert(FI.ArgNodes.end(), MarkedNodes.begin(), MarkedNodes.end());
  }
}
Example #8
0
/// FindFunctionPoolArgs - In the first pass over the program, we decide which
/// arguments will have to be added for each function, build the FunctionInfo
/// map and recording this info in the ArgNodes set.
static void FindFunctionPoolArgs(Function &F, FuncInfo& FI,
                                 EntryPointAnalysis* EPA) {
  DenseSet<const DSNode*> MarkedNodes;

  if (FI.G->node_begin() == FI.G->node_end())
    return; // No memory activity, nothing is required

  // Find DataStructure nodes which are allocated in pools non-local to the
  // current function.  This set will contain all of the DSNodes which require
  // pools to be passed in from outside of the function.
  MarkNodesWhichMustBePassedIn(MarkedNodes, F, FI.G,EPA);

  //FI.ArgNodes.insert(FI.ArgNodes.end(), MarkedNodes.begin(), MarkedNodes.end());
  //Work around DenseSet not having iterator traits
  for (DenseSet<const DSNode*>::iterator ii = MarkedNodes.begin(),
       ee = MarkedNodes.end(); ii != ee; ++ii)
    FI.ArgNodes.insert(FI.ArgNodes.end(), *ii);
}
Example #9
0
//
// Method: eraseCallsTo()
//
// Description:
//  This method removes the specified function from DSCallsites within the
//  specified function.  We do not do anything with call sites that call this
//  function indirectly (for which there is not much point as we do not yet
//  know the targets of indirect function calls).
//
void
StdLibDataStructures::eraseCallsTo(Function* F) {
  typedef std::pair<DSGraph*,Function*> RemovalPair;
  DenseSet<RemovalPair> ToRemove;
  for (Value::use_iterator ii = F->use_begin(), ee = F->use_end();
       ii != ee; ++ii)
    if (CallInst* CI = dyn_cast<CallInst>(*ii)){
      if (CI->getCalledValue() == F) {
        DSGraph* Graph = getDSGraph(*CI->getParent()->getParent());
        //delete the call
        DEBUG(errs() << "Removing " << F->getName().str() << " from "
              << CI->getParent()->getParent()->getName().str() << "\n");
        ToRemove.insert(std::make_pair(Graph, F));
      }
    }else if (InvokeInst* CI = dyn_cast<InvokeInst>(*ii)){
      if (CI->getCalledValue() == F) {
        DSGraph* Graph = getDSGraph(*CI->getParent()->getParent());
        //delete the call
        DEBUG(errs() << "Removing " << F->getName().str() << " from "
              << CI->getParent()->getParent()->getName().str() << "\n");
        ToRemove.insert(std::make_pair(Graph, F));
      }
    } else if(ConstantExpr *CE = dyn_cast<ConstantExpr>(*ii)) {
      if(CE->isCast()) {
        for (Value::use_iterator ci = CE->use_begin(), ce = CE->use_end();
             ci != ce; ++ci) {
          if (CallInst* CI = dyn_cast<CallInst>(*ci)){
            if(CI->getCalledValue() == CE) {
              DSGraph* Graph = getDSGraph(*CI->getParent()->getParent());
              //delete the call
              DEBUG(errs() << "Removing " << F->getName().str() << " from "
                    << CI->getParent()->getParent()->getName().str() << "\n");
              ToRemove.insert(std::make_pair(Graph, F));
            }
          }
        }
      }
    }

  for(DenseSet<RemovalPair>::iterator I = ToRemove.begin(), E = ToRemove.end();
      I != E; ++I)
    I->first->removeFunctionCalls(*I->second);
}
Example #10
0
bool SIDebuggerInsertNops::runOnMachineFunction(MachineFunction &MF) {
  // Skip this pass if "amdgpu-debugger-insert-nops" attribute was not
  // specified.
  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
  if (!ST.debuggerInsertNops())
    return false;

  // Skip machine functions without debug info.
  if (!MF.getMMI().hasDebugInfo())
    return false;

  // Target instruction info.
  const SIInstrInfo *TII =
    static_cast<const SIInstrInfo*>(MF.getSubtarget().getInstrInfo());

  // Set containing line numbers that have nop inserted.
  DenseSet<unsigned> NopInserted;

  for (auto &MBB : MF) {
    for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
      // Skip DBG_VALUE instructions and instructions without location.
      if (MI->isDebugValue() || !MI->getDebugLoc())
        continue;

      // Insert nop instruction if line number does not have nop inserted.
      auto DL = MI->getDebugLoc();
      if (NopInserted.find(DL.getLine()) == NopInserted.end()) {
        BuildMI(MBB, *MI, DL, TII->get(AMDGPU::S_NOP))
          .addImm(0);
        NopInserted.insert(DL.getLine());
      }
    }
  }

  return true;
}
// Reroll the provided loop with respect to the provided induction variable.
// Generally, we're looking for a loop like this:
//
// %iv = phi [ (preheader, ...), (body, %iv.next) ]
// f(%iv)
// %iv.1 = add %iv, 1                <-- a root increment
// f(%iv.1)
// %iv.2 = add %iv, 2                <-- a root increment
// f(%iv.2)
// %iv.scale_m_1 = add %iv, scale-1  <-- a root increment
// f(%iv.scale_m_1)
// ...
// %iv.next = add %iv, scale
// %cmp = icmp(%iv, ...)
// br %cmp, header, exit
//
// Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of
// instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can
// be intermixed with eachother. The restriction imposed by this algorithm is
// that the relative order of the isomorphic instructions in f(%iv), f(%iv.1),
// etc. be the same.
//
// First, we collect the use set of %iv, excluding the other increment roots.
// This gives us f(%iv). Then we iterate over the loop instructions (scale-1)
// times, having collected the use set of f(%iv.(i+1)), during which we:
//   - Ensure that the next unmatched instruction in f(%iv) is isomorphic to
//     the next unmatched instruction in f(%iv.(i+1)).
//   - Ensure that both matched instructions don't have any external users
//     (with the exception of last-in-chain reduction instructions).
//   - Track the (aliasing) write set, and other side effects, of all
//     instructions that belong to future iterations that come before the matched
//     instructions. If the matched instructions read from that write set, then
//     f(%iv) or f(%iv.(i+1)) has some dependency on instructions in
//     f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly,
//     if any of these future instructions had side effects (could not be
//     speculatively executed), and so do the matched instructions, when we
//     cannot reorder those side-effect-producing instructions, and rerolling
//     fails.
//
// Finally, we make sure that all loop instructions are either loop increment
// roots, belong to simple latch code, parts of validated reductions, part of
// f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions
// have been validated), then we reroll the loop.
bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
                        const SCEV *IterCount,
                        ReductionTracker &Reductions) {
  const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV));
  uint64_t Inc = cast<SCEVConstant>(RealIVSCEV->getOperand(1))->
                   getValue()->getZExtValue();
  // The collection of loop increment instructions.
  SmallInstructionVector LoopIncs;
  uint64_t Scale = Inc;

  // The effective induction variable, IV, is normally also the real induction
  // variable. When we're dealing with a loop like:
  //   for (int i = 0; i < 500; ++i)
  //     x[3*i] = ...;
  //     x[3*i+1] = ...;
  //     x[3*i+2] = ...;
  // then the real IV is still i, but the effective IV is (3*i).
  Instruction *RealIV = IV;
  if (Inc == 1 && !findScaleFromMul(RealIV, Scale, IV, LoopIncs))
    return false;

  assert(Scale <= MaxInc && "Scale is too large");
  assert(Scale > 1 && "Scale must be at least 2");

  // The set of increment instructions for each increment value.
  SmallVector<SmallInstructionVector, 32> Roots(Scale-1);
  SmallInstructionSet AllRoots;
  if (!collectAllRoots(L, Inc, Scale, IV, Roots, AllRoots, LoopIncs))
    return false;

  DEBUG(dbgs() << "LRR: Found all root induction increments for: " <<
                  *RealIV << "\n");

  // An array of just the possible reductions for this scale factor. When we
  // collect the set of all users of some root instructions, these reduction
  // instructions are treated as 'final' (their uses are not considered).
  // This is important because we don't want the root use set to search down
  // the reduction chain.
  SmallInstructionSet PossibleRedSet;
  SmallInstructionSet PossibleRedLastSet, PossibleRedPHISet;
  Reductions.restrictToScale(Scale, PossibleRedSet, PossibleRedPHISet,
                             PossibleRedLastSet);

  // We now need to check for equivalence of the use graph of each root with
  // that of the primary induction variable (excluding the roots). Our goal
  // here is not to solve the full graph isomorphism problem, but rather to
  // catch common cases without a lot of work. As a result, we will assume
  // that the relative order of the instructions in each unrolled iteration
  // is the same (although we will not make an assumption about how the
  // different iterations are intermixed). Note that while the order must be
  // the same, the instructions may not be in the same basic block.
  SmallInstructionSet Exclude(AllRoots);
  Exclude.insert(LoopIncs.begin(), LoopIncs.end());

  DenseSet<Instruction *> BaseUseSet;
  collectInLoopUserSet(L, IV, Exclude, PossibleRedSet, BaseUseSet);

  DenseSet<Instruction *> AllRootUses;
  std::vector<DenseSet<Instruction *> > RootUseSets(Scale-1);

  bool MatchFailed = false;
  for (unsigned i = 0; i < Scale-1 && !MatchFailed; ++i) {
    DenseSet<Instruction *> &RootUseSet = RootUseSets[i];
    collectInLoopUserSet(L, Roots[i], SmallInstructionSet(),
                         PossibleRedSet, RootUseSet);

    DEBUG(dbgs() << "LRR: base use set size: " << BaseUseSet.size() <<
                    " vs. iteration increment " << (i+1) <<
                    " use set size: " << RootUseSet.size() << "\n");

    if (BaseUseSet.size() != RootUseSet.size()) {
      MatchFailed = true;
      break;
    }

    // In addition to regular aliasing information, we need to look for
    // instructions from later (future) iterations that have side effects
    // preventing us from reordering them past other instructions with side
    // effects.
    bool FutureSideEffects = false;
    AliasSetTracker AST(*AA);

    // The map between instructions in f(%iv.(i+1)) and f(%iv).
    DenseMap<Value *, Value *> BaseMap;

    assert(L->getNumBlocks() == 1 && "Cannot handle multi-block loops");
    for (BasicBlock::iterator J1 = Header->begin(), J2 = Header->begin(),
         JE = Header->end(); J1 != JE && !MatchFailed; ++J1) {
      if (cast<Instruction>(J1) == RealIV)
        continue;
      if (cast<Instruction>(J1) == IV)
        continue;
      if (!BaseUseSet.count(J1))
        continue;
      if (PossibleRedPHISet.count(J1)) // Skip reduction PHIs.
        continue;

      while (J2 != JE && (!RootUseSet.count(J2) ||
             std::find(Roots[i].begin(), Roots[i].end(), J2) !=
               Roots[i].end())) {
        // As we iterate through the instructions, instructions that don't
        // belong to previous iterations (or the base case), must belong to
        // future iterations. We want to track the alias set of writes from
        // previous iterations.
        if (!isa<PHINode>(J2) && !BaseUseSet.count(J2) &&
            !AllRootUses.count(J2)) {
          if (J2->mayWriteToMemory())
            AST.add(J2);

          // Note: This is specifically guarded by a check on isa<PHINode>,
          // which while a valid (somewhat arbitrary) micro-optimization, is
          // needed because otherwise isSafeToSpeculativelyExecute returns
          // false on PHI nodes.
          if (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2, DL))
            FutureSideEffects = true; 
        }

        ++J2;
      }

      if (!J1->isSameOperationAs(J2)) {
        DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
                        " vs. " << *J2 << "\n");
        MatchFailed = true;
        break;
      }

      // Make sure that this instruction, which is in the use set of this
      // root instruction, does not also belong to the base set or the set of
      // some previous root instruction.
      if (BaseUseSet.count(J2) || AllRootUses.count(J2)) {
        DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
                        " vs. " << *J2 << " (prev. case overlap)\n");
        MatchFailed = true;
        break;
      }

      // Make sure that we don't alias with any instruction in the alias set
      // tracker. If we do, then we depend on a future iteration, and we
      // can't reroll.
      if (J2->mayReadFromMemory()) {
        for (AliasSetTracker::iterator K = AST.begin(), KE = AST.end();
             K != KE && !MatchFailed; ++K) {
          if (K->aliasesUnknownInst(J2, *AA)) {
            DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
                            " vs. " << *J2 << " (depends on future store)\n");
            MatchFailed = true;
            break;
          }
        }
      }

      // If we've past an instruction from a future iteration that may have
      // side effects, and this instruction might also, then we can't reorder
      // them, and this matching fails. As an exception, we allow the alias
      // set tracker to handle regular (simple) load/store dependencies.
      if (FutureSideEffects &&
            ((!isSimpleLoadStore(J1) && !isSafeToSpeculativelyExecute(J1)) ||
             (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2)))) {
        DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
                        " vs. " << *J2 <<
                        " (side effects prevent reordering)\n");
        MatchFailed = true;
        break;
      }

      // For instructions that are part of a reduction, if the operation is
      // associative, then don't bother matching the operands (because we
      // already know that the instructions are isomorphic, and the order
      // within the iteration does not matter). For non-associative reductions,
      // we do need to match the operands, because we need to reject
      // out-of-order instructions within an iteration!
      // For example (assume floating-point addition), we need to reject this:
      //   x += a[i]; x += b[i];
      //   x += a[i+1]; x += b[i+1];
      //   x += b[i+2]; x += a[i+2];
      bool InReduction = Reductions.isPairInSame(J1, J2);

      if (!(InReduction && J1->isAssociative())) {
        bool Swapped = false, SomeOpMatched = false;;
        for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) {
          Value *Op2 = J2->getOperand(j);

	  // If this is part of a reduction (and the operation is not
	  // associatve), then we match all operands, but not those that are
	  // part of the reduction.
          if (InReduction)
            if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
              if (Reductions.isPairInSame(J2, Op2I))
                continue;

          DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2);
          if (BMI != BaseMap.end())
            Op2 = BMI->second;
          else if (std::find(Roots[i].begin(), Roots[i].end(),
                             (Instruction*) Op2) != Roots[i].end())
            Op2 = IV;

          if (J1->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
	    // If we've not already decided to swap the matched operands, and
	    // we've not already matched our first operand (note that we could
	    // have skipped matching the first operand because it is part of a
	    // reduction above), and the instruction is commutative, then try
	    // the swapped match.
            if (!Swapped && J1->isCommutative() && !SomeOpMatched &&
                J1->getOperand(!j) == Op2) {
              Swapped = true;
            } else {
              DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
                              " vs. " << *J2 << " (operand " << j << ")\n");
              MatchFailed = true;
              break;
            }
          }

          SomeOpMatched = true;
        }
      }

      if ((!PossibleRedLastSet.count(J1) && hasUsesOutsideLoop(J1, L)) ||
          (!PossibleRedLastSet.count(J2) && hasUsesOutsideLoop(J2, L))) {
        DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
                        " vs. " << *J2 << " (uses outside loop)\n");
        MatchFailed = true;
        break;
      }

      if (!MatchFailed)
        BaseMap.insert(std::pair<Value *, Value *>(J2, J1));

      AllRootUses.insert(J2);
      Reductions.recordPair(J1, J2, i+1);

      ++J2;
    }
  }

  if (MatchFailed)
    return false;

  DEBUG(dbgs() << "LRR: Matched all iteration increments for " <<
                  *RealIV << "\n");

  DenseSet<Instruction *> LoopIncUseSet;
  collectInLoopUserSet(L, LoopIncs, SmallInstructionSet(),
                       SmallInstructionSet(), LoopIncUseSet);
  DEBUG(dbgs() << "LRR: Loop increment set size: " <<
                  LoopIncUseSet.size() << "\n");

  // Make sure that all instructions in the loop have been included in some
  // use set.
  for (BasicBlock::iterator J = Header->begin(), JE = Header->end();
       J != JE; ++J) {
    if (isa<DbgInfoIntrinsic>(J))
      continue;
    if (cast<Instruction>(J) == RealIV)
      continue;
    if (cast<Instruction>(J) == IV)
      continue;
    if (BaseUseSet.count(J) || AllRootUses.count(J) ||
        (LoopIncUseSet.count(J) && (J->isTerminator() ||
                                    isSafeToSpeculativelyExecute(J, DL))))
      continue;

    if (AllRoots.count(J))
      continue;

    if (Reductions.isSelectedPHI(J))
      continue;

    DEBUG(dbgs() << "LRR: aborting reroll based on " << *RealIV <<
                    " unprocessed instruction found: " << *J << "\n");
    MatchFailed = true;
    break;
  }

  if (MatchFailed)
    return false;

  DEBUG(dbgs() << "LRR: all instructions processed from " <<
                  *RealIV << "\n");

  if (!Reductions.validateSelected())
    return false;

  // At this point, we've validated the rerolling, and we're committed to
  // making changes!

  Reductions.replaceSelected();

  // Remove instructions associated with non-base iterations.
  for (BasicBlock::reverse_iterator J = Header->rbegin();
       J != Header->rend();) {
    if (AllRootUses.count(&*J)) {
      Instruction *D = &*J;
      DEBUG(dbgs() << "LRR: removing: " << *D << "\n");
      D->eraseFromParent();
      continue;
    }

    ++J; 
  }

  // Insert the new induction variable.
  const SCEV *Start = RealIVSCEV->getStart();
  if (Inc == 1)
    Start = SE->getMulExpr(Start,
                           SE->getConstant(Start->getType(), Scale));
  const SCEVAddRecExpr *H =
    cast<SCEVAddRecExpr>(SE->getAddRecExpr(Start,
                           SE->getConstant(RealIVSCEV->getType(), 1),
                           L, SCEV::FlagAnyWrap));
  { // Limit the lifetime of SCEVExpander.
    SCEVExpander Expander(*SE, "reroll");
    Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin());

    for (DenseSet<Instruction *>::iterator J = BaseUseSet.begin(),
         JE = BaseUseSet.end(); J != JE; ++J)
      (*J)->replaceUsesOfWith(IV, NewIV);

    if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) {
      if (LoopIncUseSet.count(BI)) {
        const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
        if (Inc == 1)
          ICSCEV =
            SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale));
        // Iteration count SCEV minus 1
        const SCEV *ICMinus1SCEV =
          SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1));

        Value *ICMinus1; // Iteration count minus 1
        if (isa<SCEVConstant>(ICMinus1SCEV)) {
          ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI);
        } else {
          BasicBlock *Preheader = L->getLoopPreheader();
          if (!Preheader)
            Preheader = InsertPreheaderForLoop(L, this);

          ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(),
                                            Preheader->getTerminator());
        }
 
        Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1,
                                   "exitcond");
        BI->setCondition(Cond);

        if (BI->getSuccessor(1) != Header)
          BI->swapSuccessors();
      }
    }
  }

  SimplifyInstructionsInBlock(Header, DL, TLI);
  DeleteDeadPHIs(Header, TLI);
  ++NumRerolledLoops;
  return true;
}
Example #12
0
MCFunction
MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
                                 const MemoryObject &Region, uint64_t Start,
                                 uint64_t End, const MCInstrAnalysis *Ana,
                                 raw_ostream &DebugOut,
                                 SmallVectorImpl<uint64_t> &Calls) {
  std::vector<MCDecodedInst> Instructions;
  std::set<uint64_t> Splits;
  Splits.insert(Start);
  uint64_t Size;

  MCFunction f(Name);

  {
  DenseSet<uint64_t> VisitedInsts;
  SmallVector<uint64_t, 16> WorkList;
  WorkList.push_back(Start);
  // Disassemble code and gather basic block split points.
  while (!WorkList.empty()) {
    uint64_t Index = WorkList.pop_back_val();
    if (VisitedInsts.find(Index) != VisitedInsts.end())
      continue; // Already visited this location.

    for (;Index < End; Index += Size) {
      VisitedInsts.insert(Index);

      MCInst Inst;
      if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut, nulls())){
        Instructions.push_back(MCDecodedInst(Index, Size, Inst));
        if (Ana->isBranch(Inst)) {
          uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
          if (targ != -1ULL && targ == Index+Size)
            continue; // Skip nop jumps.

          // If we could determine the branch target, make a note to start a
          // new basic block there and add the target to the worklist.
          if (targ != -1ULL) {
            Splits.insert(targ);
            WorkList.push_back(targ);
            WorkList.push_back(Index+Size);
          }
          Splits.insert(Index+Size);
          break;
        } else if (Ana->isReturn(Inst)) {
          // Return instruction. This basic block ends here.
          Splits.insert(Index+Size);
          break;
        } else if (Ana->isCall(Inst)) {
          uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
          // Add the call to the call list if the destination is known.
          if (targ != -1ULL && targ != Index+Size)
            Calls.push_back(targ);
        }
      } else {
        errs().write_hex(Index) << ": warning: invalid instruction encoding\n";
        if (Size == 0)
          Size = 1; // skip illegible bytes
      }
    }
  }
  }

  // Make sure the instruction list is sorted.
  std::sort(Instructions.begin(), Instructions.end());

  // Create basic blocks.
  unsigned ii = 0, ie = Instructions.size();
  for (std::set<uint64_t>::iterator spi = Splits.begin(),
       spe = llvm::prior(Splits.end()); spi != spe; ++spi) {
    MCBasicBlock BB;
    uint64_t BlockEnd = *llvm::next(spi);
    // Add instructions to the BB.
    for (; ii != ie; ++ii) {
      if (Instructions[ii].Address < *spi ||
          Instructions[ii].Address >= BlockEnd)
        break;
      BB.addInst(Instructions[ii]);
    }
    f.addBlock(*spi, BB);
  }

  std::sort(f.Blocks.begin(), f.Blocks.end());

  // Calculate successors of each block.
  for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
    MCBasicBlock &BB = const_cast<MCBasicBlock&>(i->second);
    if (BB.getInsts().empty()) continue;
    const MCDecodedInst &Inst = BB.getInsts().back();

    if (Ana->isBranch(Inst.Inst)) {
      uint64_t targ = Ana->evaluateBranch(Inst.Inst, Inst.Address, Inst.Size);
      if (targ == -1ULL) {
        // Indirect branch. Bail and add all blocks of the function as a
        // successor.
        for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i)
          BB.addSucc(i->first);
      } else if (targ != Inst.Address+Inst.Size)
        BB.addSucc(targ);
      // Conditional branches can also fall through to the next block.
      if (Ana->isConditionalBranch(Inst.Inst) && llvm::next(i) != e)
        BB.addSucc(llvm::next(i)->first);
    } else {
      // No branch. Fall through to the next block.
      if (!Ana->isReturn(Inst.Inst) && llvm::next(i) != e)
        BB.addSucc(llvm::next(i)->first);
    }
  }

  return f;
}
/// canonicalizeInputFunction - Functions like swift_retain return an
/// argument as a low-level performance optimization.  This makes it difficult
/// to reason about pointer equality though, so undo it as an initial
/// canonicalization step.  After this step, all swift_retain's have been
/// replaced with swift_retain.
///
/// This also does some trivial peep-hole optimizations as we go.
static bool canonicalizeInputFunction(Function &F, ARCEntryPointBuilder &B,
                                      SwiftRCIdentity *RC) {
  bool Changed = false;
  DenseSet<Value *> NativeRefs;
  DenseMap<Value *, TinyPtrVector<Instruction *>> UnknownRetains;
  DenseMap<Value *, TinyPtrVector<Instruction *>> UnknownReleases;
  for (auto &BB : F) {
    UnknownRetains.clear();
    UnknownReleases.clear();
    NativeRefs.clear();
    for (auto I = BB.begin(); I != BB.end(); ) {
      Instruction &Inst = *I++;

      switch (classifyInstruction(Inst)) {
      // These instructions should not reach here based on the pass ordering.
      // i.e. LLVMARCOpt -> LLVMContractOpt.
      case RT_RetainN:
      case RT_UnknownRetainN:
      case RT_BridgeRetainN:
      case RT_ReleaseN:
      case RT_UnknownReleaseN:
      case RT_BridgeReleaseN:
        llvm_unreachable("These are only created by LLVMARCContract !");
      case RT_Unknown:
      case RT_BridgeRelease:
      case RT_AllocObject:
      case RT_FixLifetime:
      case RT_NoMemoryAccessed:
      case RT_RetainUnowned:
      case RT_CheckUnowned:
        break;
      case RT_Retain: {
        CallInst &CI = cast<CallInst>(Inst);
        Value *ArgVal = RC->getSwiftRCIdentityRoot(CI.getArgOperand(0));
        // retain(null) is a no-op.
        if (isa<ConstantPointerNull>(ArgVal)) {
          CI.eraseFromParent();
          Changed = true;
          ++NumNoopDeleted;
          continue;
        }
        // Rewrite unknown retains into swift_retains.
        NativeRefs.insert(ArgVal);
        for (auto &X : UnknownRetains[ArgVal]) {
          B.setInsertPoint(X);
          B.createRetain(ArgVal, cast<CallInst>(X));
          X->eraseFromParent();
          ++NumUnknownRetainReleaseSRed;
          Changed = true;
        }
        UnknownRetains[ArgVal].clear();
        break;
      }
      case RT_UnknownRetain: {
        CallInst &CI = cast<CallInst>(Inst);
        Value *ArgVal = RC->getSwiftRCIdentityRoot(CI.getArgOperand(0));
        // unknownRetain(null) is a no-op.
        if (isa<ConstantPointerNull>(ArgVal)) {
          CI.eraseFromParent();
          Changed = true;
          ++NumNoopDeleted;
          continue;
        }

        // Have not encountered a strong retain/release. keep it in the
        // unknown retain/release list for now. It might get replaced
        // later.
        if (NativeRefs.find(ArgVal) == NativeRefs.end()) {
           UnknownRetains[ArgVal].push_back(&CI);
        } else {
          B.setInsertPoint(&CI);
          B.createRetain(ArgVal, &CI);
          CI.eraseFromParent();
          ++NumUnknownRetainReleaseSRed;
          Changed = true;
        }
        break;
      }
      case RT_Release: {
        CallInst &CI = cast<CallInst>(Inst);
        Value *ArgVal = RC->getSwiftRCIdentityRoot(CI.getArgOperand(0));
        // release(null) is a no-op.
        if (isa<ConstantPointerNull>(ArgVal)) {
          CI.eraseFromParent();
          Changed = true;
          ++NumNoopDeleted;
          continue;
        }
        // Rewrite unknown releases into swift_releases.
        NativeRefs.insert(ArgVal);
        for (auto &X : UnknownReleases[ArgVal]) {
          B.setInsertPoint(X);
          B.createRelease(ArgVal, cast<CallInst>(X));
          X->eraseFromParent();
          ++NumUnknownRetainReleaseSRed;
          Changed = true;
        }
        UnknownReleases[ArgVal].clear();
        break;
      }
      case RT_UnknownRelease: {
        CallInst &CI = cast<CallInst>(Inst);
        Value *ArgVal = RC->getSwiftRCIdentityRoot(CI.getArgOperand(0));
        // unknownRelease(null) is a no-op.
        if (isa<ConstantPointerNull>(ArgVal)) {
          CI.eraseFromParent();
          Changed = true;
          ++NumNoopDeleted;
          continue;
        }

        // Have not encountered a strong retain/release. keep it in the
        // unknown retain/release list for now. It might get replaced
        // later.
        if (NativeRefs.find(ArgVal) == NativeRefs.end()) {
          UnknownReleases[ArgVal].push_back(&CI);
        } else {
          B.setInsertPoint(&CI);
          B.createRelease(ArgVal, &CI);
          CI.eraseFromParent();
          ++NumUnknownRetainReleaseSRed;
          Changed = true;
        }
        break;
      }
      case RT_ObjCRelease: {
        CallInst &CI = cast<CallInst>(Inst);
        Value *ArgVal = RC->getSwiftRCIdentityRoot(CI.getArgOperand(0));
        // objc_release(null) is a noop, zap it.
        if (isa<ConstantPointerNull>(ArgVal)) {
          CI.eraseFromParent();
          Changed = true;
          ++NumNoopDeleted;
          continue;
        }
        break;
      }

      // These retain instructions return their argument so must be processed
      // specially.
      case RT_BridgeRetain:
      case RT_ObjCRetain: {
        // Canonicalize the retain so that nothing uses its result.
        CallInst &CI = cast<CallInst>(Inst);
        // Do not get RC identical value here, could end up with a
        // crash in replaceAllUsesWith as the type maybe different.
        Value *ArgVal = CI.getArgOperand(0);
        if (!CI.use_empty()) {
          CI.replaceAllUsesWith(ArgVal);
          Changed = true;
        }

        // {objc_retain,swift_unknownRetain}(null) is a noop, delete it.
        if (isa<ConstantPointerNull>(ArgVal)) {
          CI.eraseFromParent();
          Changed = true;
          ++NumNoopDeleted;
          continue;
        }

        break;
      }
      }
    }
  }
  return Changed;
}
//
// Function: GetNodesReachableFromGlobals()
//
// Description:
//  This function finds all DSNodes which are reachable from globals.  It finds
//  DSNodes both within the local DSGraph as well as in the Globals graph that
//  are reachable from globals.  It does, however, filter out those DSNodes
//  which are of no interest to automatic pool allocation.
//
// Inputs:
//  G - The DSGraph for which to find DSNodes which are reachable by globals.
//      This DSGraph can either by a DSGraph associated with a function *or*
//      it can be the globals graph itself.
//
// Outputs:
//  NodesFromGlobals - A reference to a container object in which to record
//                     DSNodes reachable from globals.  DSNodes are *added* to
//                     this container; it is not cleared by this function.
//                     DSNodes from both the local and globals graph are added.
void
AllHeapNodesHeuristic::GetNodesReachableFromGlobals (DSGraph* G,
                              DenseSet<const DSNode*> &NodesFromGlobals) {
  //
  // Get the globals graph associated with this DSGraph.  If the globals graph
  // is NULL, then the graph that was passed in *is* the globals graph.
  //
  DSGraph * GlobalsGraph = G->getGlobalsGraph();
  if (!GlobalsGraph)
    GlobalsGraph = G;

  //
  // Find all DSNodes which are reachable in the globals graph.
  //
  for (DSGraph::node_iterator NI = GlobalsGraph->node_begin();
       NI != GlobalsGraph->node_end();
       ++NI) {
    NI->markReachableNodes(NodesFromGlobals);
  }

  //
  // Remove those global nodes which we know will never be pool allocated.
  //
  
  std::vector<const DSNode *> toRemove;
  for (DenseSet<const DSNode*>::iterator I = NodesFromGlobals.begin(),
         E = NodesFromGlobals.end(); I != E; ) {
    DenseSet<const DSNode*>::iterator Last = I; ++I;

    const DSNode *tmp = *Last;
    if (!(tmp->isHeapNode())) 
      toRemove.push_back (tmp);
    // Do not poolallocate nodes that are cast to Int.
    // As we do not track through ints, these could be escaping
    if (tmp->isPtrToIntNode())
      toRemove.push_back(tmp);
  }
 
  //
  // Remove all globally reachable DSNodes which do not require pools.
  //
  for (unsigned index = 0; index < toRemove.size(); ++index) {
    NodesFromGlobals.erase(toRemove[index]);
  }

  //
  // Now the fun part.  Find DSNodes in the local graph that correspond to
  // those nodes reachable in the globals graph.  Add them to the set of
  // reachable nodes, too.
  //
  if (G->getGlobalsGraph()) {
    //
    // Compute a mapping between local DSNodes and DSNodes in the globals
    // graph.
    //
    DSGraph::NodeMapTy NodeMap;
    G->computeGToGGMapping (NodeMap);

    //
    // Scan through all DSNodes in the local graph.  If a local DSNode has a
    // corresponding DSNode in the globals graph that is reachable from a 
    // global, then add the local DSNode to the set of DSNodes reachable from a
    // global.
    //
    // FIXME: A node's existance within the global DSGraph is probably
    //        sufficient evidence that it is reachable from a global.
    //

    DSGraph::node_iterator ni = G->node_begin();
    for (; ni != G->node_end(); ++ni) {
      DSNode * N = ni;
      if (NodesFromGlobals.count (NodeMap[N].getNode()))
        NodesFromGlobals.insert (N);
    }
  }
}
Example #15
0
/// \brief Figure out if the loop is worth full unrolling.
///
/// Complete loop unrolling can make some loads constant, and we need to know
/// if that would expose any further optimization opportunities.  This routine
/// estimates this optimization.  It computes cost of unrolled loop
/// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
/// dynamic cost we mean that we won't count costs of blocks that are known not
/// to be executed (i.e. if we have a branch in the loop and we know that at the
/// given iteration its condition would be resolved to true, we won't add up the
/// cost of the 'false'-block).
/// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
/// the analysis failed (no benefits expected from the unrolling, or the loop is
/// too big to analyze), the returned value is None.
static Optional<EstimatedUnrollCost>
analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
                      ScalarEvolution &SE, const TargetTransformInfo &TTI,
                      int MaxUnrolledLoopSize) {
  // We want to be able to scale offsets by the trip count and add more offsets
  // to them without checking for overflows, and we already don't want to
  // analyze *massive* trip counts, so we force the max to be reasonably small.
  assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) &&
         "The unroll iterations max is too large!");

  // Only analyze inner loops. We can't properly estimate cost of nested loops
  // and we won't visit inner loops again anyway.
  if (!L->empty())
    return None;

  // Don't simulate loops with a big or unknown tripcount
  if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
      TripCount > UnrollMaxIterationsCountToAnalyze)
    return None;

  SmallSetVector<BasicBlock *, 16> BBWorklist;
  SmallSetVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitWorklist;
  DenseMap<Value *, Constant *> SimplifiedValues;
  SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues;

  // The estimated cost of the unrolled form of the loop. We try to estimate
  // this by simplifying as much as we can while computing the estimate.
  int UnrolledCost = 0;

  // We also track the estimated dynamic (that is, actually executed) cost in
  // the rolled form. This helps identify cases when the savings from unrolling
  // aren't just exposing dead control flows, but actual reduced dynamic
  // instructions due to the simplifications which we expect to occur after
  // unrolling.
  int RolledDynamicCost = 0;

  // We track the simplification of each instruction in each iteration. We use
  // this to recursively merge costs into the unrolled cost on-demand so that
  // we don't count the cost of any dead code. This is essentially a map from
  // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
  DenseSet<UnrolledInstState, UnrolledInstStateKeyInfo> InstCostMap;

  // A small worklist used to accumulate cost of instructions from each
  // observable and reached root in the loop.
  SmallVector<Instruction *, 16> CostWorklist;

  // PHI-used worklist used between iterations while accumulating cost.
  SmallVector<Instruction *, 4> PHIUsedList;

  // Helper function to accumulate cost for instructions in the loop.
  auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
    assert(Iteration >= 0 && "Cannot have a negative iteration!");
    assert(CostWorklist.empty() && "Must start with an empty cost list");
    assert(PHIUsedList.empty() && "Must start with an empty phi used list");
    CostWorklist.push_back(&RootI);
    for (;; --Iteration) {
      do {
        Instruction *I = CostWorklist.pop_back_val();

        // InstCostMap only uses I and Iteration as a key, the other two values
        // don't matter here.
        auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
        if (CostIter == InstCostMap.end())
          // If an input to a PHI node comes from a dead path through the loop
          // we may have no cost data for it here. What that actually means is
          // that it is free.
          continue;
        auto &Cost = *CostIter;
        if (Cost.IsCounted)
          // Already counted this instruction.
          continue;

        // Mark that we are counting the cost of this instruction now.
        Cost.IsCounted = true;

        // If this is a PHI node in the loop header, just add it to the PHI set.
        if (auto *PhiI = dyn_cast<PHINode>(I))
          if (PhiI->getParent() == L->getHeader()) {
            assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
                                  "inherently simplify during unrolling.");
            if (Iteration == 0)
              continue;

            // Push the incoming value from the backedge into the PHI used list
            // if it is an in-loop instruction. We'll use this to populate the
            // cost worklist for the next iteration (as we count backwards).
            if (auto *OpI = dyn_cast<Instruction>(
                    PhiI->getIncomingValueForBlock(L->getLoopLatch())))
              if (L->contains(OpI))
                PHIUsedList.push_back(OpI);
            continue;
          }

        // First accumulate the cost of this instruction.
        if (!Cost.IsFree) {
          UnrolledCost += TTI.getUserCost(I);
          DEBUG(dbgs() << "Adding cost of instruction (iteration " << Iteration
                       << "): ");
          DEBUG(I->dump());
        }

        // We must count the cost of every operand which is not free,
        // recursively. If we reach a loop PHI node, simply add it to the set
        // to be considered on the next iteration (backwards!).
        for (Value *Op : I->operands()) {
          // Check whether this operand is free due to being a constant or
          // outside the loop.
          auto *OpI = dyn_cast<Instruction>(Op);
          if (!OpI || !L->contains(OpI))
            continue;

          // Otherwise accumulate its cost.
          CostWorklist.push_back(OpI);
        }
      } while (!CostWorklist.empty());

      if (PHIUsedList.empty())
        // We've exhausted the search.
        break;

      assert(Iteration > 0 &&
             "Cannot track PHI-used values past the first iteration!");
      CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
      PHIUsedList.clear();
    }
  };

  // Ensure that we don't violate the loop structure invariants relied on by
  // this analysis.
  assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
  assert(L->isLCSSAForm(DT) &&
         "Must have loops in LCSSA form to track live-out values.");

  DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");

  // Simulate execution of each iteration of the loop counting instructions,
  // which would be simplified.
  // Since the same load will take different values on different iterations,
  // we literally have to go through all loop's iterations.
  for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
    DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");

    // Prepare for the iteration by collecting any simplified entry or backedge
    // inputs.
    for (Instruction &I : *L->getHeader()) {
      auto *PHI = dyn_cast<PHINode>(&I);
      if (!PHI)
        break;

      // The loop header PHI nodes must have exactly two input: one from the
      // loop preheader and one from the loop latch.
      assert(
          PHI->getNumIncomingValues() == 2 &&
          "Must have an incoming value only for the preheader and the latch.");

      Value *V = PHI->getIncomingValueForBlock(
          Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
      Constant *C = dyn_cast<Constant>(V);
      if (Iteration != 0 && !C)
        C = SimplifiedValues.lookup(V);
      if (C)
        SimplifiedInputValues.push_back({PHI, C});
    }

    // Now clear and re-populate the map for the next iteration.
    SimplifiedValues.clear();
    while (!SimplifiedInputValues.empty())
      SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());

    UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);

    BBWorklist.clear();
    BBWorklist.insert(L->getHeader());
    // Note that we *must not* cache the size, this loop grows the worklist.
    for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
      BasicBlock *BB = BBWorklist[Idx];

      // Visit all instructions in the given basic block and try to simplify
      // it.  We don't change the actual IR, just count optimization
      // opportunities.
      for (Instruction &I : *BB) {
        // Track this instruction's expected baseline cost when executing the
        // rolled loop form.
        RolledDynamicCost += TTI.getUserCost(&I);

        // Visit the instruction to analyze its loop cost after unrolling,
        // and if the visitor returns true, mark the instruction as free after
        // unrolling and continue.
        bool IsFree = Analyzer.visit(I);
        bool Inserted = InstCostMap.insert({&I, (int)Iteration,
                                           (unsigned)IsFree,
                                           /*IsCounted*/ false}).second;
        (void)Inserted;
        assert(Inserted && "Cannot have a state for an unvisited instruction!");

        if (IsFree)
          continue;

        // If the instruction might have a side-effect recursively account for
        // the cost of it and all the instructions leading up to it.
        if (I.mayHaveSideEffects())
          AddCostRecursively(I, Iteration);

        // Can't properly model a cost of a call.
        // FIXME: With a proper cost model we should be able to do it.
        if(isa<CallInst>(&I))
          return None;

        // If unrolled body turns out to be too big, bail out.
        if (UnrolledCost > MaxUnrolledLoopSize) {
          DEBUG(dbgs() << "  Exceeded threshold.. exiting.\n"
                       << "  UnrolledCost: " << UnrolledCost
                       << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
                       << "\n");
          return None;
        }
      }

      TerminatorInst *TI = BB->getTerminator();

      // Add in the live successors by first checking whether we have terminator
      // that may be simplified based on the values simplified by this call.
      BasicBlock *KnownSucc = nullptr;
      if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
        if (BI->isConditional()) {
          if (Constant *SimpleCond =
                  SimplifiedValues.lookup(BI->getCondition())) {
            // Just take the first successor if condition is undef
            if (isa<UndefValue>(SimpleCond))
              KnownSucc = BI->getSuccessor(0);
            else if (ConstantInt *SimpleCondVal =
                         dyn_cast<ConstantInt>(SimpleCond))
              KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
          }
        }
      } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
        if (Constant *SimpleCond =
                SimplifiedValues.lookup(SI->getCondition())) {
          // Just take the first successor if condition is undef
          if (isa<UndefValue>(SimpleCond))
            KnownSucc = SI->getSuccessor(0);
          else if (ConstantInt *SimpleCondVal =
                       dyn_cast<ConstantInt>(SimpleCond))
            KnownSucc = SI->findCaseValue(SimpleCondVal).getCaseSuccessor();
        }
      }
      if (KnownSucc) {
        if (L->contains(KnownSucc))
          BBWorklist.insert(KnownSucc);
        else
          ExitWorklist.insert({BB, KnownSucc});
        continue;
      }

      // Add BB's successors to the worklist.
      for (BasicBlock *Succ : successors(BB))
        if (L->contains(Succ))
          BBWorklist.insert(Succ);
        else
          ExitWorklist.insert({BB, Succ});
      AddCostRecursively(*TI, Iteration);
    }

    // If we found no optimization opportunities on the first iteration, we
    // won't find them on later ones too.
    if (UnrolledCost == RolledDynamicCost) {
      DEBUG(dbgs() << "  No opportunities found.. exiting.\n"
                   << "  UnrolledCost: " << UnrolledCost << "\n");
      return None;
    }
  }

  while (!ExitWorklist.empty()) {
    BasicBlock *ExitingBB, *ExitBB;
    std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();

    for (Instruction &I : *ExitBB) {
      auto *PN = dyn_cast<PHINode>(&I);
      if (!PN)
        break;

      Value *Op = PN->getIncomingValueForBlock(ExitingBB);
      if (auto *OpI = dyn_cast<Instruction>(Op))
        if (L->contains(OpI))
          AddCostRecursively(*OpI, TripCount - 1);
    }
  }

  DEBUG(dbgs() << "Analysis finished:\n"
               << "UnrolledCost: " << UnrolledCost << ", "
               << "RolledDynamicCost: " << RolledDynamicCost << "\n");
  return {{UnrolledCost, RolledDynamicCost}};
}
Result::Sat AttemptSolutionSDP::attempt(const ApproximateSimplex::Solution& sol){
  const DenseSet& newBasis = sol.newBasis;
  const DenseMap<DeltaRational>& newValues = sol.newValues;

  DenseSet needsToBeAdded;
  for(DenseSet::const_iterator i = newBasis.begin(), i_end = newBasis.end(); i != i_end; ++i){
    ArithVar b = *i;
    if(!d_tableau.isBasic(b)){
      needsToBeAdded.add(b);
    }
  }
  DenseMap<DeltaRational>::const_iterator nvi = newValues.begin(), nvi_end = newValues.end();
  for(; nvi != nvi_end; ++nvi){
    ArithVar currentlyNb = *nvi;
    if(!d_tableau.isBasic(currentlyNb)){
      if(!matchesNewValue(newValues, currentlyNb)){
        const DeltaRational& newValue = newValues[currentlyNb];
        Trace("arith::updateMany")
          << "updateMany:" << currentlyNb << " "
          << d_variables.getAssignment(currentlyNb) << " to "<< newValue << endl;
        d_linEq.update(currentlyNb, newValue);
        Assert(d_variables.assignmentIsConsistent(currentlyNb));
      }
    }
  }
  d_errorSet.reduceToSignals();
  d_errorSet.setSelectionRule(VAR_ORDER);

  static int instance = 0;
  ++instance;

  if(processSignals()){
    Debug("arith::findModel") << "attemptSolution("<< instance <<") early conflict" << endl;
    d_conflictVariables.purge();
    return Result::UNSAT;
  }else if(d_errorSet.errorEmpty()){
    Debug("arith::findModel") << "attemptSolution("<< instance <<") fixed itself" << endl;
    return Result::SAT;
  }

  while(!needsToBeAdded.empty() && !d_errorSet.errorEmpty()){
    ArithVar toRemove = ARITHVAR_SENTINEL;
    ArithVar toAdd = ARITHVAR_SENTINEL;
    DenseSet::const_iterator i = needsToBeAdded.begin(), i_end = needsToBeAdded.end();
    for(; toAdd == ARITHVAR_SENTINEL && i != i_end; ++i){
      ArithVar v = *i;

      Tableau::ColIterator colIter = d_tableau.colIterator(v);
      for(; !colIter.atEnd(); ++colIter){
        const Tableau::Entry& entry = *colIter;
        Assert(entry.getColVar() == v);
        ArithVar b = d_tableau.rowIndexToBasic(entry.getRowIndex());
        if(!newBasis.isMember(b)){
          toAdd = v;

          bool favorBOverToRemove =
            (toRemove == ARITHVAR_SENTINEL) ||
            (matchesNewValue(newValues, toRemove) && !matchesNewValue(newValues, b)) ||
            (d_tableau.basicRowLength(toRemove) > d_tableau.basicRowLength(b));

          if(favorBOverToRemove){
            toRemove = b;
          }
        }
      }
    }
    Assert(toRemove != ARITHVAR_SENTINEL);
    Assert(toAdd != ARITHVAR_SENTINEL);

    Trace("arith::forceNewBasis") << toRemove << " " << toAdd << endl;
    //Message() << toRemove << " " << toAdd << endl;

    d_linEq.pivotAndUpdate(toRemove, toAdd, newValues[toRemove]);

    Trace("arith::forceNewBasis") << needsToBeAdded.size() << "to go" << endl;
    //Message() << needsToBeAdded.size() << "to go" << endl;
    needsToBeAdded.remove(toAdd);

    bool conflict = processSignals();
    if(conflict){
      d_errorSet.reduceToSignals();
      d_conflictVariables.purge();

      return Result::UNSAT;
    }
  }
  Assert( d_conflictVariables.empty() );

  if(d_errorSet.errorEmpty()){
    return Result::SAT;
  }else{
    d_errorSet.reduceToSignals();
    return Result::SAT_UNKNOWN;
  }
}
Example #17
0
Error AnalysisStyle::dump() {
  auto Tpi = File.getPDBTpiStream();
  if (!Tpi)
    return Tpi.takeError();

  TypeDatabase TypeDB(Tpi->getNumTypeRecords());
  TypeDatabaseVisitor DBV(TypeDB);
  TypeVisitorCallbackPipeline Pipeline;
  HashLookupVisitor Hasher(*Tpi);
  // Add them to the database
  Pipeline.addCallbackToPipeline(DBV);
  // Store their hash values
  Pipeline.addCallbackToPipeline(Hasher);

  if (auto EC = codeview::visitTypeStream(Tpi->typeArray(), Pipeline))
    return EC;

  auto &Adjusters = Tpi->getHashAdjusters();
  DenseSet<uint32_t> AdjusterSet;
  for (const auto &Adj : Adjusters) {
    assert(AdjusterSet.find(Adj.second) == AdjusterSet.end());
    AdjusterSet.insert(Adj.second);
  }

  uint32_t Count = 0;
  outs() << "Searching for hash collisions\n";
  for (const auto &H : Hasher.Lookup) {
    if (H.second.size() <= 1)
      continue;
    ++Count;
    outs() << formatv("Hash: {0}, Count: {1} records\n", H.first,
                      H.second.size());
    for (const auto &R : H.second) {
      auto Iter = AdjusterSet.find(R.TI.getIndex());
      StringRef Prefix;
      if (Iter != AdjusterSet.end()) {
        Prefix = "[HEAD]";
        AdjusterSet.erase(Iter);
      }
      StringRef LeafName = getLeafTypeName(R.Record.Type);
      uint32_t TI = R.TI.getIndex();
      StringRef TypeName = TypeDB.getTypeName(R.TI);
      outs() << formatv("{0,-6} {1} ({2:x}) {3}\n", Prefix, LeafName, TI,
                        TypeName);
    }
  }

  outs() << "\n";
  outs() << "Dumping hash adjustment chains\n";
  for (const auto &A : Tpi->getHashAdjusters()) {
    TypeIndex TI(A.second);
    StringRef TypeName = TypeDB.getTypeName(TI);
    const CVType &HeadRecord = TypeDB.getTypeRecord(TI);
    assert(HeadRecord.Hash.hasValue());

    auto CollisionsIter = Hasher.Lookup.find(*HeadRecord.Hash);
    if (CollisionsIter == Hasher.Lookup.end())
      continue;

    const auto &Collisions = CollisionsIter->second;
    outs() << TypeName << "\n";
    outs() << formatv("    [HEAD] {0:x} {1} {2}\n", A.second,
                      getLeafTypeName(HeadRecord.Type), TypeName);
    for (const auto &Chain : Collisions) {
      if (Chain.TI == TI)
        continue;
      const CVType &TailRecord = TypeDB.getTypeRecord(Chain.TI);
      outs() << formatv("           {0:x} {1} {2}\n", Chain.TI.getIndex(),
                        getLeafTypeName(TailRecord.Type),
                        TypeDB.getTypeName(Chain.TI));
    }
  }
  outs() << formatv("There are {0} orphaned hash adjusters\n",
                    AdjusterSet.size());
  for (const auto &Adj : AdjusterSet) {
    outs() << formatv("    {0}\n", Adj);
  }

  uint32_t DistinctHashValues = Hasher.Lookup.size();
  outs() << formatv("{0}/{1} hash collisions", Count, DistinctHashValues);
  return Error::success();
}
Example #18
0
//
// Method: findGlobalPoolNodes()
//
// Description:
//  This method finds DSNodes that are reachable from globals and that need a
//  pool.  The Automatic Pool Allocation transform will use the returned
//  information to build global pools for the DSNodes in question.
//
//  For efficiency, this method also determines which DSNodes should be in the
//  same pool.
//
// Outputs:
//  Nodes - The DSNodes that are both reachable from globals and which should
//          have global pools will be *added* to this container.
//
void
AllNodesHeuristic::findGlobalPoolNodes (DSNodeSet_t & Nodes) {
    // Get the globals graph for the program.
    DSGraph* GG = Graphs->getGlobalsGraph();

    //
    // Get all of the nodes reachable from globals.
    //
    DenseSet<const DSNode*> GlobalNodes;
    GetNodesReachableFromGlobals (GG, GlobalNodes);

    //
    // Create a global pool for each global DSNode.
    //
    for (DenseSet<const DSNode *>::iterator NI = GlobalNodes.begin();
            NI != GlobalNodes.end();
            ++NI) {
        const DSNode * N = *NI;
        PoolMap[N] = OnePool(N);
    }

    //
    // Now find all DSNodes belonging to function-local DSGraphs which are
    // mirrored in the globals graph.  These DSNodes require a global pool, too,
    // but must use the same pool as the one assigned to the corresponding global
    // DSNode.
    //
    for (Module::iterator F = M->begin(); F != M->end(); ++F) {
        //
        // Ignore functions that have no DSGraph.
        //
        if (!(Graphs->hasDSGraph(*F))) continue;

        //
        // Compute a mapping between local DSNodes and DSNodes in the globals
        // graph.
        //
        DSGraph* G = Graphs->getDSGraph(*F);
        DSGraph::NodeMapTy NodeMap;
        G->computeGToGGMapping (NodeMap);

        //
        // Scan through all DSNodes in the local graph.  If a local DSNode has a
        // corresponding DSNode in the globals graph that is reachable from a
        // global, then add the local DSNode to the set of DSNodes reachable from
        // a global.
        //
        DSGraph::node_iterator ni = G->node_begin();
        for (; ni != G->node_end(); ++ni) {
            DSNode * N = ni;
            DSNode * GGN = NodeMap[N].getNode();

            assert (!GGN || GlobalNodes.count (GGN));
            if (GGN && GlobalNodes.count (GGN))
                PoolMap[GGN].NodesInPool.push_back (N);
        }
    }

    //
    // Scan through all the local graphs looking for DSNodes which may be
    // reachable by a global.  These nodes may not end up in the globals graph
    // because of the fact that DSA doesn't actually know what is happening to
    // them.
    //
    // FIXME: I believe this code causes a condition in which a local DSNode is
    //        given a local pool in one function but not in other functions.
    //        Someone needs to investigate whether DSA is being consistent here,
    //        and if not, if that inconsistency is correct.
    //
#if 0
    for (Module::iterator F = M->begin(); F != M->end(); ++F) {
        if (F->isDeclaration()) continue;
        DSGraph* G = Graphs->getDSGraph(*F);
        for (DSGraph::node_iterator I = G->node_begin(), E = G->node_end();
                I != E;
                ++I) {
            DSNode * Node = I;
            if (Node->isExternalNode() || Node->isUnknownNode()) {
                GlobalNodes.insert (Node);
            }
        }
    }
#endif

    //
    // Copy the values into the output container.  Note that DenseSet has no
    // iterator traits (or whatever allows us to treat DenseSet has a generic
    // container), so we have to use a loop to copy values from the DenseSet into
    // the output container.
    //
    // Note that we do not copy local DSNodes into the output container; we
    // merely copy those nodes in the globals graph.
    //
    for (DenseSet<const DSNode*>::iterator I = GlobalNodes.begin(),
            E = GlobalNodes.end(); I != E; ++I) {
        Nodes.insert (*I);
    }

    return;
}
Example #19
0
int main(int argc, char ** argv)
{
	std::cerr << std::fixed << std::setprecision(3);
	std::ofstream devnull("/dev/null");
	
	DB::ReadBufferFromFileDescriptor in(STDIN_FILENO);
	size_t n = atoi(argv[1]);
	size_t elems_show = 1;

	using Vec = std::vector<std::string>;
	using Set = std::unordered_map<std::string, int>;
	using RefsSet = std::unordered_map<StringRef, int, StringRefHash>;
	using DenseSet = google::dense_hash_map<std::string, int>;
	using RefsDenseSet = google::dense_hash_map<StringRef, int, StringRefHash>;
	using RefsHashMap = HashMap<StringRef, int, StringRefHash>;
	Vec vec;

	vec.reserve(n);

	{
		Stopwatch watch;

		std::string s;
		for (size_t i = 0; i < n && !in.eof(); ++i)
		{
			DB::readEscapedString(s, in);
			DB::assertChar('\n', in);
			vec.push_back(s);
		}

		std::cerr << "Read and inserted into vector in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;
	}

	{
		DB::Arena pool;
		Stopwatch watch;
		const char * res = nullptr;

		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
		{
			const char * tmp = pool.insert(it->data(), it->size());
			if (it == vec.begin())
				res = tmp;
		}

		std::cerr << "Inserted into pool in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		devnull.write(res, 100);
		devnull << std::endl;
	}

	{
		Set set;
		Stopwatch watch;

		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
			set[*it] = 0;

		std::cerr << "Inserted into std::unordered_map in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		size_t i = 0;
		for (Set::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
		{
			devnull << it->first;
			devnull << std::endl;
		}
	}

	{
		RefsSet set;
		Stopwatch watch;

		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
			set[StringRef(*it)] = 0;

		std::cerr << "Inserted refs into std::unordered_map in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		size_t i = 0;
		for (RefsSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
		{
			devnull.write(it->first.data, it->first.size);
			devnull << std::endl;
		}
	}

	{
		DB::Arena pool;
		RefsSet set;
		Stopwatch watch;
		
		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
			set[StringRef(pool.insert(it->data(), it->size()), it->size())] = 0;

		std::cerr << "Inserted into pool and refs into std::unordered_map in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		size_t i = 0;
		for (RefsSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
		{
			devnull.write(it->first.data, it->first.size);
			devnull << std::endl;
		}
	}

	{
		DenseSet set;
		set.set_empty_key(DenseSet::key_type());
		Stopwatch watch;

		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
			set[*it] = 0;

		std::cerr << "Inserted into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		size_t i = 0;
		for (DenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
		{
			devnull << it->first;
			devnull << std::endl;
		}
	}

	{
		RefsDenseSet set;
		set.set_empty_key(RefsDenseSet::key_type());
		Stopwatch watch;

		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
			set[StringRef(it->data(), it->size())] = 0;

		std::cerr << "Inserted refs into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		size_t i = 0;
		for (RefsDenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
		{
			devnull.write(it->first.data, it->first.size);
			devnull << std::endl;
		}
	}

	{
		DB::Arena pool;
		RefsDenseSet set;
		set.set_empty_key(RefsDenseSet::key_type());
		Stopwatch watch;

		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
			set[StringRef(pool.insert(it->data(), it->size()), it->size())] = 0;

		std::cerr << "Inserted into pool and refs into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		size_t i = 0;
		for (RefsDenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
		{
			devnull.write(it->first.data, it->first.size);
			devnull << std::endl;
		}
	}

	{
		RefsHashMap set;
		Stopwatch watch;

		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
		{
			RefsHashMap::iterator inserted_it;
			bool inserted;
			set.emplace(StringRef(*it), inserted_it, inserted);
		}

		std::cerr << "Inserted refs into HashMap in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		size_t i = 0;
		for (RefsHashMap::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
		{
			devnull.write(it->first.data, it->first.size);
			devnull << std::endl;
		}

		//std::cerr << set.size() << ", " << set.getCollisions() << std::endl;
	}

	{
		DB::Arena pool;
		RefsHashMap set;
		Stopwatch watch;

		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
		{
			RefsHashMap::iterator inserted_it;
			bool inserted;
			set.emplace(StringRef(pool.insert(it->data(), it->size()), it->size()), inserted_it, inserted);
		}

		std::cerr << "Inserted into pool and refs into HashMap in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		size_t i = 0;
		for (RefsHashMap::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
		{
			devnull.write(it->first.data, it->first.size);
			devnull << std::endl;
		}
	}

	return 0;
}