Ejemplo n.º 1
0
//
// Method: findGlobalPoolNodes()
//
// Description:
//  This method finds DSNodes that are reachable from globals and that need a
//  pool.  The Automatic Pool Allocation transform will use the returned
//  information to build global pools for the DSNodes in question.
//
//  Note that this method does not assign DSNodes to pools; it merely decides
//  which DSNodes are reachable from globals and will need a pool of global
//  scope.
//
// Outputs:
//  Nodes - The DSNodes that are both reachable from globals and which should
//          have global pools will be *added* to this container.
//
void
AllHeapNodesHeuristic::findGlobalPoolNodes (DSNodeSet_t & Nodes) {
  // Get the globals graph for the program.
  DSGraph* GG = Graphs->getGlobalsGraph();

  // Get all of the nodes reachable from globals.
  DenseSet<const DSNode*> GlobalHeapNodes;
  GetNodesReachableFromGlobals (GG, GlobalHeapNodes);
  //
  // Create a global pool for each global DSNode.
  //
  for (DenseSet<const DSNode *>::iterator NI = GlobalHeapNodes.begin();
              NI != GlobalHeapNodes.end();++NI) {
    const DSNode * N = *NI;
    PoolMap[N] = OnePool(N);
  }

  //
  // Now find all DSNodes belonging to function-local DSGraphs which are
  // mirrored in the globals graph.  These DSNodes require a global pool, too.
  //
  for (Module::iterator F = M->begin(); F != M->end(); ++F) {
    if (Graphs->hasDSGraph(*F)) {
      DSGraph* G = Graphs->getDSGraph(*F);
      DSGraph::NodeMapTy NodeMap;
      G->computeGToGGMapping (NodeMap);
      //
      // Scan through all DSNodes in the local graph.  If a local DSNode has a
      // corresponding DSNode in the globals graph that is reachable from a 
      // global, then add the local DSNode to the set of DSNodes reachable from
      // a global.
      //
      DSGraph::node_iterator ni = G->node_begin();
      for (; ni != G->node_end(); ++ni) {
        DSNode * N = ni;
        DSNode * GGN = NodeMap[N].getNode();
        
        //assert (!GGN || GlobalHeapNodes.count (GGN));
        if (GGN && GlobalHeapNodes.count (GGN))
          PoolMap[GGN].NodesInPool.push_back (N);
      }
    }
  }

  //
  // Copy the values into the output container.  Note that DenseSet has no
  // iterator traits (or whatever allows us to treat DenseSet has a generic
  // container), so we have to use a loop to copy values from the DenseSet into
  // the output container.
  //
  for (DenseSet<const DSNode*>::iterator I = GlobalHeapNodes.begin(),
         E = GlobalHeapNodes.end(); I != E; ++I) {
    Nodes.insert (*I);
  }

  return;
}
Ejemplo n.º 2
0
void AliasAnalysisChecker::collectMissingAliases(
    const DenseSet<ValuePair> &DynamicAliases,
    vector<ValuePair> &MissingAliases) {
  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
  AliasAnalysis &BaselineAA = getAnalysis<BaselineAliasAnalysis>();

  MissingAliases.clear();
  for (DenseSet<ValuePair>::const_iterator I = DynamicAliases.begin();
       I != DynamicAliases.end(); ++I) {
    Value *V1 = I->first, *V2 = I->second;
    if (IntraProc && !DynAAUtils::IsIntraProcQuery(V1, V2)) {
      continue;
    }

    // Ignore BitCasts and PhiNodes. The reports on them are typically
    // redundant.
    if (isa<BitCastInst>(V1) || isa<BitCastInst>(V2))
      continue;
    if (isa<PHINode>(V1) || isa<PHINode>(V2))
      continue;

    if (!CheckAllPointers) {
      if (!DynAAUtils::PointerIsDereferenced(V1) ||
          !DynAAUtils::PointerIsDereferenced(V2)) {
        continue;
      }
    }

    if (BaselineAA.alias(V1, V2) != AliasAnalysis::NoAlias &&
        AA.alias(V1, V2) == AliasAnalysis::NoAlias) {
      MissingAliases.push_back(make_pair(V1, V2));
    }
  }
}
Ejemplo n.º 3
0
static void MarkNodesWhichMustBePassedIn(DenseSet<const DSNode*> &MarkedNodes,
                                         Function &F, DSGraph* G,
                                         EntryPointAnalysis* EPA) {
  // All DSNodes reachable from arguments must be passed in...
  // Unless this is an entry point to the program
  if (!EPA->isEntryPoint(&F)) {
    for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
            I != E; ++I) {
      DSGraph::ScalarMapTy::iterator AI = G->getScalarMap().find(I);
      if (AI != G->getScalarMap().end())
        if (DSNode * N = AI->second.getNode())
          N->markReachableNodes(MarkedNodes);
    }
  }

  // Marked the returned node as needing to be passed in.
  if (DSNode * RetNode = G->getReturnNodeFor(F).getNode())
    RetNode->markReachableNodes(MarkedNodes);

  // Calculate which DSNodes are reachable from globals.  If a node is reachable
  // from a global, we will create a global pool for it, so no argument passage
  // is required.
  DenseSet<const DSNode*> NodesFromGlobals;
  GetNodesReachableFromGlobals(G, NodesFromGlobals);

  // Remove any nodes reachable from a global.  These nodes will be put into
  // global pools, which do not require arguments to be passed in.

  for (DenseSet<const DSNode*>::iterator I = NodesFromGlobals.begin(),
          E = NodesFromGlobals.end(); I != E; ++I)
    MarkedNodes.erase(*I);
}
Ejemplo n.º 4
0
//
// Method: findGlobalPoolNodes()
//
// Description:
//  This method finds DSNodes that are reachable from globals and that need a
//  pool.  The Automatic Pool Allocation transform will use the returned
//  information to build global pools for the DSNodes in question.
//
//  Note that this method does not assign DSNodes to pools; it merely decides
//  which DSNodes are reachable from globals and will need a pool of global
//  scope.
//
// Outputs:
//  Nodes - The DSNodes that are both reachable from globals and which should
//          have global pools will be *added* to this container.
//
void
Heuristic::findGlobalPoolNodes (DSNodeSet_t & Nodes) {
  // Get the globals graph for the program.
  DSGraph* GG = Graphs->getGlobalsGraph();

  // Get all of the nodes reachable from globals.
  DenseSet<const DSNode*> GlobalHeapNodes;
  GetNodesReachableFromGlobals (GG, GlobalHeapNodes);

  //
  // Now find all DSNodes belonging to function-local DSGraphs which are
  // mirrored in the globals graph.  These DSNodes require a global pool, too.
  //
  for (Module::iterator F = M->begin(); F != M->end(); ++F) {
    if (Graphs->hasDSGraph(*F)) {
      DSGraph* G = Graphs->getDSGraph(*F);
      GetNodesReachableFromGlobals (G, GlobalHeapNodes);
    }
  }

  //
  // Copy the values into the output container.  Note that DenseSet has no
  // iterator traits (or whatever allows us to treat DenseSet has a generic
  // container), so we have to use a loop to copy values from the DenseSet into
  // the output container.
  //
  for (DenseSet<const DSNode*>::iterator I = GlobalHeapNodes.begin(),
         E = GlobalHeapNodes.end(); I != E; ++I) {
    Nodes.insert (*I);
  }

  return;
}
Ejemplo n.º 5
0
// Collects missing aliases to <MissingAliases>.
void AliasAnalysisChecker::collectMissingAliases(
    const DenseSet<ValuePair> &DynamicAliases) {
  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
  AliasAnalysis &BaselineAA = getAnalysis<BaselineAliasAnalysis>();

  MissingAliases.clear();
  for (DenseSet<ValuePair>::const_iterator I = DynamicAliases.begin();
       I != DynamicAliases.end(); ++I) {
    Value *V1 = I->first, *V2 = I->second;
    if (IntraProc && !DynAAUtils::IsIntraProcQuery(V1, V2)) {
      continue;
    }

    if (!CheckAllPointers) {
      if (!DynAAUtils::PointerIsDereferenced(V1) ||
          !DynAAUtils::PointerIsDereferenced(V2)) {
        continue;
      }
    }

    if (BaselineAA.alias(V1, V2) != AliasAnalysis::NoAlias &&
        AA.alias(V1, V2) == AliasAnalysis::NoAlias) {
      MissingAliases.push_back(make_pair(V1, V2));
    }
  }
}
// Calculate the largest possible vregsPassed sets. These are the registers that
// can pass through an MBB live, but may not be live every time. It is assumed
// that all vregsPassed sets are empty before the call.
void MachineVerifier::calcRegsPassed() {
  // First push live-out regs to successors' vregsPassed. Remember the MBBs that
  // have any vregsPassed.
  DenseSet<const MachineBasicBlock*> todo;
  for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
       MFI != MFE; ++MFI) {
    const MachineBasicBlock &MBB(*MFI);
    BBInfo &MInfo = MBBInfoMap[&MBB];
    if (!MInfo.reachable)
      continue;
    for (MachineBasicBlock::const_succ_iterator SuI = MBB.succ_begin(),
           SuE = MBB.succ_end(); SuI != SuE; ++SuI) {
      BBInfo &SInfo = MBBInfoMap[*SuI];
      if (SInfo.addPassed(MInfo.regsLiveOut))
        todo.insert(*SuI);
    }
  }

  // Iteratively push vregsPassed to successors. This will converge to the same
  // final state regardless of DenseSet iteration order.
  while (!todo.empty()) {
    const MachineBasicBlock *MBB = *todo.begin();
    todo.erase(MBB);
    BBInfo &MInfo = MBBInfoMap[MBB];
    for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(),
           SuE = MBB->succ_end(); SuI != SuE; ++SuI) {
      if (*SuI == MBB)
        continue;
      BBInfo &SInfo = MBBInfoMap[*SuI];
      if (SInfo.addPassed(MInfo.vregsPassed))
        todo.insert(*SuI);
    }
  }
}
// Calculate the set of virtual registers that must be passed through each basic
// block in order to satisfy the requirements of successor blocks. This is very
// similar to calcRegsPassed, only backwards.
void MachineVerifier::calcRegsRequired() {
  // First push live-in regs to predecessors' vregsRequired.
  DenseSet<const MachineBasicBlock*> todo;
  for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
       MFI != MFE; ++MFI) {
    const MachineBasicBlock &MBB(*MFI);
    BBInfo &MInfo = MBBInfoMap[&MBB];
    for (MachineBasicBlock::const_pred_iterator PrI = MBB.pred_begin(),
           PrE = MBB.pred_end(); PrI != PrE; ++PrI) {
      BBInfo &PInfo = MBBInfoMap[*PrI];
      if (PInfo.addRequired(MInfo.vregsLiveIn))
        todo.insert(*PrI);
    }
  }

  // Iteratively push vregsRequired to predecessors. This will converge to the
  // same final state regardless of DenseSet iteration order.
  while (!todo.empty()) {
    const MachineBasicBlock *MBB = *todo.begin();
    todo.erase(MBB);
    BBInfo &MInfo = MBBInfoMap[MBB];
    for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
           PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
      if (*PrI == MBB)
        continue;
      BBInfo &SInfo = MBBInfoMap[*PrI];
      if (SInfo.addRequired(MInfo.vregsRequired))
        todo.insert(*PrI);
    }
  }
}
Ejemplo n.º 8
0
void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
                                    BasicBlock *NewSucc) {
  // When an edge in the graph has been threaded, values that we could not 
  // determine a value for before (i.e. were marked overdefined) may be possible
  // to solve now.  We do NOT try to proactively update these values.  Instead,
  // we clear their entries from the cache, and allow lazy updating to recompute
  // them when needed.
  
  // The updating process is fairly simple: we need to dropped cached info
  // for all values that were marked overdefined in OldSucc, and for those same
  // values in any successor of OldSucc (except NewSucc) in which they were
  // also marked overdefined.
  std::vector<BasicBlock*> worklist;
  worklist.push_back(OldSucc);
  
  DenseSet<Value*> ClearSet;
  for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(),
       E = OverDefinedCache.end(); I != E; ++I) {
    if (I->first == OldSucc)
      ClearSet.insert(I->second);
  }
  
  // Use a worklist to perform a depth-first search of OldSucc's successors.
  // NOTE: We do not need a visited list since any blocks we have already
  // visited will have had their overdefined markers cleared already, and we
  // thus won't loop to their successors.
  while (!worklist.empty()) {
    BasicBlock *ToUpdate = worklist.back();
    worklist.pop_back();
    
    // Skip blocks only accessible through NewSucc.
    if (ToUpdate == NewSucc) continue;
    
    bool changed = false;
    for (DenseSet<Value*>::iterator I = ClearSet.begin(), E = ClearSet.end();
         I != E; ++I) {
      // If a value was marked overdefined in OldSucc, and is here too...
      DenseSet<OverDefinedPairTy>::iterator OI =
        OverDefinedCache.find(std::make_pair(ToUpdate, *I));
      if (OI == OverDefinedCache.end()) continue;

      // Remove it from the caches.
      ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(*I, this)];
      ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate);

      assert(CI != Entry.end() && "Couldn't find entry to update?");
      Entry.erase(CI);
      OverDefinedCache.erase(OI);

      // If we removed anything, then we potentially need to update 
      // blocks successors too.
      changed = true;
    }

    if (!changed) continue;
    
    worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate));
  }
}
Ejemplo n.º 9
0
// Find the number of arguments we need to add to the functions.
void CSDataRando::findFunctionArgNodes(const std::vector<const Function *> &Functions) {
  std::vector<DSNodeHandle> RootNodes;
  for (const Function *F : Functions) {
    DSGraph *G = DSA->getDSGraph(*F);
    G->getFunctionArgumentsForCall(F, RootNodes);
  }

  // No additional args to pass.
  if (RootNodes.size() == 0) {
    return;
  }

  DenseSet<const DSNode*> MarkedNodes;
  for (DSNodeHandle &NH : RootNodes) {
    if (DSNode *N = NH.getNode()) {
      N->markReachableNodes(MarkedNodes);
    }
  }

  // Remove global nodes from the arg nodes. If we are using the bottom-up
  // analysis then if a node is a global node all contexts will use the global map.
  for (auto i : GlobalNodes) {
    MarkedNodes.erase(i);
  }

  // Remove any nodes that are marked do not encrypt.
  SmallVector<const DSNode*, 8> MarkedNodeWorkList;
  for (auto i : MarkedNodes) {
    if (i->isDoNotEncryptNode()) {
      MarkedNodeWorkList.push_back(i);
    }
  }
  for (auto i : MarkedNodeWorkList) {
    MarkedNodes.erase(i);
  }

  if (MarkedNodes.empty()) {
    return;
  }

  // Create a FuncInfo entry for each of the functions with the arg nodes that
  // need to be passed
  for (const Function *F : Functions) {
    FuncInfo &FI = FunctionInfo[F];
    FI.ArgNodes.insert(FI.ArgNodes.end(), MarkedNodes.begin(), MarkedNodes.end());
  }
}
Ejemplo n.º 10
0
/// FindFunctionPoolArgs - In the first pass over the program, we decide which
/// arguments will have to be added for each function, build the FunctionInfo
/// map and recording this info in the ArgNodes set.
static void FindFunctionPoolArgs(Function &F, FuncInfo& FI,
                                 EntryPointAnalysis* EPA) {
  DenseSet<const DSNode*> MarkedNodes;

  if (FI.G->node_begin() == FI.G->node_end())
    return; // No memory activity, nothing is required

  // Find DataStructure nodes which are allocated in pools non-local to the
  // current function.  This set will contain all of the DSNodes which require
  // pools to be passed in from outside of the function.
  MarkNodesWhichMustBePassedIn(MarkedNodes, F, FI.G,EPA);

  //FI.ArgNodes.insert(FI.ArgNodes.end(), MarkedNodes.begin(), MarkedNodes.end());
  //Work around DenseSet not having iterator traits
  for (DenseSet<const DSNode*>::iterator ii = MarkedNodes.begin(),
       ee = MarkedNodes.end(); ii != ee; ++ii)
    FI.ArgNodes.insert(FI.ArgNodes.end(), *ii);
}
Ejemplo n.º 11
0
//
// Method: eraseCallsTo()
//
// Description:
//  This method removes the specified function from DSCallsites within the
//  specified function.  We do not do anything with call sites that call this
//  function indirectly (for which there is not much point as we do not yet
//  know the targets of indirect function calls).
//
void
StdLibDataStructures::eraseCallsTo(Function* F) {
  typedef std::pair<DSGraph*,Function*> RemovalPair;
  DenseSet<RemovalPair> ToRemove;
  for (Value::use_iterator ii = F->use_begin(), ee = F->use_end();
       ii != ee; ++ii)
    if (CallInst* CI = dyn_cast<CallInst>(*ii)){
      if (CI->getCalledValue() == F) {
        DSGraph* Graph = getDSGraph(*CI->getParent()->getParent());
        //delete the call
        DEBUG(errs() << "Removing " << F->getName().str() << " from "
              << CI->getParent()->getParent()->getName().str() << "\n");
        ToRemove.insert(std::make_pair(Graph, F));
      }
    }else if (InvokeInst* CI = dyn_cast<InvokeInst>(*ii)){
      if (CI->getCalledValue() == F) {
        DSGraph* Graph = getDSGraph(*CI->getParent()->getParent());
        //delete the call
        DEBUG(errs() << "Removing " << F->getName().str() << " from "
              << CI->getParent()->getParent()->getName().str() << "\n");
        ToRemove.insert(std::make_pair(Graph, F));
      }
    } else if(ConstantExpr *CE = dyn_cast<ConstantExpr>(*ii)) {
      if(CE->isCast()) {
        for (Value::use_iterator ci = CE->use_begin(), ce = CE->use_end();
             ci != ce; ++ci) {
          if (CallInst* CI = dyn_cast<CallInst>(*ci)){
            if(CI->getCalledValue() == CE) {
              DSGraph* Graph = getDSGraph(*CI->getParent()->getParent());
              //delete the call
              DEBUG(errs() << "Removing " << F->getName().str() << " from "
                    << CI->getParent()->getParent()->getName().str() << "\n");
              ToRemove.insert(std::make_pair(Graph, F));
            }
          }
        }
      }
    }

  for(DenseSet<RemovalPair>::iterator I = ToRemove.begin(), E = ToRemove.end();
      I != E; ++I)
    I->first->removeFunctionCalls(*I->second);
}
Ejemplo n.º 12
0
void RTAssociate::SetupGlobalPools(Module* M, DSGraph* GG) {
  // Get the globals graph for the program.
  // DSGraph* GG = Graphs->getGlobalsGraph();

  // Get all of the nodes reachable from globals.
  DenseSet<const DSNode*> GlobalHeapNodes;
  GetNodesReachableFromGlobals(GG, GlobalHeapNodes);

  errs() << "Pool allocating " << GlobalHeapNodes.size()
          << " global nodes!\n";

  FuncInfo& FI = makeFuncInfo(0, GG);

  while (GlobalHeapNodes.size()) {
    const DSNode* D = *GlobalHeapNodes.begin();
    GlobalHeapNodes.erase(D);
    FI.PoolDescriptors[D] = CreateGlobalPool(D, M);
  }
}
Ejemplo n.º 13
0
// Reroll the provided loop with respect to the provided induction variable.
// Generally, we're looking for a loop like this:
//
// %iv = phi [ (preheader, ...), (body, %iv.next) ]
// f(%iv)
// %iv.1 = add %iv, 1                <-- a root increment
// f(%iv.1)
// %iv.2 = add %iv, 2                <-- a root increment
// f(%iv.2)
// %iv.scale_m_1 = add %iv, scale-1  <-- a root increment
// f(%iv.scale_m_1)
// ...
// %iv.next = add %iv, scale
// %cmp = icmp(%iv, ...)
// br %cmp, header, exit
//
// Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of
// instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can
// be intermixed with eachother. The restriction imposed by this algorithm is
// that the relative order of the isomorphic instructions in f(%iv), f(%iv.1),
// etc. be the same.
//
// First, we collect the use set of %iv, excluding the other increment roots.
// This gives us f(%iv). Then we iterate over the loop instructions (scale-1)
// times, having collected the use set of f(%iv.(i+1)), during which we:
//   - Ensure that the next unmatched instruction in f(%iv) is isomorphic to
//     the next unmatched instruction in f(%iv.(i+1)).
//   - Ensure that both matched instructions don't have any external users
//     (with the exception of last-in-chain reduction instructions).
//   - Track the (aliasing) write set, and other side effects, of all
//     instructions that belong to future iterations that come before the matched
//     instructions. If the matched instructions read from that write set, then
//     f(%iv) or f(%iv.(i+1)) has some dependency on instructions in
//     f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly,
//     if any of these future instructions had side effects (could not be
//     speculatively executed), and so do the matched instructions, when we
//     cannot reorder those side-effect-producing instructions, and rerolling
//     fails.
//
// Finally, we make sure that all loop instructions are either loop increment
// roots, belong to simple latch code, parts of validated reductions, part of
// f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions
// have been validated), then we reroll the loop.
bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
                        const SCEV *IterCount,
                        ReductionTracker &Reductions) {
  const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV));
  uint64_t Inc = cast<SCEVConstant>(RealIVSCEV->getOperand(1))->
                   getValue()->getZExtValue();
  // The collection of loop increment instructions.
  SmallInstructionVector LoopIncs;
  uint64_t Scale = Inc;

  // The effective induction variable, IV, is normally also the real induction
  // variable. When we're dealing with a loop like:
  //   for (int i = 0; i < 500; ++i)
  //     x[3*i] = ...;
  //     x[3*i+1] = ...;
  //     x[3*i+2] = ...;
  // then the real IV is still i, but the effective IV is (3*i).
  Instruction *RealIV = IV;
  if (Inc == 1 && !findScaleFromMul(RealIV, Scale, IV, LoopIncs))
    return false;

  assert(Scale <= MaxInc && "Scale is too large");
  assert(Scale > 1 && "Scale must be at least 2");

  // The set of increment instructions for each increment value.
  SmallVector<SmallInstructionVector, 32> Roots(Scale-1);
  SmallInstructionSet AllRoots;
  if (!collectAllRoots(L, Inc, Scale, IV, Roots, AllRoots, LoopIncs))
    return false;

  DEBUG(dbgs() << "LRR: Found all root induction increments for: " <<
                  *RealIV << "\n");

  // An array of just the possible reductions for this scale factor. When we
  // collect the set of all users of some root instructions, these reduction
  // instructions are treated as 'final' (their uses are not considered).
  // This is important because we don't want the root use set to search down
  // the reduction chain.
  SmallInstructionSet PossibleRedSet;
  SmallInstructionSet PossibleRedLastSet, PossibleRedPHISet;
  Reductions.restrictToScale(Scale, PossibleRedSet, PossibleRedPHISet,
                             PossibleRedLastSet);

  // We now need to check for equivalence of the use graph of each root with
  // that of the primary induction variable (excluding the roots). Our goal
  // here is not to solve the full graph isomorphism problem, but rather to
  // catch common cases without a lot of work. As a result, we will assume
  // that the relative order of the instructions in each unrolled iteration
  // is the same (although we will not make an assumption about how the
  // different iterations are intermixed). Note that while the order must be
  // the same, the instructions may not be in the same basic block.
  SmallInstructionSet Exclude(AllRoots);
  Exclude.insert(LoopIncs.begin(), LoopIncs.end());

  DenseSet<Instruction *> BaseUseSet;
  collectInLoopUserSet(L, IV, Exclude, PossibleRedSet, BaseUseSet);

  DenseSet<Instruction *> AllRootUses;
  std::vector<DenseSet<Instruction *> > RootUseSets(Scale-1);

  bool MatchFailed = false;
  for (unsigned i = 0; i < Scale-1 && !MatchFailed; ++i) {
    DenseSet<Instruction *> &RootUseSet = RootUseSets[i];
    collectInLoopUserSet(L, Roots[i], SmallInstructionSet(),
                         PossibleRedSet, RootUseSet);

    DEBUG(dbgs() << "LRR: base use set size: " << BaseUseSet.size() <<
                    " vs. iteration increment " << (i+1) <<
                    " use set size: " << RootUseSet.size() << "\n");

    if (BaseUseSet.size() != RootUseSet.size()) {
      MatchFailed = true;
      break;
    }

    // In addition to regular aliasing information, we need to look for
    // instructions from later (future) iterations that have side effects
    // preventing us from reordering them past other instructions with side
    // effects.
    bool FutureSideEffects = false;
    AliasSetTracker AST(*AA);

    // The map between instructions in f(%iv.(i+1)) and f(%iv).
    DenseMap<Value *, Value *> BaseMap;

    assert(L->getNumBlocks() == 1 && "Cannot handle multi-block loops");
    for (BasicBlock::iterator J1 = Header->begin(), J2 = Header->begin(),
         JE = Header->end(); J1 != JE && !MatchFailed; ++J1) {
      if (cast<Instruction>(J1) == RealIV)
        continue;
      if (cast<Instruction>(J1) == IV)
        continue;
      if (!BaseUseSet.count(J1))
        continue;
      if (PossibleRedPHISet.count(J1)) // Skip reduction PHIs.
        continue;

      while (J2 != JE && (!RootUseSet.count(J2) ||
             std::find(Roots[i].begin(), Roots[i].end(), J2) !=
               Roots[i].end())) {
        // As we iterate through the instructions, instructions that don't
        // belong to previous iterations (or the base case), must belong to
        // future iterations. We want to track the alias set of writes from
        // previous iterations.
        if (!isa<PHINode>(J2) && !BaseUseSet.count(J2) &&
            !AllRootUses.count(J2)) {
          if (J2->mayWriteToMemory())
            AST.add(J2);

          // Note: This is specifically guarded by a check on isa<PHINode>,
          // which while a valid (somewhat arbitrary) micro-optimization, is
          // needed because otherwise isSafeToSpeculativelyExecute returns
          // false on PHI nodes.
          if (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2, DL))
            FutureSideEffects = true; 
        }

        ++J2;
      }

      if (!J1->isSameOperationAs(J2)) {
        DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
                        " vs. " << *J2 << "\n");
        MatchFailed = true;
        break;
      }

      // Make sure that this instruction, which is in the use set of this
      // root instruction, does not also belong to the base set or the set of
      // some previous root instruction.
      if (BaseUseSet.count(J2) || AllRootUses.count(J2)) {
        DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
                        " vs. " << *J2 << " (prev. case overlap)\n");
        MatchFailed = true;
        break;
      }

      // Make sure that we don't alias with any instruction in the alias set
      // tracker. If we do, then we depend on a future iteration, and we
      // can't reroll.
      if (J2->mayReadFromMemory()) {
        for (AliasSetTracker::iterator K = AST.begin(), KE = AST.end();
             K != KE && !MatchFailed; ++K) {
          if (K->aliasesUnknownInst(J2, *AA)) {
            DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
                            " vs. " << *J2 << " (depends on future store)\n");
            MatchFailed = true;
            break;
          }
        }
      }

      // If we've past an instruction from a future iteration that may have
      // side effects, and this instruction might also, then we can't reorder
      // them, and this matching fails. As an exception, we allow the alias
      // set tracker to handle regular (simple) load/store dependencies.
      if (FutureSideEffects &&
            ((!isSimpleLoadStore(J1) && !isSafeToSpeculativelyExecute(J1)) ||
             (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2)))) {
        DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
                        " vs. " << *J2 <<
                        " (side effects prevent reordering)\n");
        MatchFailed = true;
        break;
      }

      // For instructions that are part of a reduction, if the operation is
      // associative, then don't bother matching the operands (because we
      // already know that the instructions are isomorphic, and the order
      // within the iteration does not matter). For non-associative reductions,
      // we do need to match the operands, because we need to reject
      // out-of-order instructions within an iteration!
      // For example (assume floating-point addition), we need to reject this:
      //   x += a[i]; x += b[i];
      //   x += a[i+1]; x += b[i+1];
      //   x += b[i+2]; x += a[i+2];
      bool InReduction = Reductions.isPairInSame(J1, J2);

      if (!(InReduction && J1->isAssociative())) {
        bool Swapped = false, SomeOpMatched = false;;
        for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) {
          Value *Op2 = J2->getOperand(j);

	  // If this is part of a reduction (and the operation is not
	  // associatve), then we match all operands, but not those that are
	  // part of the reduction.
          if (InReduction)
            if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
              if (Reductions.isPairInSame(J2, Op2I))
                continue;

          DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2);
          if (BMI != BaseMap.end())
            Op2 = BMI->second;
          else if (std::find(Roots[i].begin(), Roots[i].end(),
                             (Instruction*) Op2) != Roots[i].end())
            Op2 = IV;

          if (J1->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
	    // If we've not already decided to swap the matched operands, and
	    // we've not already matched our first operand (note that we could
	    // have skipped matching the first operand because it is part of a
	    // reduction above), and the instruction is commutative, then try
	    // the swapped match.
            if (!Swapped && J1->isCommutative() && !SomeOpMatched &&
                J1->getOperand(!j) == Op2) {
              Swapped = true;
            } else {
              DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
                              " vs. " << *J2 << " (operand " << j << ")\n");
              MatchFailed = true;
              break;
            }
          }

          SomeOpMatched = true;
        }
      }

      if ((!PossibleRedLastSet.count(J1) && hasUsesOutsideLoop(J1, L)) ||
          (!PossibleRedLastSet.count(J2) && hasUsesOutsideLoop(J2, L))) {
        DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
                        " vs. " << *J2 << " (uses outside loop)\n");
        MatchFailed = true;
        break;
      }

      if (!MatchFailed)
        BaseMap.insert(std::pair<Value *, Value *>(J2, J1));

      AllRootUses.insert(J2);
      Reductions.recordPair(J1, J2, i+1);

      ++J2;
    }
  }

  if (MatchFailed)
    return false;

  DEBUG(dbgs() << "LRR: Matched all iteration increments for " <<
                  *RealIV << "\n");

  DenseSet<Instruction *> LoopIncUseSet;
  collectInLoopUserSet(L, LoopIncs, SmallInstructionSet(),
                       SmallInstructionSet(), LoopIncUseSet);
  DEBUG(dbgs() << "LRR: Loop increment set size: " <<
                  LoopIncUseSet.size() << "\n");

  // Make sure that all instructions in the loop have been included in some
  // use set.
  for (BasicBlock::iterator J = Header->begin(), JE = Header->end();
       J != JE; ++J) {
    if (isa<DbgInfoIntrinsic>(J))
      continue;
    if (cast<Instruction>(J) == RealIV)
      continue;
    if (cast<Instruction>(J) == IV)
      continue;
    if (BaseUseSet.count(J) || AllRootUses.count(J) ||
        (LoopIncUseSet.count(J) && (J->isTerminator() ||
                                    isSafeToSpeculativelyExecute(J, DL))))
      continue;

    if (AllRoots.count(J))
      continue;

    if (Reductions.isSelectedPHI(J))
      continue;

    DEBUG(dbgs() << "LRR: aborting reroll based on " << *RealIV <<
                    " unprocessed instruction found: " << *J << "\n");
    MatchFailed = true;
    break;
  }

  if (MatchFailed)
    return false;

  DEBUG(dbgs() << "LRR: all instructions processed from " <<
                  *RealIV << "\n");

  if (!Reductions.validateSelected())
    return false;

  // At this point, we've validated the rerolling, and we're committed to
  // making changes!

  Reductions.replaceSelected();

  // Remove instructions associated with non-base iterations.
  for (BasicBlock::reverse_iterator J = Header->rbegin();
       J != Header->rend();) {
    if (AllRootUses.count(&*J)) {
      Instruction *D = &*J;
      DEBUG(dbgs() << "LRR: removing: " << *D << "\n");
      D->eraseFromParent();
      continue;
    }

    ++J; 
  }

  // Insert the new induction variable.
  const SCEV *Start = RealIVSCEV->getStart();
  if (Inc == 1)
    Start = SE->getMulExpr(Start,
                           SE->getConstant(Start->getType(), Scale));
  const SCEVAddRecExpr *H =
    cast<SCEVAddRecExpr>(SE->getAddRecExpr(Start,
                           SE->getConstant(RealIVSCEV->getType(), 1),
                           L, SCEV::FlagAnyWrap));
  { // Limit the lifetime of SCEVExpander.
    SCEVExpander Expander(*SE, "reroll");
    Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin());

    for (DenseSet<Instruction *>::iterator J = BaseUseSet.begin(),
         JE = BaseUseSet.end(); J != JE; ++J)
      (*J)->replaceUsesOfWith(IV, NewIV);

    if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) {
      if (LoopIncUseSet.count(BI)) {
        const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
        if (Inc == 1)
          ICSCEV =
            SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale));
        // Iteration count SCEV minus 1
        const SCEV *ICMinus1SCEV =
          SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1));

        Value *ICMinus1; // Iteration count minus 1
        if (isa<SCEVConstant>(ICMinus1SCEV)) {
          ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI);
        } else {
          BasicBlock *Preheader = L->getLoopPreheader();
          if (!Preheader)
            Preheader = InsertPreheaderForLoop(L, this);

          ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(),
                                            Preheader->getTerminator());
        }
 
        Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1,
                                   "exitcond");
        BI->setCondition(Cond);

        if (BI->getSuccessor(1) != Header)
          BI->swapSuccessors();
      }
    }
  }

  SimplifyInstructionsInBlock(Header, DL, TLI);
  DeleteDeadPHIs(Header, TLI);
  ++NumRerolledLoops;
  return true;
}
Ejemplo n.º 14
0
//
// Function: GetNodesReachableFromGlobals()
//
// Description:
//  This function finds all DSNodes which are reachable from globals.  It finds
//  DSNodes both within the local DSGraph as well as in the Globals graph that
//  are reachable from globals.  It does, however, filter out those DSNodes
//  which are of no interest to automatic pool allocation.
//
// Inputs:
//  G - The DSGraph for which to find DSNodes which are reachable by globals.
//      This DSGraph can either by a DSGraph associated with a function *or*
//      it can be the globals graph itself.
//
// Outputs:
//  NodesFromGlobals - A reference to a container object in which to record
//                     DSNodes reachable from globals.  DSNodes are *added* to
//                     this container; it is not cleared by this function.
//                     DSNodes from both the local and globals graph are added.
void
AllHeapNodesHeuristic::GetNodesReachableFromGlobals (DSGraph* G,
                              DenseSet<const DSNode*> &NodesFromGlobals) {
  //
  // Get the globals graph associated with this DSGraph.  If the globals graph
  // is NULL, then the graph that was passed in *is* the globals graph.
  //
  DSGraph * GlobalsGraph = G->getGlobalsGraph();
  if (!GlobalsGraph)
    GlobalsGraph = G;

  //
  // Find all DSNodes which are reachable in the globals graph.
  //
  for (DSGraph::node_iterator NI = GlobalsGraph->node_begin();
       NI != GlobalsGraph->node_end();
       ++NI) {
    NI->markReachableNodes(NodesFromGlobals);
  }

  //
  // Remove those global nodes which we know will never be pool allocated.
  //
  
  std::vector<const DSNode *> toRemove;
  for (DenseSet<const DSNode*>::iterator I = NodesFromGlobals.begin(),
         E = NodesFromGlobals.end(); I != E; ) {
    DenseSet<const DSNode*>::iterator Last = I; ++I;

    const DSNode *tmp = *Last;
    if (!(tmp->isHeapNode())) 
      toRemove.push_back (tmp);
    // Do not poolallocate nodes that are cast to Int.
    // As we do not track through ints, these could be escaping
    if (tmp->isPtrToIntNode())
      toRemove.push_back(tmp);
  }
 
  //
  // Remove all globally reachable DSNodes which do not require pools.
  //
  for (unsigned index = 0; index < toRemove.size(); ++index) {
    NodesFromGlobals.erase(toRemove[index]);
  }

  //
  // Now the fun part.  Find DSNodes in the local graph that correspond to
  // those nodes reachable in the globals graph.  Add them to the set of
  // reachable nodes, too.
  //
  if (G->getGlobalsGraph()) {
    //
    // Compute a mapping between local DSNodes and DSNodes in the globals
    // graph.
    //
    DSGraph::NodeMapTy NodeMap;
    G->computeGToGGMapping (NodeMap);

    //
    // Scan through all DSNodes in the local graph.  If a local DSNode has a
    // corresponding DSNode in the globals graph that is reachable from a 
    // global, then add the local DSNode to the set of DSNodes reachable from a
    // global.
    //
    // FIXME: A node's existance within the global DSGraph is probably
    //        sufficient evidence that it is reachable from a global.
    //

    DSGraph::node_iterator ni = G->node_begin();
    for (; ni != G->node_end(); ++ni) {
      DSNode * N = ni;
      if (NodesFromGlobals.count (NodeMap[N].getNode()))
        NodesFromGlobals.insert (N);
    }
  }
}
Ejemplo n.º 15
0
Result::Sat AttemptSolutionSDP::attempt(const ApproximateSimplex::Solution& sol){
  const DenseSet& newBasis = sol.newBasis;
  const DenseMap<DeltaRational>& newValues = sol.newValues;

  DenseSet needsToBeAdded;
  for(DenseSet::const_iterator i = newBasis.begin(), i_end = newBasis.end(); i != i_end; ++i){
    ArithVar b = *i;
    if(!d_tableau.isBasic(b)){
      needsToBeAdded.add(b);
    }
  }
  DenseMap<DeltaRational>::const_iterator nvi = newValues.begin(), nvi_end = newValues.end();
  for(; nvi != nvi_end; ++nvi){
    ArithVar currentlyNb = *nvi;
    if(!d_tableau.isBasic(currentlyNb)){
      if(!matchesNewValue(newValues, currentlyNb)){
        const DeltaRational& newValue = newValues[currentlyNb];
        Trace("arith::updateMany")
          << "updateMany:" << currentlyNb << " "
          << d_variables.getAssignment(currentlyNb) << " to "<< newValue << endl;
        d_linEq.update(currentlyNb, newValue);
        Assert(d_variables.assignmentIsConsistent(currentlyNb));
      }
    }
  }
  d_errorSet.reduceToSignals();
  d_errorSet.setSelectionRule(VAR_ORDER);

  static int instance = 0;
  ++instance;

  if(processSignals()){
    Debug("arith::findModel") << "attemptSolution("<< instance <<") early conflict" << endl;
    d_conflictVariables.purge();
    return Result::UNSAT;
  }else if(d_errorSet.errorEmpty()){
    Debug("arith::findModel") << "attemptSolution("<< instance <<") fixed itself" << endl;
    return Result::SAT;
  }

  while(!needsToBeAdded.empty() && !d_errorSet.errorEmpty()){
    ArithVar toRemove = ARITHVAR_SENTINEL;
    ArithVar toAdd = ARITHVAR_SENTINEL;
    DenseSet::const_iterator i = needsToBeAdded.begin(), i_end = needsToBeAdded.end();
    for(; toAdd == ARITHVAR_SENTINEL && i != i_end; ++i){
      ArithVar v = *i;

      Tableau::ColIterator colIter = d_tableau.colIterator(v);
      for(; !colIter.atEnd(); ++colIter){
        const Tableau::Entry& entry = *colIter;
        Assert(entry.getColVar() == v);
        ArithVar b = d_tableau.rowIndexToBasic(entry.getRowIndex());
        if(!newBasis.isMember(b)){
          toAdd = v;

          bool favorBOverToRemove =
            (toRemove == ARITHVAR_SENTINEL) ||
            (matchesNewValue(newValues, toRemove) && !matchesNewValue(newValues, b)) ||
            (d_tableau.basicRowLength(toRemove) > d_tableau.basicRowLength(b));

          if(favorBOverToRemove){
            toRemove = b;
          }
        }
      }
    }
    Assert(toRemove != ARITHVAR_SENTINEL);
    Assert(toAdd != ARITHVAR_SENTINEL);

    Trace("arith::forceNewBasis") << toRemove << " " << toAdd << endl;
    //Message() << toRemove << " " << toAdd << endl;

    d_linEq.pivotAndUpdate(toRemove, toAdd, newValues[toRemove]);

    Trace("arith::forceNewBasis") << needsToBeAdded.size() << "to go" << endl;
    //Message() << needsToBeAdded.size() << "to go" << endl;
    needsToBeAdded.remove(toAdd);

    bool conflict = processSignals();
    if(conflict){
      d_errorSet.reduceToSignals();
      d_conflictVariables.purge();

      return Result::UNSAT;
    }
  }
  Assert( d_conflictVariables.empty() );

  if(d_errorSet.errorEmpty()){
    return Result::SAT;
  }else{
    d_errorSet.reduceToSignals();
    return Result::SAT_UNKNOWN;
  }
}
Ejemplo n.º 16
0
//
// Method: findGlobalPoolNodes()
//
// Description:
//  This method finds DSNodes that are reachable from globals and that need a
//  pool.  The Automatic Pool Allocation transform will use the returned
//  information to build global pools for the DSNodes in question.
//
//  For efficiency, this method also determines which DSNodes should be in the
//  same pool.
//
// Outputs:
//  Nodes - The DSNodes that are both reachable from globals and which should
//          have global pools will be *added* to this container.
//
void
AllNodesHeuristic::findGlobalPoolNodes (DSNodeSet_t & Nodes) {
    // Get the globals graph for the program.
    DSGraph* GG = Graphs->getGlobalsGraph();

    //
    // Get all of the nodes reachable from globals.
    //
    DenseSet<const DSNode*> GlobalNodes;
    GetNodesReachableFromGlobals (GG, GlobalNodes);

    //
    // Create a global pool for each global DSNode.
    //
    for (DenseSet<const DSNode *>::iterator NI = GlobalNodes.begin();
            NI != GlobalNodes.end();
            ++NI) {
        const DSNode * N = *NI;
        PoolMap[N] = OnePool(N);
    }

    //
    // Now find all DSNodes belonging to function-local DSGraphs which are
    // mirrored in the globals graph.  These DSNodes require a global pool, too,
    // but must use the same pool as the one assigned to the corresponding global
    // DSNode.
    //
    for (Module::iterator F = M->begin(); F != M->end(); ++F) {
        //
        // Ignore functions that have no DSGraph.
        //
        if (!(Graphs->hasDSGraph(*F))) continue;

        //
        // Compute a mapping between local DSNodes and DSNodes in the globals
        // graph.
        //
        DSGraph* G = Graphs->getDSGraph(*F);
        DSGraph::NodeMapTy NodeMap;
        G->computeGToGGMapping (NodeMap);

        //
        // Scan through all DSNodes in the local graph.  If a local DSNode has a
        // corresponding DSNode in the globals graph that is reachable from a
        // global, then add the local DSNode to the set of DSNodes reachable from
        // a global.
        //
        DSGraph::node_iterator ni = G->node_begin();
        for (; ni != G->node_end(); ++ni) {
            DSNode * N = ni;
            DSNode * GGN = NodeMap[N].getNode();

            assert (!GGN || GlobalNodes.count (GGN));
            if (GGN && GlobalNodes.count (GGN))
                PoolMap[GGN].NodesInPool.push_back (N);
        }
    }

    //
    // Scan through all the local graphs looking for DSNodes which may be
    // reachable by a global.  These nodes may not end up in the globals graph
    // because of the fact that DSA doesn't actually know what is happening to
    // them.
    //
    // FIXME: I believe this code causes a condition in which a local DSNode is
    //        given a local pool in one function but not in other functions.
    //        Someone needs to investigate whether DSA is being consistent here,
    //        and if not, if that inconsistency is correct.
    //
#if 0
    for (Module::iterator F = M->begin(); F != M->end(); ++F) {
        if (F->isDeclaration()) continue;
        DSGraph* G = Graphs->getDSGraph(*F);
        for (DSGraph::node_iterator I = G->node_begin(), E = G->node_end();
                I != E;
                ++I) {
            DSNode * Node = I;
            if (Node->isExternalNode() || Node->isUnknownNode()) {
                GlobalNodes.insert (Node);
            }
        }
    }
#endif

    //
    // Copy the values into the output container.  Note that DenseSet has no
    // iterator traits (or whatever allows us to treat DenseSet has a generic
    // container), so we have to use a loop to copy values from the DenseSet into
    // the output container.
    //
    // Note that we do not copy local DSNodes into the output container; we
    // merely copy those nodes in the globals graph.
    //
    for (DenseSet<const DSNode*>::iterator I = GlobalNodes.begin(),
            E = GlobalNodes.end(); I != E; ++I) {
        Nodes.insert (*I);
    }

    return;
}
Ejemplo n.º 17
0
int main(int argc, char ** argv)
{
	std::cerr << std::fixed << std::setprecision(3);
	std::ofstream devnull("/dev/null");
	
	DB::ReadBufferFromFileDescriptor in(STDIN_FILENO);
	size_t n = atoi(argv[1]);
	size_t elems_show = 1;

	using Vec = std::vector<std::string>;
	using Set = std::unordered_map<std::string, int>;
	using RefsSet = std::unordered_map<StringRef, int, StringRefHash>;
	using DenseSet = google::dense_hash_map<std::string, int>;
	using RefsDenseSet = google::dense_hash_map<StringRef, int, StringRefHash>;
	using RefsHashMap = HashMap<StringRef, int, StringRefHash>;
	Vec vec;

	vec.reserve(n);

	{
		Stopwatch watch;

		std::string s;
		for (size_t i = 0; i < n && !in.eof(); ++i)
		{
			DB::readEscapedString(s, in);
			DB::assertChar('\n', in);
			vec.push_back(s);
		}

		std::cerr << "Read and inserted into vector in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;
	}

	{
		DB::Arena pool;
		Stopwatch watch;
		const char * res = nullptr;

		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
		{
			const char * tmp = pool.insert(it->data(), it->size());
			if (it == vec.begin())
				res = tmp;
		}

		std::cerr << "Inserted into pool in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		devnull.write(res, 100);
		devnull << std::endl;
	}

	{
		Set set;
		Stopwatch watch;

		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
			set[*it] = 0;

		std::cerr << "Inserted into std::unordered_map in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		size_t i = 0;
		for (Set::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
		{
			devnull << it->first;
			devnull << std::endl;
		}
	}

	{
		RefsSet set;
		Stopwatch watch;

		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
			set[StringRef(*it)] = 0;

		std::cerr << "Inserted refs into std::unordered_map in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		size_t i = 0;
		for (RefsSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
		{
			devnull.write(it->first.data, it->first.size);
			devnull << std::endl;
		}
	}

	{
		DB::Arena pool;
		RefsSet set;
		Stopwatch watch;
		
		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
			set[StringRef(pool.insert(it->data(), it->size()), it->size())] = 0;

		std::cerr << "Inserted into pool and refs into std::unordered_map in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		size_t i = 0;
		for (RefsSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
		{
			devnull.write(it->first.data, it->first.size);
			devnull << std::endl;
		}
	}

	{
		DenseSet set;
		set.set_empty_key(DenseSet::key_type());
		Stopwatch watch;

		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
			set[*it] = 0;

		std::cerr << "Inserted into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		size_t i = 0;
		for (DenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
		{
			devnull << it->first;
			devnull << std::endl;
		}
	}

	{
		RefsDenseSet set;
		set.set_empty_key(RefsDenseSet::key_type());
		Stopwatch watch;

		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
			set[StringRef(it->data(), it->size())] = 0;

		std::cerr << "Inserted refs into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		size_t i = 0;
		for (RefsDenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
		{
			devnull.write(it->first.data, it->first.size);
			devnull << std::endl;
		}
	}

	{
		DB::Arena pool;
		RefsDenseSet set;
		set.set_empty_key(RefsDenseSet::key_type());
		Stopwatch watch;

		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
			set[StringRef(pool.insert(it->data(), it->size()), it->size())] = 0;

		std::cerr << "Inserted into pool and refs into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		size_t i = 0;
		for (RefsDenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
		{
			devnull.write(it->first.data, it->first.size);
			devnull << std::endl;
		}
	}

	{
		RefsHashMap set;
		Stopwatch watch;

		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
		{
			RefsHashMap::iterator inserted_it;
			bool inserted;
			set.emplace(StringRef(*it), inserted_it, inserted);
		}

		std::cerr << "Inserted refs into HashMap in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		size_t i = 0;
		for (RefsHashMap::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
		{
			devnull.write(it->first.data, it->first.size);
			devnull << std::endl;
		}

		//std::cerr << set.size() << ", " << set.getCollisions() << std::endl;
	}

	{
		DB::Arena pool;
		RefsHashMap set;
		Stopwatch watch;

		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
		{
			RefsHashMap::iterator inserted_it;
			bool inserted;
			set.emplace(StringRef(pool.insert(it->data(), it->size()), it->size()), inserted_it, inserted);
		}

		std::cerr << "Inserted into pool and refs into HashMap in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		size_t i = 0;
		for (RefsHashMap::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
		{
			devnull.write(it->first.data, it->first.size);
			devnull << std::endl;
		}
	}

	return 0;
}