Example #1
0
void SuffixTree::add_suffix(SuffixTreePoint *point, string::size_type start) {
  SuffixTreeNode *father;

  if (!point->is_node()) {

    /* We need to create a new internal node */
    SuffixTreeNode *new_internal = get_free_node(start, point->str_depth());

    string::size_type below_i = point->node_below()->head()+point->node_above()->str_depth();
    assert(below_i < text.size());
    assert(point->node_above()->child(text[below_i]) != NULL);
    point->node_above()->erase_child(text[below_i]);
    point->node_above()->add_child(text[below_i], new_internal);

    string::size_type internal_i = point->node_below()->head()+point->str_depth();
    assert(internal_i < text.size());
    new_internal->add_child(text[internal_i], point->node_below());

    if (last_internal_added != NULL)
      last_internal_added->link_to(new_internal);

    last_internal_added = new_internal;
    father = new_internal;

  } else {
    father = point->node_above();
  }

  SuffixTreeNode *new_leaf = get_free_node(start, text.size()-start);
  string::size_type leaf_i = new_leaf->head()+father->str_depth();
  assert(leaf_i < text.size());
  father->add_child(text[leaf_i], new_leaf);
}
Example #2
0
SuffixTreeNode *SuffixTree::get_free_node(int path_start, int string_depth) {
  assert(next_node < nodes_buff.size());

  SuffixTreeNode *ret = &nodes_buff[next_node++];
  ret->set_head(path_start);
  ret->set_str_depth(string_depth);

  return ret;
}
 void insertString(const string &s, int index) {
     indexes.push_back(index);
     if (s.length() > 0) {
         value = s[0];
         SuffixTreeNode child;
         if (children.find(value) != children.end()) {
             child = children[value];
         } else {
             children.insert(make_pair(value, child));
         }
         string remainder = s.substr(1);
         child.insertString(remainder, index);
     }
 }
Example #4
0
void SuffixTree::suffix_jump(SuffixTreePoint *point) {
  SuffixTreePoint old_point = *point;

  assert(old_point.node_above()->slink() != NULL);

  point->set_node_above(old_point.node_above()->slink());
  point->set_node_below(old_point.node_above()->slink());
  point->set_str_depth(point->node_above()->str_depth());

  assert(point->str_depth() == old_point.node_above()->str_depth()-1);

  while (point->str_depth() != old_point.str_depth()-1) {
    string::size_type text_i = old_point.node_below()->head()+point->str_depth()+1;
    point->set_node_below(can_descend(point, text[text_i]));

    assert(point->node_below() != NULL);

    int diff1 = point->node_below()->str_depth() - point->node_above()->str_depth();
    int diff2 = old_point.str_depth()-1 - point->node_above()->str_depth();

    if (diff1 <= diff2) {
      point->set_str_depth(point->str_depth()+diff1);
      point->set_node_above(point->node_below());
    } else {
      point->set_str_depth(point->str_depth()+diff2);
    }
  }

  /* Now on the new spot. Any suffix links to create? */
  if (point->is_node() && last_internal_added != NULL) {
    last_internal_added->link_to(point->node_above());
    last_internal_added = NULL;
  }
}
Example #5
0
		SuffixTree(string s)
		{
			for (int i = 0; i < s.length(); ++i)
			{
				string suffix = s.substr (i);
				root.insertString (suffix, i);
			}
		}
Example #6
0
/*
* Registra todas as ocorrências do padrão no texto
*/
void SuffixTree::getMatchings(const char* pat, size_t m, SuffixTreeNode& node, int nodeHeight, Printer& printer) {
	if(node.isLeaf()) //É folha
		printer.addMatching(n-nodeHeight);
	else {
		for(int nt = node.firstChild; nt != -1; nt = nodes.at(nt).sibling){
			SuffixTreeNode& next = nodes.at(nt);
			int edgeSize = next.end - next.start + 1;
			getMatchings(pat, m, next, nodeHeight + edgeSize, printer); 
		}
	}
}
Example #7
0
SuffixTreeNode *SuffixTree::can_descend(SuffixTreePoint *point, char letter) const {
  int next_i;

  if (!point->is_node()) {
    next_i = point->node_below()->head() + point->str_depth();
    assert(next_i > 0);
    assert((string::size_type) next_i < text.size());
    return text[next_i] == letter ? point->node_below() : NULL;
  }

  // Sentinel boundary case
  if (point->get_node() == root->slink())
    return root;

  // If we get here, `point` is a node, so we just need to see
  // if it's possible to descend with `letter` on that node
  return point->get_node()->child(letter);
}
Example #8
0
unsigned
MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII,
                                InstructionMapper &Mapper,
                                std::vector<Candidate> &CandidateList,
                                std::vector<OutlinedFunction> &FunctionList) {
  CandidateList.clear();
  FunctionList.clear();
  unsigned MaxLen = 0;

  // FIXME: Visit internal nodes instead of leaves.
  for (SuffixTreeNode *Leaf : ST.LeafVector) {
    assert(Leaf && "Leaves in LeafVector cannot be null!");
    if (!Leaf->IsInTree)
      continue;

    assert(Leaf->Parent && "All leaves must have parents!");
    SuffixTreeNode &Parent = *(Leaf->Parent);

    // If it doesn't appear enough, or we already outlined from it, skip it.
    if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree)
      continue;

    // Figure out if this candidate is beneficial.
    unsigned StringLen = Leaf->ConcatLen - (unsigned)Leaf->size();

    // Too short to be beneficial; skip it.
    // FIXME: This isn't necessarily true for, say, X86. If we factor in
    // instruction lengths we need more information than this.
    if (StringLen < 2)
      continue;

    // If this is a beneficial class of candidate, then every one is stored in
    // this vector.
    std::vector<Candidate> CandidatesForRepeatedSeq;

    // Describes the start and end point of each candidate. This allows the
    // target to infer some information about each occurrence of each repeated
    // sequence.
    // FIXME: CandidatesForRepeatedSeq and this should be combined.
    std::vector<
        std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
        RepeatedSequenceLocs;

    // Figure out the call overhead for each instance of the sequence.
    for (auto &ChildPair : Parent.Children) {
      SuffixTreeNode *M = ChildPair.second;

      if (M && M->IsInTree && M->isLeaf()) {
        // Each sequence is over [StartIt, EndIt].
        MachineBasicBlock::iterator StartIt = Mapper.InstrList[M->SuffixIdx];
        MachineBasicBlock::iterator EndIt =
            Mapper.InstrList[M->SuffixIdx + StringLen - 1];

        CandidatesForRepeatedSeq.emplace_back(M->SuffixIdx, StringLen,
                                              FunctionList.size());
        RepeatedSequenceLocs.emplace_back(std::make_pair(StartIt, EndIt));

        // Never visit this leaf again.
        M->IsInTree = false;
      }
    }

    // We've found something we might want to outline.
    // Create an OutlinedFunction to store it and check if it'd be beneficial
    // to outline.
    TargetInstrInfo::MachineOutlinerInfo MInfo =
        TII.getOutlininingCandidateInfo(RepeatedSequenceLocs);
    std::vector<unsigned> Seq;
    for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++)
      Seq.push_back(ST.Str[i]);
    OutlinedFunction OF(FunctionList.size(), Parent.OccurrenceCount, Seq,
                        MInfo);
    unsigned Benefit = OF.getBenefit();

    // Is it better to outline this candidate than not?
    if (Benefit < 1) {
      // Outlining this candidate would take more instructions than not
      // outlining.
      // Emit a remark explaining why we didn't outline this candidate.
      std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator> C =
          RepeatedSequenceLocs[0];
      MachineOptimizationRemarkEmitter MORE(
          *(C.first->getParent()->getParent()), nullptr);
      MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper",
                                        C.first->getDebugLoc(),
                                        C.first->getParent());
      R << "Did not outline " << NV("Length", StringLen) << " instructions"
        << " from " << NV("NumOccurrences", RepeatedSequenceLocs.size())
        << " locations."
        << " Instructions from outlining all occurrences ("
        << NV("OutliningCost", OF.getOutliningCost()) << ")"
        << " >= Unoutlined instruction count ("
        << NV("NotOutliningCost", StringLen * OF.OccurrenceCount) << ")"
        << " (Also found at: ";

      // Tell the user the other places the candidate was found.
      for (unsigned i = 1, e = RepeatedSequenceLocs.size(); i < e; i++) {
        R << NV((Twine("OtherStartLoc") + Twine(i)).str(),
                RepeatedSequenceLocs[i].first->getDebugLoc());
        if (i != e - 1)
          R << ", ";
      }

      R << ")";
      MORE.emit(R);

      // Move to the next candidate.
      continue;
    }

    if (StringLen > MaxLen)
      MaxLen = StringLen;

    // At this point, the candidate class is seen as beneficial. Set their
    // benefit values and save them in the candidate list.
    for (Candidate &C : CandidatesForRepeatedSeq) {
      C.Benefit = Benefit;
      C.MInfo = MInfo;
      CandidateList.push_back(C);
    }

    FunctionList.push_back(OF);

    // Move to the next function.
    Parent.IsInTree = false;
  }

  return MaxLen;
}
 vector<int> getIndexes(const string &s) {
     return root.getIndexes(s);
 }
 SuffixTree(const string &s) {
     for (int i = 0; i < s.length(); i++) {
         string suffix = s.substr(i);
         root.insertString(suffix, i);
     }
 }
Example #11
0
list<int> SuffixTree::search (string str)
{
	return (root.search (str));
}