void SuffixTree::add_suffix(SuffixTreePoint *point, string::size_type start) { SuffixTreeNode *father; if (!point->is_node()) { /* We need to create a new internal node */ SuffixTreeNode *new_internal = get_free_node(start, point->str_depth()); string::size_type below_i = point->node_below()->head()+point->node_above()->str_depth(); assert(below_i < text.size()); assert(point->node_above()->child(text[below_i]) != NULL); point->node_above()->erase_child(text[below_i]); point->node_above()->add_child(text[below_i], new_internal); string::size_type internal_i = point->node_below()->head()+point->str_depth(); assert(internal_i < text.size()); new_internal->add_child(text[internal_i], point->node_below()); if (last_internal_added != NULL) last_internal_added->link_to(new_internal); last_internal_added = new_internal; father = new_internal; } else { father = point->node_above(); } SuffixTreeNode *new_leaf = get_free_node(start, text.size()-start); string::size_type leaf_i = new_leaf->head()+father->str_depth(); assert(leaf_i < text.size()); father->add_child(text[leaf_i], new_leaf); }
SuffixTreeNode *SuffixTree::get_free_node(int path_start, int string_depth) { assert(next_node < nodes_buff.size()); SuffixTreeNode *ret = &nodes_buff[next_node++]; ret->set_head(path_start); ret->set_str_depth(string_depth); return ret; }
void insertString(const string &s, int index) { indexes.push_back(index); if (s.length() > 0) { value = s[0]; SuffixTreeNode child; if (children.find(value) != children.end()) { child = children[value]; } else { children.insert(make_pair(value, child)); } string remainder = s.substr(1); child.insertString(remainder, index); } }
void SuffixTree::suffix_jump(SuffixTreePoint *point) { SuffixTreePoint old_point = *point; assert(old_point.node_above()->slink() != NULL); point->set_node_above(old_point.node_above()->slink()); point->set_node_below(old_point.node_above()->slink()); point->set_str_depth(point->node_above()->str_depth()); assert(point->str_depth() == old_point.node_above()->str_depth()-1); while (point->str_depth() != old_point.str_depth()-1) { string::size_type text_i = old_point.node_below()->head()+point->str_depth()+1; point->set_node_below(can_descend(point, text[text_i])); assert(point->node_below() != NULL); int diff1 = point->node_below()->str_depth() - point->node_above()->str_depth(); int diff2 = old_point.str_depth()-1 - point->node_above()->str_depth(); if (diff1 <= diff2) { point->set_str_depth(point->str_depth()+diff1); point->set_node_above(point->node_below()); } else { point->set_str_depth(point->str_depth()+diff2); } } /* Now on the new spot. Any suffix links to create? */ if (point->is_node() && last_internal_added != NULL) { last_internal_added->link_to(point->node_above()); last_internal_added = NULL; } }
SuffixTree(string s) { for (int i = 0; i < s.length(); ++i) { string suffix = s.substr (i); root.insertString (suffix, i); } }
/* * Registra todas as ocorrências do padrão no texto */ void SuffixTree::getMatchings(const char* pat, size_t m, SuffixTreeNode& node, int nodeHeight, Printer& printer) { if(node.isLeaf()) //É folha printer.addMatching(n-nodeHeight); else { for(int nt = node.firstChild; nt != -1; nt = nodes.at(nt).sibling){ SuffixTreeNode& next = nodes.at(nt); int edgeSize = next.end - next.start + 1; getMatchings(pat, m, next, nodeHeight + edgeSize, printer); } } }
SuffixTreeNode *SuffixTree::can_descend(SuffixTreePoint *point, char letter) const { int next_i; if (!point->is_node()) { next_i = point->node_below()->head() + point->str_depth(); assert(next_i > 0); assert((string::size_type) next_i < text.size()); return text[next_i] == letter ? point->node_below() : NULL; } // Sentinel boundary case if (point->get_node() == root->slink()) return root; // If we get here, `point` is a node, so we just need to see // if it's possible to descend with `letter` on that node return point->get_node()->child(letter); }
unsigned MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII, InstructionMapper &Mapper, std::vector<Candidate> &CandidateList, std::vector<OutlinedFunction> &FunctionList) { CandidateList.clear(); FunctionList.clear(); unsigned MaxLen = 0; // FIXME: Visit internal nodes instead of leaves. for (SuffixTreeNode *Leaf : ST.LeafVector) { assert(Leaf && "Leaves in LeafVector cannot be null!"); if (!Leaf->IsInTree) continue; assert(Leaf->Parent && "All leaves must have parents!"); SuffixTreeNode &Parent = *(Leaf->Parent); // If it doesn't appear enough, or we already outlined from it, skip it. if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree) continue; // Figure out if this candidate is beneficial. unsigned StringLen = Leaf->ConcatLen - (unsigned)Leaf->size(); // Too short to be beneficial; skip it. // FIXME: This isn't necessarily true for, say, X86. If we factor in // instruction lengths we need more information than this. if (StringLen < 2) continue; // If this is a beneficial class of candidate, then every one is stored in // this vector. std::vector<Candidate> CandidatesForRepeatedSeq; // Describes the start and end point of each candidate. This allows the // target to infer some information about each occurrence of each repeated // sequence. // FIXME: CandidatesForRepeatedSeq and this should be combined. std::vector< std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>> RepeatedSequenceLocs; // Figure out the call overhead for each instance of the sequence. for (auto &ChildPair : Parent.Children) { SuffixTreeNode *M = ChildPair.second; if (M && M->IsInTree && M->isLeaf()) { // Each sequence is over [StartIt, EndIt]. MachineBasicBlock::iterator StartIt = Mapper.InstrList[M->SuffixIdx]; MachineBasicBlock::iterator EndIt = Mapper.InstrList[M->SuffixIdx + StringLen - 1]; CandidatesForRepeatedSeq.emplace_back(M->SuffixIdx, StringLen, FunctionList.size()); RepeatedSequenceLocs.emplace_back(std::make_pair(StartIt, EndIt)); // Never visit this leaf again. M->IsInTree = false; } } // We've found something we might want to outline. // Create an OutlinedFunction to store it and check if it'd be beneficial // to outline. TargetInstrInfo::MachineOutlinerInfo MInfo = TII.getOutlininingCandidateInfo(RepeatedSequenceLocs); std::vector<unsigned> Seq; for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++) Seq.push_back(ST.Str[i]); OutlinedFunction OF(FunctionList.size(), Parent.OccurrenceCount, Seq, MInfo); unsigned Benefit = OF.getBenefit(); // Is it better to outline this candidate than not? if (Benefit < 1) { // Outlining this candidate would take more instructions than not // outlining. // Emit a remark explaining why we didn't outline this candidate. std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator> C = RepeatedSequenceLocs[0]; MachineOptimizationRemarkEmitter MORE( *(C.first->getParent()->getParent()), nullptr); MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper", C.first->getDebugLoc(), C.first->getParent()); R << "Did not outline " << NV("Length", StringLen) << " instructions" << " from " << NV("NumOccurrences", RepeatedSequenceLocs.size()) << " locations." << " Instructions from outlining all occurrences (" << NV("OutliningCost", OF.getOutliningCost()) << ")" << " >= Unoutlined instruction count (" << NV("NotOutliningCost", StringLen * OF.OccurrenceCount) << ")" << " (Also found at: "; // Tell the user the other places the candidate was found. for (unsigned i = 1, e = RepeatedSequenceLocs.size(); i < e; i++) { R << NV((Twine("OtherStartLoc") + Twine(i)).str(), RepeatedSequenceLocs[i].first->getDebugLoc()); if (i != e - 1) R << ", "; } R << ")"; MORE.emit(R); // Move to the next candidate. continue; } if (StringLen > MaxLen) MaxLen = StringLen; // At this point, the candidate class is seen as beneficial. Set their // benefit values and save them in the candidate list. for (Candidate &C : CandidatesForRepeatedSeq) { C.Benefit = Benefit; C.MInfo = MInfo; CandidateList.push_back(C); } FunctionList.push_back(OF); // Move to the next function. Parent.IsInTree = false; } return MaxLen; }
vector<int> getIndexes(const string &s) { return root.getIndexes(s); }
SuffixTree(const string &s) { for (int i = 0; i < s.length(); i++) { string suffix = s.substr(i); root.insertString(suffix, i); } }
list<int> SuffixTree::search (string str) { return (root.search (str)); }