static inline void insert(TrieNode<CharT, BucketT>* root, unsigned char** strings, size_t n) { for (size_t i=0; i < n; ++i) { unsigned char* str = strings[i]; size_t depth = 0; CharT c = get_char<CharT>(str, 0); TrieNode<CharT, BucketT>* node = root; while (node->is_trie(c)) { assert(not is_end(c)); node = node->get_node(c); depth += sizeof(CharT); c = get_char<CharT>(str, depth); } BucketT* bucket = node->get_bucket(c); assert(bucket); bucket->push_back(str); if (is_end(c)) continue; if (bucket->size() > Threshold) { node->_buckets[c] = BurstImpl()(*bucket, depth+sizeof(CharT)); make_trie(node->_buckets[c]); delete bucket; } } }
void Trie::insert(const std::string& stringToInsert) { TrieNode* currentNode = this->root.get(); for(auto stringIt = stringToInsert.begin(); stringIt != stringToInsert.end(); ++stringIt) { char currentChar = *stringIt; bool done = false; // 2. if there is a node with value == c, set that node as current node. Repeat. for (auto it = currentNode->children.begin(); it != currentNode->children.end() && !done; ++it) { if ((*it)->value == currentChar) { currentNode = *it; done = true; } } if(done) continue; // 3. if not, insert a node and go to 2. currentNode = currentNode->addNode(currentChar); } currentNode->hasString = true; }
TrieNode *searchPattern(string word) { typedef std::pair<TrieNode *, int> Match; stack<Match>s; s.push(make_pair(root, 0)); int len = word.length(); while (!s.empty()) { Match m = s.top(); TrieNode *node = m.first; s.pop(); int id = m.second; if (id >= len) { if (node->isEnd() || s.empty()) { return node; } continue; } char ch = word[m.second]; if (ch == '.') { for (int i = 0; i < TrieNode::R; i++) { if (node->links[i]) { s.push(make_pair(node->links[i], id + 1)); } } } else if (node->containsKey(ch)) { s.push(make_pair(node->get(ch), id + 1)); } } return NULL; }
void Dictionary::addWord( const std::string &word ) { TrieNode *node = root_; int tmp = 0; for ( auto it = word.rbegin(); it != word.rend(); ++it ) { char c = *it; if ( !isdigit(c) && !isalpha(c) ) { ++tmp; continue; } TrieNode *next_node = node->contain(c); if ( next_node ) { node = next_node; continue; } node = node->addNode(c); } max_length_ = std::max( max_length_, word.size() - tmp ); if ( node != root_ ) { node->setEndWord(true); } }
void IndexData::reassignKeywordIds() { map<TrieNode *, unsigned> trieNodeIdMapper; // this->trie->reassignKeywordIds(trieNodeIdMapper); // Generating an ID mapper by iterating through the set of trie nodes whose // ids need to be reassigned // a map from temperory id to new ids, this map is used for changing forwardIndex and quadTree map<unsigned, unsigned> keywordIdMapper; for (map<TrieNode *, unsigned>::iterator iter = trieNodeIdMapper.begin(); iter != trieNodeIdMapper.end(); ++iter) { TrieNode *node = iter->first; unsigned newKeywordId = iter->second; keywordIdMapper[node->getId()] = newKeywordId; node->setId(newKeywordId); // set the new keyword Id } map<unsigned, unsigned> processedRecordIds; // keep track of records that have been converted // Now we have the ID mapper. We want to go through the trie nodes one by one. // For each of them, access its inverted list. For each record, // use the id mapper to change the integers on the forward list. changeKeywordIdsOnForwardLists(trieNodeIdMapper, keywordIdMapper, processedRecordIds); // apply the ID mapper on the keyword ids of empty leaf nodes this->trie->applyKeywordIdMapperOnEmptyLeafNodes(keywordIdMapper); }
// Returns if the word is in the trie. bool search(string word) { TrieNode* itr = root; for (int i = 0; itr != NULL && i < word.length(); ++i) { itr = itr->locateCh(word[i]); } return (itr != NULL && itr->isWordEnd()); }
// Returns if there is any word in the trie // that starts with the given prefix. bool startsWith(string prefix) { TrieNode* itr = root; for (int i = 0; itr != NULL && i < prefix.length(); ++i) { itr = itr->locateCh(prefix[i]); } return (itr != NULL); }
string boldWords(vector<string>& words, string S) { TrieNode trie; for (const auto& word : words) { trie.Insert(word); } vector<bool> lookup(S.length()); for (int i = 0; i < S.length(); ++i) { auto curr = ≜ int k = i - 1; for (int j = i; j < S.length(); ++j) { if (!curr->leaves[S[j] - 'a']) { break; } curr = curr->leaves[S[j] - 'a']; if (curr->isString) { k = j; } } fill(lookup.begin() + i, lookup.begin() + k + 1, true); } string result; for (int i = 0; i < S.length(); ++i) { if (lookup[i] && (i == 0 || !lookup[i - 1])) { result += "<b>"; } result.push_back(S[i]); if (lookup[i] && (i == (S.length() - 1) || !lookup[i + 1])) { result += "</b>"; } } return result; }
//////////////////////////////////////////////////////////////////////////////// // Finds s in the tree and returns the node (may not be a leaf) returns null // otherwise. TrieNode* Trie::FindSubStr( const std::string& s ) { if( root == NULL ) { printf( "ERROR in Trie::FindSubStr, root == NULL!!!!!\n" ); return NULL; } if( s.length() == 0 ) return root; TrieNode *traverseNode = root; for( unsigned int i = 0 ; i < s.length() ; i++ ){ traverseNode = traverseNode->TraverseFind( s[i] ); if( traverseNode ) { continue; } else { return NULL; } } // Look for a leaf node here and return it if no leaf node just return this // node. std::list<TrieNode*>::iterator it; for(it = traverseNode->m_children.begin() ; it != traverseNode->m_children.end() ; it++){ //found child if((*it)->m_nNodeType == TRIE_LEAF) { return (*it); } } return traverseNode; }
static TrieNode<CharT, BucketT>* random_sample(unsigned char** strings, size_t n) { const size_t sample_size = n/8192; debug()<<__PRETTY_FUNCTION__<<" sampling "<<sample_size<<" strings\n"; size_t max_nodes = (sizeof(CharT) == 1) ? 5000 : 2000; TrieNode<CharT, BucketT>* root = new TrieNode<CharT, BucketT>; for (size_t i=0; i < sample_size; ++i) { unsigned char* str = strings[size_t(drand48()*n)]; size_t depth = 0; TrieNode<CharT, BucketT>* node = root; while (true) { CharT c = get_char<CharT>(str, depth); if (is_end(c)) break; depth += sizeof(CharT); node->extend(c+1); if (not node->is_trie(c)) { node->_buckets[c] = new TrieNode<CharT, BucketT>; make_trie(node->_buckets[c]); if (--max_nodes==0) goto finish; } node = node->get_node(c); assert(node); } } finish: return root; }
static TrieNode<CharT, BucketT>* pseudo_sample(unsigned char** strings, size_t n) { debug()<<__func__<<"(): sampling "<<n/8192<<" strings ...\n"; size_t max_nodes = (sizeof(CharT) == 1) ? 5000 : 2000; TrieNode<CharT, BucketT>* root = new TrieNode<CharT, BucketT>; for (size_t i=0; i < n; i += 8192) { unsigned char* str = strings[i]; size_t depth = 0; TrieNode<CharT, BucketT>* node = root; while (true) { CharT c = get_char<CharT>(str, depth); if (is_end(c)) break; depth += sizeof(CharT); node->extend(c+1); if (not node->is_trie(c)) { node->_buckets[c] = new TrieNode<CharT, BucketT>; make_trie(node->_buckets[c]); if (--max_nodes==0) goto finish; } node = node->get_node(c); assert(node); } } finish: return root; }
// Inserts a word into the trie. void insert(string s) { if (s.empty()) return; TrieNode* p = root; int i = 0; while (i < s.size()) { TrieNode* tmp = p->findCh(s[i]); if (nullptr == tmp) { p->addChild(s[i]); ++i; p = p->getLastChild(); } else { ++i; p = tmp; } } p->isAWord(true); return; }
void Trie::insert(string s) { TrieNode* curr = root; for(int i=0; i<s.length(); i++){ if(curr->inChildren(s[i])) curr = curr->getChild(s[i]); else curr=curr->addChild(s[i]); } curr->addChild('#'); }
void naiveTest() { TrieNode* trie = new TrieNode(); trie->insert("HELLO",1); assert(1 == trie->find("HELLO")->terminal); assert(NULL == trie->find("HELLOB")); assert(-1 == trie->find("HELL")->terminal); delete trie; }
// Returns if there is any word in the trie // that starts with the given prefix. bool startsWith(string prefix) { TrieNode* node = root; for(auto c:prefix) { node = node->subNode(c); if(node == nullptr) return false; } return true; }
// Returns if there is any word in the trie // that starts with the given prefix. bool Trie::startsWith(string prefix) { TrieNode* curr = root; for(int i=0; i<prefix.length(); i++){ if(!curr->inChildren(prefix[i])) return false; curr = curr->getChild(prefix[i]); } return true; }
// Returns if the word is in the trie. bool Trie::search(string key) { TrieNode* curr = root; for(int i=0; i<key.length(); i++){ if(!curr->inChildren(key[i])) return false; curr = curr->getChild(key[i]); } return curr->inChildren('#'); }
// Returns if the word is in the trie. bool search(string word) { TrieNode* node = root; for(auto c:word) { node = node->subNode(c); if(node == nullptr) return false; } return node->isend; }
// Returns if the word is in the trie. bool search(string key) { TrieNode *cur = root; for(auto ch : key) { cur = cur->get_child(ch); if (cur == nullptr) return false; } return cur->is_end; }
// Returns if there is any word in the trie // that starts with the given prefix. bool startsWith(string prefix) { TrieNode *cur = root, *tmp; for (auto c: prefix) { if ((tmp = cur->find(c)) == NULL) return false; cur = tmp; } return true; }
// Returns if there is any word in the trie // that starts with the given prefix. bool startsWith(string prefix) { TrieNode *cur = root; for (auto ch : prefix) { cur = cur->get_child(ch); if (cur == nullptr) return false; } return true; }
void insert(string word) { TrieNode* current = root; for (int i = 0; i < word.size(); i++) { char c = word.at(i); current->add_child(c); current = current->children[c]; } current->set_word(); }
// Returns if the word is in the trie. bool search(string word) { TrieNode *cur = root, *tmp; for (auto c: word) { if ((tmp = cur->find(c)) == NULL) return false; cur = tmp; } return cur->find('\0') != NULL; }
// Inserts a word into the trie. void insert(string word) { TrieNode* itr = root; for (int i = 0; i < word.length(); ++i) { itr = itr->addNode(word[i]); } if (itr != root) { itr->setWordEnd(); } }
bool exist_prefix(string word) { TrieNode* current = root; for (int i = 0; i < word.size(); i++) { char c = word.at(i); if (current->is_word()) return true; if (!current->exist_child(c)) return false; current = current->children[c]; } return true; }
// Returns if the word is in the trie. bool search(string key) { int len = key.size(); TrieNode *node = root; for(int i=0;i<len;i++){ node = node->get_childen(key[i]); if(node==NULL) return false; } return node->iswords(); }
bool Trie::startsWith(string prefix) { TrieNode* curr = root; for (auto ch : prefix) { curr = curr->subNode(ch); if (curr == nullptr) return false; } return true; }
bool Trie::search(string key) { TrieNode* curr = root; for (auto ch : key) { curr = curr->subNode(ch); if (curr == nullptr) return false; } return curr->isend == true; }
// Inserts a word into the trie. void insert(string word) { TrieNode *cur = root, *tmp; for (auto c: word) { if ((tmp = cur->find(c)) == NULL) { tmp = cur->insert(c); } cur = tmp; } cur->insert('\0'); }
// Accumulate data from children into their parent. void LocalState::TrieNode::propagate_data_upwards (void) { for (auto citer = children.begin(); citer != children.end(); citer++) { TrieNode* child = citer->second; child->propagate_data_upwards(); for (size_t i = 0; i < self_data.size(); i++) path_data[i] += child->path_data[i]; invocations += child->invocations; } }