Ejemplo n.º 1
0
static inline void
insert(TrieNode<CharT, BucketT>* root, unsigned char** strings, size_t n)
{
	for (size_t i=0; i < n; ++i) {
		unsigned char* str = strings[i];
		size_t depth = 0;
		CharT c = get_char<CharT>(str, 0);
		TrieNode<CharT, BucketT>* node = root;
		while (node->is_trie(c)) {
			assert(not is_end(c));
			node = node->get_node(c);
			depth += sizeof(CharT);
			c = get_char<CharT>(str, depth);
		}
		BucketT* bucket = node->get_bucket(c);
		assert(bucket);
		bucket->push_back(str);
		if (is_end(c)) continue;
		if (bucket->size() > Threshold) {
			node->_buckets[c] = BurstImpl()(*bucket,
					depth+sizeof(CharT));
			make_trie(node->_buckets[c]);
			delete bucket;
		}
	}
}
Ejemplo n.º 2
0
void Trie::insert(const std::string& stringToInsert)
{
	TrieNode* currentNode = this->root.get();
	for(auto stringIt = stringToInsert.begin(); stringIt != stringToInsert.end(); ++stringIt)
	{
		char currentChar = *stringIt;

		bool done = false;
		// 2. if there is a node with value == c, set that node as current node. Repeat.
		for (auto it = currentNode->children.begin(); it != currentNode->children.end() && !done; ++it)
		{
			if ((*it)->value == currentChar)
			{
				currentNode = *it;
				done = true;
			}
		}

		if(done)
			continue;

		// 3. if not, insert a node and go to 2.
		currentNode = currentNode->addNode(currentChar);
	}

	currentNode->hasString = true;
}
  TrieNode *searchPattern(string word) {
    typedef std::pair<TrieNode *, int> Match;
    stack<Match>s;
    s.push(make_pair(root, 0));
    int len = word.length();
    while (!s.empty()) {
      Match m = s.top();
      TrieNode *node = m.first;
      s.pop();

      int id = m.second;
      if (id >= len) {
        if (node->isEnd() || s.empty()) {
          return node;
        }
        continue;
      }

      char ch = word[m.second];
      if (ch == '.') {
        for (int i = 0; i < TrieNode::R; i++) {
          if (node->links[i]) {
            s.push(make_pair(node->links[i], id + 1));
          }
        }
      } else if (node->containsKey(ch)) {
        s.push(make_pair(node->get(ch), id + 1));
      }
    }
    return NULL;
  }
Ejemplo n.º 4
0
void Dictionary::addWord( const std::string &word )
{
    TrieNode *node = root_;

    int tmp = 0;
    for ( auto it = word.rbegin(); it != word.rend(); ++it )
    {
        char c = *it;
        if ( !isdigit(c) && !isalpha(c) )
        {
            ++tmp;
            continue;
        }
        TrieNode *next_node = node->contain(c); 
        if ( next_node ) 
        {
            node = next_node;    
            continue;
        }
        node = node->addNode(c);
    }
    
    max_length_ = std::max( max_length_, word.size() - tmp );
    if ( node != root_ )
    {
        node->setEndWord(true);
    }
}
Ejemplo n.º 5
0
void IndexData::reassignKeywordIds() {
	map<TrieNode *, unsigned> trieNodeIdMapper; //
	this->trie->reassignKeywordIds(trieNodeIdMapper);

	// Generating an ID mapper by iterating through the set of trie nodes whose
	// ids need to be reassigned
	// a map from temperory id to new ids, this map is used for changing forwardIndex and quadTree
	map<unsigned, unsigned> keywordIdMapper;
	for (map<TrieNode *, unsigned>::iterator iter = trieNodeIdMapper.begin();
			iter != trieNodeIdMapper.end(); ++iter) {
		TrieNode *node = iter->first;
		unsigned newKeywordId = iter->second;

		keywordIdMapper[node->getId()] = newKeywordId;

		node->setId(newKeywordId); // set the new keyword Id
	}

	map<unsigned, unsigned> processedRecordIds; // keep track of records that have been converted

	// Now we have the ID mapper.  We want to go through the trie nodes one by one.
	// For each of them, access its inverted list.  For each record,
	// use the id mapper to change the integers on the forward list.
	changeKeywordIdsOnForwardLists(trieNodeIdMapper, keywordIdMapper,
			processedRecordIds);

	// apply the ID mapper on the keyword ids of empty leaf nodes
	this->trie->applyKeywordIdMapperOnEmptyLeafNodes(keywordIdMapper);
}
Ejemplo n.º 6
0
 // Returns if the word is in the trie.
 bool search(string word) {
     TrieNode* itr = root;
     for (int i = 0; itr != NULL && i < word.length(); ++i) {
         itr = itr->locateCh(word[i]);
     }
     return (itr != NULL && itr->isWordEnd());
 }
Ejemplo n.º 7
0
 // Returns if there is any word in the trie
 // that starts with the given prefix.
 bool startsWith(string prefix) {
     TrieNode* itr = root;
     for (int i = 0; itr != NULL && i < prefix.length(); ++i) {
         itr = itr->locateCh(prefix[i]);
     }
     return (itr != NULL);
 }
Ejemplo n.º 8
0
    string boldWords(vector<string>& words, string S) {
        TrieNode trie;
        for (const auto& word : words) {
            trie.Insert(word);
        }

        vector<bool> lookup(S.length());
        for (int i = 0; i < S.length(); ++i) {
            auto curr = &trie;
            int k = i - 1;
            for (int j = i; j < S.length(); ++j) {
                if (!curr->leaves[S[j] - 'a']) {
                    break;
                }
                curr = curr->leaves[S[j] - 'a'];
                if (curr->isString) {
                    k = j;
                }
            }
            fill(lookup.begin() + i, lookup.begin() + k + 1, true);
        }

        string result;
        for (int i = 0; i < S.length(); ++i) {
            if (lookup[i] && (i == 0 || !lookup[i - 1])) {
                result += "<b>";
            }
            result.push_back(S[i]);
            if (lookup[i] && (i == (S.length() - 1) || !lookup[i + 1])) {
                result += "</b>";
            }
        }
        return result;
    }
Ejemplo n.º 9
0
Archivo: Trie.cpp Proyecto: arpg/CVars
////////////////////////////////////////////////////////////////////////////////
// Finds s in the tree and returns the node (may not be a leaf) returns null
// otherwise.
TrieNode* Trie::FindSubStr( const std::string& s )
{
    if( root == NULL ) {
        printf( "ERROR in Trie::FindSubStr, root == NULL!!!!!\n" );
        return NULL;
    }

    if( s.length() == 0 )
        return root;

    TrieNode *traverseNode = root;

    for( unsigned int i = 0 ; i < s.length() ; i++ ){
        traverseNode = traverseNode->TraverseFind( s[i] );
        if( traverseNode ) {
            continue;
        } else {
            return NULL;
        }
    }

    // Look for a leaf node here and return it if no leaf node just return this
    // node.
    std::list<TrieNode*>::iterator it;
    for(it = traverseNode->m_children.begin() ; it != traverseNode->m_children.end() ; it++){
        //found child
        if((*it)->m_nNodeType == TRIE_LEAF) {
            return (*it);
        }
    }

    return traverseNode;
}
Ejemplo n.º 10
0
static TrieNode<CharT, BucketT>*
random_sample(unsigned char** strings, size_t n)
{
	const size_t sample_size = n/8192;
	debug()<<__PRETTY_FUNCTION__<<" sampling "<<sample_size<<" strings\n";
	size_t max_nodes = (sizeof(CharT) == 1) ? 5000 : 2000;
	TrieNode<CharT, BucketT>* root = new TrieNode<CharT, BucketT>;
	for (size_t i=0; i < sample_size; ++i) {
		unsigned char* str = strings[size_t(drand48()*n)];
		size_t depth = 0;
		TrieNode<CharT, BucketT>* node = root;
		while (true) {
			CharT c = get_char<CharT>(str, depth);
			if (is_end(c)) break;
			depth += sizeof(CharT);
			node->extend(c+1);
			if (not node->is_trie(c)) {
				node->_buckets[c] = new TrieNode<CharT, BucketT>;
				make_trie(node->_buckets[c]);
				if (--max_nodes==0) goto finish;
			}
			node = node->get_node(c);
			assert(node);
		}
	}
finish:
	return root;
}
Ejemplo n.º 11
0
static TrieNode<CharT, BucketT>*
pseudo_sample(unsigned char** strings, size_t n)
{
	debug()<<__func__<<"(): sampling "<<n/8192<<" strings ...\n";
	size_t max_nodes = (sizeof(CharT) == 1) ? 5000 : 2000;
	TrieNode<CharT, BucketT>* root = new TrieNode<CharT, BucketT>;
	for (size_t i=0; i < n; i += 8192) {
		unsigned char* str = strings[i];
		size_t depth = 0;
		TrieNode<CharT, BucketT>* node = root;
		while (true) {
			CharT c = get_char<CharT>(str, depth);
			if (is_end(c)) break;
			depth += sizeof(CharT);
			node->extend(c+1);
			if (not node->is_trie(c)) {
				node->_buckets[c] = new TrieNode<CharT, BucketT>;
				make_trie(node->_buckets[c]);
				if (--max_nodes==0) goto finish;
			}
			node = node->get_node(c);
			assert(node);
		}
	}
finish:
	return root;
}
	// Inserts a word into the trie.
	void insert(string s) 
	{
		if (s.empty())
			return;

		TrieNode* p = root;

		int i = 0;
		while (i < s.size())
		{
			TrieNode* tmp = p->findCh(s[i]);
			if (nullptr == tmp)
			{
				p->addChild(s[i]);
				++i;
				p = p->getLastChild();
			}
			else
			{
				++i;
				p = tmp;
			}
		}

		p->isAWord(true);

		return;
	}
Ejemplo n.º 13
0
void Trie::insert(string s) {
    TrieNode* curr = root;
    for(int i=0; i<s.length(); i++){
        if(curr->inChildren(s[i])) curr = curr->getChild(s[i]);
        else curr=curr->addChild(s[i]);
    }
    curr->addChild('#');
}
Ejemplo n.º 14
0
void naiveTest() {
	TrieNode* trie = new TrieNode();
	trie->insert("HELLO",1);
	assert(1 == trie->find("HELLO")->terminal);
	assert(NULL == trie->find("HELLOB"));
	assert(-1 == trie->find("HELL")->terminal);
	delete trie;
}
Ejemplo n.º 15
0
 // Returns if there is any word in the trie
 // that starts with the given prefix.
 bool startsWith(string prefix) {
     TrieNode* node = root;
     for(auto c:prefix) {
         node = node->subNode(c);
         if(node == nullptr) return false;
     }
     return true;
 }
Ejemplo n.º 16
0
// Returns if there is any word in the trie
// that starts with the given prefix.
bool Trie::startsWith(string prefix) {
    TrieNode* curr = root;
    for(int i=0; i<prefix.length(); i++){
        if(!curr->inChildren(prefix[i])) return false;
        curr = curr->getChild(prefix[i]);
    }
    return true;
}
Ejemplo n.º 17
0
// Returns if the word is in the trie.
bool Trie::search(string key) {
    TrieNode* curr = root;
    for(int i=0; i<key.length(); i++){
        if(!curr->inChildren(key[i])) return false;
        curr = curr->getChild(key[i]);
    }
    return curr->inChildren('#');
}
Ejemplo n.º 18
0
 // Returns if the word is in the trie.
 bool search(string word) {
     TrieNode* node = root;
     for(auto c:word) {
         node = node->subNode(c);
         if(node == nullptr) return false;
     }
     return node->isend;
 }
 // Returns if the word is in the trie.
 bool search(string key) {
     TrieNode *cur = root;
     for(auto ch : key) {
         cur = cur->get_child(ch);
         if (cur == nullptr)
             return false;
     }
     return cur->is_end;
 }
Ejemplo n.º 20
0
 // Returns if there is any word in the trie
 // that starts with the given prefix.
 bool startsWith(string prefix) {
     TrieNode *cur = root, *tmp;
     for (auto c: prefix) {
         if ((tmp = cur->find(c)) == NULL)
             return false;
         cur = tmp;
     }
     return true;
 }
 // Returns if there is any word in the trie
 // that starts with the given prefix.
 bool startsWith(string prefix) {
     TrieNode *cur = root;
     for (auto ch : prefix) {
         cur = cur->get_child(ch);
         if (cur == nullptr)
             return false;
     }
     return true;
 }
Ejemplo n.º 22
0
 void insert(string word) {
     TrieNode* current = root;
     for (int i = 0; i < word.size(); i++) {
         char c = word.at(i);
         current->add_child(c);
         current = current->children[c];
     }
     current->set_word();
 }
Ejemplo n.º 23
0
 // Returns if the word is in the trie.
 bool search(string word) {
     TrieNode *cur = root, *tmp;
     for (auto c: word) {
         if ((tmp = cur->find(c)) == NULL)
             return false;
         cur = tmp;
     }
     return cur->find('\0') != NULL;
 }
Ejemplo n.º 24
0
 // Inserts a word into the trie.
 void insert(string word) {
     TrieNode* itr = root;
     for (int i = 0; i < word.length(); ++i) {
         itr = itr->addNode(word[i]);
     }
     if (itr != root) {
         itr->setWordEnd();
     }
 }
Ejemplo n.º 25
0
 bool exist_prefix(string word) {
     TrieNode* current = root;
     for (int i = 0; i < word.size(); i++) {
         char c = word.at(i);
         if (current->is_word()) return true;
         if (!current->exist_child(c)) return false;
         current = current->children[c];
     }
     return true;
 }
Ejemplo n.º 26
0
    // Returns if the word is in the trie.
    bool search(string key) {
		int len = key.size();
		TrieNode *node = root;
		for(int i=0;i<len;i++){
			node = node->get_childen(key[i]);
			if(node==NULL)
				return false;
		}
		return node->iswords();
    }
Ejemplo n.º 27
0
bool Trie::startsWith(string prefix)
{
	TrieNode* curr = root;
	for (auto ch : prefix) {
		curr = curr->subNode(ch);
		if (curr == nullptr)
			return false;
	}
	return true;
}
Ejemplo n.º 28
0
bool Trie::search(string key)
{
	TrieNode* curr = root;
	for (auto ch : key) {
		curr = curr->subNode(ch);
		if (curr == nullptr)
			return false;
	}
	return curr->isend == true;
}
Ejemplo n.º 29
0
 // Inserts a word into the trie.
 void insert(string word) {
     TrieNode *cur = root, *tmp;
     for (auto c: word) {
         if ((tmp = cur->find(c)) == NULL) {
             tmp = cur->insert(c);
         }
         cur = tmp;
     }
     cur->insert('\0');
 }
Ejemplo n.º 30
0
// Accumulate data from children into their parent.
void LocalState::TrieNode::propagate_data_upwards (void)
{
  for (auto citer = children.begin(); citer != children.end(); citer++) {
    TrieNode* child = citer->second;
    child->propagate_data_upwards();
    for (size_t i = 0; i < self_data.size(); i++)
      path_data[i] += child->path_data[i];
    invocations += child->invocations;
  }
}