Ejemplo n.º 1
0
static TrieNode<CharT, BucketT>*
pseudo_sample(unsigned char** strings, size_t n)
{
	debug()<<__func__<<"(): sampling "<<n/8192<<" strings ...\n";
	size_t max_nodes = (sizeof(CharT) == 1) ? 5000 : 2000;
	TrieNode<CharT, BucketT>* root = new TrieNode<CharT, BucketT>;
	for (size_t i=0; i < n; i += 8192) {
		unsigned char* str = strings[i];
		size_t depth = 0;
		TrieNode<CharT, BucketT>* node = root;
		while (true) {
			CharT c = get_char<CharT>(str, depth);
			if (is_end(c)) break;
			depth += sizeof(CharT);
			node->extend(c+1);
			if (not node->is_trie(c)) {
				node->_buckets[c] = new TrieNode<CharT, BucketT>;
				make_trie(node->_buckets[c]);
				if (--max_nodes==0) goto finish;
			}
			node = node->get_node(c);
			assert(node);
		}
	}
finish:
	return root;
}
Ejemplo n.º 2
0
static inline void
insert(TrieNode<CharT, BucketT>* root, unsigned char** strings, size_t n)
{
	for (size_t i=0; i < n; ++i) {
		unsigned char* str = strings[i];
		size_t depth = 0;
		CharT c = get_char<CharT>(str, 0);
		TrieNode<CharT, BucketT>* node = root;
		while (node->is_trie(c)) {
			assert(not is_end(c));
			node = node->get_node(c);
			depth += sizeof(CharT);
			c = get_char<CharT>(str, depth);
		}
		BucketT* bucket = node->get_bucket(c);
		assert(bucket);
		bucket->push_back(str);
		if (is_end(c)) continue;
		if (bucket->size() > Threshold) {
			node->_buckets[c] = BurstImpl()(*bucket,
					depth+sizeof(CharT));
			make_trie(node->_buckets[c]);
			delete bucket;
		}
	}
}
Ejemplo n.º 3
0
static TrieNode<CharT, BucketT>*
random_sample(unsigned char** strings, size_t n)
{
	const size_t sample_size = n/8192;
	debug()<<__PRETTY_FUNCTION__<<" sampling "<<sample_size<<" strings\n";
	size_t max_nodes = (sizeof(CharT) == 1) ? 5000 : 2000;
	TrieNode<CharT, BucketT>* root = new TrieNode<CharT, BucketT>;
	for (size_t i=0; i < sample_size; ++i) {
		unsigned char* str = strings[size_t(drand48()*n)];
		size_t depth = 0;
		TrieNode<CharT, BucketT>* node = root;
		while (true) {
			CharT c = get_char<CharT>(str, depth);
			if (is_end(c)) break;
			depth += sizeof(CharT);
			node->extend(c+1);
			if (not node->is_trie(c)) {
				node->_buckets[c] = new TrieNode<CharT, BucketT>;
				make_trie(node->_buckets[c]);
				if (--max_nodes==0) goto finish;
			}
			node = node->get_node(c);
			assert(node);
		}
	}
finish:
	return root;
}