Ejemplo n.º 1
0
static TrieNode<CharT, BucketT>*
pseudo_sample(unsigned char** strings, size_t n)
{
	debug()<<__func__<<"(): sampling "<<n/8192<<" strings ...\n";
	size_t max_nodes = (sizeof(CharT) == 1) ? 5000 : 2000;
	TrieNode<CharT, BucketT>* root = new TrieNode<CharT, BucketT>;
	for (size_t i=0; i < n; i += 8192) {
		unsigned char* str = strings[i];
		size_t depth = 0;
		TrieNode<CharT, BucketT>* node = root;
		while (true) {
			CharT c = get_char<CharT>(str, depth);
			if (is_end(c)) break;
			depth += sizeof(CharT);
			node->extend(c+1);
			if (not node->is_trie(c)) {
				node->_buckets[c] = new TrieNode<CharT, BucketT>;
				make_trie(node->_buckets[c]);
				if (--max_nodes==0) goto finish;
			}
			node = node->get_node(c);
			assert(node);
		}
	}
finish:
	return root;
}
Ejemplo n.º 2
0
static TrieNode<CharT, BucketT>*
random_sample(unsigned char** strings, size_t n)
{
	const size_t sample_size = n/8192;
	debug()<<__PRETTY_FUNCTION__<<" sampling "<<sample_size<<" strings\n";
	size_t max_nodes = (sizeof(CharT) == 1) ? 5000 : 2000;
	TrieNode<CharT, BucketT>* root = new TrieNode<CharT, BucketT>;
	for (size_t i=0; i < sample_size; ++i) {
		unsigned char* str = strings[size_t(drand48()*n)];
		size_t depth = 0;
		TrieNode<CharT, BucketT>* node = root;
		while (true) {
			CharT c = get_char<CharT>(str, depth);
			if (is_end(c)) break;
			depth += sizeof(CharT);
			node->extend(c+1);
			if (not node->is_trie(c)) {
				node->_buckets[c] = new TrieNode<CharT, BucketT>;
				make_trie(node->_buckets[c]);
				if (--max_nodes==0) goto finish;
			}
			node = node->get_node(c);
			assert(node);
		}
	}
finish:
	return root;
}