Exemplo n.º 1
0
static inline void
insert(TrieNode<CharT, BucketT>* root, unsigned char** strings, size_t n)
{
	for (size_t i=0; i < n; ++i) {
		unsigned char* str = strings[i];
		size_t depth = 0;
		CharT c = get_char<CharT>(str, 0);
		TrieNode<CharT, BucketT>* node = root;
		while (node->is_trie(c)) {
			assert(not is_end(c));
			node = node->get_node(c);
			depth += sizeof(CharT);
			c = get_char<CharT>(str, depth);
		}
		BucketT* bucket = node->get_bucket(c);
		assert(bucket);
		bucket->push_back(str);
		if (is_end(c)) continue;
		if (bucket->size() > Threshold) {
			node->_buckets[c] = BurstImpl()(*bucket,
					depth+sizeof(CharT));
			make_trie(node->_buckets[c]);
			delete bucket;
		}
	}
}
Exemplo n.º 2
0
static TrieNode<CharT, BucketT>*
pseudo_sample(unsigned char** strings, size_t n)
{
	debug()<<__func__<<"(): sampling "<<n/8192<<" strings ...\n";
	size_t max_nodes = (sizeof(CharT) == 1) ? 5000 : 2000;
	TrieNode<CharT, BucketT>* root = new TrieNode<CharT, BucketT>;
	for (size_t i=0; i < n; i += 8192) {
		unsigned char* str = strings[i];
		size_t depth = 0;
		TrieNode<CharT, BucketT>* node = root;
		while (true) {
			CharT c = get_char<CharT>(str, depth);
			if (is_end(c)) break;
			depth += sizeof(CharT);
			node->extend(c+1);
			if (not node->is_trie(c)) {
				node->_buckets[c] = new TrieNode<CharT, BucketT>;
				make_trie(node->_buckets[c]);
				if (--max_nodes==0) goto finish;
			}
			node = node->get_node(c);
			assert(node);
		}
	}
finish:
	return root;
}
Exemplo n.º 3
0
static TrieNode<CharT, BucketT>*
random_sample(unsigned char** strings, size_t n)
{
	const size_t sample_size = n/8192;
	debug()<<__PRETTY_FUNCTION__<<" sampling "<<sample_size<<" strings\n";
	size_t max_nodes = (sizeof(CharT) == 1) ? 5000 : 2000;
	TrieNode<CharT, BucketT>* root = new TrieNode<CharT, BucketT>;
	for (size_t i=0; i < sample_size; ++i) {
		unsigned char* str = strings[size_t(drand48()*n)];
		size_t depth = 0;
		TrieNode<CharT, BucketT>* node = root;
		while (true) {
			CharT c = get_char<CharT>(str, depth);
			if (is_end(c)) break;
			depth += sizeof(CharT);
			node->extend(c+1);
			if (not node->is_trie(c)) {
				node->_buckets[c] = new TrieNode<CharT, BucketT>;
				make_trie(node->_buckets[c]);
				if (--max_nodes==0) goto finish;
			}
			node = node->get_node(c);
			assert(node);
		}
	}
finish:
	return root;
}
Exemplo n.º 4
0
	TrieNode<CharT, BucketT>*
	operator()(const BucketT& bucket, size_t depth) const
	{
		TrieNode<CharT, BucketT>* new_node
			= BurstSimple<CharT>()(bucket, depth);
		const size_t threshold = std::max(size_t(100), bucket.size()/2);
		for (unsigned i=0; i < new_node->_buckets.size(); ++i) {
			BucketT* sub_bucket = static_cast<BucketT*>(
					new_node->_buckets[i]);
			if (not sub_bucket) continue;
			if (not is_end(i) and sub_bucket->size() > threshold) {
				new_node->_buckets[i] =
					BurstRecursive<CharT>()(*sub_bucket,
							depth+sizeof(CharT));
				delete sub_bucket;
				make_trie(new_node->_buckets[i]);
			}
		}
		return new_node;
	}
Exemplo n.º 5
0
int main(int argc, char** argv) {
	// ----------------------------
	// Initialize
	// ----------------------------
	
	// Open file, print errors if appropriate.
	FILE* my_dict = fopen(argv[1], "r");
	if(my_dict == NULL) {
		fputs("Invalid file. Terminating.\n",stderr);
		return 1;
	}
	
	// Initialize the root
	struct Node* root = make_node();
	
	// Build the trie
	root = make_trie(root, my_dict);
	
	// ----------------------------
	// User interaction
	// ----------------------------
	
	// Make a node to go through the tree
	struct Node* tracker = root;

	// Get user input, print results
	printf("Enter \"exit\" to quit.\n");
	while(1) {		
		// Prompt		
		printf("Enter Key Sequence (or \"#\" for next word):\n");

		// Make a buffer and capture user input.
		char input[MAX_WORD_LENGTH];
		fgets(input, MAX_WORD_LENGTH, stdin);

		// Break if given exit command
		if (strstr(input,"exit") != NULL) {
			break;
		}

		// If user enters one or more #'s, keep old tracker so we can
		// iterate further. Otherwise, restart our tracker from the root.
		if (input[0] != '#') {
			tracker = root;
		}

		// Process the input to remove \n
		input[strlen(input)-1] = '\0'; 
		//make_lower_case(input);
		
		// Make a flag for checking for out of bounds attemps (looking for too many #'s)
		int oob_flag = 0;
		int* oob_ptr = &oob_flag;
		
		// search for the input
		tracker = search_trie(input, tracker, oob_ptr);
		
		// Print results
		// If we got a NULL back (went too far for a word that wasn't in the trie) or
		// the word is NULL (user didn't enter anything), word not found.
		// If we ran out of words while looking for '#', no more T9onyms.
		// Else, print the word. Have a cookie.
		if (tracker == NULL || tracker->word == NULL) {
			printf("\tNot found in current dictionary.\n");
		} else if (oob_flag) {
			printf("\tThere are no more T9onyms\n");
			oob_flag = 0;
		} else {
			printf("\t%s\n",tracker->word);
		}
	}
	
	// ----------------------------
	// Clean up
	// ----------------------------
	
	// Free the trie
	free_node(root);
	
	// Close file to prevent any related memory leaks
	fclose(my_dict);
	
	return 0;
}
Exemplo n.º 6
0
int main(int argc, char** argv) {
	// ----------------------------
	// Initialize
	// ----------------------------
	
	// Open file, print errors if appropriate.
	FILE* my_dict = fopen(argv[1], "r");
	if(my_dict == NULL) {
		fputs("Invalid file. Terminating.\n",stderr);
		return 1;
	}
	
	// Initialize the root
	struct Node* root = make_node();
	
	// Build the trie
	root = make_trie(root, my_dict);
	
	// ----------------------------
	// User interaction
	// ----------------------------
	
	// Make a node and buffers to go through the tree
	struct Node* tracker = root;
	char input[MAX_WORD_LENGTH];
	char output[MAX_WORD_LENGTH];
	
	// Get user input, print results
	printf("Enter \"exit\" to quit.\n");
	while(1) {		
		// Prompt		
		printf("Enter Key Sequence (or \"#\" for next word):\n");

		fgets(input, MAX_WORD_LENGTH, stdin);

		// If user just hit enter, skip the rest of this loop
		if (input[0] == '\n') {
			printf("\tUser did not enter anything.\n");
			continue;
		}
		
		// Break if given exit command
		if (strstr(input,"exit") != NULL) {
			break;
		}

		// If user enters one or more #'s, keep old tracker so we can
		// iterate further. Otherwise, restart our tracker from the root.
		if (input[0] != '#') {
			tracker = root;
		}
		
		// Process the input to remove \n
		input[strlen(input)-1] = '\0'; 
		
		// search for the input and save our progress in tracker in case the user wants more #'s
		tracker = search_trie(input, tracker, output);
		
		printf("\t%s\n",output);
	}
	
	// ----------------------------
	// Clean up
	// ----------------------------
	
	// Free the trie
	free_node(root);
	
	// Close file to prevent any related memory leaks
	fclose(my_dict);
	
	return 0;
}