static inline void insert(TrieNode<CharT, BucketT>* root, unsigned char** strings, size_t n) { for (size_t i=0; i < n; ++i) { unsigned char* str = strings[i]; size_t depth = 0; CharT c = get_char<CharT>(str, 0); TrieNode<CharT, BucketT>* node = root; while (node->is_trie(c)) { assert(not is_end(c)); node = node->get_node(c); depth += sizeof(CharT); c = get_char<CharT>(str, depth); } BucketT* bucket = node->get_bucket(c); assert(bucket); bucket->push_back(str); if (is_end(c)) continue; if (bucket->size() > Threshold) { node->_buckets[c] = BurstImpl()(*bucket, depth+sizeof(CharT)); make_trie(node->_buckets[c]); delete bucket; } } }
static TrieNode<CharT, BucketT>* pseudo_sample(unsigned char** strings, size_t n) { debug()<<__func__<<"(): sampling "<<n/8192<<" strings ...\n"; size_t max_nodes = (sizeof(CharT) == 1) ? 5000 : 2000; TrieNode<CharT, BucketT>* root = new TrieNode<CharT, BucketT>; for (size_t i=0; i < n; i += 8192) { unsigned char* str = strings[i]; size_t depth = 0; TrieNode<CharT, BucketT>* node = root; while (true) { CharT c = get_char<CharT>(str, depth); if (is_end(c)) break; depth += sizeof(CharT); node->extend(c+1); if (not node->is_trie(c)) { node->_buckets[c] = new TrieNode<CharT, BucketT>; make_trie(node->_buckets[c]); if (--max_nodes==0) goto finish; } node = node->get_node(c); assert(node); } } finish: return root; }
static TrieNode<CharT, BucketT>* random_sample(unsigned char** strings, size_t n) { const size_t sample_size = n/8192; debug()<<__PRETTY_FUNCTION__<<" sampling "<<sample_size<<" strings\n"; size_t max_nodes = (sizeof(CharT) == 1) ? 5000 : 2000; TrieNode<CharT, BucketT>* root = new TrieNode<CharT, BucketT>; for (size_t i=0; i < sample_size; ++i) { unsigned char* str = strings[size_t(drand48()*n)]; size_t depth = 0; TrieNode<CharT, BucketT>* node = root; while (true) { CharT c = get_char<CharT>(str, depth); if (is_end(c)) break; depth += sizeof(CharT); node->extend(c+1); if (not node->is_trie(c)) { node->_buckets[c] = new TrieNode<CharT, BucketT>; make_trie(node->_buckets[c]); if (--max_nodes==0) goto finish; } node = node->get_node(c); assert(node); } } finish: return root; }
TrieNode<CharT, BucketT>* operator()(const BucketT& bucket, size_t depth) const { TrieNode<CharT, BucketT>* new_node = BurstSimple<CharT>()(bucket, depth); const size_t threshold = std::max(size_t(100), bucket.size()/2); for (unsigned i=0; i < new_node->_buckets.size(); ++i) { BucketT* sub_bucket = static_cast<BucketT*>( new_node->_buckets[i]); if (not sub_bucket) continue; if (not is_end(i) and sub_bucket->size() > threshold) { new_node->_buckets[i] = BurstRecursive<CharT>()(*sub_bucket, depth+sizeof(CharT)); delete sub_bucket; make_trie(new_node->_buckets[i]); } } return new_node; }
int main(int argc, char** argv) { // ---------------------------- // Initialize // ---------------------------- // Open file, print errors if appropriate. FILE* my_dict = fopen(argv[1], "r"); if(my_dict == NULL) { fputs("Invalid file. Terminating.\n",stderr); return 1; } // Initialize the root struct Node* root = make_node(); // Build the trie root = make_trie(root, my_dict); // ---------------------------- // User interaction // ---------------------------- // Make a node to go through the tree struct Node* tracker = root; // Get user input, print results printf("Enter \"exit\" to quit.\n"); while(1) { // Prompt printf("Enter Key Sequence (or \"#\" for next word):\n"); // Make a buffer and capture user input. char input[MAX_WORD_LENGTH]; fgets(input, MAX_WORD_LENGTH, stdin); // Break if given exit command if (strstr(input,"exit") != NULL) { break; } // If user enters one or more #'s, keep old tracker so we can // iterate further. Otherwise, restart our tracker from the root. if (input[0] != '#') { tracker = root; } // Process the input to remove \n input[strlen(input)-1] = '\0'; //make_lower_case(input); // Make a flag for checking for out of bounds attemps (looking for too many #'s) int oob_flag = 0; int* oob_ptr = &oob_flag; // search for the input tracker = search_trie(input, tracker, oob_ptr); // Print results // If we got a NULL back (went too far for a word that wasn't in the trie) or // the word is NULL (user didn't enter anything), word not found. // If we ran out of words while looking for '#', no more T9onyms. // Else, print the word. Have a cookie. if (tracker == NULL || tracker->word == NULL) { printf("\tNot found in current dictionary.\n"); } else if (oob_flag) { printf("\tThere are no more T9onyms\n"); oob_flag = 0; } else { printf("\t%s\n",tracker->word); } } // ---------------------------- // Clean up // ---------------------------- // Free the trie free_node(root); // Close file to prevent any related memory leaks fclose(my_dict); return 0; }
int main(int argc, char** argv) { // ---------------------------- // Initialize // ---------------------------- // Open file, print errors if appropriate. FILE* my_dict = fopen(argv[1], "r"); if(my_dict == NULL) { fputs("Invalid file. Terminating.\n",stderr); return 1; } // Initialize the root struct Node* root = make_node(); // Build the trie root = make_trie(root, my_dict); // ---------------------------- // User interaction // ---------------------------- // Make a node and buffers to go through the tree struct Node* tracker = root; char input[MAX_WORD_LENGTH]; char output[MAX_WORD_LENGTH]; // Get user input, print results printf("Enter \"exit\" to quit.\n"); while(1) { // Prompt printf("Enter Key Sequence (or \"#\" for next word):\n"); fgets(input, MAX_WORD_LENGTH, stdin); // If user just hit enter, skip the rest of this loop if (input[0] == '\n') { printf("\tUser did not enter anything.\n"); continue; } // Break if given exit command if (strstr(input,"exit") != NULL) { break; } // If user enters one or more #'s, keep old tracker so we can // iterate further. Otherwise, restart our tracker from the root. if (input[0] != '#') { tracker = root; } // Process the input to remove \n input[strlen(input)-1] = '\0'; // search for the input and save our progress in tracker in case the user wants more #'s tracker = search_trie(input, tracker, output); printf("\t%s\n",output); } // ---------------------------- // Clean up // ---------------------------- // Free the trie free_node(root); // Close file to prevent any related memory leaks fclose(my_dict); return 0; }