void setup_tokenizer() { int i = 0; /* * Set up the automata that * match the various token types */ for (i = 0; i < 100; i++) t[i] = new_trie(); /* D: any digit */ add_token(t[tc++], "D+", TOK_INTEGER); /* W: any whitespace character */ add_token(t[tc++], "W+", TOK_WHITESPACE); /* A: letter; B: letter or digit */ add_token(t[tc++], "AB*", TOK_IDENT); /* operators */ add_token(t[tc++], "\\+", TOK_PLUS); add_token(t[tc++], "-", TOK_MINUS); add_token(t[tc++], "/", TOK_DIV); add_token(t[tc++], "\\*", TOK_MUL); add_token(t[tc++], "=", TOK_ASGN); /* special characters */ add_token(t[tc++], "(", TOK_LPAREN); add_token(t[tc++], ")", TOK_RPAREN); add_token(t[tc++], ";", TOK_SEMICOLON); add_token(t[tc++], ",", TOK_COMMA); add_token(t[tc++], "\n", TOK_NEWLINE); add_token(t[tc++], ":", TOK_COLON); add_token(t[tc++], "[", TOK_LBRACK); add_token(t[tc++], "]", TOK_RBRACK); add_token(t[tc++], "'", TOK_QUOT); add_token(t[tc++], "@", TOK_NAN); add_token(t[tc++], "#", TOK_MNUM); add_token(t[tc++], "\\?", TOK_MVAR); add_token(t[tc++], "|", TOK_PIPE); add_token(t[tc++], "\\$", TOK_DOLLAR); add_token(t[tc++], "\\^", TOK_EXP); add_token(t[tc++], "::", TOK_CC); add_token(t[tc++], "|'", TOK_NOTSYMBOL); add_token(t[tc++], "\\?\\?", TOK_PREVSUB); }
/* insert word if it doesn't exists, returns word counter */ int trie_insert(trie * t, char *s) { if(*s) { if(t->next[*s-'a'] == NULL) t->next[*s-'a'] = new_trie(); return trie_insert(t->next[*s-'a'], s+1); } return ++t->val; }
int main(void) { trie *t; t = new_trie(); printf("Added %s %d\n","texta",trie_insert(t,"texta")); printf("Added %s %d\n","textb",trie_insert(t,"textb")); printf("Added %s %d\n","texta",trie_insert(t,"texta")); printf("Searched %s %d\n","texta",trie_search(t,"texta")); printf("Searched %s %d\n","textc",trie_search(t,"textc")); free_trie(t); return 0; }
int main(int argc, char **argv) { TrieNode *root = new_trie(); trie_insert(root, "/foo/bar", "baz"); trie_insert(root, "/foo/blubb", "bla"); trie_insert(root, "/asdf/dassf/fdas", "blubb"); trie_insert(root, "/asdd/f/dsa/s", "fasd"); //dump_tree(root); iterate_trie(root, print_line, NULL); char *result = (char*)trie_search(root, "/asdf/dassf/fdas"); fprintf(stderr, "RESULT: %s\n", result); return 0; }
/** * main program. Processes all files and returns one string * for each file in strings[]. In the process, all strings are * used to construct the trie we use to get the substring with * great f value. */ int main (int argc, char **argv) { extern int optind; extern char *optarg; int opt; int i; int n_passes = MAX_PASSES; FILE *out = stdout; time_t now; time(&now); while ((opt = getopt(argc, argv, "dhPp:o:s")) != EOF) { switch(opt) { case 'h': do_usage(); exit(0); case 'P': flags |= FLAG_PROGRESS; break; case 'p': n_passes = atol(optarg); break; case 'd': flags |= FLAG_DEBUG; break; case 'o': out = fopen(optarg, "wb"); break; case 's': flags |= FLAG_PRINT_STATE; break; } /* switch */ } /* while */ if (n_passes > MAX_PASSES) n_passes = MAX_PASSES; if (n_passes < 1) n_passes = 1; if (argc > optind) { for (i = optind; i < argc; i++) process_file(argv[i]); } else process_file(stdin_name); mark = strings_n; /* beginning of macros */ /* print the strings in the begining */ if (flags & FLAG_PRINT_STATE) print_strings(); for(i = 0; i < n_passes; i++) { struct trie_node *root_trie, *max; int j; char *o; struct ref_buff *ref; if (flags & FLAG_DEBUG) { fprintf(stderr, D("PASS #%d:\n"), i); } /* if */ /* INITIALIZE THE TRIE */ assert(root_trie = new_trie()); for (j = 0; j < strings_n; j++) { const byte *s; int l; for (s = strings[j], l = strings_sz[j]; l; s++, l--) { add_string(s, l, root_trie, j); if (*s == ESCAPE) { s++; l--; } /* if */ } /* for */ if (flags & FLAG_PROGRESS) { static char *progress[] = { "\\", "|", "/", "-", }; fprintf(stderr, "\r%s %d/%d", progress[j % 4], j+1, strings_n); } /* if */ } /* for */ /* SEARCH FOR THE MOST EFFICIENT MACRO SUBSTITUTION */ max = walk_trie(root_trie, savings_calculation); /* IF NOT FOUND, FINISH */ if (max == root_trie) { if (flags & FLAG_DEBUG) { fprintf(stderr, D("MACRO NOT FOUND, FINISHING\n")); } /* if */ break; } /* if */ if (flags & FLAG_DEBUG) { /* WRITE THE MACRO FOUND */ fprintbuf(stderr, max->l, max->refs->b, D("MACRO FOUND: len=%d, nrep=%d, savings=%d"), max->l, max->n, savings_calculation(max)); } /* if */ /* copy the string macro as a new string. */ strings[strings_n] = buffer + bs; memcpy(strings[strings_n], max->refs->b, max->l); bs += max->l; strings_sz[strings_n] = max->l; strings_n++; /* print the substitutions to be made. */ if (flags & FLAG_DEBUG) { #define FOREACHSUBST(X) \ for (ref = max->refs; ref; ref = ref->nxt) { \ int ix = ref->ix; \ const byte *src = ref->b + max->l; \ byte *dst = (byte *)ref->b; \ const byte *end = strings[ix] + strings_sz[ix]; \ int n = end - src; \ X \ } /* for */ FOREACHSUBST( fprintf(stderr, D("SUBST: string[%d], beg_hole=0x%lx, end_hole=0x%lx, hole_sz=%ld, end=0x%lx\n"), ix, dst - strings[ix], src - strings[ix], src - dst, end - strings[ix]); ) /* FOREACHSUBST */ } /* if */ /* substitute the strings as macro calls */ FOREACHSUBST( assert((strings[ix] <= dst) && (dst + MACRO_SIZE < src) && (src <= end)); *dst++ = ESCAPE; *dst++ = i + OFFSET; /* i is the macro index */ assert(n >= 0); while (n--) *dst++ = *src++; strings_sz[ix] -= max->l - MACRO_SIZE; ) /* FOREACHSUBST */
void add_token(trie* t, char* tok, int key) { int i, j; int c; trie *hist[1024]; trie *next; int len = strlen(tok); char* new_tok; int n; new_tok = cgc_malloc(len + 32); if (!new_tok) fail("buffer allocation"); strcpy(new_tok, tok); for (i = 0, n = 0; c = new_tok[i], i < len && t; ++i) { hist[n] = t; switch (c) { case 'A': /* special: set of all letters */ next = new_trie(); for(j = 'a'; j <= 'z'; j++) t->map[j] = next; for(j = 'A'; j <= 'Z'; j++) t->map[j] = next; t->map['_'] = next; t = next; ++n; break; case 'B': /* special: set of all letters + all digits */ next = new_trie(); for(j = 'a'; j <= 'z'; j++) t->map[j] = next; for(j = 'A'; j <= 'Z'; j++) t->map[j] = next; for(j = '0'; j <= '9'; j++) t->map[j] = next; t->map['_'] = next; t = next; ++n; break; case 'D': /* special: set of all digits */ next = new_trie(); for(j = '0'; j <= '9'; j++) t->map[j] = next; t = next; ++n; break; case 'W': /* special: whitespace */ next = new_trie(); t->map[' '] = next; t->map['\t'] = next; t = next; ++n; break; case '?': /* optional character */ sanity_requires(n > 0); /* * The previous node is allowed to * go straight to this node, via * an epsilon link. */ add_link(hist[n - 1], t); break; case '+': /* character can repeat any number of times */ sanity_requires(n > 0); /* * Make a "buffer" node to keep things * cleanly separated and avoid * surprises when matching e.g. * 'a+b+'. Epsilon-link to it. */ add_link(t, hist[++n] = new_trie()); t = hist[n]; /* * Next, we epsilon-link the buffer node * all the way back to the node '+' compilation * started with, to allow repetition. */ add_link(t, hist[n - 2]); /* * And finally we make a clean * new node for further work, * and epsilon-link to it. */ add_link(t, hist[++n] = new_trie()); t = hist[n]; break; case '*': /* optional character with repetition allowed */ sanity_requires(n > 0); /* same as +, except that the * second part makes a mutual link */ add_link(t, hist[++n] = new_trie()); t = hist[n]; /* mutual link */ add_link(t, hist[n - 2]); add_link(hist[n - 2], t); add_link(t, hist[++n] = new_trie()); t = hist[n]; break; case '\\': /* escape to normal text */ if (++i == len) fail("backslash expected char"); c = new_tok[i]; default: /* normal text */ if (!t->map[c]) t->map[c] = new_trie(); t = t->map[c]; ++n; } } /* * We have reached the accept-state node; * mark it as such. */ t->valid_token = key; }
int main(void) { srand(time(NULL)); printf("The available commands are:\n" "I N - Inject an NxN board. This command will scan NxN letters to form a board\n" "G N - Generates a new NxN board where a letter doesn't appear more than N times\n" "P - Print the current board\n" "W N word - Insert a word into the dictionary with score N\n" "A N word - Insert a word and all prefixes in the dictionary. Non-proper prefixes get a score of 0\n" "R word - Delete a word\n" "S word - Search for a word and return its score (-1 if no such word exists)\n" "D - Dump the dictionary with the corresponding scores\n" "B - Find the best word (word with the highest score)\n" "Q - Quit\n" "> "); char **board = NULL; size_t board_dim = 0; trie = new_trie(); char op; while (scanf(" %c", &op) == 1) { if (op == 'I') { destroy_board(board, board_dim); scanf("%zu", &board_dim); board = inject_board(board_dim); } else if (op == 'P') { if (board == NULL) { printf("No board at the moment\n"); } else { print_board(board, board_dim); } } else if (op == 'G') { destroy_board(board, board_dim); scanf("%zu", &board_dim); board = generate_board(board_dim); } else if (op == 'W') { size_t word_score; scanf("%zu%s", &word_score, word_buff); insert_word(trie, word_buff, word_score); } else if (op == 'A') { size_t word_score; scanf("%zu%s", &word_score, word_buff); size_t i; for (i = 1; word_buff[i] != '\0'; i++) { char c = word_buff[i]; word_buff[i] = '\0'; insert_word(trie, word_buff, 0); word_buff[i] = c; } insert_word(trie, word_buff, word_score); } else if (op == 'R') { scanf("%s", word_buff); delete_word(trie, word_buff); } else if (op == 'S') { scanf("%s", word_buff); int s = word_score(trie, word_buff); if (s == -1) { printf("No such word: %s\n", word_buff); } else { printf("score(%s) = %d\n", word_buff, s); } } else if (op == 'D') { print_known_words(trie); } else if (op == 'B') { char *w = find_best_word(board, board_dim); if (w == NULL) { printf("No words found\n"); } else { printf("Best word: %s\n", w); } free(w); } else if (op == 'Q') { break; } else { fprintf(stderr, "Unrecognized operation: %c\n", op); } printf("> "); } destroy_trie(trie); destroy_board(board, board_dim); return 0; }
int main() { int error; Trie *t; Leaf l,l1; char aux[20]; char c; t = new_trie(); while (1) { printf("MENIU:\n 1. Insert\n 2. Search\n 3. Delete\n 4. Display\n 5. Load data from file\n X. Exit\n"); scanf("%c", &c); switch (tolower(c)) { case '1': { printf("Type the word: "); scanf("%s", aux); l = (Leaf)malloc(sizeof(char)*(strlen(aux) + 1)); strcpy(l, aux); if ((l1=insert(t, l, 0, &error)) != NO_LEAF) { free(l1); } else { error_handling(error); } break; } case '2': { printf("Type the word: "); scanf("%s", aux); l=search(t, aux); if (l!=NO_LEAF) { printf("The word %s exist in Trie!\n",aux); } else { printf("The word %s doesn't exist in Trie!\n",aux); } break; } case '3': { printf("Type the word: "); scanf("%s", aux); delete(t, aux); break; } case '4': { display(t); break; } case '5': { file(t, "trie.in", &error); error_handling(error); break; } case ('x'): { exit(0); } default: { break; } } fflush(stdin); } return 0; }