void dump_trie(Trie *v, int detail) { int i; dump_node(v, detail); for (i = 0; i < v->nchildren; i++) dump_trie(&v->children[i], detail); }
void dump_trie (FILE *fp, int features, int lvl, uint64_t refpos, uint64_t pos, unsigned char *buffer, unsigned char *bp) { uint64_t son, curpos, cnt; int len, end; unsigned char *cp; if (!pos) return; curpos = pos; trie_node_read (fp, features, &refpos, &curpos, &cnt, &len, &end, &son, bp); if (!len) return; printf ("%010ld", (long)pos); printf (" %s", (end ? "*" : " ")); for (cp = buffer; cp < bp; cp++) putchar (*cp == '\t' ? '\t' : ' '); fwrite (bp, len, 1, stdout); printf ("\n"); dump_trie (fp, features, lvl + 1, son, son, buffer, bp + len); dump_trie (fp, features, lvl, refpos, curpos, buffer, bp); }
void serialize_trie(const std::string& filename, Trie* root) { auto file = fopen(filename.c_str(), "w+b"); auto offset_map = std::map<Trie*, size_t>(); fseek(file, 0, SEEK_SET); unsigned root_adr = 0; fwrite(&root_adr, sizeof(unsigned), 1, file); dump_trie(root, file, offset_map, sizeof(unsigned)); root_adr = offset_map[root]; fseek(file, 0, SEEK_SET); fwrite(&root_adr, sizeof(unsigned), 1, file); fclose(file); }
int main (int argc, char *argv[]) { FILE *fp; uint64_t pos; char *trie = NULL; int version, features; static unsigned char buffer[1024]; for (;;) { int option_index = 0; int c; static struct option long_options[] = { {"trie", required_argument, 0, 't'}, {0, 0, 0, 0} }; c = getopt_long(argc, argv, "t:", long_options, &option_index); if (c == -1) break; switch (c) { case 't': trie = optarg; break; default: printf("?? getopt returned character code 0%o ??\n", c); } } if (optind < argc) error (argv[0], "Excess argument"); if (trie == NULL) error (argv[0], "Missing argument --trie"); fp = fopen (trie, "r"); if (fp == NULL) error (argv[0], "Unable to open file \"%s\"\n", argv[1]); pos = trie_hdr_read (fp, &version, &features); dump_trie (fp, features, 0, pos, pos, buffer, buffer); return 0; }
size_t dump_trie(Trie* trie, FILE* file, std::map<Trie*, size_t>& offset_map, size_t offset) { for (const auto& prefix : *trie) { // Recursive call on every child offset = dump_trie(prefix.second.get(), file, offset_map, offset); } Datas d; d.is_word = trie->is_word(); d.nb_children = trie->nb_children(); offset_map[trie] = offset; // Write the datas that will be used for recognition by the app fwrite(&d, sizeof(Datas), 1, file); offset += sizeof(Datas); if (trie->is_word()) { // If we are on a word (not just a prefix) we write the frequency of it fwrite(&(trie->freq_), sizeof(int), 1, file); offset += sizeof(int); } for (const auto& prefix : *trie) { // Write the word and the offset of each chile of the current trie fwrite(prefix.first.c_str(), prefix.first.length() + 1, 1, file); fwrite(&(offset_map[prefix.second.get()]), sizeof(unsigned), 1, file); offset += prefix.first.length() + 1 + sizeof(unsigned); } return offset; }