/* Loads all lemmatizer data from files: main automaton, prediction automaton, flexias. * prediction may be 0, if you are not going to use prediction at all. */ turglem turglem_load(const char *fname_dic_autom, const char *fname_predict_autom, const char *fname_paradigms, int *err_no, int *err_what) { turglem lem; if (err_no) *err_no = 0; if (err_what) *err_what = 0; lem = (turglem)malloc(sizeof(struct turglem_struct)); if (0 == lem) { if (err_what) *err_what = TURGLEM_ERROR_NOMEM; return 0; } if (err_what) *err_what = TURGLEM_ERROR_DICTIONARY; lem->words = MAFSA_automaton_load_from_binary_file(fname_dic_autom, err_no); if (0 == lem->words) { free(lem); return 0; } if (err_what) *err_what = TURGLEM_ERROR_PREDICTION; lem->prediction = MAFSA_automaton_load_from_binary_file(fname_predict_autom, err_no); if (0 == lem->prediction) { MAFSA_automaton_close(lem->words); free(lem); return 0; } if (err_what) *err_what = TURGLEM_ERROR_PARADIGMS; lem->paradigms = turglem_paradigms_load_from_binary_file(fname_paradigms, err_no); if (0 == lem->paradigms) { MAFSA_automaton_close(lem->words); MAFSA_automaton_close(lem->prediction); free(lem); return 0; } if (err_what) *err_what = 0; return lem; }
int main(int argc, char *argv[]) { MAFSA::daciuk<MAX_LETTER + 1> dict; MAFSA_automaton ma; int i; int rc; uint32_t ex = 0, ok = 0; uint32_t cs = 0, ss = 0; for (i = 0; urls[i]; ++i) { size_t sz; MAFSA_letter word [1024]; struct timeval tvc1; struct timeval tvc2; sz = strlen((const char *) urls[i]); memcpy(word, urls[i], sz); gettimeofday(&tvc1, NULL); dict.insert(word, sz); gettimeofday(&tvc2, NULL); cs += (tvc2.tv_sec - tvc1.tv_sec) * 1000000 + (tvc2.tv_usec - tvc1.tv_usec); } dict.save_to_file(SAVE_TO); ma = MAFSA_automaton_load_from_binary_file(SAVE_TO, NULL); for (;;) { MAFSA_letter outbuf [1024]; char urlbuf [4096], *pos; size_t outsz = 0, urlsz = 0; struct timeval tvs1; struct timeval tvs2; if (NULL == fgets(urlbuf, 4096, stdin)) { break; } if (NULL != (pos = strchr(urlbuf, '\n'))) *pos = 0; if (NULL != (pos = strchr(urlbuf, '\r'))) *pos = 0; urlsz = strlen(urlbuf); gettimeofday(&tvs1, NULL); rc = MAFSA_automaton_search(ma, (const MAFSA_letter *) urlbuf, urlsz, outbuf, 1024, &outsz); gettimeofday(&tvs2, NULL); ss += (tvs2.tv_sec - tvs1.tv_sec) * 1000000 + (tvs2.tv_usec - tvs1.tv_usec); if (-1 == rc) { ++ok; } else { char dbg [4096]; ++ex; pos = dbg; snprintf(pos, rc + 1, "%s", urlbuf); pos += rc; snprintf(pos, sizeof("\033[1;31m"), "\033[1;31m"); pos += sizeof("\033[1;31m") - 1; snprintf(pos, outsz + 1, "%s", outbuf); pos += outsz; snprintf(pos, sizeof("\033[0m"), "\033[0m"); pos += sizeof("\033[0m") - 1; snprintf(pos, urlsz - outsz - rc + 1, "%s", urlbuf + rc + outsz); printf("Filter (%03d .. %03u): %s\n", rc, (unsigned int) (rc + outsz), dbg); } } printf("\n"); printf("Result: ex = %d, ok = %d\n", ex, ok); printf("TimeComple: %6u.%06u seconds\n", cs / 1000000, cs % 1000000); printf("TimeSearch: %6u.%06u seconds\n", ss / 1000000, ss % 1000000); MAFSA_automaton_close(ma); return 0; }