struct fsm *fsm_read_text_file(char *filename) { struct fsm_trie_handle *th; char *text, *textp1, *textp2; int lastword; text = file_to_mem(filename); if (text == NULL) { return NULL; } textp1 = text; th = fsm_trie_init(); for (lastword = 0 ; lastword == 0 ; textp1 = textp2+1) { for (textp2 = textp1 ; *textp2 != '\n' && *textp2 != '\0'; textp2++) { } if (*textp2 == '\0') { lastword = 1; if (textp2 == textp1) break; } *textp2 = '\0'; if (strlen(textp1) > 0) fsm_trie_add_word(th, textp1); } xxfree(text); return(fsm_trie_done(th)); }
struct fsm *fsm_read_text_file(const char *buffer) { struct fsm_trie_handle *th; const char *textp1, *textp2; int lastword; textp1 = buffer; th = fsm_trie_init(); for (lastword = 0 ; lastword == 0 ; textp1 = textp2+1) { for (textp2 = textp1 ; *textp2 != '\n' && *textp2 != '\0'; textp2++) { } if (*textp2 == '\0') { lastword = 1; if (textp2 == textp1) break; } if (textp2-textp1 > 0) { // we need a non-const copy to add the '\0' int len = textp2-textp1; char *aux = malloc((len+1)*sizeof(char)); memcpy(aux,textp1,len); aux[len]='\0'; fsm_trie_add_word(th, aux); free(aux); } } return(fsm_trie_done(th)); }
struct fsm *fsm_read_spaced_text_file(char *filename) { struct fsm_trie_handle *th; char *text, *textorig, *insym, *outsym, *t1, *t2, *l1, *l2; text = textorig = file_to_mem(filename); if (text == NULL) return NULL; th = fsm_trie_init(); for (;;) { for ( ; *text != '\0' && *text == '\n'; text++) { } t1 = spacedtext_get_next_line(&text); if (t1 == NULL) break; if (strlen(t1) == 0) continue; t2 = spacedtext_get_next_line(&text); if (t2 == NULL || strlen(t2) == 0) { for (l1 = t1; (insym = spacedtext_get_next_token(&l1)) != NULL; ) { if (strcmp(insym, "0") == 0) fsm_trie_symbol(th, "@_EPSILON_SYMBOL_@", "@_EPSILON_SYMBOL_@"); else if (strcmp(insym, "%0") == 0) fsm_trie_symbol(th, "0", "0"); else fsm_trie_symbol(th, insym, insym); } fsm_trie_end_word(th); } else { for (l1 = t1, l2 = t2; ; ) { insym = spacedtext_get_next_token(&l1); outsym = spacedtext_get_next_token(&l2); if (insym == NULL && outsym == NULL) break; if (insym == NULL || strcmp(insym, "0") == 0) insym = "@_EPSILON_SYMBOL_@"; if (strcmp(insym, "%0") == 0) insym = "0"; if (outsym == NULL || strcmp(outsym, "0") == 0) outsym = "@_EPSILON_SYMBOL_@"; if (strcmp(outsym, "%0") == 0) outsym = "0"; fsm_trie_symbol(th, insym, outsym); } fsm_trie_end_word(th); } } xxfree(textorig); return(fsm_trie_done(th)); }