Ejemplo n.º 1
0
Archivo: io.c Proyecto: JSefara/foma
struct fsm *fsm_read_text_file(char *filename) {
    struct fsm_trie_handle *th;
    char *text, *textp1, *textp2;
    int lastword;

    text = file_to_mem(filename);
    if (text == NULL) {
	return NULL;
    }
    textp1 = text;
    th = fsm_trie_init();

    for (lastword = 0 ; lastword == 0 ; textp1 = textp2+1) {
	for (textp2 = textp1 ; *textp2 != '\n' && *textp2 != '\0'; textp2++) {
	}
	if (*textp2 == '\0') {
	    lastword = 1;
	    if (textp2 == textp1)
		break;
	}
	*textp2 = '\0';
	if (strlen(textp1) > 0)
	    fsm_trie_add_word(th, textp1);
    }
    xxfree(text);
    return(fsm_trie_done(th));
}
Ejemplo n.º 2
0
struct fsm *fsm_read_text_file(const char *buffer) {
    struct fsm_trie_handle *th;
    const char *textp1, *textp2;
    int lastword;

    textp1 = buffer;
    th = fsm_trie_init();

    for (lastword = 0 ; lastword == 0 ; textp1 = textp2+1) {
	for (textp2 = textp1 ; *textp2 != '\n' && *textp2 != '\0'; textp2++) {
	}
	if (*textp2 == '\0') {
	    lastword = 1;
	    if (textp2 == textp1)
		break;
	}
        if (textp2-textp1 > 0) {
          // we need a non-const copy to add the '\0'
          int len = textp2-textp1;
          char *aux = malloc((len+1)*sizeof(char));
          memcpy(aux,textp1,len);
          aux[len]='\0';

          fsm_trie_add_word(th, aux);
          free(aux);
        } 
    }
    return(fsm_trie_done(th));
}
Ejemplo n.º 3
0
Archivo: io.c Proyecto: JSefara/foma
struct fsm *fsm_read_spaced_text_file(char *filename) {
    struct fsm_trie_handle *th;
    char *text, *textorig, *insym, *outsym, *t1, *t2, *l1, *l2;

    text = textorig = file_to_mem(filename);
    
    if (text == NULL)
	return NULL;
    th = fsm_trie_init();
    for (;;) {
	for ( ; *text != '\0' && *text == '\n'; text++) { }
	t1 = spacedtext_get_next_line(&text);
	if (t1 == NULL)
	    break;
	if (strlen(t1) == 0)
	    continue;
	t2 = spacedtext_get_next_line(&text);
	if (t2 == NULL || strlen(t2) == 0) {
	    for (l1 = t1; (insym = spacedtext_get_next_token(&l1)) != NULL; ) {
		if (strcmp(insym, "0") == 0)
		    fsm_trie_symbol(th,  "@_EPSILON_SYMBOL_@", "@_EPSILON_SYMBOL_@");
		else if (strcmp(insym, "%0") == 0)
		    fsm_trie_symbol(th,  "0", "0");
		else
		    fsm_trie_symbol(th,  insym, insym);
	    }
	    fsm_trie_end_word(th);
	} else {
	    for (l1 = t1, l2 = t2; ; ) {
		insym = spacedtext_get_next_token(&l1);
		outsym = spacedtext_get_next_token(&l2);
		if (insym == NULL && outsym == NULL)
		    break;
		if (insym == NULL || strcmp(insym, "0") == 0)
		    insym = "@_EPSILON_SYMBOL_@";
		if (strcmp(insym, "%0") == 0)
		    insym = "0";
		if (outsym == NULL || strcmp(outsym, "0") == 0)
		    outsym = "@_EPSILON_SYMBOL_@";
		if (strcmp(outsym, "%0") == 0)
		    outsym = "0";
		fsm_trie_symbol(th, insym, outsym);
	    }
	    fsm_trie_end_word(th);
	}
    }
    xxfree(textorig);
    return(fsm_trie_done(th));
}