Ejemplo n.º 1
0
cst_val *en_exp_letters(const char *lets)
{
    /* returns these as list of single char symbols */
    char *aaa;
    cst_val *r;
    int i;

    aaa = cst_alloc(char,2);
    aaa[1] = '\0';
    for (r=0,i=0; lets[i] != '\0'; i++)
    {
	aaa[0] = lets[i];
	if (isupper((int)aaa[0])) 
	    aaa[0] = tolower((int)aaa[0]);
	if (strchr("0123456789",aaa[0]))
	    r = cons_val(string_val(digit2num[aaa[0]-'0']),r);
	else if (cst_streq(aaa,"a"))
	    r = cons_val(string_val("_a"),r);
	else
	    r = cons_val(string_val(aaa),r);
    }
    cst_free(aaa);

    return val_reverse(r);
}
Ejemplo n.º 2
0
static cst_val *lex_lookup_addenda(const char *wp,const cst_lexicon *l,
                                   int *found)
{
    /* For those other words */
    int i,j;
    cst_val *phones;

    phones = NULL;

    for (i=0; l->addenda[i]; i++)
    {
        if (((wp[0] == '0') ||
                (wp[0] == l->addenda[i][0][0]) ||
                (l->addenda[i][0][0] == '0')) &&
                (cst_streq(wp+1,l->addenda[i][0]+1)))
        {
            for (j=1; l->addenda[i][j]; j++)
                phones = cons_val(string_val(l->addenda[i][j]),phones);
            *found = TRUE;
            return val_reverse(phones);
        }
    }

    return NULL;
}
Ejemplo n.º 3
0
cst_val *get_wavelist(const char *wavelistfile)
{
    cst_val *l = 0;
    cst_tokenstream *ts;
    const char *token;
    int i=0;

    ts = ts_open(wavelistfile);
    if (!ts)
    {
	fprintf(stderr,"combine_waves: can't open \"%s\"\n",wavelistfile);
	return 0;
    }

    while ((token=ts_get(ts)) != 0)
    {
	l = cons_val(string_val(token),l);
	i++;
    }

    if (i%2 != 0)
    {
	fprintf(stderr,"combine_waves: doesn't have matched pairs \"%s\"\n",wavelistfile);
	delete_val(l);
	l = 0;
    }

    ts_close(ts);

    return val_reverse(l);
}
Ejemplo n.º 4
0
cst_val *cst_args(char **argv, int argc,
		  const char *description,
		  cst_features *args)
{
    /* parses the given arguments wrt the description */
    cst_features *op_types = new_features();
    cst_val *files = NULL;
    int i;
    const char *type;
 
    parse_description(description,op_types);

    for (i=1; i<argc; i++)
    {
	if (argv[i][0] == '-')
	{
	    if ((!feat_present(op_types,argv[i])) ||
		(cst_streq("-h",argv[i])) ||
		(cst_streq("-?",argv[i])) ||
		(cst_streq("--help",argv[i])) ||
		(cst_streq("-help",argv[i])))
		parse_usage(argv[0],"","",description);
	    else
	    {
		type = feat_string(op_types,argv[i]);
		if (cst_streq("<binary>",type))
		    feat_set_string(args,argv[i],"true");
		else
		{
		    if (i+1 == argc)
			parse_usage(argv[0],
				    "missing argument for ",argv[i],
				    description);
		    if (cst_streq("<int>",type))
			feat_set_int(args,argv[i],atoi(argv[i+1]));
		    else if (cst_streq("<float>",type))
			feat_set_float(args,argv[i],atof(argv[i+1]));
		    else if (cst_streq("<string>",type))
			feat_set_string(args,argv[i],argv[i+1]);
		    else
			parse_usage(argv[0],
				    "unknown arg type ",type,
				    description);
		    i++;
		}
	    }
	}
	else
	    files = cons_val(string_val(argv[i]),files);
    }
    delete_features(op_types);

    return val_reverse(files);
}
Ejemplo n.º 5
0
cst_val *lex_lookup(const cst_lexicon *l, const char *word, const char *pos,
                    const cst_features *feats)
{
    int index;
    int p;
    const char *q;
    char *wp;
    cst_val *phones = 0;
    int found = FALSE;

    wp = cst_alloc(char,cst_strlen(word)+2);
    cst_sprintf(wp,"%c%s",(pos ? pos[0] : '0'),word);

    if (l->addenda)
        phones = lex_lookup_addenda(wp,l,&found);

    if (!found)
    {
        index = lex_lookup_bsearch(l,wp);

        if (index >= 0)
        {
            if (l->phone_hufftable)
            {
                for (p=index-2; l->data[p]; p--)
                    for (q=l->phone_hufftable[l->data[p]]; *q; q++)
                        phones = cons_val(string_val(l->phone_table[(unsigned char)*q]),
                                          phones);
            }
            else  /* no compression -- should we still support this ? */
            {
                for (p=index-2; l->data[p]; p--)
                    phones = cons_val(string_val(l->phone_table[l->data[p]]),
                                      phones);
            }
            phones = val_reverse(phones);
        }
        else if (l->lts_function)
        {
            phones = (l->lts_function)(l,word,"",feats);
        }
        else if (l->lts_rule_set)
        {
            phones = lts_apply(word,
                               "",  /* more features if we had them */
                               l->lts_rule_set);
        }
    }

    cst_free(wp);

    return phones;
}
Ejemplo n.º 6
0
static cst_val *add_lts_boundary_marks(const cst_val *l)
{
    cst_val *l1;
    const cst_val *v;
    l1 = cons_val(string_val("#"),NULL);
    for (v=l;v;v=val_cdr(v))
      {
        l1=cons_val(val_car(v),l1);
      }
    l1 = cons_val(string_val("#"),l1);
    l1 = val_reverse(l1);
    return l1;
}
Ejemplo n.º 7
0
cst_val *en_exp_ordinal(const char *rawnumstring)
{
    /* return ordinal for digit string */
    cst_val *card, *o;
    const cst_val *t;
    const char *l;
    const char *ord;
    char *numstring;
    int i,j;

    numstring = cst_strdup(rawnumstring);
    for (j=i=0; i < cst_strlen(rawnumstring); i++)
	if (rawnumstring[i] != ',')
	{
	    numstring[j] = rawnumstring[i];
	    j++;
	}
    numstring[j] = '\0';
    card = val_reverse(en_exp_number(numstring));
    cst_free(numstring);

    l = val_string(val_car(card));
    ord = 0;
    for (i=0; i<10; i++)
	if (cst_streq(l,digit2num[i]))
	    ord = ord2num[i];
    if (!ord)
	for (i=0; i<10; i++)
	    if (cst_streq(l,digit2teen[i]))
		ord = ord2teen[i];
    if (!ord)
	for (i=0; i<10; i++)
	    if (cst_streq(l,digit2enty[i]))
		ord = ord2enty[i];
    if (cst_streq(l,"hundred"))
	ord = "hundredth";
    if (cst_streq(l,"thousand"))
	ord = "thousandth";
    if (cst_streq(l,"billion"))
	ord = "billtionth";
    if (!ord)  /* dunno, so don't convert anything */
	return card;
    o = cons_val(string_val(ord),0);
    for (t=val_cdr(card); t; t=val_cdr(t))
	o = cons_val(val_car(t),o);
    delete_val(card);
    return o;
}
Ejemplo n.º 8
0
cst_val *en_exp_digits(const char *numstring)
{
    /* Expand given token to list of words pronouncing it as digits */
    cst_val *d = 0;
    const char *p;

    for (p=numstring; *p; p++)
    {
	if ((*p >= '0') && (*p <= '9'))
	    d = cons_val(string_val(digit2num[*p-'0']),d);
	else
	    d = cons_val(string_val("umpty"),d);
    }

    return val_reverse(d);
}
Ejemplo n.º 9
0
cst_val *cst_utf8_explode(const cst_string *utf8string)
{
    /* return a list of utf-8 characters as strings */
    const unsigned char *xxx = (const unsigned char *)utf8string;
    cst_val *chars=NULL;
    int i, l=0;
    char utf8char[5];
#import "OpenEarsStaticAnalysisToggle.h"
#ifdef STATICANALYZEDEPENDENCIES
#define __clang_analyzer__ 1
#endif
#if !defined(__clang_analyzer__) || defined(STATICANALYZEDEPENDENCIES)
#undef __clang_analyzer__
    for (i=0; xxx[i]; i++)
    {
        if (xxx[i] < 0x80)  /* one byte */
        {
            sprintf(utf8char,"%c",xxx[i]);
            l = 1;
        }
        else if (xxx[i] < 0xe0) /* two bytes */
        {
            sprintf(utf8char,"%c%c",xxx[i],xxx[i+1]);
            i++;
            l = 2;
        }
        else if (xxx[i] < 0xff) /* three bytes */
        {
            sprintf(utf8char,"%c%c%c",xxx[i],xxx[i+1],xxx[i+2]);
            i++; i++;
            l = 3;
        }
        else
        {
            sprintf(utf8char,"%c%c%c%c",xxx[i],xxx[i+1],xxx[i+2],xxx[i+3]);
            i++; i++; i++;
            l = 4;
        }
        chars = cons_val(string_val(utf8char),chars);
    }
    return val_reverse(chars);
#endif
}
Ejemplo n.º 10
0
cst_val *cmu_grapheme_lex_lts_function(const struct lexicon_struct *l, 
                                       const char *word, const char *pos,
                                       const cst_features *feats)
{
    cst_val *phones = 0;
    cst_val *utflets = 0;
    const cst_val *v;
    char ord[10];
    int i,phindex;

    /* string to utf8 chars */
    utflets = cst_utf8_explode(word);

    for (v=utflets; v; v=val_cdr(v))
    {
        /* We will add the found phones in reverse order and reverse then */
        /* afterwards */
        cst_utf8_as_hex(val_string(val_car(v)),ord);
        phindex = cst_find_u2sampa(ord);
        if (phindex < 0)
            printf("awb_debug no sampa %s %s\n",val_string(val_car(v)),ord);
        for (i=4; (phindex>=0) && (i>0); i--)
        {
            if (unicode_sampa_mapping[phindex][i])
                phones = cons_val(string_val(unicode_sampa_mapping[phindex][i]),
                                  phones);
        }
    }

    phones = val_reverse(phones);
#if 1
    printf("cmu_grapheme_lex.c: word \"%s\" ",word);
    val_print(stdout,phones);
    printf("\n");
#endif

    delete_val(utflets);

    return phones;
}
Ejemplo n.º 11
0
cst_val *cst_lex_load_addenda(const cst_lexicon *lex, const char *lexfile)
{   /* Load an addend from given file, check its phones wrt lex */
    cst_tokenstream *lf;
    const cst_string *line;
    cst_val *e = NULL;
    cst_val *na = NULL;
    int i;

    lf = ts_open(lexfile,"\n","","","");
    if (lf == NULL)
    {
        cst_errmsg("lex_add_addenda: cannot open lexicon file\n");
        return NULL;;
    }

    while (!ts_eof(lf))
    {
        line = ts_get(lf);
        if (line[0] == '#')
            continue;  /* a comment */
        for (i=0; line[i]; i++)
        {
            if (line[i] != ' ')
                break;
        }
        if (line[i])
        {
            e = cst_lex_make_entry(lex,line);
            if (e)
                na = cons_val(e,na);
        }
        else
            continue;  /* a blank line */
    }

    ts_close(lf);
    return val_reverse(na);
}
Ejemplo n.º 12
0
cst_val *cst_utf8_explode(const cst_string *utf8string)
{
    /* return a list of utf-8 characters as strings */
    const unsigned char *xxx = (const unsigned char *)utf8string;
    cst_val *chars=NULL;
    int i, l=0;
    char utf8char[5];

    for (i=0; xxx[i]; i++)
    {
        if (xxx[i] < 0x80)  /* one byte */
        {
            sprintf(utf8char,"%c",xxx[i]);
            l = 1;
        }
        else if (xxx[i] < 0xe0) /* two bytes */
        {
            sprintf(utf8char,"%c%c",xxx[i],xxx[i+1]);
            i++;
            l = 2;
        }
        else if (xxx[i] < 0xff) /* three bytes */
        {
            sprintf(utf8char,"%c%c%c",xxx[i],xxx[i+1],xxx[i+2]);
            i++; i++;
            l = 3;
        }
        else
        {
            sprintf(utf8char,"%c%c%c%c",xxx[i],xxx[i+1],xxx[i+2],xxx[i+3]);
            i++; i++; i++;
            l = 4;
        }
        chars = cons_val(string_val(utf8char),chars);
    }
    return val_reverse(chars);

}
Ejemplo n.º 13
0
cst_val *lts_rewrites_word(const char *word, const cst_lts_rewrites *r)
{
    cst_val *w, *p;
    char x[2];
    int i;

    x[1] = '\0';
    w = cons_val(string_val("#"),NULL);
    for (i=0; word[i]; i++)
    {
	x[0] = word[i];
	w = cons_val(string_val(x),w);
    }
    w = cons_val(string_val("#"),w);

    w = val_reverse(w);

    p = lts_rewrites(w,r);

    delete_val(w);

    return p;
}
Ejemplo n.º 14
0
cst_val *lts_rewrites(const cst_val *itape, const cst_lts_rewrites *r)
{
    /* Returns list of rewritten "letters" to "phones" by r */
    cst_val *LC;
    const cst_val *RC, *i;
    const cst_val *rule;
    cst_val *otape;

    LC = cons_val(val_car(itape),NULL);
    RC = val_cdr(itape);
    otape = NULL;

    while (val_cdr(RC))
    {
	rule = find_rewrite_rule(LC,RC,r);

	if (!rule)
	    break;
/*	val_print(stdout,rule);
	printf("\n"); */

	/* Shift itape head */
	for (i=val_car(val_cdr(rule)); i; i=val_cdr(i))
	{
	    LC = cons_val(val_car(RC),LC);
	    RC = val_cdr(RC);
	}

	/* Output things to otape */
	for (i=val_car(val_cdr(val_cdr(val_cdr(rule)))); i; i=val_cdr(i))
	    otape = cons_val(val_car(i),otape);
    }

    delete_val_list(LC);

    return val_reverse(otape);
}
Ejemplo n.º 15
0
cst_val *cst_lex_make_entry(const cst_lexicon *lex, const cst_string *entry)
{   /* if replace then replace entry in addenda of lex with entry */
    /* else append entry to addenda of lex                        */
    cst_tokenstream *e;
    cst_val *phones = NULL;
    cst_val *ventry;
    const cst_string *w, *p;
    cst_string *word;
    cst_string *pos;
    int i;

    e = ts_open_string(entry,
                       cst_ts_default_whitespacesymbols,
                       "","","");

    w = ts_get(e);
    if (w[0] == '"') /* it was a quoted entry */
    {   /* so reparse it */
        ts_close(e);
        e = ts_open_string(entry,
                           cst_ts_default_whitespacesymbols,
                           "","","");
        w = ts_get_quoted_token(e,'"','\\');
    }

    word = cst_strdup(w);
    p = ts_get(e);
    if (!cst_streq(":",p)) /* there is a real pos */
    {
        pos = cst_strdup(p);
        p = ts_get(e);
        if (!cst_streq(":",p)) /* there is a real pos */
        {
            cst_fprintf(stdout,"add_addenda: lex %s: expected \":\" in %s\n",
                        lex->name,
                        word);
            cst_free(word);
            cst_free(pos);
            ts_close(e);
            return NULL;
        }
    }
    else
        pos = cst_strdup("nil");

    while (!ts_eof(e))
    {
        p = ts_get(e);
        /* Check its a legal phone */
        for (i=0; lex->phone_table[i]; i++)
        {
            if (cst_streq(p,lex->phone_table[i]))
                break;
        }
        if (cst_streq("#",p)) /* comment to end of line */
            break;
        else if (cst_streq("",p)) /* trailing ws at eoln causes this */
            break;
        else if (lex->phone_table[i])
            /* Only add it if its a valid phone */
            phones = cons_val(string_val(p),phones);
        else
        {
            cst_fprintf(stdout,"add_addenda: lex: %s word %s phone %s not in lexicon phoneset\n",
                        lex->name,
                        word,
                        p);
        }
    }

    ventry = cons_val(string_val(word),cons_val(string_val(pos),
                      val_reverse(phones)));
    cst_free(word);
    cst_free(pos);
    ts_close(e);
#if 0
    printf("entry: ");
    val_print(stdout,ventry);
    printf("\n");
#endif

    return ventry;
}
cst_val *lex_lookup_return_pos(const cst_lexicon *l, const char *word, const char *pos, int *return_pos)
{
    int index,p;
    char *wp, *buf;
    cst_val *phones = 0;
    int found = FALSE;

    wp = cst_alloc(char,strlen(word)+2);
    buf = cst_alloc(char,3);
    sprintf(wp,"%c%s",(pos ? pos[0] : '0'),word);
    
#ifdef CECUM_DEBUG    
    printf ("lex_lookup_return_pos looks for %s \n", word);
#endif

    if (l->addenda)
      phones = CECUM_lex_lookup_addenda (wp,l,&found);
    
    if (!found)
      {
        index = CECUM_lex_lookup_bsearch (l, l->entry_index,0,l->num_entries,wp);

#ifdef CECUM_DEBUG          
	printf ("lex_lookup_return_pos has found the word at pos %d \n", index);
#endif
	
	if (index >= 0)
	  {
	    for (p=l->entry_index[index].phone_index; l->phones[p]; p++)
	      phones = cons_val(string_val(l->phone_table[l->phones[p]]), phones);
	    phones = val_reverse(phones);
	    if (!pos) 
	      {
                //aggiungi in testa il pos se non lo ho!
		*return_pos = TRUE;
		sprintf(buf,"%c",l->entry_index[index].word_pos[0]);
		phones = cons_val(string_val(buf), phones);	      
	      }
	    //val_print(stdout,phones);
	    //printf("\n");
	  } 
	else 
        {
#ifdef CECUM_DEBUG          
	  printf ("lex_lookup_return_pos: word not found \n", index);
#endif

	  /*
	       E' a partire da questo punto che si verifica in genere il problema su cui stavamo
               lavorando. Se la parola non viene trovata nel dizionario, viene restituito un indice -1.
               Ora, lts_rules_set (e lts_apply) non viene eseguita perchè in ifd_lex.c noi abbiamo
	       posto
		
			ifd_lex.lts_rule_set = 0;

	       Del resto, nella lingua italiana non sembrano essere fornite rules compatibili con lst_apply,
	       a differenza di quello che accade con la lingua inglese

	       Il puntatore a funzione l->lst_function viene, al contrario, eseguito. Qui la funzione 
               richiamata è italian_lts_function() (dentro ifd_lex.c). Sarà quest'ultima a richiamare
	       più volte una serie di routine di rewrites (è qui che si verifica l'errore su cui stiamo
	       lavorando). Per esempio, è qui che verrà richiamata ifd_mid_lts_rewrites ()
	 */	 		

	  if (l->lts_rule_set)
	  phones = lts_apply(word,
			     "",  /* more features if we had them */
			     l->lts_rule_set);
	  else  if (l->lts_function)
	  	phones = (l->lts_function)(l,word,"");
        }
      }

    cst_free(wp);
    cst_free(buf);
    return phones;
}