Example #1
0
cst_val *en_exp_letters(const char *lets)
{
    /* returns these as list of single char symbols */
    char *aaa;
    cst_val *r;
    int i;

    aaa = cst_alloc(char,2);
    aaa[1] = '\0';
    for (r=0,i=0; lets[i] != '\0'; i++)
    {
	aaa[0] = lets[i];
	if (isupper((int)aaa[0])) 
	    aaa[0] = tolower((int)aaa[0]);
	if (strchr("0123456789",aaa[0]))
	    r = cons_val(string_val(digit2num[aaa[0]-'0']),r);
	else if (cst_streq(aaa,"a"))
	    r = cons_val(string_val("_a"),r);
	else
	    r = cons_val(string_val(aaa),r);
    }
    cst_free(aaa);

    return val_reverse(r);
}
Example #2
0
cst_val *en_exp_real(const char *numstring)
{
    char *aaa, *p;
    cst_val *r;

    if (numstring && (numstring[0] == '-'))
	r = cons_val(string_val("minus"),
		     en_exp_real(&numstring[1]));
    else if (numstring && (numstring[0] == '+'))
	r = cons_val(string_val("plus"),
		     en_exp_real(&numstring[1]));
    else if (((p=strchr(numstring,'e')) != 0) ||
	     ((p=strchr(numstring,'E')) != 0))
    {
	aaa = cst_strdup(numstring);
	aaa[cst_strlen(numstring)-cst_strlen(p)] = '\0';
	r = val_append(en_exp_real(aaa),
		       cons_val(string_val("e"),
				en_exp_real(p+1)));
	cst_free(aaa);
    }
    else if ((p=strchr(numstring,'.')) != 0)
    {
	aaa = cst_strdup(numstring);
	aaa[cst_strlen(numstring)-cst_strlen(p)] = '\0';
	r = val_append(en_exp_number(aaa),
		       cons_val(string_val("point"),
				en_exp_digits(p+1)));
	cst_free(aaa);
    }
    else
	r = en_exp_number(numstring);  /* I don't think you can get here */

    return r;
}
static cst_val *cmu_LANGNAME_tokentowords(cst_item *token, const char *name)
{
    /* Return list of words that expand token/name */
    cst_val *r;

    /* printf("token_name %s name %s\n",item_name(token),name); */

    if (item_feat_present(token,"phones"))
	return cons_val(string_val(name),NULL);

#if 0
    if (item_feat_present(token,"nsw"))
	nsw = item_feat_string(token,"nsw");

    utt = item_utt(token);
    lex = val_lexicon(feat_val(utt->features,"lexicon"));
#endif

    if (cst_strlen(name) > 0)
        r = cons_val(string_val(name),0);
    else
        r = NULL;
    
    return r;
}
Example #4
0
cst_val *lex_lookup(const cst_lexicon *l, const char *word, const char *pos,
                    const cst_features *feats)
{
    int index;
    int p;
    const char *q;
    char *wp;
    cst_val *phones = 0;
    int found = FALSE;

    wp = cst_alloc(char,cst_strlen(word)+2);
    cst_sprintf(wp,"%c%s",(pos ? pos[0] : '0'),word);

    if (l->addenda)
        phones = lex_lookup_addenda(wp,l,&found);

    if (!found)
    {
        index = lex_lookup_bsearch(l,wp);

        if (index >= 0)
        {
            if (l->phone_hufftable)
            {
                for (p=index-2; l->data[p]; p--)
                    for (q=l->phone_hufftable[l->data[p]]; *q; q++)
                        phones = cons_val(string_val(l->phone_table[(unsigned char)*q]),
                                          phones);
            }
            else  /* no compression -- should we still support this ? */
            {
                for (p=index-2; l->data[p]; p--)
                    phones = cons_val(string_val(l->phone_table[l->data[p]]),
                                      phones);
            }
            phones = val_reverse(phones);
        }
        else if (l->lts_function)
        {
            phones = (l->lts_function)(l,word,"",feats);
        }
        else if (l->lts_rule_set)
        {
            phones = lts_apply(word,
                               "",  /* more features if we had them */
                               l->lts_rule_set);
        }
    }

    cst_free(wp);

    return phones;
}
Example #5
0
static cst_val *add_lts_boundary_marks(const cst_val *l)
{
    cst_val *l1;
    const cst_val *v;
    l1 = cons_val(string_val("#"),NULL);
    for (v=l;v;v=val_cdr(v))
      {
        l1=cons_val(val_car(v),l1);
      }
    l1 = cons_val(string_val("#"),l1);
    l1 = val_reverse(l1);
    return l1;
}
Example #6
0
cst_val *en_exp_ordinal(const char *rawnumstring)
{
    /* return ordinal for digit string */
    cst_val *card, *o;
    const cst_val *t;
    const char *l;
    const char *ord;
    char *numstring;
    int i,j;

    numstring = cst_strdup(rawnumstring);
    for (j=i=0; i < cst_strlen(rawnumstring); i++)
	if (rawnumstring[i] != ',')
	{
	    numstring[j] = rawnumstring[i];
	    j++;
	}
    numstring[j] = '\0';
    card = val_reverse(en_exp_number(numstring));
    cst_free(numstring);

    l = val_string(val_car(card));
    ord = 0;
    for (i=0; i<10; i++)
	if (cst_streq(l,digit2num[i]))
	    ord = ord2num[i];
    if (!ord)
	for (i=0; i<10; i++)
	    if (cst_streq(l,digit2teen[i]))
		ord = ord2teen[i];
    if (!ord)
	for (i=0; i<10; i++)
	    if (cst_streq(l,digit2enty[i]))
		ord = ord2enty[i];
    if (cst_streq(l,"hundred"))
	ord = "hundredth";
    if (cst_streq(l,"thousand"))
	ord = "thousandth";
    if (cst_streq(l,"billion"))
	ord = "billtionth";
    if (!ord)  /* dunno, so don't convert anything */
	return card;
    o = cons_val(string_val(ord),0);
    for (t=val_cdr(card); t; t=val_cdr(t))
	o = cons_val(val_car(t),o);
    delete_val(card);
    return o;
}
Example #7
0
cst_val *get_wavelist(const char *wavelistfile)
{
    cst_val *l = 0;
    cst_tokenstream *ts;
    const char *token;
    int i=0;

    ts = ts_open(wavelistfile);
    if (!ts)
    {
	fprintf(stderr,"combine_waves: can't open \"%s\"\n",wavelistfile);
	return 0;
    }

    while ((token=ts_get(ts)) != 0)
    {
	l = cons_val(string_val(token),l);
	i++;
    }

    if (i%2 != 0)
    {
	fprintf(stderr,"combine_waves: doesn't have matched pairs \"%s\"\n",wavelistfile);
	delete_val(l);
	l = 0;
    }

    ts_close(ts);

    return val_reverse(l);
}
Example #8
0
static cst_val *lex_lookup_addenda(const char *wp,const cst_lexicon *l,
                                   int *found)
{
    /* For those other words */
    int i,j;
    cst_val *phones;

    phones = NULL;

    for (i=0; l->addenda[i]; i++)
    {
        if (((wp[0] == '0') ||
                (wp[0] == l->addenda[i][0][0]) ||
                (l->addenda[i][0][0] == '0')) &&
                (cst_streq(wp+1,l->addenda[i][0]+1)))
        {
            for (j=1; l->addenda[i][j]; j++)
                phones = cons_val(string_val(l->addenda[i][j]),phones);
            *found = TRUE;
            return val_reverse(phones);
        }
    }

    return NULL;
}
Example #9
0
cst_val *en_exp_digits(const char *numstring)
{
    /* Expand given token to list of words pronouncing it as digits */
    cst_val *d = 0;
    const char *p;

    for (p=numstring; *p; p++)
    {
	if ((*p >= '0') && (*p <= '9'))
	    d = cons_val(string_val(digit2num[*p-'0']),d);
	else
	    d = cons_val(string_val("umpty"),d);
    }

    return val_reverse(d);
}
Example #10
0
cst_val *cst_args(char **argv, int argc,
		  const char *description,
		  cst_features *args)
{
    /* parses the given arguments wrt the description */
    cst_features *op_types = new_features();
    cst_val *files = NULL;
    int i;
    const char *type;
 
    parse_description(description,op_types);

    for (i=1; i<argc; i++)
    {
	if (argv[i][0] == '-')
	{
	    if ((!feat_present(op_types,argv[i])) ||
		(cst_streq("-h",argv[i])) ||
		(cst_streq("-?",argv[i])) ||
		(cst_streq("--help",argv[i])) ||
		(cst_streq("-help",argv[i])))
		parse_usage(argv[0],"","",description);
	    else
	    {
		type = feat_string(op_types,argv[i]);
		if (cst_streq("<binary>",type))
		    feat_set_string(args,argv[i],"true");
		else
		{
		    if (i+1 == argc)
			parse_usage(argv[0],
				    "missing argument for ",argv[i],
				    description);
		    if (cst_streq("<int>",type))
			feat_set_int(args,argv[i],atoi(argv[i+1]));
		    else if (cst_streq("<float>",type))
			feat_set_float(args,argv[i],atof(argv[i+1]));
		    else if (cst_streq("<string>",type))
			feat_set_string(args,argv[i],argv[i+1]);
		    else
			parse_usage(argv[0],
				    "unknown arg type ",type,
				    description);
		    i++;
		}
	    }
	}
	else
	    files = cons_val(string_val(argv[i]),files);
    }
    delete_features(op_types);

    return val_reverse(files);
}
Example #11
0
cst_val* ustring32_lts_apply(const ustring32_t u32,const cst_lts_rewrites *rule)
{
  size_t n=ustring32_length(u32);
  if(n==0) return NULL;
  cst_val *l=cons_val(string_val("#"),NULL);
  uint8_t b[8];
  size_t i=n;
  int k;
  do
    {
      i--;
      k=u8_uctomb(b,ustring32_at(u32,i),sizeof(b));
      b[k]='\0';
      l=cons_val(string_val((char*)b),l);
    }
  while(i);
  l=cons_val(string_val("#"),l);
  cst_val *output=lts_rewrites(l, rule);
  delete_val(l);
  return output;
}
cst_val *en_exp_id(const char *numstring)
{
    /* Expand numstring as pairs as in years or ids */
    char aaa[3];

    if ((strlen(numstring) == 4) && 
	(numstring[2] == '0') &&
	(numstring[3] == '0'))
    {
	if (numstring[1] == '0')
	    return en_exp_number(numstring); /* 2000, 3000 */
	else
	{
	    aaa[0] = numstring[0];
	    aaa[1] = numstring[1];
	    aaa[2] = '\0';
	    return val_append(en_exp_number(aaa),
			      cons_val(string_val("hundred"),0));
	}
    }
    else if ((strlen(numstring) == 2) && (numstring[0] == '0'))
	return cons_val(string_val("oh"),
			en_exp_digits(&numstring[1]));
    else if (((strlen(numstring) == 4) && 
	 ((numstring[1] == '0'))) ||
	(strlen(numstring) < 3))
	return en_exp_number(numstring);
    else if (strlen(numstring)%2 == 1)
    {
	return cons_val(string_val(digit2num[numstring[0]-'0']),
			en_exp_id(&numstring[1]));
    }
    else 
    {
	aaa[0] = numstring[0];
	aaa[1] = numstring[1];
	aaa[2] = '\0';
	return val_append(en_exp_number(aaa),en_exp_id(&numstring[2]));
    }
}
Example #13
0
cst_val *lts_rewrites_word(const char *word, const cst_lts_rewrites *r)
{
    cst_val *w, *p;
    char x[2];
    int i;

    x[1] = '\0';
    w = cons_val(string_val("#"),NULL);
    for (i=0; word[i]; i++)
    {
	x[0] = word[i];
	w = cons_val(string_val(x),w);
    }
    w = cons_val(string_val("#"),w);

    w = val_reverse(w);

    p = lts_rewrites(w,r);

    delete_val(w);

    return p;
}
Example #14
0
cst_val *lts_rewrites(const cst_val *itape, const cst_lts_rewrites *r)
{
    /* Returns list of rewritten "letters" to "phones" by r */
    cst_val *LC;
    const cst_val *RC, *i;
    const cst_val *rule;
    cst_val *otape;

    LC = cons_val(val_car(itape),NULL);
    RC = val_cdr(itape);
    otape = NULL;

    while (val_cdr(RC))
    {
	rule = find_rewrite_rule(LC,RC,r);

	if (!rule)
	    break;
/*	val_print(stdout,rule);
	printf("\n"); */

	/* Shift itape head */
	for (i=val_car(val_cdr(rule)); i; i=val_cdr(i))
	{
	    LC = cons_val(val_car(RC),LC);
	    RC = val_cdr(RC);
	}

	/* Output things to otape */
	for (i=val_car(val_cdr(val_cdr(val_cdr(rule)))); i; i=val_cdr(i))
	    otape = cons_val(val_car(i),otape);
    }

    delete_val_list(LC);

    return val_reverse(otape);
}
Example #15
0
cst_val *cst_utf8_explode(const cst_string *utf8string)
{
    /* return a list of utf-8 characters as strings */
    const unsigned char *xxx = (const unsigned char *)utf8string;
    cst_val *chars=NULL;
    int i, l=0;
    char utf8char[5];
#import "OpenEarsStaticAnalysisToggle.h"
#ifdef STATICANALYZEDEPENDENCIES
#define __clang_analyzer__ 1
#endif
#if !defined(__clang_analyzer__) || defined(STATICANALYZEDEPENDENCIES)
#undef __clang_analyzer__
    for (i=0; xxx[i]; i++)
    {
        if (xxx[i] < 0x80)  /* one byte */
        {
            sprintf(utf8char,"%c",xxx[i]);
            l = 1;
        }
        else if (xxx[i] < 0xe0) /* two bytes */
        {
            sprintf(utf8char,"%c%c",xxx[i],xxx[i+1]);
            i++;
            l = 2;
        }
        else if (xxx[i] < 0xff) /* three bytes */
        {
            sprintf(utf8char,"%c%c%c",xxx[i],xxx[i+1],xxx[i+2]);
            i++; i++;
            l = 3;
        }
        else
        {
            sprintf(utf8char,"%c%c%c%c",xxx[i],xxx[i+1],xxx[i+2],xxx[i+3]);
            i++; i++; i++;
            l = 4;
        }
        chars = cons_val(string_val(utf8char),chars);
    }
    return val_reverse(chars);
#endif
}
cst_val *cmu_grapheme_lex_lts_function(const struct lexicon_struct *l, 
                                       const char *word, const char *pos,
                                       const cst_features *feats)
{
    cst_val *phones = 0;
    cst_val *utflets = 0;
    const cst_val *v;
    char ord[10];
    int i,phindex;

    /* string to utf8 chars */
    utflets = cst_utf8_explode(word);

    for (v=utflets; v; v=val_cdr(v))
    {
        /* We will add the found phones in reverse order and reverse then */
        /* afterwards */
        cst_utf8_as_hex(val_string(val_car(v)),ord);
        phindex = cst_find_u2sampa(ord);
        if (phindex < 0)
            printf("awb_debug no sampa %s %s\n",val_string(val_car(v)),ord);
        for (i=4; (phindex>=0) && (i>0); i--)
        {
            if (unicode_sampa_mapping[phindex][i])
                phones = cons_val(string_val(unicode_sampa_mapping[phindex][i]),
                                  phones);
        }
    }

    phones = val_reverse(phones);
#if 1
    printf("cmu_grapheme_lex.c: word \"%s\" ",word);
    val_print(stdout,phones);
    printf("\n");
#endif

    delete_val(utflets);

    return phones;
}
Example #17
0
cst_val *cst_utf8_explode(const cst_string *utf8string)
{
    /* return a list of utf-8 characters as strings */
    const unsigned char *xxx = (const unsigned char *)utf8string;
    cst_val *chars=NULL;
    int i, l=0;
    char utf8char[5];

    for (i=0; xxx[i]; i++)
    {
        if (xxx[i] < 0x80)  /* one byte */
        {
            sprintf(utf8char,"%c",xxx[i]);
            l = 1;
        }
        else if (xxx[i] < 0xe0) /* two bytes */
        {
            sprintf(utf8char,"%c%c",xxx[i],xxx[i+1]);
            i++;
            l = 2;
        }
        else if (xxx[i] < 0xff) /* three bytes */
        {
            sprintf(utf8char,"%c%c%c",xxx[i],xxx[i+1],xxx[i+2]);
            i++; i++;
            l = 3;
        }
        else
        {
            sprintf(utf8char,"%c%c%c%c",xxx[i],xxx[i+1],xxx[i+2],xxx[i+3]);
            i++; i++; i++;
            l = 4;
        }
        chars = cons_val(string_val(utf8char),chars);
    }
    return val_reverse(chars);

}
Example #18
0
cst_val *cst_lex_load_addenda(const cst_lexicon *lex, const char *lexfile)
{   /* Load an addend from given file, check its phones wrt lex */
    cst_tokenstream *lf;
    const cst_string *line;
    cst_val *e = NULL;
    cst_val *na = NULL;
    int i;

    lf = ts_open(lexfile,"\n","","","");
    if (lf == NULL)
    {
        cst_errmsg("lex_add_addenda: cannot open lexicon file\n");
        return NULL;;
    }

    while (!ts_eof(lf))
    {
        line = ts_get(lf);
        if (line[0] == '#')
            continue;  /* a comment */
        for (i=0; line[i]; i++)
        {
            if (line[i] != ' ')
                break;
        }
        if (line[i])
        {
            e = cst_lex_make_entry(lex,line);
            if (e)
                na = cons_val(e,na);
        }
        else
            continue;  /* a blank line */
    }

    ts_close(lf);
    return val_reverse(na);
}
Example #19
0
cst_val *default_tokentowords(cst_item *i)
{
    return cons_val(string_val(item_feat_string(i,"name")), NULL);
}
Example #20
0
const char *feat_own_string(cst_features *f,const char *n)
{
    f->owned_strings = cons_val(string_val(n),f->owned_strings);
    return val_string(val_car(f->owned_strings));
}
Example #21
0
cst_val *lts_apply(const char *word,const char *feats,const cst_lts_rules *r)
{
    int pos;
    cst_val *phones=0;
    cst_lts_letter *fval_buff;
    cst_lts_letter *full_buff;
    cst_lts_phone phone;
    char *left, *right, *p;

    /* For feature vals for each letter */
    fval_buff = cst_alloc(cst_lts_letter,
			  (r->context_window_size*2)+
			   r->context_extra_feats);
    /* Buffer with added contexts */
    full_buff = cst_alloc(cst_lts_letter,
			  (r->context_window_size*2)+
			  strlen(word)+1); /* TBD assumes single POS feat */
    /* Assumes l_letter is a char and context < 8 */
    sprintf(full_buff,"%.*s#%s#%.*s",
	    r->context_window_size-1, "00000000",
	    word,
	    r->context_window_size-1, "00000000");

    /* Do the prediction backwards so we don't need to reverse the answer */
    for (pos = r->context_window_size + strlen(word) - 1;
	 full_buff[pos] != '#';
	 pos--)
    {
	/* Fill the features buffer for the predictor */
	sprintf(fval_buff,"%.*s%.*s%s",
		r->context_window_size,
		full_buff+pos-r->context_window_size,
		r->context_window_size,
		full_buff+pos+1,
		feats);
	if ((full_buff[pos] < 'a') ||
	    (full_buff[pos] > 'z'))
	{   /* English specific */
#ifdef EXCESSIVELY_CHATTY
	    cst_errmsg("lts:skipping unknown char \"%c\"\n",
		       full_buff[pos]);
#endif
	    continue;
	}
	phone = apply_model(fval_buff,
			    r->letter_index[((full_buff[pos])-'a')%26],
			    r->models);
	/* delete epsilons and split dual-phones */
	if (cst_streq("epsilon",r->phone_table[phone]))
	    continue;
	else if ((p=strchr(r->phone_table[phone],'-')) != NULL)
	{
	    left = cst_substr(r->phone_table[phone],0,
			      strlen(r->phone_table[phone])-strlen(p));
	    right = cst_substr(r->phone_table[phone],
			       (strlen(r->phone_table[phone])-strlen(p))+1,
			       (strlen(p)-1));
	    phones = cons_val(string_val(left),
			      cons_val(string_val(right),phones));
	    cst_free(left);
	    cst_free(right);
	}
	else
	    phones = cons_val(string_val(r->phone_table[phone]),phones);
    }

    cst_free(full_buff);
    cst_free(fval_buff);

    return phones;
}
Example #22
0
cst_val *en_exp_number(const char *numstring)
{
    /* Expand given token to list of words pronouncing it as a number */
    int num_digits = cst_strlen(numstring);
    char part[4];
    cst_val *p;
    int i;

    if (num_digits == 0)
	return NULL;
    else if (num_digits == 1)
	return en_exp_digits(numstring);
    else if (num_digits == 2)
    {
	if (numstring[0] == '0')
	{
	    if (numstring[1] == '0')
		return 0;
	    else
		return cons_val(string_val(digit2num[numstring[1]-'0']),0);
	}
	else if (numstring[1] == '0')
	    return cons_val(string_val(digit2enty[numstring[0]-'0']),0);
	else if (numstring[0] == '1')
	    return cons_val(string_val(digit2teen[numstring[1]-'0']),0);
	else 
	    return cons_val(string_val(digit2enty[numstring[0]-'0']),
			    en_exp_digits(numstring+1));
    }
    else if (num_digits == 3)
    {
	if (numstring[0] == '0')
	    return en_exp_number(numstring+1);
	else
	    return cons_val(string_val(digit2num[numstring[0]-'0']),
				cons_val(string_val("hundred"),
					     en_exp_number(numstring+1)));
    }
    else if (num_digits < 7)
    {
	for (i=0; i < num_digits-3; i++)
	    part[i] = numstring[i];
	part[i]='\0';
	p = en_exp_number(part);
	if (p == 0)  /* no thousands */
	    return en_exp_number(numstring+i);
	else
	    return val_append(p,cons_val(string_val("thousand"),
					 en_exp_number(numstring+i)));
    }
    else if (num_digits < 10)
    {
	for (i=0; i < num_digits-6; i++)
	    part[i] = numstring[i];
	part[i]='\0';
	p = en_exp_number(part);
	if (p == 0)  /* no millions */
	    return en_exp_number(numstring+i);
	else
	    return val_append(p,cons_val(string_val("million"),
					 en_exp_number(numstring+i)));
    }
    else if (num_digits < 13)
    {   /* If there are pedantic brits out there, tough!, 10^9 is a billion */
	for (i=0; i < num_digits-9; i++)
	    part[i] = numstring[i];
	part[i]='\0';
	p = en_exp_number(part);
	if (p == 0)  /* no billions */
	    return en_exp_number(numstring+i);
	else
	    return val_append(p,cons_val(string_val("billion"),
					 en_exp_number(numstring+i)));
    }
    else  /* Way too many digits here, to be a number */
    {
	return en_exp_digits(numstring);
    }
}
cst_val *lex_lookup_return_pos(const cst_lexicon *l, const char *word, const char *pos, int *return_pos)
{
    int index,p;
    char *wp, *buf;
    cst_val *phones = 0;
    int found = FALSE;

    wp = cst_alloc(char,strlen(word)+2);
    buf = cst_alloc(char,3);
    sprintf(wp,"%c%s",(pos ? pos[0] : '0'),word);
    
#ifdef CECUM_DEBUG    
    printf ("lex_lookup_return_pos looks for %s \n", word);
#endif

    if (l->addenda)
      phones = CECUM_lex_lookup_addenda (wp,l,&found);
    
    if (!found)
      {
        index = CECUM_lex_lookup_bsearch (l, l->entry_index,0,l->num_entries,wp);

#ifdef CECUM_DEBUG          
	printf ("lex_lookup_return_pos has found the word at pos %d \n", index);
#endif
	
	if (index >= 0)
	  {
	    for (p=l->entry_index[index].phone_index; l->phones[p]; p++)
	      phones = cons_val(string_val(l->phone_table[l->phones[p]]), phones);
	    phones = val_reverse(phones);
	    if (!pos) 
	      {
                //aggiungi in testa il pos se non lo ho!
		*return_pos = TRUE;
		sprintf(buf,"%c",l->entry_index[index].word_pos[0]);
		phones = cons_val(string_val(buf), phones);	      
	      }
	    //val_print(stdout,phones);
	    //printf("\n");
	  } 
	else 
        {
#ifdef CECUM_DEBUG          
	  printf ("lex_lookup_return_pos: word not found \n", index);
#endif

	  /*
	       E' a partire da questo punto che si verifica in genere il problema su cui stavamo
               lavorando. Se la parola non viene trovata nel dizionario, viene restituito un indice -1.
               Ora, lts_rules_set (e lts_apply) non viene eseguita perchè in ifd_lex.c noi abbiamo
	       posto
		
			ifd_lex.lts_rule_set = 0;

	       Del resto, nella lingua italiana non sembrano essere fornite rules compatibili con lst_apply,
	       a differenza di quello che accade con la lingua inglese

	       Il puntatore a funzione l->lst_function viene, al contrario, eseguito. Qui la funzione 
               richiamata è italian_lts_function() (dentro ifd_lex.c). Sarà quest'ultima a richiamare
	       più volte una serie di routine di rewrites (è qui che si verifica l'errore su cui stiamo
	       lavorando). Per esempio, è qui che verrà richiamata ifd_mid_lts_rewrites ()
	 */	 		

	  if (l->lts_rule_set)
	  phones = lts_apply(word,
			     "",  /* more features if we had them */
			     l->lts_rule_set);
	  else  if (l->lts_function)
	  	phones = (l->lts_function)(l,word,"");
        }
      }

    cst_free(wp);
    cst_free(buf);
    return phones;
}
Example #24
0
cst_val *cst_lex_make_entry(const cst_lexicon *lex, const cst_string *entry)
{   /* if replace then replace entry in addenda of lex with entry */
    /* else append entry to addenda of lex                        */
    cst_tokenstream *e;
    cst_val *phones = NULL;
    cst_val *ventry;
    const cst_string *w, *p;
    cst_string *word;
    cst_string *pos;
    int i;

    e = ts_open_string(entry,
                       cst_ts_default_whitespacesymbols,
                       "","","");

    w = ts_get(e);
    if (w[0] == '"') /* it was a quoted entry */
    {   /* so reparse it */
        ts_close(e);
        e = ts_open_string(entry,
                           cst_ts_default_whitespacesymbols,
                           "","","");
        w = ts_get_quoted_token(e,'"','\\');
    }

    word = cst_strdup(w);
    p = ts_get(e);
    if (!cst_streq(":",p)) /* there is a real pos */
    {
        pos = cst_strdup(p);
        p = ts_get(e);
        if (!cst_streq(":",p)) /* there is a real pos */
        {
            cst_fprintf(stdout,"add_addenda: lex %s: expected \":\" in %s\n",
                        lex->name,
                        word);
            cst_free(word);
            cst_free(pos);
            ts_close(e);
            return NULL;
        }
    }
    else
        pos = cst_strdup("nil");

    while (!ts_eof(e))
    {
        p = ts_get(e);
        /* Check its a legal phone */
        for (i=0; lex->phone_table[i]; i++)
        {
            if (cst_streq(p,lex->phone_table[i]))
                break;
        }
        if (cst_streq("#",p)) /* comment to end of line */
            break;
        else if (cst_streq("",p)) /* trailing ws at eoln causes this */
            break;
        else if (lex->phone_table[i])
            /* Only add it if its a valid phone */
            phones = cons_val(string_val(p),phones);
        else
        {
            cst_fprintf(stdout,"add_addenda: lex: %s word %s phone %s not in lexicon phoneset\n",
                        lex->name,
                        word,
                        p);
        }
    }

    ventry = cons_val(string_val(word),cons_val(string_val(pos),
                      val_reverse(phones)));
    cst_free(word);
    cst_free(pos);
    ts_close(e);
#if 0
    printf("entry: ");
    val_print(stdout,ventry);
    printf("\n");
#endif

    return ventry;
}