Esempio n. 1
0
cst_val *en_exp_real(const char *numstring)
{
    char *aaa, *p;
    cst_val *r;

    if (numstring && (numstring[0] == '-'))
	r = cons_val(string_val("minus"),
		     en_exp_real(&numstring[1]));
    else if (numstring && (numstring[0] == '+'))
	r = cons_val(string_val("plus"),
		     en_exp_real(&numstring[1]));
    else if (((p=strchr(numstring,'e')) != 0) ||
	     ((p=strchr(numstring,'E')) != 0))
    {
	aaa = cst_strdup(numstring);
	aaa[cst_strlen(numstring)-cst_strlen(p)] = '\0';
	r = val_append(en_exp_real(aaa),
		       cons_val(string_val("e"),
				en_exp_real(p+1)));
	cst_free(aaa);
    }
    else if ((p=strchr(numstring,'.')) != 0)
    {
	aaa = cst_strdup(numstring);
	aaa[cst_strlen(numstring)-cst_strlen(p)] = '\0';
	r = val_append(en_exp_number(aaa),
		       cons_val(string_val("point"),
				en_exp_digits(p+1)));
	cst_free(aaa);
    }
    else
	r = en_exp_number(numstring);  /* I don't think you can get here */

    return r;
}
Esempio n. 2
0
static void ef_set(cst_features *f,const char *fv,const char *type)
{
    /* set feature from fv (F=V), guesses type if not explicit type given */
    const char *val;
    char *feat;

    if ((val = strchr(fv,'=')) == 0)
    {
	fprintf(stderr,
		"flite: can't find '=' in featval \"%s\", ignoring it\n",
		fv);
    }
    else
    {
	feat = cst_strdup(fv);
	feat[cst_strlen(fv)-cst_strlen(val)] = '\0';
	val = val+1;
	if ((type && cst_streq("int",type)) ||
	    ((type == 0) && (cst_regex_match(cst_rx_int,val))))
	    feat_set_int(f,feat,atoi(val));
	else if ((type && cst_streq("float",type)) ||
		 ((type == 0) && (cst_regex_match(cst_rx_double,val))))
	    feat_set_float(f,feat,atof(val));
	else
	    feat_set_string(f,feat,val);
        cst_free(feat);
    }
}
Esempio n. 3
0
int default_utt_break(cst_tokenstream *ts,
		      const char *token,
		      cst_relation *tokens)
{
    /* This is the default utt break functions, languages may override this */
    /* This will be ok for some latin based languages */
    const char *postpunct = item_feat_string(relation_tail(tokens), "punc");
    const char *ltoken = item_name(relation_tail(tokens));

    if (cst_strchr(ts->whitespace,'\n') != cst_strrchr(ts->whitespace,'\n'))
	 /* contains two new lines */
	 return TRUE;
    else if (strchr(postpunct,':') ||
	     strchr(postpunct,'?') ||
	     strchr(postpunct,'!'))
	return TRUE;
    else if (strchr(postpunct,'.') &&
	     (cst_strlen(ts->whitespace) > 1) &&
	     strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ",token[0]))
	return TRUE;
    else if (strchr(postpunct,'.') &&
	     /* next word starts with a capital */
	     strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ",token[0]) &&
	     /* last word isn't an abbreviation */
	     !(strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ",ltoken[cst_strlen(ltoken)-1])||
	       ((cst_strlen(ltoken) < 4) &&
		strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ",ltoken[0]))))
	return TRUE;
    else
	return FALSE;
}
Esempio n. 4
0
static int lex_uncompress_word(char *ucword,int max_size,
                               int p,const cst_lexicon *l)
{
    int i,j=0,length;
    unsigned char *cword;

    if (l->entry_hufftable == 0)
        /* can have "compressed" lexicons without compression */
        cst_sprintf(ucword,"%s",&l->data[p]);
    else
    {
        cword = &l->data[p];
        for (i=0,j=0; cword[i]; i++)
        {
            length = cst_strlen(l->entry_hufftable[cword[i]]);
            if (j+length+1<max_size)
            {
                memmove(ucword+j,l->entry_hufftable[cword[i]],length);
                j += length;
            }
            else
                break;
        }
        ucword[j] = '\0';
    }

    return j;
}
Esempio n. 5
0
cst_utterance *default_tokenization(cst_utterance *u)
{
    const char *text,*token;
    cst_tokenstream *fd;
    cst_item *t;
    cst_relation *r;

    text = utt_input_text(u);
    r = utt_relation_create(u,"Token");
    fd = ts_open_string(text,
	get_param_string(u->features,"text_whitespace",NULL),
	get_param_string(u->features,"text_singlecharsymbols",NULL),
	get_param_string(u->features,"text_prepunctuation",NULL),
        get_param_string(u->features,"text_postpunctuation",NULL));
    
    while(!ts_eof(fd))
    {
	token = ts_get(fd);
	if (cst_strlen(token) > 0)
	{
	    t = relation_append(r,NULL);
	    item_set_string(t,"name",token);
	    item_set_string(t,"whitespace",fd->whitespace);
	    item_set_string(t,"prepunctuation",fd->prepunctuation);
	    item_set_string(t,"punc",fd->postpunctuation);
	    item_set_int(t,"file_pos",fd->file_pos);
	    item_set_int(t,"line_number",fd->line_number);
	}
    }

    ts_close(fd);
    
    return u;
}
Esempio n. 6
0
char *bard_token_shorten(const char *name,int size)
{
    /* Shortens the given string to given size putting "..." in the middle */
    char *short_string;
    int sl;
    int i,m;

    sl = cst_strlen(name);
    
    if (sl <= size)
        return cst_strdup(name);
    if (size < 6)
        return cst_strdup("****");

    short_string = cst_alloc(char,size+1);

    m = (size-3)/2;
    for (i=0; i<m; i++)
        short_string[i] = name[i];
    short_string[i] = '.'; i++;
    short_string[i] = '.'; i++;
    short_string[i] = '.'; i++;
    for (   ; i<size; i++)
        short_string[i] = name[(sl-m)+(i-(m+3))];
    short_string[i] = '\0';

    return short_string;
}
Esempio n. 7
0
static void add_raw_data(cst_utterance *u, const char *raw_data,
			 cst_features *attributes)
{
    /* Add all tokens in raw _data to u */
    cst_tokenstream *ts;
    cst_relation *r;
    cst_item *t;
    const char *token;

    r = utt_relation_create(u,"Token");
    ts = 
     ts_open_string(raw_data,
                    get_param_string(u->features,"text_whitespace",NULL),
                    get_param_string(u->features,"text_singlecharsymbols",NULL),
                    get_param_string(u->features,"text_prepunctuation",NULL),
                    get_param_string(u->features,"text_pospunctuation",NULL));
    while (!(ts_eof(ts)))
    {
	t = relation_append(r,NULL);
	feat_copy_into(item_feats(t),attributes);
	token = ts_get(ts);
	if (cst_strlen(token) > 0)
	{
	    t = relation_append(r,NULL);
	    item_set_string(t,"name",token);
	    item_set_string(t,"whitespace",ts->whitespace);
	    item_set_string(t,"prepunctuation",ts->prepunctuation);
	    item_set_string(t,"punc",ts->postpunctuation);
	}
    }

}
static cst_val *cmu_LANGNAME_tokentowords(cst_item *token, const char *name)
{
    /* Return list of words that expand token/name */
    cst_val *r;

    /* printf("token_name %s name %s\n",item_name(token),name); */

    if (item_feat_present(token,"phones"))
	return cons_val(string_val(name),NULL);

#if 0
    if (item_feat_present(token,"nsw"))
	nsw = item_feat_string(token,"nsw");

    utt = item_utt(token);
    lex = val_lexicon(feat_val(utt->features,"lexicon"));
#endif

    if (cst_strlen(name) > 0)
        r = cons_val(string_val(name),0);
    else
        r = NULL;
    
    return r;
}
unsigned char *cst_strrchr(const unsigned char *str, int c)
{
    unsigned char *p = (const unsigned char *)str + cst_strlen(str);
    while (p >= str) {
	if (*p == c)
	    return p;
	--p;
    }
    return NULL;
}
Esempio n. 10
0
char *cst_strdup(const char *str)
{
    char *nstr = NULL;

    if (str)
    {
	nstr = cst_alloc(char,cst_strlen((const char *)str)+1);
	memmove(nstr,str,cst_strlen((const char *)str)+1);
    }
    return nstr;
}
Esempio n. 11
0
cst_val *en_exp_id(const char *numstring)
{
    /* Expand numstring as pairs as in years or ids */
    char aaa[3];

    if ((cst_strlen(numstring) == 4) && 
	(numstring[2] == '0') &&
	(numstring[3] == '0'))
    {
	if (numstring[1] == '0')
	    return en_exp_number(numstring); /* 2000, 3000 */
	else
	{
	    aaa[0] = numstring[0];
	    aaa[1] = numstring[1];
	    aaa[2] = '\0';
	    return val_append(en_exp_number(aaa),
			      cons_val(string_val("hundred"),0));
	}
    }
    else if ((cst_strlen(numstring) == 3) && 
             (numstring[0] != '0') &&
             (numstring[1] == '0') && 
             (numstring[2] == '0'))
    {
        return cons_val(string_val(digit2num[numstring[0]-'0']),
                        cons_val(string_val("hundred"),0));
    }
    else if ((cst_strlen(numstring) == 2) && (numstring[0] == '0')
             && (numstring[1] == '0'))
	return cons_val(string_val("zero"),
                        cons_val(string_val("zero"),NULL));
    else if ((cst_strlen(numstring) == 2) && (numstring[0] == '0'))
	return cons_val(string_val("oh"),
			en_exp_digits(&numstring[1]));
    else if (((cst_strlen(numstring) == 4) && 
              (numstring[1] == '0') && (numstring[2] == '0')) ||
             (cst_strlen(numstring) < 3))
	return en_exp_number(numstring);
    else if (cst_strlen(numstring)%2 == 1)
    {
	return cons_val(string_val(digit2num[numstring[0]-'0']),
			en_exp_id(&numstring[1]));
    }
    else 
    {
	aaa[0] = numstring[0];
	aaa[1] = numstring[1];
	aaa[2] = '\0';
	return val_append(en_exp_number(aaa),en_exp_id(&numstring[2]));
    }
}
Esempio n. 12
0
cst_utterance *flowm_print_relation_callback(cst_utterance *u)
{
    /* Say the details of a named relation for display */
    char rst[FL_MAX_MSG_CHARS];
    const char *name;
    const char *relname;
    cst_item *item;
    char *space;

    space = "";
    relname = get_param_string(u->features,"print_info_relation", NULL);
    cst_sprintf(rst,"%s: ",relname);

    if (!relname)
    {
        mbstowcs(fl_tts_msg,"",FL_MAX_MSG_CHARS);
        return u;
    }

    for (item=relation_head(utt_relation(u,relname)); 
         item; item=item_next(item))
    {
        name = item_feat_string(item,"name");
        
        if (cst_strlen(name)+1+4 < FL_MAX_MSG_CHARS)
            cst_sprintf(rst,"%s%s%s",rst,space,name);
        else if (cst_strlen(rst)+4 < FL_MAX_MSG_CHARS)
            cst_sprintf(rst,"%s ...",rst);
        else
            break;
        space = " ";
    }
    mbstowcs(fl_tts_msg,rst,FL_MAX_MSG_CHARS);

    return u;
}
Esempio n. 13
0
cst_val *en_exp_ordinal(const char *rawnumstring)
{
    /* return ordinal for digit string */
    cst_val *card, *o;
    const cst_val *t;
    const char *l;
    const char *ord;
    char *numstring;
    int i,j;

    numstring = cst_strdup(rawnumstring);
    for (j=i=0; i < cst_strlen(rawnumstring); i++)
	if (rawnumstring[i] != ',')
	{
	    numstring[j] = rawnumstring[i];
	    j++;
	}
    numstring[j] = '\0';
    card = val_reverse(en_exp_number(numstring));
    cst_free(numstring);

    l = val_string(val_car(card));
    ord = 0;
    for (i=0; i<10; i++)
	if (cst_streq(l,digit2num[i]))
	    ord = ord2num[i];
    if (!ord)
	for (i=0; i<10; i++)
	    if (cst_streq(l,digit2teen[i]))
		ord = ord2teen[i];
    if (!ord)
	for (i=0; i<10; i++)
	    if (cst_streq(l,digit2enty[i]))
		ord = ord2enty[i];
    if (cst_streq(l,"hundred"))
	ord = "hundredth";
    if (cst_streq(l,"thousand"))
	ord = "thousandth";
    if (cst_streq(l,"billion"))
	ord = "billtionth";
    if (!ord)  /* dunno, so don't convert anything */
	return card;
    o = cons_val(string_val(ord),0);
    for (t=val_cdr(card); t; t=val_cdr(t))
	o = cons_val(val_car(t),o);
    delete_val(card);
    return o;
}
Esempio n. 14
0
static void get_token_postpunctuation(cst_tokenstream *ts)
{
    int p,t;

    t = cst_strlen(ts->token);
    for (p=t;
	 (p > 0) && 
	     ((ts->token[p] == '\0') ||
	      (ts_charclass(ts->token[p],TS_CHARCLASS_POSTPUNCT,ts)));
	 p--);

    if (t != p)
    {
	if (t-p >= ts->postp_max) 
	    extend_buffer(&ts->postpunctuation,&ts->postp_max);
	/* Copy postpunctuation from token */
	memmove(ts->postpunctuation,&ts->token[p+1],(t-p));
	/* truncate token at postpunctuation */
	ts->token[p+1] = '\0';
    }
}
Esempio n. 15
0
cst_string *cst_implode(const cst_val *sl)
{
    const cst_val *v;
    int l=0;
    char *s;

    for (v=sl; v; v=val_cdr(v))
    {
        if (val_stringp(val_car(v)))
            l += cst_strlen(val_string(val_car(v)));
    }

    s = cst_alloc(cst_string,l+1);

    for (v=sl; v; v=val_cdr(v))
    {
        if (val_stringp(val_car(v)))
            cst_sprintf(s,"%s%s",s,val_string(val_car(v)));

    }

    return s;
}
Esempio n. 16
0
int ts_set_stream_pos(cst_tokenstream *ts, int pos)
{
    /* Note this doesn't preserve line_pos */
    int new_pos, l;

    if (ts->fd)
        new_pos = (int)cst_fseek(ts->fd,(long)pos,CST_SEEK_ABSOLUTE);
    else if (ts->string_buffer)
    {
        l = cst_strlen(ts->string_buffer);
        if (pos > l)
            new_pos = l;
        else if (pos < 0)
            new_pos = 0;
        else
            new_pos = pos;
    }
    else
        new_pos = pos;  /* not sure it can get here */
    ts->file_pos = new_pos;
    ts->current_char = ' ';  /* To be safe */

    return ts->file_pos;
}
Esempio n. 17
0
}

int ts_set_stream_pos(cst_tokenstream *ts, int pos)
{
    /* Note this doesn't preserve line_pos */
    int new_pos, l;

    if (ts->fd)
#ifdef FLITE_PLUS_HTS_ENGINE
        ;
#else
        new_pos = (int)cst_fseek(ts->fd,(long)pos,CST_SEEK_ABSOLUTE);
#endif /* !FLITE_PLUS_HTS_ENGINE */
    else if (ts->string_buffer)
    {
        l = cst_strlen(ts->string_buffer);
        if (pos > l)
            new_pos = l;
        else if (pos < 0)
            new_pos = 0;
        else
            new_pos = pos;
    }
    else
        new_pos = pos;  /* not sure it can get here */
    ts->file_pos = new_pos;
    ts->current_char = ' ';  /* To be safe */

    return ts->file_pos;
}
Esempio n. 18
0
const unsigned char *ts_get_quoted_token(cst_tokenstream *ts,
					 char quote,
					 char escape)
{
    /* for reading the next quoted token that starts with quote and
       ends with quote, quote may appear only if preceded by escape */
    int l;

    /* Hmm can't change quotes within a ts */
    ts->charclass[(unsigned int)quote] |= TS_CHARCLASS_QUOTE;
    ts->charclass[(unsigned int)escape] |= TS_CHARCLASS_QUOTE;

    /* skipping whitespace */
    get_token_sub_part(ts,TS_CHARCLASS_WHITESPACE,
		       &ts->whitespace,
		       &ts->ws_max);
    ts->token_pos = ts->file_pos - 1;

    if (ts->current_char == quote)
    {   /* go until quote */
	ts_getc(ts);
	l=0;
	while (!ts_eof(ts))
	{
	    get_token_sub_part_2(ts,TS_CHARCLASS_QUOTE,
				 &ts->token,&ts->token_max);
	    if (ts->current_char == escape)
	    {
		ts_getc(ts);
		l = cst_strlen(ts->token);
		if (l+1 >= ts->token_max) 
		    extend_buffer(&ts->token,&ts->token_max);
		ts->token[l] = ts->current_char;
		ts->token[l+1] = '\0';
		ts_getc(ts);
	    }
	    else
		break;
	}
	ts_getc(ts);
    }
    else /* its not quotes, like to be careful dont you */
    {    /* treat is as standard token                  */
	/* Get prepunctuation */
	get_token_sub_part(ts,TS_CHARCLASS_PREPUNCT,
			   &ts->prepunctuation,
			   &ts->prep_max);
	/* Get the symbol itself */
	if (!ts_charclass(ts->current_char,TS_CHARCLASS_SINGLECHAR,ts))
	{
	    if (2 >= ts->token_max) extend_buffer(&ts->token,&ts->token_max);
	    ts->token[0] = ts->current_char;
	    ts->token[1] = '\0';
	    ts_getc(ts);
	}
	else
	    get_token_sub_part_2(ts,
				 TS_CHARCLASS_WHITESPACE,    /* end class1 */
				 &ts->token,
				 &ts->token_max);
	/* This'll have token *plus* post punctuation in ts->token */
	/* Get postpunctuation */
	get_token_postpunctuation(ts);
    }

    return ts->token;
}
Esempio n. 19
0
static cst_utterance *tokentosegs(cst_utterance *u)
{
    cst_item *t;
    cst_relation *seg, *syl, *sylstructure, *word;
    cst_item *sylitem, *sylstructureitem, *worditem, *sssyl;
    cst_phoneset *ps;

    ps = val_phoneset(utt_feat_val(u, "phoneset"));
    /* Just copy tokens into the Segment relation */
    seg = utt_relation_create(u, "Segment");
    syl = utt_relation_create(u, "Syllable");
    word = utt_relation_create(u, "Word");
    sylstructure = utt_relation_create(u, "SylStructure");
    sssyl = sylitem = worditem = sylstructureitem = 0;
    for (t = relation_head(utt_relation(u, "Token")); t; t = item_next(t)) 
    {
	cst_item *segitem = relation_append(seg, NULL);
	char const *pname = item_feat_string(t, "name");
	char *name = cst_strdup(pname);

	if (worditem == 0)
	{
	    worditem = relation_append(word,NULL);
	    item_set_string(worditem, "name", "phonestring");
	    sylstructureitem = relation_append(sylstructure,worditem);
	}
	if (sylitem == 0)
	{
	    sylitem = relation_append(syl,NULL);
	    sssyl = item_add_daughter(sylstructureitem,sylitem);
	}
	
	if (name[cst_strlen(name)-1] == '1')
	{
	    item_set_string(sssyl,"stress","1");
	    name[cst_strlen(name)-1] = '\0';
	}
	else if (name[cst_strlen(name)-1] == '0')
	{
	    item_set_string(sssyl,"stress","0");
	    name[cst_strlen(name)-1] = '\0';
	}

	if (cst_streq(name,"-"))
	{
	    sylitem = 0;  /* syllable break */
	}
	else if (phone_id(ps, name) == -1) 
	{
	    cst_errmsg("Phone `%s' not in phoneset\n", pname);
	    cst_error();
	}
	else
	{
	    item_add_daughter(sssyl,segitem);
	    item_set_string(segitem, "name", name);
	}

	cst_free(name);
    }

    return u;
}
Esempio n. 20
0
cst_utterance *default_lexical_insertion(cst_utterance *u)
{
    cst_item *word;
    cst_relation *sylstructure,*seg,*syl;
    cst_lexicon *lex;
    const cst_val *lex_addenda = NULL;
    const cst_val *p, *wp = NULL;
    char *phone_name;
    char *stress = "0";
    const char *pos;
    cst_val *phones;
    cst_item *ssword, *sssyl, *segitem, *sylitem, *seg_in_syl;

    lex = val_lexicon(feat_val(u->features,"lexicon"));
    if (lex->lex_addenda)
	lex_addenda = lex->lex_addenda;

    syl = utt_relation_create(u,"Syllable");
    sylstructure = utt_relation_create(u,"SylStructure");
    seg = utt_relation_create(u,"Segment");

    for (word=relation_head(utt_relation(u,"Word")); 
	 word; word=item_next(word))
    {
	ssword = relation_append(sylstructure,word);
        pos = ffeature_string(word,"pos");
	phones = NULL;
        wp = NULL;
        
        /*        printf("awb_debug word %s pos %s gpos %s\n",
               item_feat_string(word,"name"),
               pos,
               ffeature_string(word,"gpos")); */

	/* FIXME: need to make sure that textanalysis won't split
           tokens with explicit pronunciation (or that it will
           propagate such to words, then we can remove the path here) */
	if (item_feat_present(item_parent(item_as(word, "Token")), "phones"))
	    phones = (cst_val *) item_feat(item_parent(item_as(word, "Token")), "phones");
	else
	{
            wp = val_assoc_string(item_feat_string(word, "name"),lex_addenda);
            if (wp)
                phones = (cst_val *)val_cdr(val_cdr(wp));
            else
		phones = lex_lookup(lex,item_feat_string(word,"name"),pos);
	}

	for (sssyl=NULL,sylitem=NULL,p=phones; p; p=val_cdr(p))
	{
	    if (sylitem == NULL)
	    {
		sylitem = relation_append(syl,NULL);
		sssyl = item_add_daughter(ssword,sylitem);
		stress = "0";
	    }
	    segitem = relation_append(seg,NULL);
	    phone_name = cst_strdup(val_string(val_car(p)));
	    if (phone_name[cst_strlen(phone_name)-1] == '1')
	    {
		stress = "1";
		phone_name[cst_strlen(phone_name)-1] = '\0';
	    }
	    else if (phone_name[cst_strlen(phone_name)-1] == '0')
	    {
		stress = "0";
		phone_name[cst_strlen(phone_name)-1] = '\0';
	    }
	    item_set_string(segitem,"name",phone_name);
	    seg_in_syl = item_add_daughter(sssyl,segitem);
#if 0
            printf("awb_debug ph %s\n",phone_name);
#endif
	    if ((lex->syl_boundary)(seg_in_syl,val_cdr(p)))
	    {
#if 0
                printf("awb_debug SYL\n");
#endif
		sylitem = NULL;
		if (sssyl)
		    item_set_string(sssyl,"stress",stress);
	    }
	    cst_free(phone_name);
	}
	if (!item_feat_present(item_parent(item_as(word, "Token")), "phones")
            && ! wp)
	    delete_val(phones);
    }

    return u;
}
Esempio n. 21
0
float flite_file_to_speech(const char *filename, 
			   cst_voice *voice,
			   const char *outtype)
{
    cst_utterance *utt;
    cst_tokenstream *ts;
    const char *token;
    cst_item *t;
    cst_relation *tokrel;
    float durs = 0;
    int num_tokens;
    cst_wave *w;
    cst_breakfunc breakfunc = default_utt_break;
    cst_uttfunc utt_user_callback = 0;
    int fp;

    if ((ts = ts_open(filename,
	      get_param_string(voice->features,"text_whitespace",NULL),
	      get_param_string(voice->features,"text_singlecharsymbols",NULL),
	      get_param_string(voice->features,"text_prepunctuation",NULL),
	      get_param_string(voice->features,"text_postpunctuation",NULL)))
	== NULL)
    {
	cst_errmsg("failed to open file \"%s\" for reading\n",
		   filename);
	return 1;
    }
    fp = get_param_int(voice->features,"file_start_position",0);
    if (fp > 0)
        ts_set_stream_pos(ts,fp);

    if (feat_present(voice->features,"utt_break"))
	breakfunc = val_breakfunc(feat_val(voice->features,"utt_break"));

    if (feat_present(voice->features,"utt_user_callback"))
	utt_user_callback = val_uttfunc(feat_val(voice->features,"utt_user_callback"));

    /* If its a file to write to, create and save an empty wave file */
    /* as we are going to incrementally append to it                 */
    if (!cst_streq(outtype,"play") && 
        !cst_streq(outtype,"none") &&
        !cst_streq(outtype,"stream"))
    {
	w = new_wave();
	cst_wave_resize(w,0,1);
	cst_wave_set_sample_rate(w,16000);
	cst_wave_save_riff(w,outtype);  /* an empty wave */
	delete_wave(w);
    }

    num_tokens = 0;
    utt = new_utterance();
    tokrel = utt_relation_create(utt, "Token");
    while (!ts_eof(ts) || num_tokens > 0)
    {
	token = ts_get(ts);
	if ((cst_strlen(token) == 0) ||
	    (num_tokens > 500) ||  /* need an upper bound */
	    (relation_head(tokrel) && 
	     breakfunc(ts,token,tokrel)))
	{
	    /* An end of utt, so synthesize it */
            if (utt_user_callback)
                utt = (utt_user_callback)(utt);

            if (utt)
            {
                utt = flite_do_synth(utt,voice,utt_synth_tokens);
                durs += flite_process_output(utt,outtype,TRUE);
                delete_utterance(utt); utt = NULL;
            }
            else 
                break;

	    if (ts_eof(ts)) break;

	    utt = new_utterance();
	    tokrel = utt_relation_create(utt, "Token");
	    num_tokens = 0;
	}
	num_tokens++;

	t = relation_append(tokrel, NULL);
	item_set_string(t,"name",token);
	item_set_string(t,"whitespace",ts->whitespace);
	item_set_string(t,"prepunctuation",ts->prepunctuation);
	item_set_string(t,"punc",ts->postpunctuation);
        /* Mark it at the beginning of the token */
	item_set_int(t,"file_pos",
                     ts->file_pos-(1+ /* as we are already on the next char */
                                   cst_strlen(token)+
                                   cst_strlen(ts->prepunctuation)+
                                   cst_strlen(ts->postpunctuation)));
	item_set_int(t,"line_number",ts->line_number);
    }

    delete_utterance(utt);
    ts_close(ts);
    return durs;
}
Esempio n. 22
0
cst_val *en_exp_number(const char *numstring)
{
    /* Expand given token to list of words pronouncing it as a number */
    int num_digits = cst_strlen(numstring);
    char part[4];
    cst_val *p;
    int i;

    if (num_digits == 0)
	return NULL;
    else if (num_digits == 1)
	return en_exp_digits(numstring);
    else if (num_digits == 2)
    {
	if (numstring[0] == '0')
	{
	    if (numstring[1] == '0')
		return 0;
	    else
		return cons_val(string_val(digit2num[numstring[1]-'0']),0);
	}
	else if (numstring[1] == '0')
	    return cons_val(string_val(digit2enty[numstring[0]-'0']),0);
	else if (numstring[0] == '1')
	    return cons_val(string_val(digit2teen[numstring[1]-'0']),0);
	else 
	    return cons_val(string_val(digit2enty[numstring[0]-'0']),
			    en_exp_digits(numstring+1));
    }
    else if (num_digits == 3)
    {
	if (numstring[0] == '0')
	    return en_exp_number(numstring+1);
	else
	    return cons_val(string_val(digit2num[numstring[0]-'0']),
				cons_val(string_val("hundred"),
					     en_exp_number(numstring+1)));
    }
    else if (num_digits < 7)
    {
	for (i=0; i < num_digits-3; i++)
	    part[i] = numstring[i];
	part[i]='\0';
	p = en_exp_number(part);
	if (p == 0)  /* no thousands */
	    return en_exp_number(numstring+i);
	else
	    return val_append(p,cons_val(string_val("thousand"),
					 en_exp_number(numstring+i)));
    }
    else if (num_digits < 10)
    {
	for (i=0; i < num_digits-6; i++)
	    part[i] = numstring[i];
	part[i]='\0';
	p = en_exp_number(part);
	if (p == 0)  /* no millions */
	    return en_exp_number(numstring+i);
	else
	    return val_append(p,cons_val(string_val("million"),
					 en_exp_number(numstring+i)));
    }
    else if (num_digits < 13)
    {   /* If there are pedantic brits out there, tough!, 10^9 is a billion */
	for (i=0; i < num_digits-9; i++)
	    part[i] = numstring[i];
	part[i]='\0';
	p = en_exp_number(part);
	if (p == 0)  /* no billions */
	    return en_exp_number(numstring+i);
	else
	    return val_append(p,cons_val(string_val("billion"),
					 en_exp_number(numstring+i)));
    }
    else  /* Way too many digits here, to be a number */
    {
	return en_exp_digits(numstring);
    }
}
Esempio n. 23
0
cst_val *lts_apply(const char *word,const char *feats,const cst_lts_rules *r)
{
    int pos, index, i;
    cst_val *phones=0;
    cst_lts_letter *fval_buff;
    cst_lts_letter *full_buff;
    cst_lts_phone phone;
    char *left, *right, *p;
    char hash;
    char zeros[8];
    
    /* For feature vals for each letter */
    fval_buff = cst_alloc(cst_lts_letter,
			  (r->context_window_size*2)+
			   r->context_extra_feats);
    /* Buffer with added contexts */
    full_buff = cst_alloc(cst_lts_letter,
			  (r->context_window_size*2)+
			  cst_strlen(word)+1); /* TBD assumes single POS feat */
    if (r->letter_table)
    {
	for (i=0; i<8; i++) zeros[i] = 2;
	cst_sprintf((char *)full_buff,
                    "%.*s%c%s%c%.*s",
		    r->context_window_size-1, zeros,
		    1,
		    word,
		    1,
		    r->context_window_size-1, zeros);
	hash = 1;
    }
    else
    {
	/* Assumes l_letter is a char and context < 8 */
	cst_sprintf((char *)full_buff,
                    "%.*s#%s#%.*s",
		    r->context_window_size-1, "00000000",
		    word,
		    r->context_window_size-1, "00000000");
	hash = '#';
    }

    /* Do the prediction backwards so we don't need to reverse the answer */
    for (pos = r->context_window_size + cst_strlen(word) - 1;
	 full_buff[pos] != hash;
	 pos--)
    {
	/* Fill the features buffer for the predictor */
	cst_sprintf((char *)fval_buff,
                    "%.*s%.*s%s",
		    r->context_window_size,
		    full_buff+pos-r->context_window_size,
		    r->context_window_size,
		    full_buff+pos+1,
		    feats);
	if ((!r->letter_table
	     && ((full_buff[pos] < 'a') || (full_buff[pos] > 'z'))))
	{   
#ifdef EXCESSIVELY_CHATTY
	    cst_errmsg("lts:skipping unknown char \"%c\"\n",
		       full_buff[pos]);
#endif
	    continue;
	}
	if (r->letter_table)
	    index = full_buff[pos] - 3;
	else
	    index = (full_buff[pos]-'a')%26;
	phone = apply_model(fval_buff,
			    r->letter_index[index],
			    r->models);
	/* delete epsilons and split dual-phones */
	if (cst_streq("epsilon",r->phone_table[phone]))
	    continue;
	else if ((p=strchr(r->phone_table[phone],'-')) != NULL)
	{
	    left = cst_substr(r->phone_table[phone],0,
			      cst_strlen(r->phone_table[phone])-cst_strlen(p));
	    right = cst_substr(r->phone_table[phone],
			       (cst_strlen(r->phone_table[phone])-cst_strlen(p))+1,
			       (cst_strlen(p)-1));
	    phones = cons_val(string_val(left),
			      cons_val(string_val(right),phones));
	    cst_free(left);
	    cst_free(right);
	}
	else
	    phones = cons_val(string_val(r->phone_table[phone]),phones);
    }

    cst_free(full_buff);
    cst_free(fval_buff);
    
    return phones;
}
Esempio n. 24
0
cst_file cst_url_open(const char *url)
{
    /* Always opens it for reading */
    cst_tokenstream *urlts;
    const cst_string *protocol;
    int port;
    cst_string *host;
    int fd;
    char *url_request;
    char *path;
    cst_file ofd;
    int state,n;
    char c;

    urlts = ts_open_string(url, "", ":/", "", "");

    protocol = ts_get(urlts);
    if (cst_streq(protocol,"http"))
    {
#ifdef CST_NO_SOCKETS
        ts_close(urlts);
        return NULL;
#else
        if (!cst_streq(ts_get(urlts),":") ||
            !cst_streq(ts_get(urlts),"/") ||
            !cst_streq(ts_get(urlts),"/"))
        {
            ts_close(urlts);
            return NULL;
        }
        host = cst_strdup(ts_get(urlts));
        if (cst_streq(ts_get(urlts),":"))
            port = (int)cst_atof(ts_get(urlts));
        else
            port = 80;

        /* Open port to web server */
        fd = cst_socket_open(host,port);
        if (fd < 0)
        {
            cst_free(host);
            ts_close(urlts);
            return NULL;
        }

        url_request = cst_alloc(char,cst_strlen(url)+17);
        cst_sprintf(url_request,"GET %s HTTP/1.2\n\n",url);
        n = write(fd,url_request,cst_strlen(url_request));
        cst_free(url_request);

        /* Skip http header -- until \n\n */
        state=0;
        while (state != 4)
        {
            n=read(fd,&c,1);
            if (n == 0)
            {   /* eof or link gone down */
                cst_free(host);
                ts_close(urlts);
                return NULL;
            }
            if ((state == 0) && (c == '\r'))
                state=1;
            else if ((state == 1) && (c == '\n'))
                state=2;
            else if ((state == 2) && (c == '\r'))
                state=3;
            else if ((state == 3) && (c == '\n'))
                state=4;
            /* Not sure you can get no CRs in the stream */
            else if ((state == 0) && (c == '\n'))
                state=2;
            else if ((state == 2) && (c == '\n'))
                state=4;
            else
                state = 0;
        }

        ofd = fdopen(fd,"rb");

        ts_close(urlts);
        cst_free(host);

        return ofd;
#endif
    }
Esempio n. 25
0
cst_utterance *flowm_utt_callback(cst_utterance *u)
{
    char rst[FL_MAX_MSG_CHARS];
    const char *tok;
    cst_item *item;
    char *space;
    int extend_length;
    
    /* In order to stop the synthesizer if the STOP button is pressed */
    /* This stops the synthesis of the next utterance */

    if ((flowm_play_status == FLOWM_PLAY) ||
        (flowm_play_status == FLOWM_SKIP))
    {
        if (TTSWindow)
        {
            rst[0] = '\0';
            space = "";
            for (item=relation_head(utt_relation(u,"Token")); 
                 item; item=item_next(item))
            {
                tok = item_feat_string(item,"name");
                if (cst_streq("",space))
                    /* Only do this on the first token/word */
                    flowm_file_pos = item_feat_int(item,"file_pos");
                extend_length = cst_strlen(rst) + 1 +
                    cst_strlen(item_feat_string(item,"prepunctuation"))+
                    cst_strlen(item_feat_string(item,"punc"));
                if (cst_strlen(tok)+extend_length+4 < FL_MAX_MSG_CHARS)
                    cst_sprintf(rst,"%s%s%s%s%s",rst,space,
                                item_feat_string(item,"prepunctuation"),
                                tok,
                                item_feat_string(item,"punc"));
                else 
                {
                    if (cst_strlen(rst)+4 < FL_MAX_MSG_CHARS)
                        cst_sprintf(rst,"%s ...",rst);
                    break;
                }
                space = " ";
            }

            if (flowm_file_pos > flowm_prev_utt_pos[flowm_utt_pos_pos])
            {
                if ((flowm_utt_pos_pos+1) >= FLOWM_NUM_UTT_POS)
                {
                    /* Filled it up, so move it down */
                    memmove(flowm_prev_utt_pos,&flowm_prev_utt_pos[1],
                            sizeof(int)*(FLOWM_NUM_UTT_POS-10));
                    flowm_utt_pos_pos = (FLOWM_NUM_UTT_POS-10);
                }
                flowm_utt_pos_pos++;
                flowm_prev_utt_pos[flowm_utt_pos_pos] = flowm_file_pos;
            }

            /* Send text to TTSWindow */
            mbstowcs(fl_tts_msg,rst,FL_MAX_MSG_CHARS);
            SetDlgItemText(TTSWindow, FL_SYNTHTEXT, fl_tts_msg);

            /* Update file pos percentage in FilePos window */
            cst_sprintf(rst,"%2.3f",flowm_find_file_percentage());
            mbstowcs(fl_fp_msg,rst,FL_MAX_MSG_CHARS);
            SetDlgItemText(TTSWindow, FL_FILEPOS, fl_fp_msg);

            SystemIdleTimerReset();  /* keep alive while synthesizing */
            if (flowm_play_status == FLOWM_SKIP)
                flowm_play_status = FLOWM_PLAY;
        }
        return u;
    }
    else
    {
        delete_utterance(u);
        return 0;
    }
}
Esempio n. 26
0
static float flite_ssml_to_speech_ts(cst_tokenstream *ts,
                                     cst_voice *voice,
                                     const char *outtype)
{
    cst_features *ssml_feats, *ssml_word_feats;
    cst_features *attributes;
    const char *token;
    char *tag;
    cst_utterance *utt;
    cst_relation *tokrel;
    int num_tokens;
    cst_breakfunc breakfunc = default_utt_break;
    cst_uttfunc utt_user_callback = 0;
    float durs = 0.0;
    cst_item *t;

    ssml_feats = new_features();
    ssml_word_feats = new_features();
    set_charclasses(ts,
                    " \t\n\r",
                    ssml_singlecharsymbols_general,
                    get_param_string(voice->features,"text_prepunctuation",""),
                    get_param_string(voice->features,"text_postpunctuation","")
                    );

    if (feat_present(voice->features,"utt_break"))
	breakfunc = val_breakfunc(feat_val(voice->features,"utt_break"));

    if (feat_present(voice->features,"utt_user_callback"))
	utt_user_callback = val_uttfunc(feat_val(voice->features,"utt_user_callback"));

    num_tokens = 0;
    utt = new_utterance();
    tokrel = utt_relation_create(utt, "Token");
    while (!ts_eof(ts) || num_tokens > 0)
    {
	token = ts_get(ts);
	if (cst_streq("<",token))
	{   /* A tag */
	    tag = cst_upcase(ts_get(ts));
            if (cst_streq("/",tag)) /* an end tag */
            {
                tag = cst_upcase(ts_get(ts));
                attributes = ssml_get_attributes(ts);
                feat_set_string(attributes,"_type","end");
            }
            else
                attributes = ssml_get_attributes(ts);
	    utt = ssml_apply_tag(tag,attributes,utt,ssml_word_feats);
	    cst_free(tag);
	}
	else if (cst_streq("&",token))
	{   /* an escape sequence */
	    /* skip to ; and insert value in rawdata */
	}
        else
        {
            if ((cst_strlen(token) == 0) ||
                (num_tokens > 500) ||  /* need an upper bound */
                (relation_head(tokrel) && 
                 breakfunc(ts,token,tokrel)))
            {
                /* An end of utt, so synthesize it */
                if (utt_user_callback)
                    utt = (utt_user_callback)(utt);
                
                if (utt)
                {
                    utt = flite_do_synth(utt,voice,utt_synth_tokens);
                    durs += flite_process_output(utt,outtype,TRUE);
                    delete_utterance(utt); utt = NULL;
                }
                else 
                    break;

                if (ts_eof(ts)) break;
                
                utt = new_utterance();
                tokrel = utt_relation_create(utt, "Token");
                num_tokens = 0;
            }

            num_tokens++;

            t = relation_append(tokrel, NULL);
            item_set_string(t,"name",token);
            item_set_string(t,"whitespace",ts->whitespace);
            item_set_string(t,"prepunctuation",ts->prepunctuation);
            item_set_string(t,"punc",ts->postpunctuation);
            /* Mark it at the beginning of the token */
            item_set_int(t,"file_pos",
                         ts->file_pos-(1+ /* as we are already on the next char */
                                       cst_strlen(token)+
                                       cst_strlen(ts->prepunctuation)+
                                       cst_strlen(ts->postpunctuation)));
            item_set_int(t,"line_number",ts->line_number);
        }
    }

    delete_utterance(utt);
    return durs;
}