static cst_tokenstream *new_tokenstream(const cst_string *whitespace,
					const cst_string *singlechars,
					const cst_string *prepunct,
					const cst_string *postpunct)
{   /* Constructor function */
    cst_tokenstream *ts = cst_alloc(cst_tokenstream,1);
    ts->fd = NULL;
    ts->file_pos = 0;
    ts->line_number = 0;
    ts->string_buffer = NULL;
    ts->token_pos = 0;
    ts->whitespace = cst_alloc(cst_string,TS_BUFFER_SIZE);
    ts->ws_max = TS_BUFFER_SIZE;
    if (prepunct && prepunct[0])
    {
        ts->prepunctuation = cst_alloc(cst_string,TS_BUFFER_SIZE);
        ts->prep_max = TS_BUFFER_SIZE;
    }
    ts->token = cst_alloc(cst_string,TS_BUFFER_SIZE);
    ts->token_max = TS_BUFFER_SIZE;
    if (postpunct && postpunct[0])
    {
        ts->postpunctuation = cst_alloc(cst_string,TS_BUFFER_SIZE);
        ts->postp_max = TS_BUFFER_SIZE;
    }

    set_charclasses(ts,whitespace,singlechars,prepunct,postpunct);
    ts->current_char = 0;

    return ts;
}
Exemple #2
0
static cst_features *ssml_get_attributes(cst_tokenstream *ts)
{
    cst_features *a = new_features();
    const char* name, *val;

    set_charclasses(ts,
                    ts->p_whitespacesymbols,
                    ssml_singlecharsymbols_inattr,
                    ts->p_prepunctuationsymbols,
                    ts->p_postpunctuationsymbols);

    name = ts_get(ts);
    while (!cst_streq(">",name))
    {
	if (cst_streq(name,"/"))
	    feat_set_string(a,"_type","startend");
	else
	{
	    feat_set_string(a,"_type","start");
	    feat_set_string(a,"_name0",name);
	    if (cst_streq("=",ts_get(ts)))
	    {
                val = ts_get_quoted_remainder(ts);
                feat_set_string(a,"_val0",val);
            }
	}
	if (ts_eof(ts))
	{
	    fprintf(stderr,"ssml: unexpected EOF\n");
	    delete_features(a);
	    return 0;
	}
        name = ts_get(ts);
    }
	
    set_charclasses(ts,
                    ts->p_whitespacesymbols,
                    ssml_singlecharsymbols_general,
                    ts->p_prepunctuationsymbols,
                    ts->p_postpunctuationsymbols);

    return a;
}
Exemple #3
0
static float flite_ssml_to_speech_ts(cst_tokenstream *ts,
                                     cst_voice *voice,
                                     const char *outtype)
{
    cst_features *ssml_feats, *ssml_word_feats;
    cst_features *attributes;
    const char *token;
    char *tag;
    cst_utterance *utt;
    cst_relation *tokrel;
    int num_tokens;
    cst_breakfunc breakfunc = default_utt_break;
    cst_uttfunc utt_user_callback = 0;
    float durs = 0.0;
    cst_item *t;

    ssml_feats = new_features();
    ssml_word_feats = new_features();
    set_charclasses(ts,
                    " \t\n\r",
                    ssml_singlecharsymbols_general,
                    get_param_string(voice->features,"text_prepunctuation",""),
                    get_param_string(voice->features,"text_postpunctuation","")
                    );

    if (feat_present(voice->features,"utt_break"))
	breakfunc = val_breakfunc(feat_val(voice->features,"utt_break"));

    if (feat_present(voice->features,"utt_user_callback"))
	utt_user_callback = val_uttfunc(feat_val(voice->features,"utt_user_callback"));

    num_tokens = 0;
    utt = new_utterance();
    tokrel = utt_relation_create(utt, "Token");
    while (!ts_eof(ts) || num_tokens > 0)
    {
	token = ts_get(ts);
	if (cst_streq("<",token))
	{   /* A tag */
	    tag = cst_upcase(ts_get(ts));
            if (cst_streq("/",tag)) /* an end tag */
            {
                tag = cst_upcase(ts_get(ts));
                attributes = ssml_get_attributes(ts);
                feat_set_string(attributes,"_type","end");
            }
            else
                attributes = ssml_get_attributes(ts);
	    utt = ssml_apply_tag(tag,attributes,utt,ssml_word_feats);
	    cst_free(tag);
	}
	else if (cst_streq("&",token))
	{   /* an escape sequence */
	    /* skip to ; and insert value in rawdata */
	}
        else
        {
            if ((cst_strlen(token) == 0) ||
                (num_tokens > 500) ||  /* need an upper bound */
                (relation_head(tokrel) && 
                 breakfunc(ts,token,tokrel)))
            {
                /* An end of utt, so synthesize it */
                if (utt_user_callback)
                    utt = (utt_user_callback)(utt);
                
                if (utt)
                {
                    utt = flite_do_synth(utt,voice,utt_synth_tokens);
                    durs += flite_process_output(utt,outtype,TRUE);
                    delete_utterance(utt); utt = NULL;
                }
                else 
                    break;

                if (ts_eof(ts)) break;
                
                utt = new_utterance();
                tokrel = utt_relation_create(utt, "Token");
                num_tokens = 0;
            }

            num_tokens++;

            t = relation_append(tokrel, NULL);
            item_set_string(t,"name",token);
            item_set_string(t,"whitespace",ts->whitespace);
            item_set_string(t,"prepunctuation",ts->prepunctuation);
            item_set_string(t,"punc",ts->postpunctuation);
            /* Mark it at the beginning of the token */
            item_set_int(t,"file_pos",
                         ts->file_pos-(1+ /* as we are already on the next char */
                                       cst_strlen(token)+
                                       cst_strlen(ts->prepunctuation)+
                                       cst_strlen(ts->postpunctuation)));
            item_set_int(t,"line_number",ts->line_number);
        }
    }

    delete_utterance(utt);
    return durs;
}