static cst_tokenstream *new_tokenstream(const cst_string *whitespace, const cst_string *singlechars, const cst_string *prepunct, const cst_string *postpunct) { /* Constructor function */ cst_tokenstream *ts = cst_alloc(cst_tokenstream,1); ts->fd = NULL; ts->file_pos = 0; ts->line_number = 0; ts->string_buffer = NULL; ts->token_pos = 0; ts->whitespace = cst_alloc(cst_string,TS_BUFFER_SIZE); ts->ws_max = TS_BUFFER_SIZE; if (prepunct && prepunct[0]) { ts->prepunctuation = cst_alloc(cst_string,TS_BUFFER_SIZE); ts->prep_max = TS_BUFFER_SIZE; } ts->token = cst_alloc(cst_string,TS_BUFFER_SIZE); ts->token_max = TS_BUFFER_SIZE; if (postpunct && postpunct[0]) { ts->postpunctuation = cst_alloc(cst_string,TS_BUFFER_SIZE); ts->postp_max = TS_BUFFER_SIZE; } set_charclasses(ts,whitespace,singlechars,prepunct,postpunct); ts->current_char = 0; return ts; }
static cst_features *ssml_get_attributes(cst_tokenstream *ts) { cst_features *a = new_features(); const char* name, *val; set_charclasses(ts, ts->p_whitespacesymbols, ssml_singlecharsymbols_inattr, ts->p_prepunctuationsymbols, ts->p_postpunctuationsymbols); name = ts_get(ts); while (!cst_streq(">",name)) { if (cst_streq(name,"/")) feat_set_string(a,"_type","startend"); else { feat_set_string(a,"_type","start"); feat_set_string(a,"_name0",name); if (cst_streq("=",ts_get(ts))) { val = ts_get_quoted_remainder(ts); feat_set_string(a,"_val0",val); } } if (ts_eof(ts)) { fprintf(stderr,"ssml: unexpected EOF\n"); delete_features(a); return 0; } name = ts_get(ts); } set_charclasses(ts, ts->p_whitespacesymbols, ssml_singlecharsymbols_general, ts->p_prepunctuationsymbols, ts->p_postpunctuationsymbols); return a; }
static float flite_ssml_to_speech_ts(cst_tokenstream *ts, cst_voice *voice, const char *outtype) { cst_features *ssml_feats, *ssml_word_feats; cst_features *attributes; const char *token; char *tag; cst_utterance *utt; cst_relation *tokrel; int num_tokens; cst_breakfunc breakfunc = default_utt_break; cst_uttfunc utt_user_callback = 0; float durs = 0.0; cst_item *t; ssml_feats = new_features(); ssml_word_feats = new_features(); set_charclasses(ts, " \t\n\r", ssml_singlecharsymbols_general, get_param_string(voice->features,"text_prepunctuation",""), get_param_string(voice->features,"text_postpunctuation","") ); if (feat_present(voice->features,"utt_break")) breakfunc = val_breakfunc(feat_val(voice->features,"utt_break")); if (feat_present(voice->features,"utt_user_callback")) utt_user_callback = val_uttfunc(feat_val(voice->features,"utt_user_callback")); num_tokens = 0; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); while (!ts_eof(ts) || num_tokens > 0) { token = ts_get(ts); if (cst_streq("<",token)) { /* A tag */ tag = cst_upcase(ts_get(ts)); if (cst_streq("/",tag)) /* an end tag */ { tag = cst_upcase(ts_get(ts)); attributes = ssml_get_attributes(ts); feat_set_string(attributes,"_type","end"); } else attributes = ssml_get_attributes(ts); utt = ssml_apply_tag(tag,attributes,utt,ssml_word_feats); cst_free(tag); } else if (cst_streq("&",token)) { /* an escape sequence */ /* skip to ; and insert value in rawdata */ } else { if ((cst_strlen(token) == 0) || (num_tokens > 500) || /* need an upper bound */ (relation_head(tokrel) && breakfunc(ts,token,tokrel))) { /* An end of utt, so synthesize it */ if (utt_user_callback) utt = (utt_user_callback)(utt); if (utt) { utt = flite_do_synth(utt,voice,utt_synth_tokens); durs += flite_process_output(utt,outtype,TRUE); delete_utterance(utt); utt = NULL; } else break; if (ts_eof(ts)) break; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); num_tokens = 0; } num_tokens++; t = relation_append(tokrel, NULL); item_set_string(t,"name",token); item_set_string(t,"whitespace",ts->whitespace); item_set_string(t,"prepunctuation",ts->prepunctuation); item_set_string(t,"punc",ts->postpunctuation); /* Mark it at the beginning of the token */ item_set_int(t,"file_pos", ts->file_pos-(1+ /* as we are already on the next char */ cst_strlen(token)+ cst_strlen(ts->prepunctuation)+ cst_strlen(ts->postpunctuation))); item_set_int(t,"line_number",ts->line_number); } } delete_utterance(utt); return durs; }