static cst_utterance *cg_make_hmmstates(cst_utterance *utt) { /* Build HMM state structure below the segment structure */ cst_cg_db *cg_db; cst_relation *hmmstate, *segstate; cst_item *seg, *s, *ss; const char *segname; int sp,p; cg_db = val_cg_db(utt_feat_val(utt,"cg_db")); hmmstate = utt_relation_create(utt,"HMMstate"); segstate = utt_relation_create(utt,"segstate"); for (seg = utt_rel_head(utt,"Segment"); seg; seg=item_next(seg)) { ss = relation_append(segstate,seg); segname = item_feat_string(seg,"name"); for (p=0; cg_db->phone_states[p]; p++) if (cst_streq(segname,cg_db->phone_states[p][0])) break; if (cg_db->phone_states[p] == NULL) p = 0; /* unknown phoneme */ for (sp=1; cg_db->phone_states[p][sp]; sp++) { s = relation_append(hmmstate,NULL); item_add_daughter(ss,s); item_set_string(s,"name",cg_db->phone_states[p][sp]); item_set_int(s,"statepos",sp); } } return utt; }
/* Dummy F0 modelling for phones, copied directly from us_f0_model.c */ cst_utterance *flat_prosody(cst_utterance *u) { /* F0 target model */ cst_item *s,*t; cst_relation *targ_rel; float mean, stddev; targ_rel = utt_relation_create(u,"Target"); mean = get_param_float(u->features,"target_f0_mean", 100.0); mean *= get_param_float(u->features,"f0_shift", 1.0); stddev = get_param_float(u->features,"target_f0_stddev", 12.0); s=relation_head(utt_relation(u,"Segment")); t = relation_append(targ_rel,NULL); item_set_float(t,"pos",0.0); item_set_float(t,"f0",mean+stddev); s=relation_tail(utt_relation(u,"Segment")); t = relation_append(targ_rel,NULL); item_set_float(t,"pos",item_feat_float(s,"end")); item_set_float(t,"f0",mean-stddev); return u; }
static int WordSylSeg(cst_utterance *u) { cst_item *word; cst_relation *sylstructure,*seg,*syl; cst_val *phones; const cst_val *p; cst_item *ssword,*segitem; syl = utt_relation_create(u,"Syllable"); sylstructure = utt_relation_create(u,"SylStructure"); seg = utt_relation_create(u,"Segment"); for (word=relation_head(utt_relation(u,"Word")); word; word=item_next(word)) { printf("word: %s\n",item_feat_string(word,"name")); ssword = relation_append(sylstructure,word); phones = lex_lookup((cst_lexicon *)&cmu_lex,item_feat_string(word,"name"),0); for (p=phones; p; p=val_cdr(p)) { segitem = relation_append(seg,NULL); item_set(segitem,"name",val_car(p)); printf("seg: %s\n",item_feat_string(segitem,"name")); item_add_daughter(ssword,segitem); } delete_val_list(phones); } return TRUE; }
static void add_raw_data(cst_utterance *u, const char *raw_data, cst_features *attributes) { /* Add all tokens in raw _data to u */ cst_tokenstream *ts; cst_relation *r; cst_item *t; const char *token; r = utt_relation_create(u,"Token"); ts = ts_open_string(raw_data, get_param_string(u->features,"text_whitespace",NULL), get_param_string(u->features,"text_singlecharsymbols",NULL), get_param_string(u->features,"text_prepunctuation",NULL), get_param_string(u->features,"text_pospunctuation",NULL)); while (!(ts_eof(ts))) { t = relation_append(r,NULL); feat_copy_into(item_feats(t),attributes); token = ts_get(ts); if (cst_strlen(token) > 0) { t = relation_append(r,NULL); item_set_string(t,"name",token); item_set_string(t,"whitespace",ts->whitespace); item_set_string(t,"prepunctuation",ts->prepunctuation); item_set_string(t,"punc",ts->postpunctuation); } } }
int main(int argc, char **argv) { cst_utterance *u; cst_relation *r; cst_item *item=0; int i; u = new_utterance(); r = utt_relation_create(u,"Segment"); for (i=0; i<10; i++) { char buff[20]; sprintf(buff,"seg_%03d",i); if (i==0) item = relation_append(r,NULL); else item = item_append(item,NULL); item_set_string(item,"name",buff); item_set_float(item,"duration",i*0.20); } for (i=0,item=relation_head(utt_relation(u,"Segment")); item; item=item_next(item),i++) { printf("Segment %d %s %f\n", i, item_feat_string(item,"name"), item_feat_float(item,"duration")); } delete_utterance(u); return 0; }
cst_utterance *default_tokenization(cst_utterance *u) { const char *text,*token; cst_tokenstream *fd; cst_item *t; cst_relation *r; text = utt_input_text(u); r = utt_relation_create(u,"Token"); fd = ts_open_string(text, get_param_string(u->features,"text_whitespace",NULL), get_param_string(u->features,"text_singlecharsymbols",NULL), get_param_string(u->features,"text_prepunctuation",NULL), get_param_string(u->features,"text_postpunctuation",NULL)); while(!ts_eof(fd)) { token = ts_get(fd); if (cst_strlen(token) > 0) { t = relation_append(r,NULL); item_set_string(t,"name",token); item_set_string(t,"whitespace",fd->whitespace); item_set_string(t,"prepunctuation",fd->prepunctuation); item_set_string(t,"punc",fd->postpunctuation); item_set_int(t,"file_pos",fd->file_pos); item_set_int(t,"line_number",fd->line_number); } } ts_close(fd); return u; }
cst_utterance *default_phrasing(cst_utterance *u) { cst_relation *r; cst_item *w, *p, *lp=NULL; const cst_val *v; cst_cart *phrasing_cart; r = utt_relation_create(u,"Phrase"); phrasing_cart = val_cart(feat_val(u->features,"phrasing_cart")); for (p=NULL,w=relation_head(utt_relation(u,"Word")); w; w=item_next(w)) { if (p == NULL) { p = relation_append(r,NULL); lp = p; #ifdef FLITE_PLUS_HTS_ENGINE item_set_string(p,"name","BB"); #else item_set_string(p,"name","B"); #endif /* FLITE_PLUS_HTS_ENGINE */ } item_add_daughter(p,w); v = cart_interpret(w,phrasing_cart); if (cst_streq(val_string(v),"BB")) p = NULL; } if (lp && item_prev(lp)) /* follow festival */ item_set_string(lp,"name","BB"); return u; }
cst_utterance *default_phrasing(cst_utterance *u) { cst_relation *r; cst_item *w, *p; const cst_val *v; cst_cart *phrasing_cart; r = utt_relation_create(u,"Phrase"); phrasing_cart = val_cart(feat_val(u->features,"phrasing_cart")); for (p=NULL,w=relation_head(utt_relation(u,"Word")); w; w=item_next(w)) { if (p == NULL) { p = relation_append(r,NULL); item_set_string(p,"name","BB"); } item_add_daughter(p,w); v = cart_interpret(w,phrasing_cart); if (cst_streq(val_string(v),"BB")) p = NULL; } return u; }
static int bbb_relation_load(cst_relation *r,const char *filename) { const char *token; cst_item *item; cst_tokenstream *fd; fd = ts_open(filename); if (fd == 0) return 0; while (!ts_eof(fd)) { token = ts_get(fd); if (cst_streq(token,"")) continue; item = relation_append(r,NULL); item_set_string(item,"name",token); item_set_string(item,"whitespace",fd->whitespace); item_set_string(item,"prepunctuation",fd->prepunctuation); item_set_string(item,"punc",fd->postpunctuation); item_set_int(item,"file_pos",fd->file_pos); item_set_int(item,"line_number",fd->line_number); } ts_close(fd); return 1; }
cst_utterance *default_pause_insertion(cst_utterance *u) { /* Add initial silences and silence at each phrase break */ const char *silence; const cst_item *w; cst_item *p, *s; silence = val_string(feat_val(u->features,"silence")); /* Insert initial silence */ s = relation_head(utt_relation(u,"Segment")); if (s == NULL) s = relation_append(utt_relation(u,"Segment"),NULL); else s = item_prepend(s,NULL); item_set_string(s,"name",silence); for (p=relation_head(utt_relation(u,"Phrase")); p; p=item_next(p)) { for (w = item_last_daughter(p); w; w=item_prev(w)) { s = path_to_item(w,"R:SylStructure.daughtern.daughtern.R:Segment"); if (s) { s = item_append(s,NULL); item_set_string(s,"name",silence); break; } } } return u; }
void test_hrg(void) { cst_utterance *u; cst_relation *r; cst_item *item = 0; int i; u = new_utterance(); r = utt_relation_create(u, "Segment"); for (i = 0; i < 10; i++) { char buff[20]; sprintf(buff, "seg_%03d", i); if (i == 0) item = relation_append(r, NULL); else item = item_append(item, NULL); item_set_string(item, "name", buff); item_set_float(item, "duration", i * 0.20); } for (i = 0, item = relation_head(utt_relation(u, "Segment")); item; item = item_next(item), i++) { TEST_CHECK(item_feat_float(item, "duration") == correct_list[i]); } delete_utterance(u); }
int relation_load(cst_relation *r, const char *filename) { cst_tokenstream *fd; cst_item *item; const char *token=0; if ((fd = ts_open(filename,NULL,";","","")) == 0) { cst_errmsg("relation_load: can't open file \"%s\" for reading\n", filename); return CST_ERROR_FORMAT; } for ( ; !ts_eof(fd); ) { token = ts_get(fd); if (cst_streq("#",token)) break; } #import "OpenEarsStaticAnalysisToggle.h" #ifdef STATICANALYZEDEPENDENCIES #define __clang_analyzer__ 1 #endif #if !defined(__clang_analyzer__) || defined(STATICANALYZEDEPENDENCIES) #undef __clang_analyzer__ if (!cst_streq("#",token)) #endif { cst_errmsg("relation_load: no end of header marker in \"%s\"\n", filename); ts_close(fd); return CST_ERROR_FORMAT; } while (!ts_eof(fd)) { token = ts_get(fd); if (cst_streq(token,"")) continue; item = relation_append(r,NULL); item_set_float(item,"end",(float)cst_atof(token)); #import "OpenEarsStaticAnalysisToggle.h" #ifdef STATICANALYZEDEPENDENCIES #define __clang_analyzer__ 1 #endif #if !defined(__clang_analyzer__) || defined(STATICANALYZEDEPENDENCIES) #undef __clang_analyzer__ token = ts_get(fd); #endif token = ts_get(fd); item_set_string(item,"name",token); } ts_close(fd); return CST_OK_FORMAT; }
static cst_utterance *cg_make_params(cst_utterance *utt) { /* puts in the frame items */ /* historically called "mcep" but can actually be any random vectors */ cst_cg_db *cg_db; cst_relation *mcep, *mcep_link; cst_item *s, *mcep_parent, *mcep_frame; int num_frames; float start, end; float dur_stretch, tok_stretch; cg_db = val_cg_db(utt_feat_val(utt,"cg_db")); mcep = utt_relation_create(utt,"mcep"); mcep_link = utt_relation_create(utt,"mcep_link"); end = 0.0; num_frames = 0; dur_stretch = get_param_float(utt->features,"duration_stretch", 1.0); for (s = utt_rel_head(utt,"HMMstate"); s; s=item_next(s)) { start = end; tok_stretch = ffeature_float(s,"R:segstate.parent.R:SylStructure.parent.parent.R:Token.parent.local_duration_stretch"); if (tok_stretch == 0) tok_stretch = 1.0; end = start + (tok_stretch*dur_stretch*cg_state_duration(s,cg_db)); item_set_float(s,"end",end); mcep_parent = relation_append(mcep_link, s); for ( ; (num_frames * cg_db->frame_advance) <= end; num_frames++ ) { mcep_frame = relation_append(mcep,NULL); item_add_daughter(mcep_parent,mcep_frame); item_set_int(mcep_frame,"frame_number",num_frames); item_set(mcep_frame,"name",item_feat(mcep_parent,"name")); } } /* Copy duration up onto Segment relation */ for (s = utt_rel_head(utt,"Segment"); s; s=item_next(s)) item_set(s,"end",ffeature(s,"R:segstate.daughtern.end")); utt_set_feat_int(utt,"param_track_num_frames",num_frames); return utt; }
int relation_load(cst_relation *r, const char *filename) { cst_tokenstream *fd; cst_item *item; const char *token=0; if ((fd = ts_open(filename,NULL,";","","")) == 0) { cst_errmsg("relation_load: can't open file \"%s\" for reading\n", filename); return CST_ERROR_FORMAT; } for ( ; !ts_eof(fd); ) { token = ts_get(fd); if (cst_streq("#",token)) break; } if (!cst_streq("#",token)) { cst_errmsg("relation_load: no end of header marker in \"%s\"\n", filename); ts_close(fd); return CST_ERROR_FORMAT; } while (!ts_eof(fd)) { token = ts_get(fd); if (cst_streq(token,"")) continue; item = relation_append(r,NULL); item_set_float(item,"end",(float)cst_atof(token)); token = ts_get(fd); token = ts_get(fd); item_set_string(item,"name",token); } ts_close(fd); return CST_OK_FORMAT; }
cst_utterance *default_textanalysis(cst_utterance *u) { cst_item *t,*word; cst_relation *word_rel; cst_val *words; const cst_val *w; const cst_val *ttwv; word_rel = utt_relation_create(u,"Word"); ttwv = feat_val(u->features, "tokentowords_func"); for (t=relation_head(utt_relation(u,"Token")); t; t=item_next(t)) { if (ttwv) words = (cst_val *)(*val_itemfunc(ttwv))(t); else words = default_tokentowords(t); for (w=words; w; w=val_cdr(w)) { word = item_add_daughter(t,NULL); if (cst_val_consp(val_car(w))) { /* Has extra features */ item_set_string(word,"name",val_string(val_car(val_car(w)))); feat_copy_into(val_features(val_cdr(val_car(w))), item_feats(word)); } else item_set_string(word,"name",val_string(val_car(w))); relation_append(word_rel,word); } delete_val(words); } return u; }
cst_utterance *default_lexical_insertion(cst_utterance *u) { cst_item *word; cst_relation *sylstructure,*seg,*syl; cst_lexicon *lex; const cst_val *lex_addenda = NULL; const cst_val *p, *wp = NULL; char *phone_name; char *stress = "0"; const char *pos; cst_val *phones; cst_item *ssword, *sssyl, *segitem, *sylitem, *seg_in_syl; lex = val_lexicon(feat_val(u->features,"lexicon")); if (lex->lex_addenda) lex_addenda = lex->lex_addenda; syl = utt_relation_create(u,"Syllable"); sylstructure = utt_relation_create(u,"SylStructure"); seg = utt_relation_create(u,"Segment"); for (word=relation_head(utt_relation(u,"Word")); word; word=item_next(word)) { ssword = relation_append(sylstructure,word); pos = ffeature_string(word,"pos"); phones = NULL; wp = NULL; /* printf("awb_debug word %s pos %s gpos %s\n", item_feat_string(word,"name"), pos, ffeature_string(word,"gpos")); */ /* FIXME: need to make sure that textanalysis won't split tokens with explicit pronunciation (or that it will propagate such to words, then we can remove the path here) */ if (item_feat_present(item_parent(item_as(word, "Token")), "phones")) phones = (cst_val *) item_feat(item_parent(item_as(word, "Token")), "phones"); else { wp = val_assoc_string(item_feat_string(word, "name"),lex_addenda); if (wp) phones = (cst_val *)val_cdr(val_cdr(wp)); else phones = lex_lookup(lex,item_feat_string(word,"name"),pos); } for (sssyl=NULL,sylitem=NULL,p=phones; p; p=val_cdr(p)) { if (sylitem == NULL) { sylitem = relation_append(syl,NULL); sssyl = item_add_daughter(ssword,sylitem); stress = "0"; } segitem = relation_append(seg,NULL); phone_name = cst_strdup(val_string(val_car(p))); if (phone_name[cst_strlen(phone_name)-1] == '1') { stress = "1"; phone_name[cst_strlen(phone_name)-1] = '\0'; } else if (phone_name[cst_strlen(phone_name)-1] == '0') { stress = "0"; phone_name[cst_strlen(phone_name)-1] = '\0'; } item_set_string(segitem,"name",phone_name); seg_in_syl = item_add_daughter(sssyl,segitem); #if 0 printf("awb_debug ph %s\n",phone_name); #endif if ((lex->syl_boundary)(seg_in_syl,val_cdr(p))) { #if 0 printf("awb_debug SYL\n"); #endif sylitem = NULL; if (sssyl) item_set_string(sssyl,"stress",stress); } cst_free(phone_name); } if (!item_feat_present(item_parent(item_as(word, "Token")), "phones") && ! wp) delete_val(phones); } return u; }
float flite_file_to_speech(const char *filename, cst_voice *voice, const char *outtype) { cst_utterance *utt; cst_tokenstream *ts; const char *token; cst_item *t; cst_relation *tokrel; float durs = 0; int num_tokens; cst_wave *w; cst_breakfunc breakfunc = default_utt_break; cst_uttfunc utt_user_callback = 0; int fp; if ((ts = ts_open(filename, get_param_string(voice->features,"text_whitespace",NULL), get_param_string(voice->features,"text_singlecharsymbols",NULL), get_param_string(voice->features,"text_prepunctuation",NULL), get_param_string(voice->features,"text_postpunctuation",NULL))) == NULL) { cst_errmsg("failed to open file \"%s\" for reading\n", filename); return 1; } fp = get_param_int(voice->features,"file_start_position",0); if (fp > 0) ts_set_stream_pos(ts,fp); if (feat_present(voice->features,"utt_break")) breakfunc = val_breakfunc(feat_val(voice->features,"utt_break")); if (feat_present(voice->features,"utt_user_callback")) utt_user_callback = val_uttfunc(feat_val(voice->features,"utt_user_callback")); /* If its a file to write to, create and save an empty wave file */ /* as we are going to incrementally append to it */ if (!cst_streq(outtype,"play") && !cst_streq(outtype,"none") && !cst_streq(outtype,"stream")) { w = new_wave(); cst_wave_resize(w,0,1); cst_wave_set_sample_rate(w,16000); cst_wave_save_riff(w,outtype); /* an empty wave */ delete_wave(w); } num_tokens = 0; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); while (!ts_eof(ts) || num_tokens > 0) { token = ts_get(ts); if ((cst_strlen(token) == 0) || (num_tokens > 500) || /* need an upper bound */ (relation_head(tokrel) && breakfunc(ts,token,tokrel))) { /* An end of utt, so synthesize it */ if (utt_user_callback) utt = (utt_user_callback)(utt); if (utt) { utt = flite_do_synth(utt,voice,utt_synth_tokens); durs += flite_process_output(utt,outtype,TRUE); delete_utterance(utt); utt = NULL; } else break; if (ts_eof(ts)) break; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); num_tokens = 0; } num_tokens++; t = relation_append(tokrel, NULL); item_set_string(t,"name",token); item_set_string(t,"whitespace",ts->whitespace); item_set_string(t,"prepunctuation",ts->prepunctuation); item_set_string(t,"punc",ts->postpunctuation); /* Mark it at the beginning of the token */ item_set_int(t,"file_pos", ts->file_pos-(1+ /* as we are already on the next char */ cst_strlen(token)+ cst_strlen(ts->prepunctuation)+ cst_strlen(ts->postpunctuation))); item_set_int(t,"line_number",ts->line_number); } delete_utterance(utt); ts_close(ts); return durs; }
cst_utterance *default_lexical_insertion(cst_utterance *u) { cst_item *word; cst_relation *sylstructure,*seg,*syl; cst_lexicon *lex, *ulex = NULL; const cst_val *p; char *phone_name; char *stress = "0"; cst_val *phones; cst_item *ssword, *sssyl, *segitem, *sylitem, *seg_in_syl; lex = val_lexicon(feat_val(u->features,"lexicon")); if (feat_present(u->features, "user_lexicon")) ulex = val_lexicon(feat_val(u->features, "user_lexicon")); syl = utt_relation_create(u,"Syllable"); sylstructure = utt_relation_create(u,"SylStructure"); seg = utt_relation_create(u,"Segment"); for (word=relation_head(utt_relation(u,"Word")); word; word=item_next(word)) { ssword = relation_append(sylstructure,word); phones = NULL; /* FIXME: need to make sure that textanalysis won't split tokens with explicit pronunciation (or that it will propagate such to words, then we can remove the path here) */ if (item_feat_present(item_parent(item_as(word, "Token")), "phones")) phones = (cst_val *) item_feat(item_parent(item_as(word, "Token")), "phones"); else { if (ulex) phones = lex_lookup(ulex,item_feat_string(word, "name"),0); if (phones == NULL) phones = lex_lookup(lex,item_feat_string(word,"name"),0); } for (sssyl=NULL,sylitem=NULL,p=phones; p; p=val_cdr(p)) { if (sylitem == NULL) { sylitem = relation_append(syl,NULL); sssyl = item_add_daughter(ssword,sylitem); stress = "0"; } segitem = relation_append(seg,NULL); phone_name = cst_strdup(val_string(val_car(p))); if (phone_name[strlen(phone_name)-1] == '1') { stress = "1"; phone_name[strlen(phone_name)-1] = '\0'; } else if (phone_name[strlen(phone_name)-1] == '0') { stress = "0"; phone_name[strlen(phone_name)-1] = '\0'; } item_set_string(segitem,"name",phone_name); seg_in_syl = item_add_daughter(sssyl,segitem); if ((lex->syl_boundary)(seg_in_syl,val_cdr(p))) { sylitem = NULL; if (sssyl) item_set_string(sssyl,"stress",stress); } cst_free(phone_name); } if (!item_feat_present(item_parent(item_as(word, "Token")), "phones")) delete_val(phones); } return u; }
static float flite_ssml_to_speech_ts(cst_tokenstream *ts, cst_voice *voice, const char *outtype) { cst_features *ssml_feats, *ssml_word_feats; cst_features *attributes; const char *token; char *tag; cst_utterance *utt; cst_relation *tokrel; int num_tokens; cst_breakfunc breakfunc = default_utt_break; cst_uttfunc utt_user_callback = 0; float durs = 0.0; cst_item *t; ssml_feats = new_features(); ssml_word_feats = new_features(); set_charclasses(ts, " \t\n\r", ssml_singlecharsymbols_general, get_param_string(voice->features,"text_prepunctuation",""), get_param_string(voice->features,"text_postpunctuation","") ); if (feat_present(voice->features,"utt_break")) breakfunc = val_breakfunc(feat_val(voice->features,"utt_break")); if (feat_present(voice->features,"utt_user_callback")) utt_user_callback = val_uttfunc(feat_val(voice->features,"utt_user_callback")); num_tokens = 0; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); while (!ts_eof(ts) || num_tokens > 0) { token = ts_get(ts); if (cst_streq("<",token)) { /* A tag */ tag = cst_upcase(ts_get(ts)); if (cst_streq("/",tag)) /* an end tag */ { tag = cst_upcase(ts_get(ts)); attributes = ssml_get_attributes(ts); feat_set_string(attributes,"_type","end"); } else attributes = ssml_get_attributes(ts); utt = ssml_apply_tag(tag,attributes,utt,ssml_word_feats); cst_free(tag); } else if (cst_streq("&",token)) { /* an escape sequence */ /* skip to ; and insert value in rawdata */ } else { if ((cst_strlen(token) == 0) || (num_tokens > 500) || /* need an upper bound */ (relation_head(tokrel) && breakfunc(ts,token,tokrel))) { /* An end of utt, so synthesize it */ if (utt_user_callback) utt = (utt_user_callback)(utt); if (utt) { utt = flite_do_synth(utt,voice,utt_synth_tokens); durs += flite_process_output(utt,outtype,TRUE); delete_utterance(utt); utt = NULL; } else break; if (ts_eof(ts)) break; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); num_tokens = 0; } num_tokens++; t = relation_append(tokrel, NULL); item_set_string(t,"name",token); item_set_string(t,"whitespace",ts->whitespace); item_set_string(t,"prepunctuation",ts->prepunctuation); item_set_string(t,"punc",ts->postpunctuation); /* Mark it at the beginning of the token */ item_set_int(t,"file_pos", ts->file_pos-(1+ /* as we are already on the next char */ cst_strlen(token)+ cst_strlen(ts->prepunctuation)+ cst_strlen(ts->postpunctuation))); item_set_int(t,"line_number",ts->line_number); } } delete_utterance(utt); return durs; }
float flite_file_to_speech(const char *filename, cst_voice *voice, const char *outtype) { cst_utterance *utt; cst_tokenstream *ts; const char *token; cst_item *t; cst_relation *tokrel; float d, durs = 0; int num_tokens; cst_breakfunc breakfunc = default_utt_break; if ((ts = ts_open(filename, get_param_string(voice->features,"text_whitespace",NULL), get_param_string(voice->features,"text_singlecharsymbols",NULL), get_param_string(voice->features,"text_prepunctuation",NULL), get_param_string(voice->features,"text_postpunctuation",NULL))) == NULL) { cst_errmsg("failed to open file \"%s\" for reading\n", filename); return 1; } if (feat_present(voice->features,"utt_break")) breakfunc = val_breakfunc(feat_val(voice->features,"utt_break")); /* If its a file to write to delete it as we're going to */ /* incrementally append to it */ if (!cst_streq(outtype,"play") && !cst_streq(outtype,"none")) { cst_wave *w; w = new_wave(); cst_wave_resize(w,0,1); cst_wave_set_sample_rate(w,16000); cst_wave_save_riff(w,outtype); /* an empty wave */ delete_wave(w); } num_tokens = 0; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); while (!ts_eof(ts) || num_tokens > 0) { token = ts_get(ts); if ((strlen(token) == 0) || (num_tokens > 500) || /* need an upper bound */ (relation_head(tokrel) && breakfunc(ts,token,tokrel))) { /* An end of utt */ d = flite_tokens_to_speech(utt,voice,outtype); utt = NULL; if (d < 0) goto out; durs += d; if (ts_eof(ts)) goto out; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); num_tokens = 0; } num_tokens++; t = relation_append(tokrel, NULL); item_set_string(t,"name",token); item_set_string(t,"whitespace",ts->whitespace); item_set_string(t,"prepunctuation",ts->prepunctuation); item_set_string(t,"punc",ts->postpunctuation); item_set_int(t,"file_pos",ts->file_pos); item_set_int(t,"line_number",ts->line_number); } out: delete_utterance(utt); ts_close(ts); return durs; }
static cst_utterance *tokentosegs(cst_utterance *u) { cst_item *t; cst_relation *seg, *syl, *sylstructure, *word; cst_item *sylitem, *sylstructureitem, *worditem, *sssyl; cst_phoneset *ps; ps = val_phoneset(utt_feat_val(u, "phoneset")); /* Just copy tokens into the Segment relation */ seg = utt_relation_create(u, "Segment"); syl = utt_relation_create(u, "Syllable"); word = utt_relation_create(u, "Word"); sylstructure = utt_relation_create(u, "SylStructure"); sssyl = sylitem = worditem = sylstructureitem = 0; for (t = relation_head(utt_relation(u, "Token")); t; t = item_next(t)) { cst_item *segitem = relation_append(seg, NULL); char const *pname = item_feat_string(t, "name"); char *name = cst_strdup(pname); if (worditem == 0) { worditem = relation_append(word,NULL); item_set_string(worditem, "name", "phonestring"); sylstructureitem = relation_append(sylstructure,worditem); } if (sylitem == 0) { sylitem = relation_append(syl,NULL); sssyl = item_add_daughter(sylstructureitem,sylitem); } if (name[cst_strlen(name)-1] == '1') { item_set_string(sssyl,"stress","1"); name[cst_strlen(name)-1] = '\0'; } else if (name[cst_strlen(name)-1] == '0') { item_set_string(sssyl,"stress","0"); name[cst_strlen(name)-1] = '\0'; } if (cst_streq(name,"-")) { sylitem = 0; /* syllable break */ } else if (phone_id(ps, name) == -1) { cst_errmsg("Phone `%s' not in phoneset\n", pname); cst_error(); } else { item_add_daughter(sssyl,segitem); item_set_string(segitem, "name", name); } cst_free(name); } return u; }
cst_utterance *russian_lexical_insertion(cst_utterance *u) { cst_item *word; cst_relation *sylstructure,*seg,*syl,*sylvowel,*transcription; const cst_val *p; const char *phone_name; cst_val *phones; cst_item *ssword, *sssyl, *segitem, *sylitem, *seg_in_syl, *svsyl, *vowel_in_syl, *tword, *seg_in_word; cst_item *i,*tmp; int num_segs; int total_num_segs=0; syl = utt_relation_create(u,"Syllable"); sylstructure = utt_relation_create(u,"SylStructure"); seg = utt_relation_create(u,"Segment"); sylvowel = utt_relation_create(u,"SylVowel"); transcription = utt_relation_create(u,"Transcription"); for (word=relation_head(utt_relation(u,"Word"));word;word=item_next(word)) { phones=word_to_phones(word); if(!phones) continue; num_segs=val_length(phones); if((total_num_segs+num_segs)>max_num_segs) { delete_val(phones); break; } ssword = relation_append(sylstructure,word); tword = relation_append(transcription,word); for (sssyl=NULL,sylitem=NULL,p=phones; p; p=val_cdr(p)) { if (sylitem == NULL) { sylitem = relation_append(syl,NULL); sssyl = item_add_daughter(ssword,sylitem); } segitem = relation_append(seg,NULL); phone_name = val_string(val_car(p)); item_set_string(segitem,"name",phone_name); seg_in_syl = item_add_daughter(sssyl,segitem); seg_in_word = item_add_daughter(tword,segitem); if(is_vowel(phone_name)) { svsyl=relation_append(sylvowel,sylitem); vowel_in_syl=item_add_daughter(svsyl,segitem); } if (ru_syl_boundary(seg_in_syl,val_cdr(p))) { sylitem = NULL; if (sssyl) item_set_string(sssyl,"stress","0"); } } assign_stress(word); delete_val(phones); total_num_segs+=num_segs; } i=relation_head(utt_relation(u,"Word")); while(i) { tmp=item_next(i); if(item_as(i,"Transcription")==NULL) { delete_item(item_as(i,"Token")); delete_item(item_as(i,"Phrase")); delete_item(i); } i=tmp; } i=relation_head(utt_relation(u,"Phrase")); while(i) { tmp=item_next(i); if(item_daughter(i)==NULL) delete_item(i); i=tmp; } return u; }