int flite_voice_add_lex_addenda(cst_voice *v, const cst_string *lexfile) { /* Add addenda in lexfile to current voice */ cst_lexicon *lex; const cst_val *lex_addenda = NULL; cst_val *new_addenda; lex = val_lexicon(feat_val(v->features,"lexicon")); if (feat_present(v->features, "lex_addenda")) lex_addenda = feat_val(v->features, "lex_addenda"); new_addenda = cst_lex_load_addenda(lex,lexfile); #if 0 printf("\naddenda: "); val_print(stdout,new_addenda); printf("\n"); #endif new_addenda = val_append(new_addenda,(cst_val *)lex_addenda); if (lex->lex_addenda) delete_val(lex->lex_addenda); lex->lex_addenda = new_addenda; return 0; }
cst_utterance *cart_intonation(cst_utterance *u) { cst_cart *accents, *tones; cst_item *s; const cst_val *v; if (feat_present(u->features,"no_intonation_accent_model")) return u; /* not all languages have intonation models */ accents = val_cart(feat_val(u->features,"int_cart_accents")); tones = val_cart(feat_val(u->features,"int_cart_tones")); for (s=relation_head(utt_relation(u,"Syllable")); s; s=item_next(s)) { v = cart_interpret(s,accents); if (!cst_streq("NONE",val_string(v))) item_set_string(s,"accent",val_string(v)); v = cart_interpret(s,tones); if (!cst_streq("NONE",val_string(v))) item_set_string(s,"endtone",val_string(v)); DPRINTF(0,("word %s gpos %s stress %s ssyl_in %s ssyl_out %s accent %s endtone %s\n", ffeature_string(s,"R:SylStructure.parent.name"), ffeature_string(s,"R:SylStructure.parent.gpos"), ffeature_string(s,"stress"), ffeature_string(s,"ssyl_in"), ffeature_string(s,"ssyl_out"), ffeature_string(s,"accent"), ffeature_string(s,"endtone"))); } return u; }
cst_utterance *default_phrasing(cst_utterance *u) { cst_relation *r; cst_item *w, *p, *lp=NULL; const cst_val *v; cst_cart *phrasing_cart; r = utt_relation_create(u,"Phrase"); phrasing_cart = val_cart(feat_val(u->features,"phrasing_cart")); for (p=NULL,w=relation_head(utt_relation(u,"Word")); w; w=item_next(w)) { if (p == NULL) { p = relation_append(r,NULL); lp = p; #ifdef FLITE_PLUS_HTS_ENGINE item_set_string(p,"name","BB"); #else item_set_string(p,"name","B"); #endif /* FLITE_PLUS_HTS_ENGINE */ } item_add_daughter(p,w); v = cart_interpret(w,phrasing_cart); if (cst_streq(val_string(v),"BB")) p = NULL; } if (lp && item_prev(lp)) /* follow festival */ item_set_string(lp,"name","BB"); return u; }
cst_utterance *default_pause_insertion(cst_utterance *u) { /* Add initial silences and silence at each phrase break */ const char *silence; const cst_item *w; cst_item *p, *s; silence = val_string(feat_val(u->features,"silence")); /* Insert initial silence */ s = relation_head(utt_relation(u,"Segment")); if (s == NULL) s = relation_append(utt_relation(u,"Segment"),NULL); else s = item_prepend(s,NULL); item_set_string(s,"name",silence); for (p=relation_head(utt_relation(u,"Phrase")); p; p=item_next(p)) { for (w = item_last_daughter(p); w; w=item_prev(w)) { s = path_to_item(w,"R:SylStructure.daughtern.daughtern.R:Segment"); if (s) { s = item_append(s,NULL); item_set_string(s,"name",silence); break; } } } return u; }
void item_contents_set(cst_item *current, cst_item *i) { cst_item_contents *c = 0; cst_item *nn_item; if (i == 0) c = new_item_contents(current); else c = i->contents; if (c != current->contents) { item_unref_contents(current); current->contents = c; /* If this contents is already in this relation */ /* empty the other reference */ if (feat_present(current->contents->relations,current->relation->name)) { /* oops this is already in this relation */ nn_item = val_item(feat_val(current->contents->relations, current->relation->name)); feat_set(nn_item->contents->relations, current->relation->name, item_val(nn_item)); } /* Add back reference */ feat_set(current->contents->relations, current->relation->name, item_val(current)); } }
cst_utterance *default_phrasing(cst_utterance *u) { cst_relation *r; cst_item *w, *p; const cst_val *v; cst_cart *phrasing_cart; r = utt_relation_create(u,"Phrase"); phrasing_cart = val_cart(feat_val(u->features,"phrasing_cart")); for (p=NULL,w=relation_head(utt_relation(u,"Word")); w; w=item_next(w)) { if (p == NULL) { p = relation_append(r,NULL); item_set_string(p,"name","BB"); } item_add_daughter(p,w); v = cart_interpret(w,phrasing_cart); if (cst_streq(val_string(v),"BB")) p = NULL; } return u; }
static cst_val *cmu_LANGNAME_tokentowords(cst_item *token, const char *name) { /* Return list of words that expand token/name */ cst_val *r; /* printf("token_name %s name %s\n",item_name(token),name); */ if (item_feat_present(token,"phones")) return cons_val(string_val(name),NULL); #if 0 if (item_feat_present(token,"nsw")) nsw = item_feat_string(token,"nsw"); utt = item_utt(token); lex = val_lexicon(feat_val(utt->features,"lexicon")); #endif if (cst_strlen(name) > 0) r = cons_val(string_val(name),0); else r = NULL; return r; }
const cst_val *val_string_x(const char *n) { const cst_val *v; /* *BUG* This will have to be fixed soon */ if (val_string_consts == NULL) val_string_consts = new_features(); v = feat_val(val_string_consts,n); if (v) return v; else { feat_set_string(val_string_consts,n,n); return feat_val(val_string_consts,n); } }
const cst_val *get_param_val(const cst_features *f, const char *name, cst_val *def) { const cst_val *v; v = feat_val(f,name); if (v != NULL) return v; else return def; }
const char *get_param_string(const cst_features *f, const char *name, const char *def) { const cst_val *v; v = feat_val(f,name); if (v != NULL) return val_string(v); else return def; }
float get_param_float(const cst_features *f, const char *name, float def) { const cst_val *v; v = feat_val(f,name); if (v != NULL) return val_float(v); else return def; }
cst_utterance *cart_duration(cst_utterance *u) { cst_cart *dur_tree; cst_item *s; float zdur, dur_stretch, local_dur_stretch, dur; float end; dur_stats *ds; const dur_stat *dur_stat; end = 0; if (feat_present(u->features,"no_segment_duration_model")) return u; /* not all methods need segment durations */ dur_tree = val_cart(feat_val(u->features,"dur_cart")); dur_stretch = get_param_float(u->features,"duration_stretch", 1.0); ds = val_dur_stats(feat_val(u->features,"dur_stats")); for (s=relation_head(utt_relation(u,"Segment")); s; s=item_next(s)) { zdur = val_float(cart_interpret(s,dur_tree)); dur_stat = phone_dur_stat(ds,item_name(s)); local_dur_stretch = ffeature_float(s, "R:SylStructure.parent.parent." "R:Token.parent.local_duration_stretch"); if (local_dur_stretch) local_dur_stretch *= dur_stretch; else local_dur_stretch = dur_stretch; dur = local_dur_stretch * ((zdur*dur_stat->stddev)+dur_stat->mean); DPRINTF(0,("phone %s accent %s stress %s pdur %f stretch %f mean %f std %f dur %f\n", item_name(s), ffeature_string(s,"R:SylStructure.parent.accented"), ffeature_string(s,"R:SylStructure.parent.stress"), zdur, local_dur_stretch, dur_stat->mean, dur_stat->stddev, dur)); end += dur; item_set_float(s,"end",end); } return u; }
cst_utterance *apply_synth_module(cst_utterance *u, const cst_synth_module *mod) { const cst_val *v; v = feat_val(u->features, mod->hookname); if (v) return (*val_uttfunc(v))(u); if (mod->defhook) return (*mod->defhook)(u); return u; }
cst_item *item_as(const cst_item *i,const char *rname) { /* return i as relation rname or null */ const cst_val *v; if (i == NULL) return NULL; else { v = feat_val(i->contents->relations,rname); if (v != NULL) return val_item(v); else return NULL; } }
const cst_val *feat_val(const cst_features *f, const char *name) { cst_featvalpair *n; n = feat_find_featpair(f, name); if (n == NULL) { if (f && f->linked) { /* Search the linked features too if there are any */ /* We assume the linked features haven't been deleted, and */ return feat_val(f->linked, name); } else return NULL; /* its really not there at all */ } else return n->val; }
cst_utterance *default_textanalysis(cst_utterance *u) { cst_item *t,*word; cst_relation *word_rel; cst_val *words; const cst_val *w; const cst_val *ttwv; word_rel = utt_relation_create(u,"Word"); ttwv = feat_val(u->features, "tokentowords_func"); for (t=relation_head(utt_relation(u,"Token")); t; t=item_next(t)) { if (ttwv) words = (cst_val *)(*val_itemfunc(ttwv))(t); else words = default_tokentowords(t); for (w=words; w; w=val_cdr(w)) { word = item_add_daughter(t,NULL); if (cst_val_consp(val_car(w))) { /* Has extra features */ item_set_string(word,"name",val_string(val_car(val_car(w)))); feat_copy_into(val_features(val_cdr(val_car(w))), item_feats(word)); } else item_set_string(word,"name",val_string(val_car(w))); relation_append(word_rel,word); } delete_val(words); } return u; }
float flite_file_to_speech(const char *filename, cst_voice *voice, const char *outtype) { cst_utterance *utt; cst_tokenstream *ts; const char *token; cst_item *t; cst_relation *tokrel; float durs = 0; int num_tokens; cst_wave *w; cst_breakfunc breakfunc = default_utt_break; cst_uttfunc utt_user_callback = 0; int fp; if ((ts = ts_open(filename, get_param_string(voice->features,"text_whitespace",NULL), get_param_string(voice->features,"text_singlecharsymbols",NULL), get_param_string(voice->features,"text_prepunctuation",NULL), get_param_string(voice->features,"text_postpunctuation",NULL))) == NULL) { cst_errmsg("failed to open file \"%s\" for reading\n", filename); return 1; } fp = get_param_int(voice->features,"file_start_position",0); if (fp > 0) ts_set_stream_pos(ts,fp); if (feat_present(voice->features,"utt_break")) breakfunc = val_breakfunc(feat_val(voice->features,"utt_break")); if (feat_present(voice->features,"utt_user_callback")) utt_user_callback = val_uttfunc(feat_val(voice->features,"utt_user_callback")); /* If its a file to write to, create and save an empty wave file */ /* as we are going to incrementally append to it */ if (!cst_streq(outtype,"play") && !cst_streq(outtype,"none") && !cst_streq(outtype,"stream")) { w = new_wave(); cst_wave_resize(w,0,1); cst_wave_set_sample_rate(w,16000); cst_wave_save_riff(w,outtype); /* an empty wave */ delete_wave(w); } num_tokens = 0; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); while (!ts_eof(ts) || num_tokens > 0) { token = ts_get(ts); if ((cst_strlen(token) == 0) || (num_tokens > 500) || /* need an upper bound */ (relation_head(tokrel) && breakfunc(ts,token,tokrel))) { /* An end of utt, so synthesize it */ if (utt_user_callback) utt = (utt_user_callback)(utt); if (utt) { utt = flite_do_synth(utt,voice,utt_synth_tokens); durs += flite_process_output(utt,outtype,TRUE); delete_utterance(utt); utt = NULL; } else break; if (ts_eof(ts)) break; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); num_tokens = 0; } num_tokens++; t = relation_append(tokrel, NULL); item_set_string(t,"name",token); item_set_string(t,"whitespace",ts->whitespace); item_set_string(t,"prepunctuation",ts->prepunctuation); item_set_string(t,"punc",ts->postpunctuation); /* Mark it at the beginning of the token */ item_set_int(t,"file_pos", ts->file_pos-(1+ /* as we are already on the next char */ cst_strlen(token)+ cst_strlen(ts->prepunctuation)+ cst_strlen(ts->postpunctuation))); item_set_int(t,"line_number",ts->line_number); } delete_utterance(utt); ts_close(ts); return durs; }
const cst_phoneset *item_phoneset(const cst_item *p) { return val_phoneset(feat_val(item_utt(p)->features,"phoneset")); }
cst_utterance *default_lexical_insertion(cst_utterance *u) { cst_item *word; cst_relation *sylstructure,*seg,*syl; cst_lexicon *lex, *ulex = NULL; const cst_val *p; char *phone_name; char *stress = "0"; cst_val *phones; cst_item *ssword, *sssyl, *segitem, *sylitem, *seg_in_syl; lex = val_lexicon(feat_val(u->features,"lexicon")); if (feat_present(u->features, "user_lexicon")) ulex = val_lexicon(feat_val(u->features, "user_lexicon")); syl = utt_relation_create(u,"Syllable"); sylstructure = utt_relation_create(u,"SylStructure"); seg = utt_relation_create(u,"Segment"); for (word=relation_head(utt_relation(u,"Word")); word; word=item_next(word)) { ssword = relation_append(sylstructure,word); phones = NULL; /* FIXME: need to make sure that textanalysis won't split tokens with explicit pronunciation (or that it will propagate such to words, then we can remove the path here) */ if (item_feat_present(item_parent(item_as(word, "Token")), "phones")) phones = (cst_val *) item_feat(item_parent(item_as(word, "Token")), "phones"); else { if (ulex) phones = lex_lookup(ulex,item_feat_string(word, "name"),0); if (phones == NULL) phones = lex_lookup(lex,item_feat_string(word,"name"),0); } for (sssyl=NULL,sylitem=NULL,p=phones; p; p=val_cdr(p)) { if (sylitem == NULL) { sylitem = relation_append(syl,NULL); sssyl = item_add_daughter(ssword,sylitem); stress = "0"; } segitem = relation_append(seg,NULL); phone_name = cst_strdup(val_string(val_car(p))); if (phone_name[strlen(phone_name)-1] == '1') { stress = "1"; phone_name[strlen(phone_name)-1] = '\0'; } else if (phone_name[strlen(phone_name)-1] == '0') { stress = "0"; phone_name[strlen(phone_name)-1] = '\0'; } item_set_string(segitem,"name",phone_name); seg_in_syl = item_add_daughter(sssyl,segitem); if ((lex->syl_boundary)(seg_in_syl,val_cdr(p))) { sylitem = NULL; if (sssyl) item_set_string(sssyl,"stress",stress); } cst_free(phone_name); } if (!item_feat_present(item_parent(item_as(word, "Token")), "phones")) delete_val(phones); } return u; }
const cst_val *item_feat(const cst_item *i,const char *name) { return feat_val(item_feats(i),name); }
cst_utterance *default_lexical_insertion(cst_utterance *u) { cst_item *word; cst_relation *sylstructure,*seg,*syl; cst_lexicon *lex; const cst_val *lex_addenda = NULL; const cst_val *p, *wp = NULL; char *phone_name; char *stress = "0"; const char *pos; cst_val *phones; cst_item *ssword, *sssyl, *segitem, *sylitem, *seg_in_syl; lex = val_lexicon(feat_val(u->features,"lexicon")); if (lex->lex_addenda) lex_addenda = lex->lex_addenda; syl = utt_relation_create(u,"Syllable"); sylstructure = utt_relation_create(u,"SylStructure"); seg = utt_relation_create(u,"Segment"); for (word=relation_head(utt_relation(u,"Word")); word; word=item_next(word)) { ssword = relation_append(sylstructure,word); pos = ffeature_string(word,"pos"); phones = NULL; wp = NULL; /* printf("awb_debug word %s pos %s gpos %s\n", item_feat_string(word,"name"), pos, ffeature_string(word,"gpos")); */ /* FIXME: need to make sure that textanalysis won't split tokens with explicit pronunciation (or that it will propagate such to words, then we can remove the path here) */ if (item_feat_present(item_parent(item_as(word, "Token")), "phones")) phones = (cst_val *) item_feat(item_parent(item_as(word, "Token")), "phones"); else { wp = val_assoc_string(item_feat_string(word, "name"),lex_addenda); if (wp) phones = (cst_val *)val_cdr(val_cdr(wp)); else phones = lex_lookup(lex,item_feat_string(word,"name"),pos); } for (sssyl=NULL,sylitem=NULL,p=phones; p; p=val_cdr(p)) { if (sylitem == NULL) { sylitem = relation_append(syl,NULL); sssyl = item_add_daughter(ssword,sylitem); stress = "0"; } segitem = relation_append(seg,NULL); phone_name = cst_strdup(val_string(val_car(p))); if (phone_name[cst_strlen(phone_name)-1] == '1') { stress = "1"; phone_name[cst_strlen(phone_name)-1] = '\0'; } else if (phone_name[cst_strlen(phone_name)-1] == '0') { stress = "0"; phone_name[cst_strlen(phone_name)-1] = '\0'; } item_set_string(segitem,"name",phone_name); seg_in_syl = item_add_daughter(sssyl,segitem); #if 0 printf("awb_debug ph %s\n",phone_name); #endif if ((lex->syl_boundary)(seg_in_syl,val_cdr(p))) { #if 0 printf("awb_debug SYL\n"); #endif sylitem = NULL; if (sssyl) item_set_string(sssyl,"stress",stress); } cst_free(phone_name); } if (!item_feat_present(item_parent(item_as(word, "Token")), "phones") && ! wp) delete_val(phones); } return u; }
static float flite_ssml_to_speech_ts(cst_tokenstream *ts, cst_voice *voice, const char *outtype) { cst_features *ssml_feats, *ssml_word_feats; cst_features *attributes; const char *token; char *tag; cst_utterance *utt; cst_relation *tokrel; int num_tokens; cst_breakfunc breakfunc = default_utt_break; cst_uttfunc utt_user_callback = 0; float durs = 0.0; cst_item *t; ssml_feats = new_features(); ssml_word_feats = new_features(); set_charclasses(ts, " \t\n\r", ssml_singlecharsymbols_general, get_param_string(voice->features,"text_prepunctuation",""), get_param_string(voice->features,"text_postpunctuation","") ); if (feat_present(voice->features,"utt_break")) breakfunc = val_breakfunc(feat_val(voice->features,"utt_break")); if (feat_present(voice->features,"utt_user_callback")) utt_user_callback = val_uttfunc(feat_val(voice->features,"utt_user_callback")); num_tokens = 0; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); while (!ts_eof(ts) || num_tokens > 0) { token = ts_get(ts); if (cst_streq("<",token)) { /* A tag */ tag = cst_upcase(ts_get(ts)); if (cst_streq("/",tag)) /* an end tag */ { tag = cst_upcase(ts_get(ts)); attributes = ssml_get_attributes(ts); feat_set_string(attributes,"_type","end"); } else attributes = ssml_get_attributes(ts); utt = ssml_apply_tag(tag,attributes,utt,ssml_word_feats); cst_free(tag); } else if (cst_streq("&",token)) { /* an escape sequence */ /* skip to ; and insert value in rawdata */ } else { if ((cst_strlen(token) == 0) || (num_tokens > 500) || /* need an upper bound */ (relation_head(tokrel) && breakfunc(ts,token,tokrel))) { /* An end of utt, so synthesize it */ if (utt_user_callback) utt = (utt_user_callback)(utt); if (utt) { utt = flite_do_synth(utt,voice,utt_synth_tokens); durs += flite_process_output(utt,outtype,TRUE); delete_utterance(utt); utt = NULL; } else break; if (ts_eof(ts)) break; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); num_tokens = 0; } num_tokens++; t = relation_append(tokrel, NULL); item_set_string(t,"name",token); item_set_string(t,"whitespace",ts->whitespace); item_set_string(t,"prepunctuation",ts->prepunctuation); item_set_string(t,"punc",ts->postpunctuation); /* Mark it at the beginning of the token */ item_set_int(t,"file_pos", ts->file_pos-(1+ /* as we are already on the next char */ cst_strlen(token)+ cst_strlen(ts->prepunctuation)+ cst_strlen(ts->postpunctuation))); item_set_int(t,"line_number",ts->line_number); } } delete_utterance(utt); return durs; }
float feat_float(const cst_features *f, const char *name) { return val_float(feat_val(f,name)); }
const char *feat_string(const cst_features *f, const char *name) { return val_string(feat_val(f,name)); }
float flite_file_to_speech(const char *filename, cst_voice *voice, const char *outtype) { cst_utterance *utt; cst_tokenstream *ts; const char *token; cst_item *t; cst_relation *tokrel; float d, durs = 0; int num_tokens; cst_breakfunc breakfunc = default_utt_break; if ((ts = ts_open(filename, get_param_string(voice->features,"text_whitespace",NULL), get_param_string(voice->features,"text_singlecharsymbols",NULL), get_param_string(voice->features,"text_prepunctuation",NULL), get_param_string(voice->features,"text_postpunctuation",NULL))) == NULL) { cst_errmsg("failed to open file \"%s\" for reading\n", filename); return 1; } if (feat_present(voice->features,"utt_break")) breakfunc = val_breakfunc(feat_val(voice->features,"utt_break")); /* If its a file to write to delete it as we're going to */ /* incrementally append to it */ if (!cst_streq(outtype,"play") && !cst_streq(outtype,"none")) { cst_wave *w; w = new_wave(); cst_wave_resize(w,0,1); cst_wave_set_sample_rate(w,16000); cst_wave_save_riff(w,outtype); /* an empty wave */ delete_wave(w); } num_tokens = 0; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); while (!ts_eof(ts) || num_tokens > 0) { token = ts_get(ts); if ((strlen(token) == 0) || (num_tokens > 500) || /* need an upper bound */ (relation_head(tokrel) && breakfunc(ts,token,tokrel))) { /* An end of utt */ d = flite_tokens_to_speech(utt,voice,outtype); utt = NULL; if (d < 0) goto out; durs += d; if (ts_eof(ts)) goto out; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); num_tokens = 0; } num_tokens++; t = relation_append(tokrel, NULL); item_set_string(t,"name",token); item_set_string(t,"whitespace",ts->whitespace); item_set_string(t,"prepunctuation",ts->prepunctuation); item_set_string(t,"punc",ts->postpunctuation); item_set_int(t,"file_pos",ts->file_pos); item_set_int(t,"line_number",ts->line_number); } out: delete_utterance(utt); ts_close(ts); return durs; }