/* Dummy F0 modelling for phones, copied directly from us_f0_model.c */ cst_utterance *flat_prosody(cst_utterance *u) { /* F0 target model */ cst_item *s,*t; cst_relation *targ_rel; float mean, stddev; targ_rel = utt_relation_create(u,"Target"); mean = get_param_float(u->features,"target_f0_mean", 100.0); mean *= get_param_float(u->features,"f0_shift", 1.0); stddev = get_param_float(u->features,"target_f0_stddev", 12.0); s=relation_head(utt_relation(u,"Segment")); t = relation_append(targ_rel,NULL); item_set_float(t,"pos",0.0); item_set_float(t,"f0",mean+stddev); s=relation_tail(utt_relation(u,"Segment")); t = relation_append(targ_rel,NULL); item_set_float(t,"pos",item_feat_float(s,"end")); item_set_float(t,"f0",mean-stddev); return u; }
cst_utterance *default_pause_insertion(cst_utterance *u) { /* Add initial silences and silence at each phrase break */ const char *silence; const cst_item *w; cst_item *p, *s; silence = val_string(feat_val(u->features,"silence")); /* Insert initial silence */ s = relation_head(utt_relation(u,"Segment")); if (s == NULL) s = relation_append(utt_relation(u,"Segment"),NULL); else s = item_prepend(s,NULL); item_set_string(s,"name",silence); for (p=relation_head(utt_relation(u,"Phrase")); p; p=item_next(p)) { for (w = item_last_daughter(p); w; w=item_prev(w)) { s = path_to_item(w,"R:SylStructure.daughtern.daughtern.R:Segment"); if (s) { s = item_append(s,NULL); item_set_string(s,"name",silence); break; } } } return u; }
cst_utterance *f0_targets_to_pm(cst_utterance *utt) { cst_item *t; float pos,lpos,f0,lf0,m; double time; int pm; cst_sts_list *sts_list; cst_lpcres *target_lpcres; sts_list = val_sts_list(utt_feat_val(utt,"sts_list")); lpos = 0; lf0 = 120; /* hmm */ pm = 0; time = 0; /* First pass to count how many pms will be required */ for (t=relation_head(utt_relation(utt,"Target")); t; t=item_next(t), lf0 = f0, lpos = pos) /* changed by dhopkins */ { pos = item_feat_float(t,"pos"); f0 = item_feat_float(t,"f0"); if (time == pos) continue; m = (f0-lf0)/(pos-lpos); for ( ; time < pos; pm++) { time += 1/(lf0 + ((time-lpos)*m)); } } target_lpcres = new_lpcres(); lpcres_resize_frames(target_lpcres,pm); lpos = 0; lf0 = 120; pm = 0; time = 0; /* Second pass puts the values in */ for (t=relation_head(utt_relation(utt,"Target")); t; t=item_next(t), lf0 = f0, lpos = pos) /* changed by dhopkins */ { pos = item_feat_float(t,"pos"); f0 = item_feat_float(t,"f0"); if (time == pos) continue; m = (f0-lf0)/(pos-lpos); for ( ; time < pos; pm++) { time += 1/(lf0 + ((time-lpos)*m)); target_lpcres->times[pm] = sts_list->sample_rate * time; } } utt_set_feat(utt,"target_lpcres",lpcres_val(target_lpcres)); return utt; }
/* Flite_HTS_Engine_synthesize: synthesize speech */ HTS_Boolean Flite_HTS_Engine_synthesize(Flite_HTS_Engine * f, const char *txt, const char *wav) { int i; FILE *fp; cst_voice *v = NULL; cst_utterance *u = NULL; cst_item *s = NULL; char **label_data = NULL; int label_size = 0; if (txt == NULL) return FALSE; /* text analysis part */ v = REGISTER_VOX(NULL); if (v == NULL) return FALSE; u = flite_synth_text(txt, v); if (u == NULL) { UNREGISTER_VOX(v); return FALSE; } for (s = relation_head(utt_relation(u, "Segment")); s; s = item_next(s)) label_size++; if (label_size <= 0) { delete_utterance(u); UNREGISTER_VOX(v); return FALSE; } label_data = (char **) calloc(label_size, sizeof(char *)); for (i = 0, s = relation_head(utt_relation(u, "Segment")); s; s = item_next(s), i++) { label_data[i] = (char *) calloc(MAXBUFLEN, sizeof(char)); Flite_HTS_Engine_create_label(f, s, label_data[i]); } /* speech synthesis part */ HTS_Engine_synthesize_from_strings(&f->engine, label_data, label_size); if (wav != NULL) { fp = fopen(wav, "wb"); HTS_Engine_save_riff(&f->engine, fp); fclose(fp); } HTS_Engine_refresh(&f->engine); for (i = 0; i < label_size; i++) free(label_data[i]); free(label_data); delete_utterance(u); UNREGISTER_VOX(v); return TRUE; }
int main(int argc, char **argv) { cst_utterance *u; cst_relation *r; cst_item *item=0; int i; cmu_lex_init(); u = new_utterance(); r = utt_relation_create(u,"Word"); bbb_relation_load(r,"ttt.txt"); WordSylSeg(u); for (i=0,item=item_next(relation_head(utt_relation(u,"Segment"))); item; item=item_next(item),i++) { printf("Segment %s %s %s %s\n", ffeature_string(item,"name"), ffeature_string(item,"n.name"), ffeature_string(item,"p.name"), ffeature_string(item,"R:SylStructure.parent.name") /* ffeature_string(item,"R:SylStructure.parent.R:Word.n.name"), */ /* item_feat_float(item,"duration")); */ ); } delete_utterance(u); return 0; }
static int WordSylSeg(cst_utterance *u) { cst_item *word; cst_relation *sylstructure,*seg,*syl; cst_val *phones; const cst_val *p; cst_item *ssword,*segitem; syl = utt_relation_create(u,"Syllable"); sylstructure = utt_relation_create(u,"SylStructure"); seg = utt_relation_create(u,"Segment"); for (word=relation_head(utt_relation(u,"Word")); word; word=item_next(word)) { printf("word: %s\n",item_feat_string(word,"name")); ssword = relation_append(sylstructure,word); phones = lex_lookup((cst_lexicon *)&cmu_lex,item_feat_string(word,"name"),0); for (p=phones; p; p=val_cdr(p)) { segitem = relation_append(seg,NULL); item_set(segitem,"name",val_car(p)); printf("seg: %s\n",item_feat_string(segitem,"name")); item_add_daughter(ssword,segitem); } delete_val_list(phones); } return TRUE; }
cst_utterance *default_phrasing(cst_utterance *u) { cst_relation *r; cst_item *w, *p, *lp=NULL; const cst_val *v; cst_cart *phrasing_cart; r = utt_relation_create(u,"Phrase"); phrasing_cart = val_cart(feat_val(u->features,"phrasing_cart")); for (p=NULL,w=relation_head(utt_relation(u,"Word")); w; w=item_next(w)) { if (p == NULL) { p = relation_append(r,NULL); lp = p; #ifdef FLITE_PLUS_HTS_ENGINE item_set_string(p,"name","BB"); #else item_set_string(p,"name","B"); #endif /* FLITE_PLUS_HTS_ENGINE */ } item_add_daughter(p,w); v = cart_interpret(w,phrasing_cart); if (cst_streq(val_string(v),"BB")) p = NULL; } if (lp && item_prev(lp)) /* follow festival */ item_set_string(lp,"name","BB"); return u; }
int main(int argc, char **argv) { cst_utterance *u; cst_relation *r; cst_item *item=0; int i; u = new_utterance(); r = utt_relation_create(u,"Segment"); for (i=0; i<10; i++) { char buff[20]; sprintf(buff,"seg_%03d",i); if (i==0) item = relation_append(r,NULL); else item = item_append(item,NULL); item_set_string(item,"name",buff); item_set_float(item,"duration",i*0.20); } for (i=0,item=relation_head(utt_relation(u,"Segment")); item; item=item_next(item),i++) { printf("Segment %d %s %f\n", i, item_feat_string(item,"name"), item_feat_float(item,"duration")); } delete_utterance(u); return 0; }
cst_utterance *cart_intonation(cst_utterance *u) { cst_cart *accents, *tones; cst_item *s; const cst_val *v; if (feat_present(u->features,"no_intonation_accent_model")) return u; /* not all languages have intonation models */ accents = val_cart(feat_val(u->features,"int_cart_accents")); tones = val_cart(feat_val(u->features,"int_cart_tones")); for (s=relation_head(utt_relation(u,"Syllable")); s; s=item_next(s)) { v = cart_interpret(s,accents); if (!cst_streq("NONE",val_string(v))) item_set_string(s,"accent",val_string(v)); v = cart_interpret(s,tones); if (!cst_streq("NONE",val_string(v))) item_set_string(s,"endtone",val_string(v)); DPRINTF(0,("word %s gpos %s stress %s ssyl_in %s ssyl_out %s accent %s endtone %s\n", ffeature_string(s,"R:SylStructure.parent.name"), ffeature_string(s,"R:SylStructure.parent.gpos"), ffeature_string(s,"stress"), ffeature_string(s,"ssyl_in"), ffeature_string(s,"ssyl_out"), ffeature_string(s,"accent"), ffeature_string(s,"endtone"))); } return u; }
void test_hrg(void) { cst_utterance *u; cst_relation *r; cst_item *item = 0; int i; u = new_utterance(); r = utt_relation_create(u, "Segment"); for (i = 0; i < 10; i++) { char buff[20]; sprintf(buff, "seg_%03d", i); if (i == 0) item = relation_append(r, NULL); else item = item_append(item, NULL); item_set_string(item, "name", buff); item_set_float(item, "duration", i * 0.20); } for (i = 0, item = relation_head(utt_relation(u, "Segment")); item; item = item_next(item), i++) { TEST_CHECK(item_feat_float(item, "duration") == correct_list[i]); } delete_utterance(u); }
cst_utterance *default_phrasing(cst_utterance *u) { cst_relation *r; cst_item *w, *p; const cst_val *v; cst_cart *phrasing_cart; r = utt_relation_create(u,"Phrase"); phrasing_cart = val_cart(feat_val(u->features,"phrasing_cart")); for (p=NULL,w=relation_head(utt_relation(u,"Word")); w; w=item_next(w)) { if (p == NULL) { p = relation_append(r,NULL); item_set_string(p,"name","BB"); } item_add_daughter(p,w); v = cart_interpret(w,phrasing_cart); if (cst_streq(val_string(v),"BB")) p = NULL; } return u; }
cst_utterance *russian_postlex_function(cst_utterance *u) { const cst_item *word,*seg; const char *answer,*name,*pair; for(word=relation_head(utt_relation(u,"Transcription"));word;word=item_next(word)) { if(item_feat_present(word,"no_pl")||item_feat_present(word,"no_vr")) continue; for(seg=item_daughter(word);seg;seg=item_next(seg)) { name=item_feat_string(seg,"name"); if(cst_member_string(name,unstressed_vowels)) { answer=val_string(cart_interpret(item_as(seg,"Segment"),&ru_vowel_reduction_cart)); if(!cst_streq(answer,"N")) item_set_string(seg,"name",answer); } else { if(cst_streq(name,"ii")&& cst_streq(ffeature_string(seg,"R:Segment.p.ph_csoft"),"-")&& !(cst_streq(item_feat_string(word,"name"),"и")&& cst_streq(ffeature_string(word,"gpos"),"content"))) { item_set_string(seg,"name","yy"); } } } } for(word=relation_tail(utt_relation(u,"Transcription"));word;word=item_prev(word)) { if(item_feat_present(word,"no_pl")) continue; for(seg=item_last_daughter(word);seg;seg=item_prev(seg)) { name=item_feat_string(seg,"name"); pair=russian_vpair(name); if(pair!=NULL) { answer=val_string(cart_interpret(item_as(seg,"Segment"),&ru_vpair_cart)); if(cst_streq(answer,"Y")) item_set_string(seg,"name",pair); } } } return u; }
/* Flite_Text_Analyzer_analysis: text analysis */ void Flite_Text_Analyzer_analysis(Flite_Text_Analyzer * analyzer, const char *text) { int i; cst_item *s; Flite_Utterance *fu; if (analyzer == NULL || text == NULL) return; if (analyzer->pointer != NULL) Flite_Text_Analyzer_clear(analyzer); /* allocate */ fu = (Flite_Utterance *) malloc(sizeof(Flite_Utterance)); /* create voice */ fu->v = REGISTER_VOX(NULL); if (fu->v == NULL) { free(fu); return; } /* create utterance */ fu->u = flite_synth_text(text, fu->v); if (fu->u == NULL) { UNREGISTER_VOX(fu->v); free(fu); return; } /* count number of phonemes */ for (fu->nitem = 0, s = relation_head(utt_relation(fu->u, "Segment")); s; s = item_next(s), fu->nitem++); if (fu->nitem == 0) { delete_utterance(fu->u); UNREGISTER_VOX(fu->v); free(fu); return; } /* save informations */ fu->items = (cst_item **) malloc(sizeof(cst_item *) * fu->nitem); for (i = 0, s = relation_head(utt_relation(fu->u, "Segment")); s; s = item_next(s), i++) fu->items[i] = s; analyzer->pointer = (void *) fu; }
static void fix_ah(cst_utterance *u) { /* This should really be done in the index itself */ const cst_item *s; for (s=relation_head(utt_relation(u,"Segment")); s; s=item_next(s)) if (cst_streq(item_feat_string(s,"name"),"ah")) item_set_string(s,"name","aa"); }
/* Flite_HTS_Engine_synthesis: speech synthesis */ void Flite_HTS_Engine_synthesis(Flite_HTS_Engine * f, char *txt, FILE * wavfp) { int i; cst_voice *v = NULL; cst_utterance *u = NULL; cst_item *s = NULL; char **label_data = NULL; int label_size = 0; /* text analysis part */ v = REGISTER_VOX(NULL); if (v == NULL) return; u = flite_synth_text(txt, v); if (u == NULL) return; for (s = relation_head(utt_relation(u, "Segment")); s; s = item_next(s)) label_size++; if (label_size <= 0) return; label_data = (char **) calloc(label_size, sizeof(char *)); for (i = 0, s = relation_head(utt_relation(u, "Segment")); s; s = item_next(s), i++) { label_data[i] = (char *) calloc(MAXBUFLEN, sizeof(char)); Flite_HTS_Engine_create_label(f, s, label_data[i]); } /* speech synthesis part */ HTS_Engine_load_label_from_string_list(&f->engine, label_data, label_size); HTS_Engine_create_sstream(&f->engine); HTS_Engine_create_pstream(&f->engine); HTS_Engine_create_gstream(&f->engine); if (wavfp != NULL) HTS_Engine_save_riff(&f->engine, wavfp); HTS_Engine_refresh(&f->engine); for (i = 0; i < label_size; i++) free(label_data[i]); free(label_data); delete_utterance(u); UNREGISTER_VOX(v); }
cst_utterance *asis_to_pm(cst_utterance *utt) { /* Copy the PM structure from the units unchanged */ cst_item *u; cst_lpcres *target_lpcres; int unit_start, unit_end; int utt_pms, utt_size, i; cst_sts_list *sts_list; sts_list = val_sts_list(utt_feat_val(utt,"sts_list")); target_lpcres = new_lpcres(); /* Pass one to find the size */ utt_pms = utt_size = 0; for (u=relation_head(utt_relation(utt,"Unit")); u; u=item_next(u)) { unit_start = item_feat_int(u,"unit_start"); unit_end = item_feat_int(u,"unit_end"); utt_size += get_unit_size(sts_list,unit_start,unit_end); utt_pms += unit_end - unit_start; item_set_int(u,"target_end",utt_size); } lpcres_resize_frames(target_lpcres,utt_pms); /* Pass two to fill in the values */ utt_pms = utt_size = 0; for (u=relation_head(utt_relation(utt,"Unit")); u; u=item_next(u)) { unit_start = item_feat_int(u,"unit_start"); unit_end = item_feat_int(u,"unit_end"); for (i=unit_start; i<unit_end; i++,utt_pms++) { utt_size += get_frame_size(sts_list, i); target_lpcres->times[utt_pms] = utt_size; } } utt_set_feat(utt,"target_lpcres",lpcres_val(target_lpcres)); return utt; }
static cst_utterance *print_info(cst_utterance *u) { cst_item *item; const char *relname; relname = utt_feat_string(u,"print_info_relation"); for (item=relation_head(utt_relation(u,relname)); item; item=item_next(item)) { printf("%s ",item_feat_string(item,"name")); } printf("\n"); return u; }
cst_utterance *cart_duration(cst_utterance *u) { cst_cart *dur_tree; cst_item *s; float zdur, dur_stretch, local_dur_stretch, dur; float end; dur_stats *ds; const dur_stat *dur_stat; end = 0; if (feat_present(u->features,"no_segment_duration_model")) return u; /* not all methods need segment durations */ dur_tree = val_cart(feat_val(u->features,"dur_cart")); dur_stretch = get_param_float(u->features,"duration_stretch", 1.0); ds = val_dur_stats(feat_val(u->features,"dur_stats")); for (s=relation_head(utt_relation(u,"Segment")); s; s=item_next(s)) { zdur = val_float(cart_interpret(s,dur_tree)); dur_stat = phone_dur_stat(ds,item_name(s)); local_dur_stretch = ffeature_float(s, "R:SylStructure.parent.parent." "R:Token.parent.local_duration_stretch"); if (local_dur_stretch) local_dur_stretch *= dur_stretch; else local_dur_stretch = dur_stretch; dur = local_dur_stretch * ((zdur*dur_stat->stddev)+dur_stat->mean); DPRINTF(0,("phone %s accent %s stress %s pdur %f stretch %f mean %f std %f dur %f\n", item_name(s), ffeature_string(s,"R:SylStructure.parent.accented"), ffeature_string(s,"R:SylStructure.parent.stress"), zdur, local_dur_stretch, dur_stat->mean, dur_stat->stddev, dur)); end += dur; item_set_float(s,"end",end); } return u; }
cst_utterance *default_pos_tagger(cst_utterance *u) { cst_item *word; const cst_val *p; const cst_cart *tagger; p = get_param_val(u->features,"pos_tagger_cart",NULL); if (p == NULL) return u; tagger = val_cart(p); for (word=relation_head(utt_relation(u,"Word")); word; word=item_next(word)) { p = cart_interpret(word,tagger); item_set_string(word,"pos",val_string(p)); } return u; }
static cst_utterance *print_info(cst_utterance *u) { cst_item *item; const char *relname; relname = utt_feat_string(u,"print_info_relation"); for (item=relation_head(utt_relation(u,relname)); item; item=item_next(item)) { printf("%s ",item_feat_string(item,"name")); #if 0 if (cst_streq("+",ffeature_string(item,"ph_vc"))) printf("%s",ffeature_string(item,"R:SylStructure.parent.stress")); printf(" "); #endif } printf("\n"); return u; }
string getPhonemes( const char* sText ) { string sRet; cst_features* args = new_features(); cst_voice* v; cst_utterance* u; cst_item* s; const char* name; //const cst_val* d; flite_init(); v = register_cmu_us_no_wave( NULL ); u = flite_synth_text( sText, v ); for ( s = relation_head( utt_relation( u, "Segment" ) ); s; s = item_next( s ) ) { sRet += item_feat_string( s, "name" ); float test = item_feat_float( s, "end" ); //d = segment_duration( s ); /* If its a vowel and is stressed output stress value */ if ( ( cst_streq( "+", ffeature_string( s, "ph_vc" ) ) ) && ( cst_streq( "1", ffeature_string( s, "R:SylStructure.parent.stress" ) ) ) ) { sRet += "1"; } sRet += " "; } delete_utterance( u ); delete_features( args ); return sRet; }
cst_utterance *flowm_print_relation_callback(cst_utterance *u) { /* Say the details of a named relation for display */ char rst[FL_MAX_MSG_CHARS]; const char *name; const char *relname; cst_item *item; char *space; space = ""; relname = get_param_string(u->features,"print_info_relation", NULL); cst_sprintf(rst,"%s: ",relname); if (!relname) { mbstowcs(fl_tts_msg,"",FL_MAX_MSG_CHARS); return u; } for (item=relation_head(utt_relation(u,relname)); item; item=item_next(item)) { name = item_feat_string(item,"name"); if (cst_strlen(name)+1+4 < FL_MAX_MSG_CHARS) cst_sprintf(rst,"%s%s%s",rst,space,name); else if (cst_strlen(rst)+4 < FL_MAX_MSG_CHARS) cst_sprintf(rst,"%s ...",rst); else break; space = " "; } mbstowcs(fl_tts_msg,rst,FL_MAX_MSG_CHARS); return u; }
cst_utterance *default_textanalysis(cst_utterance *u) { cst_item *t,*word; cst_relation *word_rel; cst_val *words; const cst_val *w; const cst_val *ttwv; word_rel = utt_relation_create(u,"Word"); ttwv = feat_val(u->features, "tokentowords_func"); for (t=relation_head(utt_relation(u,"Token")); t; t=item_next(t)) { if (ttwv) words = (cst_val *)(*val_itemfunc(ttwv))(t); else words = default_tokentowords(t); for (w=words; w; w=val_cdr(w)) { word = item_add_daughter(t,NULL); if (cst_val_consp(val_car(w))) { /* Has extra features */ item_set_string(word,"name",val_string(val_car(val_car(w)))); feat_copy_into(val_features(val_cdr(val_car(w))), item_feats(word)); } else item_set_string(word,"name",val_string(val_car(w))); relation_append(word_rel,word); } delete_val(words); } return u; }
cst_utterance *russian_lexical_insertion(cst_utterance *u) { cst_item *word; cst_relation *sylstructure,*seg,*syl,*sylvowel,*transcription; const cst_val *p; const char *phone_name; cst_val *phones; cst_item *ssword, *sssyl, *segitem, *sylitem, *seg_in_syl, *svsyl, *vowel_in_syl, *tword, *seg_in_word; cst_item *i,*tmp; int num_segs; int total_num_segs=0; syl = utt_relation_create(u,"Syllable"); sylstructure = utt_relation_create(u,"SylStructure"); seg = utt_relation_create(u,"Segment"); sylvowel = utt_relation_create(u,"SylVowel"); transcription = utt_relation_create(u,"Transcription"); for (word=relation_head(utt_relation(u,"Word"));word;word=item_next(word)) { phones=word_to_phones(word); if(!phones) continue; num_segs=val_length(phones); if((total_num_segs+num_segs)>max_num_segs) { delete_val(phones); break; } ssword = relation_append(sylstructure,word); tword = relation_append(transcription,word); for (sssyl=NULL,sylitem=NULL,p=phones; p; p=val_cdr(p)) { if (sylitem == NULL) { sylitem = relation_append(syl,NULL); sssyl = item_add_daughter(ssword,sylitem); } segitem = relation_append(seg,NULL); phone_name = val_string(val_car(p)); item_set_string(segitem,"name",phone_name); seg_in_syl = item_add_daughter(sssyl,segitem); seg_in_word = item_add_daughter(tword,segitem); if(is_vowel(phone_name)) { svsyl=relation_append(sylvowel,sylitem); vowel_in_syl=item_add_daughter(svsyl,segitem); } if (ru_syl_boundary(seg_in_syl,val_cdr(p))) { sylitem = NULL; if (sssyl) item_set_string(sssyl,"stress","0"); } } assign_stress(word); delete_val(phones); total_num_segs+=num_segs; } i=relation_head(utt_relation(u,"Word")); while(i) { tmp=item_next(i); if(item_as(i,"Transcription")==NULL) { delete_item(item_as(i,"Token")); delete_item(item_as(i,"Phrase")); delete_item(i); } i=tmp; } i=relation_head(utt_relation(u,"Phrase")); while(i) { tmp=item_next(i); if(item_daughter(i)==NULL) delete_item(i); i=tmp; } return u; }
cst_utterance *flowm_utt_callback(cst_utterance *u) { char rst[FL_MAX_MSG_CHARS]; const char *tok; cst_item *item; char *space; int extend_length; /* In order to stop the synthesizer if the STOP button is pressed */ /* This stops the synthesis of the next utterance */ if ((flowm_play_status == FLOWM_PLAY) || (flowm_play_status == FLOWM_SKIP)) { if (TTSWindow) { rst[0] = '\0'; space = ""; for (item=relation_head(utt_relation(u,"Token")); item; item=item_next(item)) { tok = item_feat_string(item,"name"); if (cst_streq("",space)) /* Only do this on the first token/word */ flowm_file_pos = item_feat_int(item,"file_pos"); extend_length = cst_strlen(rst) + 1 + cst_strlen(item_feat_string(item,"prepunctuation"))+ cst_strlen(item_feat_string(item,"punc")); if (cst_strlen(tok)+extend_length+4 < FL_MAX_MSG_CHARS) cst_sprintf(rst,"%s%s%s%s%s",rst,space, item_feat_string(item,"prepunctuation"), tok, item_feat_string(item,"punc")); else { if (cst_strlen(rst)+4 < FL_MAX_MSG_CHARS) cst_sprintf(rst,"%s ...",rst); break; } space = " "; } if (flowm_file_pos > flowm_prev_utt_pos[flowm_utt_pos_pos]) { if ((flowm_utt_pos_pos+1) >= FLOWM_NUM_UTT_POS) { /* Filled it up, so move it down */ memmove(flowm_prev_utt_pos,&flowm_prev_utt_pos[1], sizeof(int)*(FLOWM_NUM_UTT_POS-10)); flowm_utt_pos_pos = (FLOWM_NUM_UTT_POS-10); } flowm_utt_pos_pos++; flowm_prev_utt_pos[flowm_utt_pos_pos] = flowm_file_pos; } /* Send text to TTSWindow */ mbstowcs(fl_tts_msg,rst,FL_MAX_MSG_CHARS); SetDlgItemText(TTSWindow, FL_SYNTHTEXT, fl_tts_msg); /* Update file pos percentage in FilePos window */ cst_sprintf(rst,"%2.3f",flowm_find_file_percentage()); mbstowcs(fl_fp_msg,rst,FL_MAX_MSG_CHARS); SetDlgItemText(TTSWindow, FL_FILEPOS, fl_fp_msg); SystemIdleTimerReset(); /* keep alive while synthesizing */ if (flowm_play_status == FLOWM_SKIP) flowm_play_status = FLOWM_PLAY; } return u; } else { delete_utterance(u); return 0; } }
static cst_utterance *tokentosegs(cst_utterance *u) { cst_item *t; cst_relation *seg, *syl, *sylstructure, *word; cst_item *sylitem, *sylstructureitem, *worditem, *sssyl; cst_phoneset *ps; ps = val_phoneset(utt_feat_val(u, "phoneset")); /* Just copy tokens into the Segment relation */ seg = utt_relation_create(u, "Segment"); syl = utt_relation_create(u, "Syllable"); word = utt_relation_create(u, "Word"); sylstructure = utt_relation_create(u, "SylStructure"); sssyl = sylitem = worditem = sylstructureitem = 0; for (t = relation_head(utt_relation(u, "Token")); t; t = item_next(t)) { cst_item *segitem = relation_append(seg, NULL); char const *pname = item_feat_string(t, "name"); char *name = cst_strdup(pname); if (worditem == 0) { worditem = relation_append(word,NULL); item_set_string(worditem, "name", "phonestring"); sylstructureitem = relation_append(sylstructure,worditem); } if (sylitem == 0) { sylitem = relation_append(syl,NULL); sssyl = item_add_daughter(sylstructureitem,sylitem); } if (name[cst_strlen(name)-1] == '1') { item_set_string(sssyl,"stress","1"); name[cst_strlen(name)-1] = '\0'; } else if (name[cst_strlen(name)-1] == '0') { item_set_string(sssyl,"stress","0"); name[cst_strlen(name)-1] = '\0'; } if (cst_streq(name,"-")) { sylitem = 0; /* syllable break */ } else if (phone_id(ps, name) == -1) { cst_errmsg("Phone `%s' not in phoneset\n", pname); cst_error(); } else { item_add_daughter(sssyl,segitem); item_set_string(segitem, "name", name); } cst_free(name); } return u; }
cst_utterance *default_lexical_insertion(cst_utterance *u) { cst_item *word; cst_relation *sylstructure,*seg,*syl; cst_lexicon *lex, *ulex = NULL; const cst_val *p; char *phone_name; char *stress = "0"; cst_val *phones; cst_item *ssword, *sssyl, *segitem, *sylitem, *seg_in_syl; lex = val_lexicon(feat_val(u->features,"lexicon")); if (feat_present(u->features, "user_lexicon")) ulex = val_lexicon(feat_val(u->features, "user_lexicon")); syl = utt_relation_create(u,"Syllable"); sylstructure = utt_relation_create(u,"SylStructure"); seg = utt_relation_create(u,"Segment"); for (word=relation_head(utt_relation(u,"Word")); word; word=item_next(word)) { ssword = relation_append(sylstructure,word); phones = NULL; /* FIXME: need to make sure that textanalysis won't split tokens with explicit pronunciation (or that it will propagate such to words, then we can remove the path here) */ if (item_feat_present(item_parent(item_as(word, "Token")), "phones")) phones = (cst_val *) item_feat(item_parent(item_as(word, "Token")), "phones"); else { if (ulex) phones = lex_lookup(ulex,item_feat_string(word, "name"),0); if (phones == NULL) phones = lex_lookup(lex,item_feat_string(word,"name"),0); } for (sssyl=NULL,sylitem=NULL,p=phones; p; p=val_cdr(p)) { if (sylitem == NULL) { sylitem = relation_append(syl,NULL); sssyl = item_add_daughter(ssword,sylitem); stress = "0"; } segitem = relation_append(seg,NULL); phone_name = cst_strdup(val_string(val_car(p))); if (phone_name[strlen(phone_name)-1] == '1') { stress = "1"; phone_name[strlen(phone_name)-1] = '\0'; } else if (phone_name[strlen(phone_name)-1] == '0') { stress = "0"; phone_name[strlen(phone_name)-1] = '\0'; } item_set_string(segitem,"name",phone_name); seg_in_syl = item_add_daughter(sssyl,segitem); if ((lex->syl_boundary)(seg_in_syl,val_cdr(p))) { sylitem = NULL; if (sssyl) item_set_string(sssyl,"stress",stress); } cst_free(phone_name); } if (!item_feat_present(item_parent(item_as(word, "Token")), "phones")) delete_val(phones); } return u; }
cst_utterance *default_lexical_insertion(cst_utterance *u) { cst_item *word; cst_relation *sylstructure,*seg,*syl; cst_lexicon *lex; const cst_val *lex_addenda = NULL; const cst_val *p, *wp = NULL; char *phone_name; char *stress = "0"; const char *pos; cst_val *phones; cst_item *ssword, *sssyl, *segitem, *sylitem, *seg_in_syl; lex = val_lexicon(feat_val(u->features,"lexicon")); if (lex->lex_addenda) lex_addenda = lex->lex_addenda; syl = utt_relation_create(u,"Syllable"); sylstructure = utt_relation_create(u,"SylStructure"); seg = utt_relation_create(u,"Segment"); for (word=relation_head(utt_relation(u,"Word")); word; word=item_next(word)) { ssword = relation_append(sylstructure,word); pos = ffeature_string(word,"pos"); phones = NULL; wp = NULL; /* printf("awb_debug word %s pos %s gpos %s\n", item_feat_string(word,"name"), pos, ffeature_string(word,"gpos")); */ /* FIXME: need to make sure that textanalysis won't split tokens with explicit pronunciation (or that it will propagate such to words, then we can remove the path here) */ if (item_feat_present(item_parent(item_as(word, "Token")), "phones")) phones = (cst_val *) item_feat(item_parent(item_as(word, "Token")), "phones"); else { wp = val_assoc_string(item_feat_string(word, "name"),lex_addenda); if (wp) phones = (cst_val *)val_cdr(val_cdr(wp)); else phones = lex_lookup(lex,item_feat_string(word,"name"),pos); } for (sssyl=NULL,sylitem=NULL,p=phones; p; p=val_cdr(p)) { if (sylitem == NULL) { sylitem = relation_append(syl,NULL); sssyl = item_add_daughter(ssword,sylitem); stress = "0"; } segitem = relation_append(seg,NULL); phone_name = cst_strdup(val_string(val_car(p))); if (phone_name[cst_strlen(phone_name)-1] == '1') { stress = "1"; phone_name[cst_strlen(phone_name)-1] = '\0'; } else if (phone_name[cst_strlen(phone_name)-1] == '0') { stress = "0"; phone_name[cst_strlen(phone_name)-1] = '\0'; } item_set_string(segitem,"name",phone_name); seg_in_syl = item_add_daughter(sssyl,segitem); #if 0 printf("awb_debug ph %s\n",phone_name); #endif if ((lex->syl_boundary)(seg_in_syl,val_cdr(p))) { #if 0 printf("awb_debug SYL\n"); #endif sylitem = NULL; if (sssyl) item_set_string(sssyl,"stress",stress); } cst_free(phone_name); } if (!item_feat_present(item_parent(item_as(word, "Token")), "phones") && ! wp) delete_val(phones); } return u; }
float flite_text_to_speech_phenome( const char* text, cst_voice* voice, const char* outtype, void* pStream ) { cst_utterance* u; float dur; float end_last = 0; float end_current = 0; float dur_current = 0; float dur_sum = 0; //feat_set_float( voice->features, "duration_stretch", 1 ); u = flite_synth_text( text, voice ); cst_item* s; string sRet; int nPhoneme = 0; for ( s = relation_head( utt_relation( u, "Segment" ) ); s; s = item_next( s ) ) { SPhenomeTiming ps; string sPhoneme = item_feat_string( s, "name" ); sRet += sPhoneme; end_current = item_feat_float( s, "end" ); dur_current = end_current - end_last; //if ( !( nPhoneme == 0 && sPhoneme == "pau" ) ) //{ dur_sum += dur_current; //} ps.fWeight = 1; /* If its a vowel and is stressed output stress value */ if ( ( cst_streq( "+", ffeature_string( s, "ph_vc" ) ) ) && ( cst_streq( "1", ffeature_string( s, "R:SylStructure.parent.stress" ) ) ) ) { sRet += "1"; ps.fWeight = 1.3; } sRet += " "; if ( pStream ) { // fade into each other ps.sName = sPhoneme; ps.fStart = end_current - dur_current; ps.fEnd = end_current; ps.fDuration = dur_current; ( ( CryMT::queue<SPhenomeTiming>* )pStream )->push( ps ); } end_last = end_current; ++nPhoneme; } dur = flite_process_output( u, outtype, FALSE ); delete_utterance( u ); return dur; }
int main(int argc, char *argv[]) { char *s,*fn; cst_voice *voice; // synthesis voice cst_utterance *utt; // current utterance cst_wave *cstwave; // synthesised wave Wave w; // HTK wave short *p; HTime sampPeriod = 625.0; int n; MemHeap mem; AudioOut a; try { if (InitHTK(argc,argv,version)<SUCCESS){ ReportErrors("Main",0); exit(-1); } if (NumArgs() !=2) { printf("SFliteTest synthstring file\n"); exit(0); } CreateHeap(&mem,"heap",MSTAK,1,0.0,10000,100000); s = GetStrArg(); fn = GetStrArg(); printf("Synth: %s -> %s\n",s,fn); // initialise Edinburgh cst lib cst_regex_init(); // setup the voice voice = register_cmu_us_kal16(NULL); // convert text to waveform utt = flite_synth_text(s,voice); if (utt==NULL) { HRError(12001,"SFliteTest: cant synthesise %s\n",s); throw ATK_Error(12001); } cstwave = utt_wave(utt); p = cstwave->samples; n = cstwave->num_samples; w = OpenWaveOutput(&mem,&sampPeriod,n); printf("%d samples created\n",n); PutWaveSample(w,n,p); if (CloseWaveOutput(w,WAV,fn)<SUCCESS){ ReportErrors("Main",0); exit(-1); } // explore structure const cst_item *it, *itlast = NULL; float x,y; int i; string lastword="0"; x = 0; for (i=1,it = relation_head(utt_relation(utt, "Segment")); it!=NULL; it = item_next(it),i++) { printf("Segment %d\n",i); y = item_feat_float(it,"end"); string ph = string(ffeature_string(it,"p.name")); string wd = string(ffeature_string(it,"R:SylStructure.parent.parent.name")); //printf("end = %f ph=%s wd=%s\n",y,ph.c_str(),wd.c_str()); if (wd != lastword){ printf("**** end of %s = %f\n",lastword.c_str(),x); lastword=wd; } x = y; } //if (itlast!=NULL) { // word = string(ffeature_string(itlast,"R:SylStructure.parent.parent.name")); // idx = text.find(word); //} return 0; } catch (ATK_Error e){ ReportErrors("ATK",e.i); } catch (HTK_Error e){ ReportErrors("HTK",e.i); } return 0; }