cst_utterance *f0_targets_to_pm(cst_utterance *utt) { cst_item *t; float pos,lpos,f0,lf0,m; double time; int pm; cst_sts_list *sts_list; cst_lpcres *target_lpcres; sts_list = val_sts_list(utt_feat_val(utt,"sts_list")); lpos = 0; lf0 = 120; /* hmm */ pm = 0; time = 0; /* First pass to count how many pms will be required */ for (t=relation_head(utt_relation(utt,"Target")); t; t=item_next(t), lf0 = f0, lpos = pos) /* changed by dhopkins */ { pos = item_feat_float(t,"pos"); f0 = item_feat_float(t,"f0"); if (time == pos) continue; m = (f0-lf0)/(pos-lpos); for ( ; time < pos; pm++) { time += 1/(lf0 + ((time-lpos)*m)); } } target_lpcres = new_lpcres(); lpcres_resize_frames(target_lpcres,pm); lpos = 0; lf0 = 120; pm = 0; time = 0; /* Second pass puts the values in */ for (t=relation_head(utt_relation(utt,"Target")); t; t=item_next(t), lf0 = f0, lpos = pos) /* changed by dhopkins */ { pos = item_feat_float(t,"pos"); f0 = item_feat_float(t,"f0"); if (time == pos) continue; m = (f0-lf0)/(pos-lpos); for ( ; time < pos; pm++) { time += 1/(lf0 + ((time-lpos)*m)); target_lpcres->times[pm] = sts_list->sample_rate * time; } } utt_set_feat(utt,"target_lpcres",lpcres_val(target_lpcres)); return utt; }
static const cst_val *segment_duration(const cst_item *seg) { const cst_item *s = item_as(seg,"Segment"); if (!s) return VAL_STRING_0; else if (item_prev(s) == NULL) return item_feat(s,"end"); else /* It should be okay to construct this as it will get dereferenced when the CART interpreter frees its feature cache. */ return float_val(item_feat_float(s,"end") - item_feat_float(item_prev(s),"end")); }
/* Dummy F0 modelling for phones, copied directly from us_f0_model.c */ cst_utterance *flat_prosody(cst_utterance *u) { /* F0 target model */ cst_item *s,*t; cst_relation *targ_rel; float mean, stddev; targ_rel = utt_relation_create(u,"Target"); mean = get_param_float(u->features,"target_f0_mean", 100.0); mean *= get_param_float(u->features,"f0_shift", 1.0); stddev = get_param_float(u->features,"target_f0_stddev", 12.0); s=relation_head(utt_relation(u,"Segment")); t = relation_append(targ_rel,NULL); item_set_float(t,"pos",0.0); item_set_float(t,"f0",mean+stddev); s=relation_tail(utt_relation(u,"Segment")); t = relation_append(targ_rel,NULL); item_set_float(t,"pos",item_feat_float(s,"end")); item_set_float(t,"f0",mean-stddev); return u; }
int relation_save(cst_relation *r, const char *filename) { cst_file fd; cst_item *item; if (cst_streq(filename, "-")) fd = stdout; else if ((fd = cst_fopen(filename, CST_OPEN_WRITE)) == 0) { cst_errmsg("relation_save: can't open file \"%s\" for writing\n", filename); return CST_ERROR_FORMAT; } for (item = relation_head(r); item; item = item_next(item)) { if (item_feat_present(item, "end")) cst_fprintf(fd, "%f ", item_feat_float(item, "end")); else cst_fprintf(fd, "%f ", 0.00); if (item_feat_present(item, "name")) cst_fprintf(fd, "%s ", item_feat_string(item, "name")); else cst_fprintf(fd, "%s ", "_"); cst_fprintf(fd, "\n"); } if (fd != stdout) cst_fclose(fd); return CST_OK_FORMAT; }
void test_hrg(void) { cst_utterance *u; cst_relation *r; cst_item *item = 0; int i; u = new_utterance(); r = utt_relation_create(u, "Segment"); for (i = 0; i < 10; i++) { char buff[20]; sprintf(buff, "seg_%03d", i); if (i == 0) item = relation_append(r, NULL); else item = item_append(item, NULL); item_set_string(item, "name", buff); item_set_float(item, "duration", i * 0.20); } for (i = 0, item = relation_head(utt_relation(u, "Segment")); item; item = item_next(item), i++) { TEST_CHECK(item_feat_float(item, "duration") == correct_list[i]); } delete_utterance(u); }
int main(int argc, char **argv) { cst_utterance *u; cst_relation *r; cst_item *item=0; int i; u = new_utterance(); r = utt_relation_create(u,"Segment"); for (i=0; i<10; i++) { char buff[20]; sprintf(buff,"seg_%03d",i); if (i==0) item = relation_append(r,NULL); else item = item_append(item,NULL); item_set_string(item,"name",buff); item_set_float(item,"duration",i*0.20); } for (i=0,item=relation_head(utt_relation(u,"Segment")); item; item=item_next(item),i++) { printf("Segment %d %s %f\n", i, item_feat_string(item,"name"), item_feat_float(item,"duration")); } delete_utterance(u); return 0; }
static int voiced_frame(cst_item *m) { const char *ph_vc; const char *ph_name; ph_vc = ffeature_string(m,"R:mcep_link.parent.R:segstate.parent.ph_vc"); ph_name = ffeature_string(m,"R:mcep_link.parent.R:segstate.parent.name"); if (cst_streq(ph_name,"pau")) return 0; /* unvoiced */ else if (cst_streq("+",ph_vc)) return 1; /* voiced */ else if (item_feat_float(m,"voicing") > 0.5) /* Even though the range is 0-10, I *do* mean 0.5 */ return 1; /* voiced */ else return 0; /* unvoiced */ }
string getPhonemes( const char* sText ) { string sRet; cst_features* args = new_features(); cst_voice* v; cst_utterance* u; cst_item* s; const char* name; //const cst_val* d; flite_init(); v = register_cmu_us_no_wave( NULL ); u = flite_synth_text( sText, v ); for ( s = relation_head( utt_relation( u, "Segment" ) ); s; s = item_next( s ) ) { sRet += item_feat_string( s, "name" ); float test = item_feat_float( s, "end" ); //d = segment_duration( s ); /* If its a vowel and is stressed output stress value */ if ( ( cst_streq( "+", ffeature_string( s, "ph_vc" ) ) ) && ( cst_streq( "1", ffeature_string( s, "R:SylStructure.parent.stress" ) ) ) ) { sRet += "1"; } sRet += " "; } delete_utterance( u ); delete_features( args ); return sRet; }
float flite_text_to_speech_phenome( const char* text, cst_voice* voice, const char* outtype, void* pStream ) { cst_utterance* u; float dur; float end_last = 0; float end_current = 0; float dur_current = 0; float dur_sum = 0; //feat_set_float( voice->features, "duration_stretch", 1 ); u = flite_synth_text( text, voice ); cst_item* s; string sRet; int nPhoneme = 0; for ( s = relation_head( utt_relation( u, "Segment" ) ); s; s = item_next( s ) ) { SPhenomeTiming ps; string sPhoneme = item_feat_string( s, "name" ); sRet += sPhoneme; end_current = item_feat_float( s, "end" ); dur_current = end_current - end_last; //if ( !( nPhoneme == 0 && sPhoneme == "pau" ) ) //{ dur_sum += dur_current; //} ps.fWeight = 1; /* If its a vowel and is stressed output stress value */ if ( ( cst_streq( "+", ffeature_string( s, "ph_vc" ) ) ) && ( cst_streq( "1", ffeature_string( s, "R:SylStructure.parent.stress" ) ) ) ) { sRet += "1"; ps.fWeight = 1.3; } sRet += " "; if ( pStream ) { // fade into each other ps.sName = sPhoneme; ps.fStart = end_current - dur_current; ps.fEnd = end_current; ps.fDuration = dur_current; ( ( CryMT::queue<SPhenomeTiming>* )pStream )->push( ps ); } end_last = end_current; ++nPhoneme; } dur = flite_process_output( u, outtype, FALSE ); delete_utterance( u ); return dur; }
int main(int argc, char *argv[]) { char *s,*fn; cst_voice *voice; // synthesis voice cst_utterance *utt; // current utterance cst_wave *cstwave; // synthesised wave Wave w; // HTK wave short *p; HTime sampPeriod = 625.0; int n; MemHeap mem; AudioOut a; try { if (InitHTK(argc,argv,version)<SUCCESS){ ReportErrors("Main",0); exit(-1); } if (NumArgs() !=2) { printf("SFliteTest synthstring file\n"); exit(0); } CreateHeap(&mem,"heap",MSTAK,1,0.0,10000,100000); s = GetStrArg(); fn = GetStrArg(); printf("Synth: %s -> %s\n",s,fn); // initialise Edinburgh cst lib cst_regex_init(); // setup the voice voice = register_cmu_us_kal16(NULL); // convert text to waveform utt = flite_synth_text(s,voice); if (utt==NULL) { HRError(12001,"SFliteTest: cant synthesise %s\n",s); throw ATK_Error(12001); } cstwave = utt_wave(utt); p = cstwave->samples; n = cstwave->num_samples; w = OpenWaveOutput(&mem,&sampPeriod,n); printf("%d samples created\n",n); PutWaveSample(w,n,p); if (CloseWaveOutput(w,WAV,fn)<SUCCESS){ ReportErrors("Main",0); exit(-1); } // explore structure const cst_item *it, *itlast = NULL; float x,y; int i; string lastword="0"; x = 0; for (i=1,it = relation_head(utt_relation(utt, "Segment")); it!=NULL; it = item_next(it),i++) { printf("Segment %d\n",i); y = item_feat_float(it,"end"); string ph = string(ffeature_string(it,"p.name")); string wd = string(ffeature_string(it,"R:SylStructure.parent.parent.name")); //printf("end = %f ph=%s wd=%s\n",y,ph.c_str(),wd.c_str()); if (wd != lastword){ printf("**** end of %s = %f\n",lastword.c_str(),x); lastword=wd; } x = y; } //if (itlast!=NULL) { // word = string(ffeature_string(itlast,"R:SylStructure.parent.parent.name")); // idx = text.find(word); //} return 0; } catch (ATK_Error e){ ReportErrors("ATK",e.i); } catch (HTK_Error e){ ReportErrors("HTK",e.i); } return 0; }