cst_wave *flite_text_to_wave(const char *text, cst_voice *voice)
{
    cst_utterance *u;
    cst_wave *w;

    if ((u = flite_synth_text(text,voice)) == NULL)
	return NULL;

    w = copy_wave(utt_wave(u));
    delete_utterance(u);
    return w;
}
Ejemplo n.º 2
0
/* Flite_HTS_Engine_synthesize: synthesize speech */
HTS_Boolean Flite_HTS_Engine_synthesize(Flite_HTS_Engine * f, const char *txt, const char *wav)
{
   int i;
   FILE *fp;
   cst_voice *v = NULL;
   cst_utterance *u = NULL;
   cst_item *s = NULL;
   char **label_data = NULL;
   int label_size = 0;

   if (txt == NULL)
      return FALSE;

   /* text analysis part */
   v = REGISTER_VOX(NULL);
   if (v == NULL)
      return FALSE;
   u = flite_synth_text(txt, v);
   if (u == NULL) {
      UNREGISTER_VOX(v);
      return FALSE;
   }
   for (s = relation_head(utt_relation(u, "Segment")); s; s = item_next(s))
      label_size++;
   if (label_size <= 0) {
      delete_utterance(u);
      UNREGISTER_VOX(v);
      return FALSE;
   }
   label_data = (char **) calloc(label_size, sizeof(char *));
   for (i = 0, s = relation_head(utt_relation(u, "Segment")); s; s = item_next(s), i++) {
      label_data[i] = (char *) calloc(MAXBUFLEN, sizeof(char));
      Flite_HTS_Engine_create_label(f, s, label_data[i]);
   }

   /* speech synthesis part */
   HTS_Engine_synthesize_from_strings(&f->engine, label_data, label_size);
   if (wav != NULL) {
      fp = fopen(wav, "wb");
      HTS_Engine_save_riff(&f->engine, fp);
      fclose(fp);
   }
   HTS_Engine_refresh(&f->engine);

   for (i = 0; i < label_size; i++)
      free(label_data[i]);
   free(label_data);

   delete_utterance(u);
   UNREGISTER_VOX(v);

   return TRUE;
}
Ejemplo n.º 3
0
float flite_text_to_speech(const char *text,
			   cst_voice *voice,
			   const char *outtype)
{
    cst_utterance *u;
    float dur;

    u = flite_synth_text(text,voice);
    dur = flite_process_output(u,outtype,FALSE);
    delete_utterance(u);

    return dur;
}
Ejemplo n.º 4
0
/* Flite_Text_Analyzer_analysis: text analysis */
void Flite_Text_Analyzer_analysis(Flite_Text_Analyzer * analyzer, const char *text)
{
   int i;
   cst_item *s;
   Flite_Utterance *fu;

   if (analyzer == NULL || text == NULL)
      return;

   if (analyzer->pointer != NULL)
      Flite_Text_Analyzer_clear(analyzer);

   /* allocate */
   fu = (Flite_Utterance *) malloc(sizeof(Flite_Utterance));

   /* create voice */
   fu->v = REGISTER_VOX(NULL);
   if (fu->v == NULL) {
      free(fu);
      return;
   }

   /* create utterance */
   fu->u = flite_synth_text(text, fu->v);
   if (fu->u == NULL) {
      UNREGISTER_VOX(fu->v);
      free(fu);
      return;
   }

   /* count number of phonemes */
   for (fu->nitem = 0, s = relation_head(utt_relation(fu->u, "Segment")); s; s = item_next(s), fu->nitem++);
   if (fu->nitem == 0) {
      delete_utterance(fu->u);
      UNREGISTER_VOX(fu->v);
      free(fu);
      return;
   }

   /* save informations */
   fu->items = (cst_item **) malloc(sizeof(cst_item *) * fu->nitem);
   for (i = 0, s = relation_head(utt_relation(fu->u, "Segment")); s; s = item_next(s), i++)
      fu->items[i] = s;

   analyzer->pointer = (void *) fu;
}
/* Flite_HTS_Engine_synthesis: speech synthesis */
void Flite_HTS_Engine_synthesis(Flite_HTS_Engine * f, char *txt, FILE * wavfp)
{
    int i;
    cst_voice *v = NULL;
    cst_utterance *u = NULL;
    cst_item *s = NULL;
    char **label_data = NULL;
    int label_size = 0;

    /* text analysis part */
    v = REGISTER_VOX(NULL);
    if (v == NULL)
        return;
    u = flite_synth_text(txt, v);
    if (u == NULL)
        return;
    for (s = relation_head(utt_relation(u, "Segment")); s; s = item_next(s))
        label_size++;
    if (label_size <= 0)
        return;
    label_data = (char **) calloc(label_size, sizeof(char *));
    for (i = 0, s = relation_head(utt_relation(u, "Segment")); s;
            s = item_next(s), i++) {
        label_data[i] = (char *) calloc(MAXBUFLEN, sizeof(char));
        Flite_HTS_Engine_create_label(f, s, label_data[i]);
    }

    /* speech synthesis part */
    HTS_Engine_load_label_from_string_list(&f->engine, label_data, label_size);
    HTS_Engine_create_sstream(&f->engine);
    HTS_Engine_create_pstream(&f->engine);
    HTS_Engine_create_gstream(&f->engine);
    if (wavfp != NULL)
        HTS_Engine_save_riff(&f->engine, wavfp);

    HTS_Engine_refresh(&f->engine);

    for (i = 0; i < label_size; i++)
        free(label_data[i]);
    free(label_data);

    delete_utterance(u);
    UNREGISTER_VOX(v);
}
Ejemplo n.º 6
0
    string getPhonemes( const char* sText )
    {
        string sRet;

        cst_features* args = new_features();
        cst_voice* v;
        cst_utterance* u;
        cst_item* s;
        const char* name;
        //const cst_val* d;

        flite_init();
        v = register_cmu_us_no_wave( NULL );

        u = flite_synth_text( sText, v );

        for ( s = relation_head( utt_relation( u, "Segment" ) ); s; s = item_next( s ) )
        {
            sRet += item_feat_string( s, "name" );
            float test = item_feat_float( s, "end" );
            //d = segment_duration( s );

            /* If its a vowel and is stressed output stress value */
            if ( ( cst_streq( "+", ffeature_string( s, "ph_vc" ) ) ) &&
                    ( cst_streq( "1", ffeature_string( s, "R:SylStructure.parent.stress" ) ) ) )
            {
                sRet += "1";
            }

            sRet += " ";
        }

        delete_utterance( u );
        delete_features( args );

        return sRet;
    }
float flite_text_to_speech(const char *text,
			   cst_voice *voice,
			   const char *outtype)
{
    cst_utterance *u;
    cst_wave *w;
    float durs;

    u = flite_synth_text(text,voice);
    if (u == NULL)
	return -1;

    w = utt_wave(u);

    durs = (float)w->num_samples/(float)w->sample_rate;
	     
    if (cst_streq(outtype,"play"))
	play_wave(w);
    else if (!cst_streq(outtype,"none"))
	cst_wave_save_riff(w,outtype);
    delete_utterance(u);

    return durs;
}
Ejemplo n.º 8
0
static void AppEventLoop(void)
{
    Err error;
    EventType event;

    do 
    {
	/* we should wait for 100ms only so we can server the audio */
	EvtGetEvent(&event, evtWaitForever);
    
	if (SysHandleEvent(&event)) continue;
	if (MenuHandleEvent(0, &event, &error)) continue;
	if (AppHandleEvent(&event)) continue;

	FrmDispatchEvent(&event);

    	/* Serve playing */
	if (FlopPlay == true)
	{
	    if (!flite->samples)
	    {   /* Got stuff to play and we're not currently playing */
		flite->type = FlopOutputType; /* words, phones, wave, stream */
		flite->text = input;               /* text to be synthesized */
		flite_synth_text(flite);                 /* do the synthesis */
		flite->WavePosition = flite->start;
		flite->start += flite->utt_length;        /* update position */
		
		/* highlight the area being spoken */

		if (flite->type != FliteOutputTypeWave)
		    FlopPlay = false;  /* we don't do async play on words/ph */

	    }

	    if (playdata.samples && !playdata.active)
		/* stop and tidy up anything that's finished playing */
		StopPlayStream();
	    else if (flite->samples && !playdata.active)
	    {   /* create a new stream and start it playing */
		flite->PlayPosition = flite->WavePosition;
		SetupPlayStream(flite);
		SndStreamStart(playstream);
	    }
	    else if (!playdata.active)
		/* go into stop mode as there is nothing more to play */
		FlopPlay = false;   /* nothing more to play */
	}
	else
	{
	    if (flite && flite->output)
		StrPrintF(flite->output,"stopped %d",flite->PlayPosition);
	    StopPlayStream();
	    /* flush any other waveform waiting to play */
	    if (flite->samples)
	    {
	    	MemPtrFree(flite->samples);
		flite->num_samples = 0;
		flite->samples = 0;
	    }
	}
	if (flite && flite->output)
	{   /* should be within if above, but for debugging ... */
	    /* Update the output field with any new information */
	    SetField(FlopForm, FlopOutput, flite->output);
	    FrmDrawForm(FrmGetFormPtr(FlopForm));
	}
    } 
    while ((FlopStop==false) && (event.eType != appStopEvent));

    return;
}
Ejemplo n.º 9
0
    float flite_text_to_speech_phenome( const char* text,
                                        cst_voice* voice,
                                        const char* outtype,
                                        void* pStream )
    {
        cst_utterance* u;

        float dur;
        float end_last = 0;
        float end_current = 0;
        float dur_current = 0;
        float dur_sum = 0;

        //feat_set_float( voice->features, "duration_stretch", 1 );

        u = flite_synth_text( text, voice );


        cst_item* s;
        string sRet;

        int nPhoneme = 0;

        for ( s = relation_head( utt_relation( u, "Segment" ) ); s; s = item_next( s ) )
        {
            SPhenomeTiming ps;

            string sPhoneme = item_feat_string( s, "name" );
            sRet += sPhoneme;

            end_current = item_feat_float( s, "end" );
            dur_current = end_current - end_last;

            //if ( !( nPhoneme == 0 && sPhoneme == "pau" ) )
            //{
            dur_sum += dur_current;
            //}

            ps.fWeight = 1;

            /* If its a vowel and is stressed output stress value */
            if ( ( cst_streq( "+", ffeature_string( s, "ph_vc" ) ) ) &&
                    ( cst_streq( "1", ffeature_string( s, "R:SylStructure.parent.stress" ) ) ) )
            {
                sRet += "1";
                ps.fWeight = 1.3;
            }

            sRet += " ";

            if ( pStream )
            {
                // fade into each other
                ps.sName = sPhoneme;
                ps.fStart = end_current - dur_current;
                ps.fEnd = end_current;
                ps.fDuration = dur_current;

                ( ( CryMT::queue<SPhenomeTiming>* )pStream )->push( ps );
            }

            end_last = end_current;
            ++nPhoneme;
        }

        dur = flite_process_output( u, outtype, FALSE );
        delete_utterance( u );

        return dur;
    }
Ejemplo n.º 10
0
int main(int argc, char *argv[])
{
   char *s,*fn;
   cst_voice *voice;     // synthesis voice
   cst_utterance *utt;   // current utterance
   cst_wave *cstwave;       // synthesised wave
   Wave w;  // HTK wave
   short *p;
   HTime sampPeriod = 625.0;
   int n;
   MemHeap mem;
   AudioOut a;

   try {
      if (InitHTK(argc,argv,version)<SUCCESS){
         ReportErrors("Main",0); exit(-1);
      }
      if (NumArgs() !=2) {
         printf("SFliteTest synthstring file\n"); exit(0);
      }
      CreateHeap(&mem,"heap",MSTAK,1,0.0,10000,100000);
      s = GetStrArg();
      fn = GetStrArg();
      printf("Synth: %s -> %s\n",s,fn);
      // initialise Edinburgh cst lib
      cst_regex_init();
      // setup the voice
      voice = register_cmu_us_kal16(NULL);
      // convert text to waveform
      utt = flite_synth_text(s,voice);
      if (utt==NULL) {
         HRError(12001,"SFliteTest: cant synthesise %s\n",s);
         throw ATK_Error(12001);
      }
      cstwave = utt_wave(utt);
      p = cstwave->samples; n = cstwave->num_samples;
      w = OpenWaveOutput(&mem,&sampPeriod,n);
      printf("%d samples created\n",n);
      PutWaveSample(w,n,p);
      if (CloseWaveOutput(w,WAV,fn)<SUCCESS){
         ReportErrors("Main",0); exit(-1);
      }
      // explore structure
      const cst_item *it, *itlast = NULL;
      float x,y;
      int i;
      string lastword="0"; x = 0;
      for (i=1,it = relation_head(utt_relation(utt, "Segment")); it!=NULL; it = item_next(it),i++)
      {
         printf("Segment %d\n",i);
         y = item_feat_float(it,"end");
         string ph = string(ffeature_string(it,"p.name"));
         string wd = string(ffeature_string(it,"R:SylStructure.parent.parent.name"));
         //printf("end = %f ph=%s wd=%s\n",y,ph.c_str(),wd.c_str());
         if (wd != lastword){
            printf("**** end of %s = %f\n",lastword.c_str(),x);
            lastword=wd;
         }
         x = y;
      }
      //if (itlast!=NULL) {
      //   word = string(ffeature_string(itlast,"R:SylStructure.parent.parent.name"));
      //   idx = text.find(word);
      //}


      return 0;
   }



   catch (ATK_Error e){ ReportErrors("ATK",e.i); }
   catch (HTK_Error e){ ReportErrors("HTK",e.i); }
   return 0;
}