Пример #1
0
/* Flite_HTS_Engine_synthesize: synthesize speech */
HTS_Boolean Flite_HTS_Engine_synthesize(Flite_HTS_Engine * f, const char *txt, const char *wav)
{
   int i;
   FILE *fp;
   cst_voice *v = NULL;
   cst_utterance *u = NULL;
   cst_item *s = NULL;
   char **label_data = NULL;
   int label_size = 0;

   if (txt == NULL)
      return FALSE;

   /* text analysis part */
   v = REGISTER_VOX(NULL);
   if (v == NULL)
      return FALSE;
   u = flite_synth_text(txt, v);
   if (u == NULL) {
      UNREGISTER_VOX(v);
      return FALSE;
   }
   for (s = relation_head(utt_relation(u, "Segment")); s; s = item_next(s))
      label_size++;
   if (label_size <= 0) {
      delete_utterance(u);
      UNREGISTER_VOX(v);
      return FALSE;
   }
   label_data = (char **) calloc(label_size, sizeof(char *));
   for (i = 0, s = relation_head(utt_relation(u, "Segment")); s; s = item_next(s), i++) {
      label_data[i] = (char *) calloc(MAXBUFLEN, sizeof(char));
      Flite_HTS_Engine_create_label(f, s, label_data[i]);
   }

   /* speech synthesis part */
   HTS_Engine_synthesize_from_strings(&f->engine, label_data, label_size);
   if (wav != NULL) {
      fp = fopen(wav, "wb");
      HTS_Engine_save_riff(&f->engine, fp);
      fclose(fp);
   }
   HTS_Engine_refresh(&f->engine);

   for (i = 0; i < label_size; i++)
      free(label_data[i]);
   free(label_data);

   delete_utterance(u);
   UNREGISTER_VOX(v);

   return TRUE;
}
Пример #2
0
void test_hrg(void)
{
    cst_utterance *u;
    cst_relation *r;
    cst_item *item = 0;
    int i;

    u = new_utterance();
    r = utt_relation_create(u, "Segment");

    for (i = 0; i < 10; i++)
    {
        char buff[20];
        sprintf(buff, "seg_%03d", i);
        if (i == 0)
            item = relation_append(r, NULL);
        else
            item = item_append(item, NULL);
        item_set_string(item, "name", buff);
        item_set_float(item, "duration", i * 0.20);
    }

    for (i = 0, item = relation_head(utt_relation(u, "Segment"));
         item; item = item_next(item), i++)
    {
        TEST_CHECK(item_feat_float(item, "duration") == correct_list[i]);
    }

    delete_utterance(u);
}
Пример #3
0
int main(int argc, char **argv)
{
    cst_utterance *u;
    cst_relation *r;
    cst_item *item=0;
    int i;

    u = new_utterance();
    r = utt_relation_create(u,"Segment");

    for (i=0; i<10; i++)
    {
	char buff[20];
	sprintf(buff,"seg_%03d",i);
	if (i==0)
	    item = relation_append(r,NULL);
	else
	    item = item_append(item,NULL);
	item_set_string(item,"name",buff);
	item_set_float(item,"duration",i*0.20);
    }

    for (i=0,item=relation_head(utt_relation(u,"Segment")); 
	 item; item=item_next(item),i++)
    {
	printf("Segment %d %s %f\n",
	       i,
	       item_feat_string(item,"name"),
	       item_feat_float(item,"duration"));
    }

    delete_utterance(u);

    return 0;
}
Пример #4
0
int main(int argc, char **argv)
{
    cst_utterance *u;
    cst_relation *r;
    cst_item *item=0;
    int i;

    cmu_lex_init();

    u = new_utterance();
    r = utt_relation_create(u,"Word");

    bbb_relation_load(r,"ttt.txt");

    WordSylSeg(u);

    for (i=0,item=item_next(relation_head(utt_relation(u,"Segment"))); 
	 item; item=item_next(item),i++)
    {
	printf("Segment %s %s %s %s\n",
	       ffeature_string(item,"name"),
	       ffeature_string(item,"n.name"),
	       ffeature_string(item,"p.name"),
	       ffeature_string(item,"R:SylStructure.parent.name")
/*	       ffeature_string(item,"R:SylStructure.parent.R:Word.n.name"), */
/*	       item_feat_float(item,"duration")); */
	       );
    }

    delete_utterance(u);

    return 0;
}
cst_wave *flite_text_to_wave(const char *text, cst_voice *voice)
{
    cst_utterance *u;
    cst_wave *w;

    if ((u = flite_synth_text(text,voice)) == NULL)
	return NULL;

    w = copy_wave(utt_wave(u));
    delete_utterance(u);
    return w;
}
Пример #6
0
float flite_phones_to_speech(const char *text,
			     cst_voice *voice,
			     const char *outtype)
{
    cst_utterance *u;
    float dur;

    u = flite_synth_phones(text,voice);
    dur = flite_process_output(u,outtype,FALSE);
    delete_utterance(u);

    return dur;
}
static cst_utterance *flite_synth_foo(cst_utterance *u,
				      cst_voice *voice,
				      cst_uttfunc synth)
{		       
    utt_init(u, voice);
    if ((*synth)(u) == NULL)
    {
	delete_utterance(u);
	return NULL;
    }
    else
	return u;
}
Пример #8
0
/* Flite_Text_Analyzer_analysis: text analysis */
void Flite_Text_Analyzer_analysis(Flite_Text_Analyzer * analyzer, const char *text)
{
   int i;
   cst_item *s;
   Flite_Utterance *fu;

   if (analyzer == NULL || text == NULL)
      return;

   if (analyzer->pointer != NULL)
      Flite_Text_Analyzer_clear(analyzer);

   /* allocate */
   fu = (Flite_Utterance *) malloc(sizeof(Flite_Utterance));

   /* create voice */
   fu->v = REGISTER_VOX(NULL);
   if (fu->v == NULL) {
      free(fu);
      return;
   }

   /* create utterance */
   fu->u = flite_synth_text(text, fu->v);
   if (fu->u == NULL) {
      UNREGISTER_VOX(fu->v);
      free(fu);
      return;
   }

   /* count number of phonemes */
   for (fu->nitem = 0, s = relation_head(utt_relation(fu->u, "Segment")); s; s = item_next(s), fu->nitem++);
   if (fu->nitem == 0) {
      delete_utterance(fu->u);
      UNREGISTER_VOX(fu->v);
      free(fu);
      return;
   }

   /* save informations */
   fu->items = (cst_item **) malloc(sizeof(cst_item *) * fu->nitem);
   for (i = 0, s = relation_head(utt_relation(fu->u, "Segment")); s; s = item_next(s), i++)
      fu->items[i] = s;

   analyzer->pointer = (void *) fu;
}
Пример #9
0
/* Flite_Text_Analyzer_clear: finalize flite front-end */
void Flite_Text_Analyzer_clear(Flite_Text_Analyzer * analyzer)
{
   Flite_Utterance *fu;

   if (analyzer == NULL || analyzer->pointer == NULL)
      return;

   fu = (Flite_Utterance *) analyzer->pointer;
   if (fu->items != NULL)
      free(fu->items);
   if (fu->u != NULL)
      delete_utterance(fu->u);
   if (fu->v != NULL)
      UNREGISTER_VOX(fu->v);
   free(fu);

   analyzer->pointer = NULL;
}
/* Flite_HTS_Engine_synthesis: speech synthesis */
void Flite_HTS_Engine_synthesis(Flite_HTS_Engine * f, char *txt, FILE * wavfp)
{
    int i;
    cst_voice *v = NULL;
    cst_utterance *u = NULL;
    cst_item *s = NULL;
    char **label_data = NULL;
    int label_size = 0;

    /* text analysis part */
    v = REGISTER_VOX(NULL);
    if (v == NULL)
        return;
    u = flite_synth_text(txt, v);
    if (u == NULL)
        return;
    for (s = relation_head(utt_relation(u, "Segment")); s; s = item_next(s))
        label_size++;
    if (label_size <= 0)
        return;
    label_data = (char **) calloc(label_size, sizeof(char *));
    for (i = 0, s = relation_head(utt_relation(u, "Segment")); s;
            s = item_next(s), i++) {
        label_data[i] = (char *) calloc(MAXBUFLEN, sizeof(char));
        Flite_HTS_Engine_create_label(f, s, label_data[i]);
    }

    /* speech synthesis part */
    HTS_Engine_load_label_from_string_list(&f->engine, label_data, label_size);
    HTS_Engine_create_sstream(&f->engine);
    HTS_Engine_create_pstream(&f->engine);
    HTS_Engine_create_gstream(&f->engine);
    if (wavfp != NULL)
        HTS_Engine_save_riff(&f->engine, wavfp);

    HTS_Engine_refresh(&f->engine);

    for (i = 0; i < label_size; i++)
        free(label_data[i]);
    free(label_data);

    delete_utterance(u);
    UNREGISTER_VOX(v);
}
float flite_tokens_to_speech(cst_utterance *u,
			     cst_voice *voice,
			     const char *outtype)
{
    cst_wave *w;
    float durs;

    u = flite_synth_foo(u,voice,utt_synth_tokens);
    if (u == NULL)
	    return -1;
    w = utt_wave(u);

    durs = (float)w->num_samples/(float)w->sample_rate;

    if (cst_streq(outtype,"play"))
	play_wave(w);
    else if (!cst_streq(outtype,"none"))
	cst_wave_append_riff(w,outtype);
    delete_utterance(u);

    return durs;
}
Пример #12
0
    string getPhonemes( const char* sText )
    {
        string sRet;

        cst_features* args = new_features();
        cst_voice* v;
        cst_utterance* u;
        cst_item* s;
        const char* name;
        //const cst_val* d;

        flite_init();
        v = register_cmu_us_no_wave( NULL );

        u = flite_synth_text( sText, v );

        for ( s = relation_head( utt_relation( u, "Segment" ) ); s; s = item_next( s ) )
        {
            sRet += item_feat_string( s, "name" );
            float test = item_feat_float( s, "end" );
            //d = segment_duration( s );

            /* If its a vowel and is stressed output stress value */
            if ( ( cst_streq( "+", ffeature_string( s, "ph_vc" ) ) ) &&
                    ( cst_streq( "1", ffeature_string( s, "R:SylStructure.parent.stress" ) ) ) )
            {
                sRet += "1";
            }

            sRet += " ";
        }

        delete_utterance( u );
        delete_features( args );

        return sRet;
    }
Пример #13
0
cst_utterance *flowm_utt_callback(cst_utterance *u)
{
    char rst[FL_MAX_MSG_CHARS];
    const char *tok;
    cst_item *item;
    char *space;
    int extend_length;
    
    /* In order to stop the synthesizer if the STOP button is pressed */
    /* This stops the synthesis of the next utterance */

    if ((flowm_play_status == FLOWM_PLAY) ||
        (flowm_play_status == FLOWM_SKIP))
    {
        if (TTSWindow)
        {
            rst[0] = '\0';
            space = "";
            for (item=relation_head(utt_relation(u,"Token")); 
                 item; item=item_next(item))
            {
                tok = item_feat_string(item,"name");
                if (cst_streq("",space))
                    /* Only do this on the first token/word */
                    flowm_file_pos = item_feat_int(item,"file_pos");
                extend_length = cst_strlen(rst) + 1 +
                    cst_strlen(item_feat_string(item,"prepunctuation"))+
                    cst_strlen(item_feat_string(item,"punc"));
                if (cst_strlen(tok)+extend_length+4 < FL_MAX_MSG_CHARS)
                    cst_sprintf(rst,"%s%s%s%s%s",rst,space,
                                item_feat_string(item,"prepunctuation"),
                                tok,
                                item_feat_string(item,"punc"));
                else 
                {
                    if (cst_strlen(rst)+4 < FL_MAX_MSG_CHARS)
                        cst_sprintf(rst,"%s ...",rst);
                    break;
                }
                space = " ";
            }

            if (flowm_file_pos > flowm_prev_utt_pos[flowm_utt_pos_pos])
            {
                if ((flowm_utt_pos_pos+1) >= FLOWM_NUM_UTT_POS)
                {
                    /* Filled it up, so move it down */
                    memmove(flowm_prev_utt_pos,&flowm_prev_utt_pos[1],
                            sizeof(int)*(FLOWM_NUM_UTT_POS-10));
                    flowm_utt_pos_pos = (FLOWM_NUM_UTT_POS-10);
                }
                flowm_utt_pos_pos++;
                flowm_prev_utt_pos[flowm_utt_pos_pos] = flowm_file_pos;
            }

            /* Send text to TTSWindow */
            mbstowcs(fl_tts_msg,rst,FL_MAX_MSG_CHARS);
            SetDlgItemText(TTSWindow, FL_SYNTHTEXT, fl_tts_msg);

            /* Update file pos percentage in FilePos window */
            cst_sprintf(rst,"%2.3f",flowm_find_file_percentage());
            mbstowcs(fl_fp_msg,rst,FL_MAX_MSG_CHARS);
            SetDlgItemText(TTSWindow, FL_FILEPOS, fl_fp_msg);

            SystemIdleTimerReset();  /* keep alive while synthesizing */
            if (flowm_play_status == FLOWM_SKIP)
                flowm_play_status = FLOWM_PLAY;
        }
        return u;
    }
    else
    {
        delete_utterance(u);
        return 0;
    }
}
Пример #14
0
    float flite_text_to_speech_phenome( const char* text,
                                        cst_voice* voice,
                                        const char* outtype,
                                        void* pStream )
    {
        cst_utterance* u;

        float dur;
        float end_last = 0;
        float end_current = 0;
        float dur_current = 0;
        float dur_sum = 0;

        //feat_set_float( voice->features, "duration_stretch", 1 );

        u = flite_synth_text( text, voice );


        cst_item* s;
        string sRet;

        int nPhoneme = 0;

        for ( s = relation_head( utt_relation( u, "Segment" ) ); s; s = item_next( s ) )
        {
            SPhenomeTiming ps;

            string sPhoneme = item_feat_string( s, "name" );
            sRet += sPhoneme;

            end_current = item_feat_float( s, "end" );
            dur_current = end_current - end_last;

            //if ( !( nPhoneme == 0 && sPhoneme == "pau" ) )
            //{
            dur_sum += dur_current;
            //}

            ps.fWeight = 1;

            /* If its a vowel and is stressed output stress value */
            if ( ( cst_streq( "+", ffeature_string( s, "ph_vc" ) ) ) &&
                    ( cst_streq( "1", ffeature_string( s, "R:SylStructure.parent.stress" ) ) ) )
            {
                sRet += "1";
                ps.fWeight = 1.3;
            }

            sRet += " ";

            if ( pStream )
            {
                // fade into each other
                ps.sName = sPhoneme;
                ps.fStart = end_current - dur_current;
                ps.fEnd = end_current;
                ps.fDuration = dur_current;

                ( ( CryMT::queue<SPhenomeTiming>* )pStream )->push( ps );
            }

            end_last = end_current;
            ++nPhoneme;
        }

        dur = flite_process_output( u, outtype, FALSE );
        delete_utterance( u );

        return dur;
    }
Пример #15
0
float flite_file_to_speech(const char *filename, 
			   cst_voice *voice,
			   const char *outtype)
{
    cst_utterance *utt;
    cst_tokenstream *ts;
    const char *token;
    cst_item *t;
    cst_relation *tokrel;
    float durs = 0;
    int num_tokens;
    cst_wave *w;
    cst_breakfunc breakfunc = default_utt_break;
    cst_uttfunc utt_user_callback = 0;
    int fp;

    if ((ts = ts_open(filename,
	      get_param_string(voice->features,"text_whitespace",NULL),
	      get_param_string(voice->features,"text_singlecharsymbols",NULL),
	      get_param_string(voice->features,"text_prepunctuation",NULL),
	      get_param_string(voice->features,"text_postpunctuation",NULL)))
	== NULL)
    {
	cst_errmsg("failed to open file \"%s\" for reading\n",
		   filename);
	return 1;
    }
    fp = get_param_int(voice->features,"file_start_position",0);
    if (fp > 0)
        ts_set_stream_pos(ts,fp);

    if (feat_present(voice->features,"utt_break"))
	breakfunc = val_breakfunc(feat_val(voice->features,"utt_break"));

    if (feat_present(voice->features,"utt_user_callback"))
	utt_user_callback = val_uttfunc(feat_val(voice->features,"utt_user_callback"));

    /* If its a file to write to, create and save an empty wave file */
    /* as we are going to incrementally append to it                 */
    if (!cst_streq(outtype,"play") && 
        !cst_streq(outtype,"none") &&
        !cst_streq(outtype,"stream"))
    {
	w = new_wave();
	cst_wave_resize(w,0,1);
	cst_wave_set_sample_rate(w,16000);
	cst_wave_save_riff(w,outtype);  /* an empty wave */
	delete_wave(w);
    }

    num_tokens = 0;
    utt = new_utterance();
    tokrel = utt_relation_create(utt, "Token");
    while (!ts_eof(ts) || num_tokens > 0)
    {
	token = ts_get(ts);
	if ((cst_strlen(token) == 0) ||
	    (num_tokens > 500) ||  /* need an upper bound */
	    (relation_head(tokrel) && 
	     breakfunc(ts,token,tokrel)))
	{
	    /* An end of utt, so synthesize it */
            if (utt_user_callback)
                utt = (utt_user_callback)(utt);

            if (utt)
            {
                utt = flite_do_synth(utt,voice,utt_synth_tokens);
                durs += flite_process_output(utt,outtype,TRUE);
                delete_utterance(utt); utt = NULL;
            }
            else 
                break;

	    if (ts_eof(ts)) break;

	    utt = new_utterance();
	    tokrel = utt_relation_create(utt, "Token");
	    num_tokens = 0;
	}
	num_tokens++;

	t = relation_append(tokrel, NULL);
	item_set_string(t,"name",token);
	item_set_string(t,"whitespace",ts->whitespace);
	item_set_string(t,"prepunctuation",ts->prepunctuation);
	item_set_string(t,"punc",ts->postpunctuation);
        /* Mark it at the beginning of the token */
	item_set_int(t,"file_pos",
                     ts->file_pos-(1+ /* as we are already on the next char */
                                   cst_strlen(token)+
                                   cst_strlen(ts->prepunctuation)+
                                   cst_strlen(ts->postpunctuation)));
	item_set_int(t,"line_number",ts->line_number);
    }

    delete_utterance(utt);
    ts_close(ts);
    return durs;
}
float flite_file_to_speech(const char *filename, 
			   cst_voice *voice,
			   const char *outtype)
{
    cst_utterance *utt;
    cst_tokenstream *ts;
    const char *token;
    cst_item *t;
    cst_relation *tokrel;
    float d, durs = 0;
    int num_tokens;
    cst_breakfunc breakfunc = default_utt_break;

    if ((ts = ts_open(filename,
	      get_param_string(voice->features,"text_whitespace",NULL),
	      get_param_string(voice->features,"text_singlecharsymbols",NULL),
	      get_param_string(voice->features,"text_prepunctuation",NULL),
	      get_param_string(voice->features,"text_postpunctuation",NULL)))
	== NULL)
    {
	cst_errmsg("failed to open file \"%s\" for reading\n",
		   filename);
	return 1;
    }

    if (feat_present(voice->features,"utt_break"))
	breakfunc = val_breakfunc(feat_val(voice->features,"utt_break"));

    /* If its a file to write to delete it as we're going to */
    /* incrementally append to it                            */
    if (!cst_streq(outtype,"play") && !cst_streq(outtype,"none"))
    {
	cst_wave *w;
	w = new_wave();
	cst_wave_resize(w,0,1);
	cst_wave_set_sample_rate(w,16000);
	cst_wave_save_riff(w,outtype);  /* an empty wave */
	delete_wave(w);
    }

    num_tokens = 0;
    utt = new_utterance();
    tokrel = utt_relation_create(utt, "Token");
    while (!ts_eof(ts) || num_tokens > 0)
    {
	token = ts_get(ts);
	if ((strlen(token) == 0) ||
	    (num_tokens > 500) ||  /* need an upper bound */
	    (relation_head(tokrel) && 
	     breakfunc(ts,token,tokrel)))
	{
	    /* An end of utt */
	    d = flite_tokens_to_speech(utt,voice,outtype);
	    utt = NULL;
	    if (d < 0)
		goto out;
	    durs += d;

	    if (ts_eof(ts))
		goto out;

	    utt = new_utterance();
	    tokrel = utt_relation_create(utt, "Token");
	    num_tokens = 0;
	}
	num_tokens++;

	t = relation_append(tokrel, NULL);
	item_set_string(t,"name",token);
	item_set_string(t,"whitespace",ts->whitespace);
	item_set_string(t,"prepunctuation",ts->prepunctuation);
	item_set_string(t,"punc",ts->postpunctuation);
	item_set_int(t,"file_pos",ts->file_pos);
	item_set_int(t,"line_number",ts->line_number);
    }

out:
    delete_utterance(utt);
    ts_close(ts);
    return durs;
}
Пример #17
0
static float flite_ssml_to_speech_ts(cst_tokenstream *ts,
                                     cst_voice *voice,
                                     const char *outtype)
{
    cst_features *ssml_feats, *ssml_word_feats;
    cst_features *attributes;
    const char *token;
    char *tag;
    cst_utterance *utt;
    cst_relation *tokrel;
    int num_tokens;
    cst_breakfunc breakfunc = default_utt_break;
    cst_uttfunc utt_user_callback = 0;
    float durs = 0.0;
    cst_item *t;

    ssml_feats = new_features();
    ssml_word_feats = new_features();
    set_charclasses(ts,
                    " \t\n\r",
                    ssml_singlecharsymbols_general,
                    get_param_string(voice->features,"text_prepunctuation",""),
                    get_param_string(voice->features,"text_postpunctuation","")
                    );

    if (feat_present(voice->features,"utt_break"))
	breakfunc = val_breakfunc(feat_val(voice->features,"utt_break"));

    if (feat_present(voice->features,"utt_user_callback"))
	utt_user_callback = val_uttfunc(feat_val(voice->features,"utt_user_callback"));

    num_tokens = 0;
    utt = new_utterance();
    tokrel = utt_relation_create(utt, "Token");
    while (!ts_eof(ts) || num_tokens > 0)
    {
	token = ts_get(ts);
	if (cst_streq("<",token))
	{   /* A tag */
	    tag = cst_upcase(ts_get(ts));
            if (cst_streq("/",tag)) /* an end tag */
            {
                tag = cst_upcase(ts_get(ts));
                attributes = ssml_get_attributes(ts);
                feat_set_string(attributes,"_type","end");
            }
            else
                attributes = ssml_get_attributes(ts);
	    utt = ssml_apply_tag(tag,attributes,utt,ssml_word_feats);
	    cst_free(tag);
	}
	else if (cst_streq("&",token))
	{   /* an escape sequence */
	    /* skip to ; and insert value in rawdata */
	}
        else
        {
            if ((cst_strlen(token) == 0) ||
                (num_tokens > 500) ||  /* need an upper bound */
                (relation_head(tokrel) && 
                 breakfunc(ts,token,tokrel)))
            {
                /* An end of utt, so synthesize it */
                if (utt_user_callback)
                    utt = (utt_user_callback)(utt);
                
                if (utt)
                {
                    utt = flite_do_synth(utt,voice,utt_synth_tokens);
                    durs += flite_process_output(utt,outtype,TRUE);
                    delete_utterance(utt); utt = NULL;
                }
                else 
                    break;

                if (ts_eof(ts)) break;
                
                utt = new_utterance();
                tokrel = utt_relation_create(utt, "Token");
                num_tokens = 0;
            }

            num_tokens++;

            t = relation_append(tokrel, NULL);
            item_set_string(t,"name",token);
            item_set_string(t,"whitespace",ts->whitespace);
            item_set_string(t,"prepunctuation",ts->prepunctuation);
            item_set_string(t,"punc",ts->postpunctuation);
            /* Mark it at the beginning of the token */
            item_set_int(t,"file_pos",
                         ts->file_pos-(1+ /* as we are already on the next char */
                                       cst_strlen(token)+
                                       cst_strlen(ts->prepunctuation)+
                                       cst_strlen(ts->postpunctuation)));
            item_set_int(t,"line_number",ts->line_number);
        }
    }

    delete_utterance(utt);
    return durs;
}