/* HTS_Engine_save_generated_speech: output generated speech */ void HTS_Engine_save_generated_speech(HTS_Engine * engine, FILE * fp) { int i; short temp; HTS_GStreamSet *gss = &engine->gss; for (i = 0; i < HTS_GStreamSet_get_total_nsample(gss); i++) { temp = HTS_GStreamSet_get_speech(gss, i); fwrite(&temp, sizeof(short), 1, fp); } }
/* HTS_Engine_save_riff: output RIFF format file */ void HTS_Engine_save_riff(HTS_Engine * engine, FILE * fp) { int i; short temp; HTS_GStreamSet *gss = &engine->gss; char data_01_04[] = { 'R', 'I', 'F', 'F' }; int data_05_08 = HTS_GStreamSet_get_total_nsample(gss) * sizeof(short) + 36; char data_09_12[] = { 'W', 'A', 'V', 'E' }; char data_13_16[] = { 'f', 'm', 't', ' ' }; int data_17_20 = 16; short data_21_22 = 1; /* PCM */ short data_23_24 = 1; /* monoral */ int data_25_28 = engine->global.sampling_rate; int data_29_32 = engine->global.sampling_rate * sizeof(short); short data_33_34 = sizeof(short); short data_35_36 = (short) (sizeof(short) * 8); char data_37_40[] = { 'd', 'a', 't', 'a' }; int data_41_44 = HTS_GStreamSet_get_total_nsample(gss) * sizeof(short); /* write header */ HTS_fwrite_little_endian(data_01_04, sizeof(char), 4, fp); HTS_fwrite_little_endian(&data_05_08, sizeof(int), 1, fp); HTS_fwrite_little_endian(data_09_12, sizeof(char), 4, fp); HTS_fwrite_little_endian(data_13_16, sizeof(char), 4, fp); HTS_fwrite_little_endian(&data_17_20, sizeof(int), 1, fp); HTS_fwrite_little_endian(&data_21_22, sizeof(short), 1, fp); HTS_fwrite_little_endian(&data_23_24, sizeof(short), 1, fp); HTS_fwrite_little_endian(&data_25_28, sizeof(int), 1, fp); HTS_fwrite_little_endian(&data_29_32, sizeof(int), 1, fp); HTS_fwrite_little_endian(&data_33_34, sizeof(short), 1, fp); HTS_fwrite_little_endian(&data_35_36, sizeof(short), 1, fp); HTS_fwrite_little_endian(data_37_40, sizeof(char), 4, fp); HTS_fwrite_little_endian(&data_41_44, sizeof(int), 1, fp); /* write data */ for (i = 0; i < HTS_GStreamSet_get_total_nsample(gss); i++) { temp = HTS_GStreamSet_get_speech(gss, i); HTS_fwrite_little_endian(&temp, sizeof(short), 1, fp); } }
static void Run(const SUttProcessor *self, SUtterance *utt, s_erc *error) { SHTSEngineSynthUttProc104 *HTSsynth = (SHTSEngineSynthUttProc104*)self; SPlugin *audioPlugin; const SRelation *segmentRel; SAudio *audio = NULL; s_bool is_present; char **label_data = NULL; int label_size; const SItem *item; const SItem *itemItr; int counter; uint i; int frame; int state; S_CLR_ERR(error); /* we require the segment relation */ is_present = SUtteranceRelationIsPresent(utt, "Segment", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceRelationIsPresent\" failed")) goto quit_error; if (!is_present) { S_CTX_ERR(error, S_FAILURE, "Run", "Failed to find 'Segment' relation in utterance"); goto quit_error; } segmentRel = SUtteranceGetRelation(utt, "Segment", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceGetRelation\" failed")) goto quit_error; item = SRelationHead(segmentRel, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SRelationHead\" failed")) goto quit_error; itemItr = item; label_size = 0; while (itemItr != NULL) { label_size++; itemItr = SItemNext(itemItr, error); } label_data = S_CALLOC(char*, label_size); itemItr = item; counter = 0; while (itemItr != NULL) { SObject *dFeat; const char *tmp; dFeat = SItemPathToFeatProc(itemItr, "hts_labels", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemPathToFeatProc\" failed")) goto quit_error; if (dFeat == NULL) { S_CTX_ERR(error, S_FAILURE, "Run", "Failed to generate hts labels for segment item"); goto quit_error; } tmp = SObjectGetString(dFeat, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SObjectGetString\" failed")) goto quit_error; label_data[counter++] = s_strdup(tmp, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"s_strdup\" failed")) goto quit_error; SItemSetObject((SItem*)itemItr, "hts_label", dFeat, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemSetObject\" failed")) goto quit_error; itemItr = SItemNext(itemItr, error); } /* speech synthesis part */ HTS_Engine_load_label_from_string_list(&(HTSsynth->engine), label_data, label_size); HTS_Engine_create_sstream(&(HTSsynth->engine)); HTS_Engine_create_pstream(&(HTSsynth->engine)); HTS_Engine_create_gstream(&(HTSsynth->engine)); itemItr = item; counter = 0; frame = 0; state = 0; while (itemItr != NULL) { int j; int duration; HTS_SStreamSet *sss = &(HTSsynth->engine.sss); const int nstate = HTS_ModelSet_get_nstate(&(HTSsynth->engine.ms)); const double rate = HTSsynth->engine.global.fperiod * 1e+7 / HTSsynth->engine.global.sampling_rate; float tmp; for (j = 0, duration = 0; j < nstate; j++) duration += HTS_SStreamSet_get_duration(sss, state++); tmp = frame * rate; SItemSetFloat((SItem*)itemItr, "start", tmp/1e+7, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemSetFloat\" failed")) goto quit_error; tmp = (frame + duration) * rate; SItemSetFloat((SItem*)itemItr, "end", tmp/1e+7, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SItemSetFloat\" failed")) goto quit_error; frame += duration; itemItr = SItemNext(itemItr, error); counter++; } /* create an audio object */ audio = S_NEW(SAudio, error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Failed to create new 'SAudio' object")) goto quit_error; /* set audio feature in utterance */ SUtteranceSetFeature(utt, "audio", S_OBJECT(audio), error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceSetFeature\" failed")) goto quit_error; /* We need to give the utterance the audio plug-in. If we don't do * this and the voice is deleted before the utterance, then the * utterance can't do *anything* with the audio. Not even delete * it (segfault). This should be fast because it is already * loaded. * Note that this happens after the audio is set. This is because * utt features are a list implementation. */ audioPlugin = s_pm_load_plugin("audio.spi", error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceSetFeature\" failed")) goto quit_error; SUtteranceSetFeature(utt, "audio_plugin", S_OBJECT(audioPlugin), error); if (S_CHK_ERR(error, S_CONTERR, "Run", "Call to \"SUtteranceSetFeature\" failed")) { S_DELETE(audioPlugin, "Run", error); goto quit_error; } audio->sample_rate = HTSsynth->engine.global.sampling_rate; audio->num_samples = (uint32)HTS_GStreamSet_get_total_nsample(&(HTSsynth->engine).gss); audio->samples = S_MALLOC(float, audio->num_samples); if (audio->samples == NULL) { S_FTL_ERR(error, S_MEMERROR, "Run", "Failed to allocate memory for 'float' object"); goto quit_error; } /* write data */ for (i = 0; i < audio->num_samples; i++) audio->samples[i] = (float)(HTS_GStreamSet_get_speech(&(HTSsynth->engine).gss, i) * 1.0); for (counter = 0; counter < label_size; counter++) S_FREE(label_data[counter]); S_FREE(label_data); HTS_Engine_refresh(&(HTSsynth->engine)); /* all OK here */ return; /* error clean-up code */ quit_error: if (label_data != NULL) { for (counter = 0; counter < label_size; counter++) { if (label_data[counter] != NULL) S_FREE(label_data[counter]); } S_FREE(label_data); } return; }