/** * <EN> * Allocate a new MFCC calculation instance * </EN> * <JA> * MFCC計算インスタンスを新たに割り付ける. * </JA> * * @param amconf [in] acoustic model configuration parameters * * @return the newly allocated MFCC calculation instance. * * @callgraph * @callergraph * */ MFCCCalc * j_mfcccalc_new(JCONF_AM *amconf) { MFCCCalc *mfcc; mfcc = (MFCCCalc *)mymalloc(sizeof(MFCCCalc)); memset(mfcc, 0, sizeof(MFCCCalc)); mfcc->param = NULL; mfcc->rest_param = NULL; mfcc->frontend.ssbuf = NULL; mfcc->cmn.loaded = FALSE; mfcc->plugin_source = -1; if (amconf) { mfcc->para = &(amconf->analysis.para); mfcc->hmm_loaded = (amconf->analysis.para_hmm.loaded == 1) ? TRUE : FALSE; mfcc->htk_loaded = (amconf->analysis.para_htk.loaded == 1) ? TRUE : FALSE; mfcc->wrk = WMP_work_new(mfcc->para); if (mfcc->wrk == NULL) { jlog("ERROR: j_mfcccalc_new: failed to initialize MFCC computation\n"); return NULL; } mfcc->cmn.load_filename = amconf->analysis.cmnload_filename; mfcc->cmn.update = amconf->analysis.cmn_update; mfcc->cmn.save_filename = amconf->analysis.cmnsave_filename; mfcc->cmn.map_weight = amconf->analysis.cmn_map_weight; mfcc->frontend.ss_alpha = amconf->frontend.ss_alpha; mfcc->frontend.ss_floor = amconf->frontend.ss_floor; mfcc->frontend.sscalc = amconf->frontend.sscalc; mfcc->frontend.sscalc_len = amconf->frontend.sscalc_len; mfcc->frontend.ssload_filename = amconf->frontend.ssload_filename; } mfcc->next = NULL; return mfcc; }
int main(int argc, char *argv[]) { Recog *recog; Jconf *jconf; float *ss; MFCCWork *wrk; /* create instance */ recog = j_recog_new(); jconf = j_jconf_new(); recog->jconf = jconf; /* set application-specific additional options */ j_add_option("-freq", 1, 1, "sampling freq in Hz", opt_freq); j_add_option("-len", 1, 1, "record length in msec", opt_len); j_add_option("-h", 0, 0, "display this help", opt_help); j_add_option("-help", 0, 0, "display this help", opt_help); j_add_option("--help", 0, 0, "display this help", opt_help); /* when no argument, output help and exit */ if (argc <= 1) { opt_help(jconf, NULL, 0); return 0; } /* regard last arg as filename */ if (strmatch(argv[argc-1], "-")) { stout = TRUE; } else { filename = argv[argc-1]; } /* set default as same as "-input mic" */ jconf->input.type = INPUT_WAVEFORM; jconf->input.speech_input = SP_MIC; jconf->input.device = SP_INPUT_DEFAULT; /* process config and load them */ if (j_config_load_args(jconf, argc-1, argv) == -1) { fprintf(stderr, "Error reading arguments\n"); return -1; } /* force some default values */ jconf->detect.silence_cut = 0; /* disable silence cut */ jconf->preprocess.strip_zero_sample = TRUE; /* strip zero samples */ jconf->detect.level_thres = 0; /* no VAD, record all */ /* set Julius default parameters for unspecified acoustic parameters */ apply_para(&(jconf->am_root->analysis.para), &(jconf->am_root->analysis.para_default)); /* set some values */ jconf->input.sfreq = jconf->am_root->analysis.para.smp_freq; jconf->input.period = jconf->am_root->analysis.para.smp_period; jconf->input.frameshift = jconf->am_root->analysis.para.frameshift; jconf->input.framesize = jconf->am_root->analysis.para.framesize; sfreq = jconf->am_root->analysis.para.smp_freq; /* output file check */ if (!stout) { if (access(filename, F_OK) == 0) { if (access(filename, W_OK) == 0) { fprintf(stderr, "Warning: overwriting file \"%s\"\n", filename); } else { perror("mkss"); return(1); } } } /* allocate speech store buffer */ samples = sfreq * slen / 1000; speech = (SP16 *)mymalloc(sizeof(SP16) * samples); /* allocate work area to compute spectrum */ wrk = WMP_work_new(&(jconf->am_root->analysis.para)); if (wrk == NULL) { jlog("ERROR: m_fusion: failed to initialize MFCC computation for SS\n"); return -1; } /* initialize input device */ if (j_adin_init(recog) == FALSE) { fprintf(stderr, "Error in initializing adin device\n"); return -1; } /* open device */ if (j_open_stream(recog, NULL) < 0) { fprintf(stderr, "Error in opening adin device\n"); } /* record mic input */ fprintf(stderr, "%dHz recording for %.2f seconds of noise\n", sfreq, (float)slen /(float)1000); speechnum = 0; adin_go(adin_callback, NULL, recog); /* close device */ adin_end(recog->adin); fprintf(stderr, "\n%d samples (%d bytes, %.1f sec) recorded\n", samples, samples * sizeof(SP16), (float)samples / (float)sfreq); /* compute SS */ fprintf(stderr, "compute SS:\n"); fprintf(stderr, " fsize : %4d samples (%.1f msec)\n", jconf->input.framesize, (float)jconf->input.framesize * 1000.0/ (float)sfreq); fprintf(stderr, " fshift: %4d samples (%.1f msec)\n", jconf->input.frameshift, (float)jconf->input.frameshift * 1000.0/ (float)sfreq); ss = new_SS_calculate(speech, samples, &sslen, wrk, &(jconf->am_root->analysis.para)); fprintf(stderr, " points: %4d\n", sslen); fprintf(stderr, "noise spectrum was measured\n"); /* open file for recording */ fprintf(stderr, "writing average noise spectrum to [%s]...", filename); if (stout) { fd = 1; } else { if ((fd = open(filename, O_CREAT | O_RDWR #ifdef O_BINARY | O_BINARY #endif , 0644)) == -1) { perror("mkss"); return(1); } } x = sslen; #ifndef WORDS_BIGENDIAN swap_bytes((char *)&x, sizeof(int), 1); #endif if (write(fd, &x, sizeof(int)) < sizeof(int)) { perror("mkss"); return(1); } #ifndef WORDS_BIGENDIAN swap_bytes((char *)ss, sizeof(float), sslen); #endif if (write(fd, ss, sslen * sizeof(float)) < sslen * sizeof(float)) { perror("mkss"); return(1); } if (!stout) { if (close(fd) < 0) { perror("mkss"); return(1); } } fprintf(stderr, "done\n"); WMP_free(wrk); return 0; }
/** * <EN> * @brief Combine all loaded models and settings into one engine instance. * * This function will finalize preparation of recognition: * * - create required MFCC calculation instances, * - create recognition process instance for specified LM/AM combination, * - set model-specific recognition parameters, * - build tree lexicon for each process instance for the 1st pass, * - prepare work area and cache area for recognition, * - initialize some values / work area for frontend processing. * * After this function, all recognition setup was done and we are ready for * start recognition. * * This should be called after j_jconf_finalize() and j_load_all() has been * completed. You should put the jconf at recog->jconf before calling this * function. * </EN> * <JA> * @brief 全てのロードされたモデルと設定からエンジンインスタンスを * 最終構成する. * * この関数は,認識準備のための最終処理を行う. 内部では, * * - 必要な MFCC 計算インスタンスの生成 * - 指定された LM/AM の組からの認識処理インスタンス生成 * - モデルに依存する認識用パラメータの設定 * - 第1パス用の木構造化辞書を認識処理インスタンスごとに構築 * - 認識処理用ワークエリアとキャッシュエリアを確保 * - フロントエンド処理のためのいくつかの値とワークエリアの確保 * * を行う. この関数が終了後,エンジンインスタンス内の全てのセットアップ * は終了し,認識が開始できる状態となる. * * この関数は,j_jconf_finalize() と j_load_all() が終わった状態で * 呼び出す必要がある. 呼出し前には,recog->jconf に (j_load_all でともに * 使用した) jconf を格納しておくこと. * * </JA> * * @param recog [in] engine instance * * @return TRUE when all initialization successfully done, or FALSE if any * error has been occured. * * @callgraph * @callergraph * @ingroup instance * */ boolean j_final_fusion(Recog *recog) { MFCCCalc *mfcc; JCONF_SEARCH *sconf; PROCESS_AM *am; jlog("STAT: ------\n"); jlog("STAT: All models are ready, go for final fusion\n"); jlog("STAT: [1] create MFCC extraction instance(s)\n"); if (recog->jconf->input.type == INPUT_WAVEFORM) { /***************************************************/ /* create MFCC calculation instance from AM config */ /* according to the fixated parameter information */ /***************************************************/ create_mfcc_calc_instances(recog); } /****************************************/ /* create recognition process instances */ /****************************************/ jlog("STAT: [2] create recognition processing instance(s) with AM and LM\n"); for(sconf=recog->jconf->search_root;sconf;sconf=sconf->next) { if (j_launch_recognition_instance(recog, sconf) == FALSE) return FALSE; } /****************************/ /****** initialize GMM ******/ /****************************/ if (recog->gmm != NULL) { jlog("STAT: [2.5] create GMM instance\n"); if (gmm_init(recog) == FALSE) { jlog("ERROR: m_fusion: error in initializing GMM\n"); return FALSE; } } /* stage 4: setup output probability function for each AM */ jlog("STAT: [3] initialize for acoustic HMM calculation\n"); for(am=recog->amlist;am;am=am->next) { #ifdef ENABLE_PLUGIN /* set plugin function if specified */ if (am->config->gprune_method == GPRUNE_SEL_USER) { am->hmmwrk.compute_gaussset = (void (*)(HMMWork *, HTK_HMM_Dens **, int, int *, int)) plugin_get_func(am->config->gprune_plugin_source, "calcmix"); if (am->hmmwrk.compute_gaussset == NULL) { jlog("ERROR: calcmix plugin has no function \"calcmix\"\n"); return FALSE; } am->hmmwrk.compute_gaussset_init = (boolean (*)(HMMWork *)) plugin_get_func(am->config->gprune_plugin_source, "calcmix_init"); if (am->hmmwrk.compute_gaussset_init == NULL) { jlog("ERROR: calcmix plugin has no function \"calcmix_init\"\n"); return FALSE; } am->hmmwrk.compute_gaussset_free = (void (*)(HMMWork *)) plugin_get_func(am->config->gprune_plugin_source, "calcmix_free"); if (am->hmmwrk.compute_gaussset_free == NULL) { jlog("ERROR: calcmix plugin has no function \"calcmix_free\"\n"); return FALSE; } } #endif if (am->config->hmm_gs_filename != NULL) {/* with GMS */ if (outprob_init(&(am->hmmwrk), am->hmminfo, am->hmm_gs, am->config->gs_statenum, am->config->gprune_method, am->config->mixnum_thres) == FALSE) { return FALSE; } } else { if (outprob_init(&(am->hmmwrk), am->hmminfo, NULL, 0, am->config->gprune_method, am->config->mixnum_thres) == FALSE) { return FALSE; } } } /* stage 5: initialize work area for input and realtime decoding */ jlog("STAT: [4] prepare MFCC storage(s)\n"); if (recog->jconf->input.type == INPUT_VECTOR) { /* create an MFCC instance for MFCC input */ /* create new mfcc instance */ recog->mfcclist = j_mfcccalc_new(NULL); recog->mfcclist->id = 1; /* assign to the am */ for(am=recog->amlist;am;am=am->next) { am->mfcc = recog->mfcclist; } if (recog->gmm) recog->gmmmfcc = recog->mfcclist; } /* allocate parameter holders */ for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { mfcc->param = new_param(); } /* initialize SS calculation work area */ if (recog->jconf->input.type == INPUT_WAVEFORM) { for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { if (mfcc->frontend.sscalc) { mfcc->frontend.mfccwrk_ss = WMP_work_new(mfcc->para); if (mfcc->frontend.mfccwrk_ss == NULL) { jlog("ERROR: m_fusion: failed to initialize MFCC computation for SS\n"); return FALSE; } if (mfcc->frontend.sscalc_len * recog->jconf->input.sfreq / 1000 < mfcc->para->framesize) { jlog("ERROR: m_fusion: head sil length for SS (%d msec) is shorter than a frame (%d msec)\n", mfcc->frontend.sscalc_len, mfcc->para->framesize * 1000 / recog->jconf->input.sfreq); return FALSE; } } } } if (recog->jconf->decodeopt.realtime_flag) { jlog("STAT: [5] prepare for real-time decoding\n"); /* prepare for 1st pass pipeline processing */ if (recog->jconf->input.type == INPUT_WAVEFORM) { if (RealTimeInit(recog) == FALSE) { jlog("ERROR: m_fusion: failed to initialize recognition process\n"); return FALSE; } } } /* finished! */ jlog("STAT: All init successfully done\n\n"); /* set-up callback plugin if any */ #ifdef ENABLE_PLUGIN if (plugin_exec_engine_startup(recog) == FALSE) { jlog("ERROR: m_fusion: failed to execute callback setup in plugin\n"); return FALSE; } #endif return TRUE; }