Beispiel #1
0
/** 
 * <EN>
 * Allocate a new MFCC calculation instance
 * </EN>
 * <JA>
 * MFCC計算インスタンスを新たに割り付ける. 
 * </JA>
 * 
 * @param amconf [in] acoustic model configuration parameters
 * 
 * @return the newly allocated MFCC calculation instance.
 *
 * @callgraph
 * @callergraph
 * 
 */
MFCCCalc *
j_mfcccalc_new(JCONF_AM *amconf)
{
  MFCCCalc *mfcc;

  mfcc = (MFCCCalc *)mymalloc(sizeof(MFCCCalc));
  memset(mfcc, 0, sizeof(MFCCCalc));
  mfcc->param = NULL;
  mfcc->rest_param = NULL;
  mfcc->frontend.ssbuf = NULL;
  mfcc->cmn.loaded = FALSE;
  mfcc->plugin_source = -1;
  if (amconf) {
    mfcc->para = &(amconf->analysis.para);
    mfcc->hmm_loaded = (amconf->analysis.para_hmm.loaded == 1) ? TRUE : FALSE;
    mfcc->htk_loaded = (amconf->analysis.para_htk.loaded == 1) ? TRUE : FALSE;
    mfcc->wrk = WMP_work_new(mfcc->para);
    if (mfcc->wrk == NULL) {
      jlog("ERROR: j_mfcccalc_new: failed to initialize MFCC computation\n");
      return NULL;
    }
    mfcc->cmn.load_filename = amconf->analysis.cmnload_filename;
    mfcc->cmn.update = amconf->analysis.cmn_update;
    mfcc->cmn.save_filename = amconf->analysis.cmnsave_filename;
    mfcc->cmn.map_weight = amconf->analysis.cmn_map_weight;
    mfcc->frontend.ss_alpha = amconf->frontend.ss_alpha;
    mfcc->frontend.ss_floor = amconf->frontend.ss_floor;
    mfcc->frontend.sscalc = amconf->frontend.sscalc;
    mfcc->frontend.sscalc_len = amconf->frontend.sscalc_len;
    mfcc->frontend.ssload_filename = amconf->frontend.ssload_filename;
  }
  mfcc->next = NULL;
  return mfcc;
}
Beispiel #2
0
int
main(int argc, char *argv[])
{
  Recog *recog;
  Jconf *jconf;
  float *ss;
  MFCCWork *wrk;

  /* create instance */
  recog = j_recog_new();
  jconf = j_jconf_new();
  recog->jconf = jconf;

  /* set application-specific additional options */
  j_add_option("-freq", 1, 1, "sampling freq in Hz", opt_freq);
  j_add_option("-len", 1, 1, "record length in msec", opt_len);
  j_add_option("-h", 0, 0, "display this help", opt_help);
  j_add_option("-help", 0, 0, "display this help", opt_help);
  j_add_option("--help", 0, 0, "display this help", opt_help);

  /* when no argument, output help and exit */
  if (argc <= 1) {
    opt_help(jconf, NULL, 0);
    return 0;
  }

  /* regard last arg as filename */
  if (strmatch(argv[argc-1], "-")) {
    stout = TRUE;
  } else {
    filename = argv[argc-1];
  }

  /* set default as same as "-input mic" */
  jconf->input.type = INPUT_WAVEFORM;
  jconf->input.speech_input = SP_MIC;
  jconf->input.device = SP_INPUT_DEFAULT;
  /* process config and load them */
  if (j_config_load_args(jconf, argc-1, argv) == -1) {
    fprintf(stderr, "Error reading arguments\n");
    return -1;
  }
  /* force some default values */
  jconf->detect.silence_cut  = 0; /* disable silence cut */
  jconf->preprocess.strip_zero_sample = TRUE; /* strip zero samples */
  jconf->detect.level_thres = 0;	/* no VAD, record all */
  /* set Julius default parameters for unspecified acoustic parameters */
  apply_para(&(jconf->am_root->analysis.para), &(jconf->am_root->analysis.para_default));
  /* set some values */
  jconf->input.sfreq = jconf->am_root->analysis.para.smp_freq;
  jconf->input.period = jconf->am_root->analysis.para.smp_period;
  jconf->input.frameshift = jconf->am_root->analysis.para.frameshift;
  jconf->input.framesize = jconf->am_root->analysis.para.framesize;

  sfreq = jconf->am_root->analysis.para.smp_freq;

  /* output file check */
  if (!stout) {
    if (access(filename, F_OK) == 0) {
      if (access(filename, W_OK) == 0) {
	fprintf(stderr, "Warning: overwriting file \"%s\"\n", filename);
      } else {
	perror("mkss");
	return(1);
      }
    }
  }

  /* allocate speech store buffer */
  samples = sfreq * slen / 1000;
  speech = (SP16 *)mymalloc(sizeof(SP16) * samples);

  /* allocate work area to compute spectrum */
  wrk = WMP_work_new(&(jconf->am_root->analysis.para));
  if (wrk == NULL) {
    jlog("ERROR: m_fusion: failed to initialize MFCC computation for SS\n");
    return -1;
  }

  /* initialize input device */
  if (j_adin_init(recog) == FALSE) {
    fprintf(stderr, "Error in initializing adin device\n");
    return -1;
  }

  /* open device */
  if (j_open_stream(recog, NULL) < 0) {
    fprintf(stderr, "Error in opening adin device\n");
  }

  /* record mic input */
  fprintf(stderr, "%dHz recording for %.2f seconds of noise\n", sfreq, (float)slen /(float)1000);
  speechnum = 0;
  adin_go(adin_callback, NULL, recog);

  /* close device */
  adin_end(recog->adin);
  fprintf(stderr, "\n%d samples (%d bytes, %.1f sec) recorded\n", samples, samples * sizeof(SP16), (float)samples / (float)sfreq);

  /* compute SS */
  fprintf(stderr, "compute SS:\n");
  fprintf(stderr, "  fsize : %4d samples (%.1f msec)\n", jconf->input.framesize, (float)jconf->input.framesize * 1000.0/ (float)sfreq);
  fprintf(stderr, "  fshift: %4d samples (%.1f msec)\n", jconf->input.frameshift, (float)jconf->input.frameshift * 1000.0/ (float)sfreq);

  ss = new_SS_calculate(speech, samples, &sslen, wrk, &(jconf->am_root->analysis.para));

  fprintf(stderr, "  points: %4d\n", sslen);
  fprintf(stderr, "noise spectrum was measured\n");
  
  /* open file for recording */
  fprintf(stderr, "writing average noise spectrum to [%s]...", filename);
  if (stout) {
    fd = 1;
  } else {
    if ((fd = open(filename, O_CREAT | O_RDWR
#ifdef O_BINARY
		   | O_BINARY
#endif
		   , 0644)) == -1) {
      perror("mkss");
      return(1);
    }
  }
  x = sslen;
#ifndef WORDS_BIGENDIAN
  swap_bytes((char *)&x, sizeof(int), 1);
#endif
  if (write(fd, &x, sizeof(int)) < sizeof(int)) {
    perror("mkss");
    return(1);
  }
#ifndef WORDS_BIGENDIAN
  swap_bytes((char *)ss, sizeof(float), sslen);
#endif
  if (write(fd, ss, sslen * sizeof(float)) < sslen * sizeof(float)) {
    perror("mkss");
    return(1);
  }
  if (!stout) {
    if (close(fd) < 0) {
      perror("mkss");
      return(1);
    }
  }
  fprintf(stderr, "done\n");

  WMP_free(wrk);

  return 0;
}
Beispiel #3
0
/** 
 * <EN>
 * @brief  Combine all loaded models and settings into one engine instance.
 *
 * This function will finalize preparation of recognition:
 * 
 *  - create required MFCC calculation instances,
 *  - create recognition process instance for specified LM/AM combination,
 *  - set model-specific recognition parameters,
 *  - build tree lexicon for each process instance for the 1st pass,
 *  - prepare work area and cache area for recognition,
 *  - initialize some values / work area for frontend processing.
 *
 * After this function, all recognition setup was done and we are ready for
 * start recognition.
 *
 * This should be called after j_jconf_finalize() and j_load_all() has been
 * completed.  You should put the jconf at recog->jconf before calling this
 * function.

 * </EN>
 * <JA>
 * @brief  全てのロードされたモデルと設定からエンジンインスタンスを
 * 最終構成する. 
 *
 * この関数は,認識準備のための最終処理を行う. 内部では,
 *
 *  - 必要な MFCC 計算インスタンスの生成
 *  - 指定された LM/AM の組からの認識処理インスタンス生成
 *  - モデルに依存する認識用パラメータの設定
 *  - 第1パス用の木構造化辞書を認識処理インスタンスごとに構築
 *  - 認識処理用ワークエリアとキャッシュエリアを確保
 *  - フロントエンド処理のためのいくつかの値とワークエリアの確保
 *
 *  を行う. この関数が終了後,エンジンインスタンス内の全てのセットアップ
 *  は終了し,認識が開始できる状態となる. 
 *
 *  この関数は,j_jconf_finalize() と j_load_all() が終わった状態で
 *  呼び出す必要がある. 呼出し前には,recog->jconf に (j_load_all でともに
 *  使用した) jconf を格納しておくこと. 
 * 
 * </JA>
 * 
 * @param recog [in] engine instance
 * 
 * @return TRUE when all initialization successfully done, or FALSE if any
 * error has been occured.
 *
 * @callgraph
 * @callergraph
 * @ingroup instance
 * 
 */
boolean
j_final_fusion(Recog *recog)
{
  MFCCCalc *mfcc;
  JCONF_SEARCH *sconf;
  PROCESS_AM *am;

  jlog("STAT: ------\n");
  jlog("STAT: All models are ready, go for final fusion\n");
  jlog("STAT: [1] create MFCC extraction instance(s)\n");
  if (recog->jconf->input.type == INPUT_WAVEFORM) {
    /***************************************************/
    /* create MFCC calculation instance from AM config */
    /* according to the fixated parameter information  */
    /***************************************************/
    create_mfcc_calc_instances(recog);
  }

  /****************************************/
  /* create recognition process instances */
  /****************************************/
  jlog("STAT: [2] create recognition processing instance(s) with AM and LM\n");
  for(sconf=recog->jconf->search_root;sconf;sconf=sconf->next) {
    if (j_launch_recognition_instance(recog, sconf) == FALSE) return FALSE;
  }

  /****************************/
  /****** initialize GMM ******/
  /****************************/
  if (recog->gmm != NULL) {
    jlog("STAT: [2.5] create GMM instance\n");
    if (gmm_init(recog) == FALSE) {
      jlog("ERROR: m_fusion: error in initializing GMM\n");
      return FALSE;
    }
  }

  /* stage 4: setup output probability function for each AM */
  jlog("STAT: [3] initialize for acoustic HMM calculation\n");
  for(am=recog->amlist;am;am=am->next) {
#ifdef ENABLE_PLUGIN
    /* set plugin function if specified */
    if (am->config->gprune_method == GPRUNE_SEL_USER) {
      am->hmmwrk.compute_gaussset = (void (*)(HMMWork *, HTK_HMM_Dens **, int, int *, int)) plugin_get_func(am->config->gprune_plugin_source, "calcmix");
      if (am->hmmwrk.compute_gaussset == NULL) {
	jlog("ERROR: calcmix plugin has no function \"calcmix\"\n");
	return FALSE;
      }
      am->hmmwrk.compute_gaussset_init = (boolean (*)(HMMWork *)) plugin_get_func(am->config->gprune_plugin_source, "calcmix_init");
      if (am->hmmwrk.compute_gaussset_init == NULL) {
	jlog("ERROR: calcmix plugin has no function \"calcmix_init\"\n");
	return FALSE;
      }
      am->hmmwrk.compute_gaussset_free = (void (*)(HMMWork *)) plugin_get_func(am->config->gprune_plugin_source, "calcmix_free");
      if (am->hmmwrk.compute_gaussset_free == NULL) {
	jlog("ERROR: calcmix plugin has no function \"calcmix_free\"\n");
	return FALSE;
      }
    }
#endif
    if (am->config->hmm_gs_filename != NULL) {/* with GMS */
      if (outprob_init(&(am->hmmwrk), am->hmminfo, am->hmm_gs, am->config->gs_statenum, am->config->gprune_method, am->config->mixnum_thres) == FALSE) {
	return FALSE;
      }
    } else {
      if (outprob_init(&(am->hmmwrk), am->hmminfo, NULL, 0, am->config->gprune_method, am->config->mixnum_thres) == FALSE) {
	return FALSE;
      }
    }
  }

  /* stage 5: initialize work area for input and realtime decoding */

  jlog("STAT: [4] prepare MFCC storage(s)\n");
  if (recog->jconf->input.type == INPUT_VECTOR) {
    /* create an MFCC instance for MFCC input */
    /* create new mfcc instance */
    recog->mfcclist = j_mfcccalc_new(NULL);
    recog->mfcclist->id = 1;
    /* assign to the am */
    for(am=recog->amlist;am;am=am->next) {
      am->mfcc = recog->mfcclist;
    }
    if (recog->gmm) recog->gmmmfcc = recog->mfcclist;
  }
  /* allocate parameter holders */
  for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
    mfcc->param = new_param();
  }
  
  /* initialize SS calculation work area */
  if (recog->jconf->input.type == INPUT_WAVEFORM) {
    for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
      if (mfcc->frontend.sscalc) {
	mfcc->frontend.mfccwrk_ss = WMP_work_new(mfcc->para);
	if (mfcc->frontend.mfccwrk_ss == NULL) {
	  jlog("ERROR: m_fusion: failed to initialize MFCC computation for SS\n");
	  return FALSE;
	}
	if (mfcc->frontend.sscalc_len * recog->jconf->input.sfreq / 1000 < mfcc->para->framesize) {
	  jlog("ERROR: m_fusion: head sil length for SS (%d msec) is shorter than a frame (%d msec)\n", mfcc->frontend.sscalc_len, mfcc->para->framesize * 1000 / recog->jconf->input.sfreq);
	  return FALSE;
	}
      }
    }
  }

  if (recog->jconf->decodeopt.realtime_flag) {
    jlog("STAT: [5] prepare for real-time decoding\n");
    /* prepare for 1st pass pipeline processing */
    if (recog->jconf->input.type == INPUT_WAVEFORM) {
      if (RealTimeInit(recog) == FALSE) {
	jlog("ERROR: m_fusion: failed to initialize recognition process\n");
	return FALSE;
      }
    }
  }

  /* finished! */
  jlog("STAT: All init successfully done\n\n");

  /* set-up callback plugin if any */
#ifdef ENABLE_PLUGIN
  if (plugin_exec_engine_startup(recog) == FALSE) {
    jlog("ERROR: m_fusion: failed to execute callback setup in plugin\n");
    return FALSE;
  }
#endif

  return TRUE;
}