Esempio n. 1
0
/** 
 * Convert string of space-separated word strings to array of word ids.
 * 
 * @param winfo [in] word dictionary
 * @param s [in] string of space-separated word strings
 * @param len_return [out] number of found words
 * 
 * @return pointer to a newly allocated word list.
 */
WORD_ID *
new_str2wordseq(WORD_INFO *winfo, char *s, int *len_return)
{
  char *p;
  int num;
  int maxnum;
  WORD_ID *wseq;

  maxnum = WSSTEP;
  wseq = (WORD_ID *)mymalloc(sizeof(WORD_ID)*maxnum);
  num = 0;
  for (p = strtok(s, " "); p != NULL; p = strtok(NULL, " ")) {
    if (num >= maxnum) {
      maxnum += WSSTEP;
      wseq = (WORD_ID *)myrealloc(wseq, sizeof(WORD_ID) * maxnum);
    }
    if ((wseq[num] = voca_lookup_wid(p, winfo)) == WORD_INVALID) {
      /* not found */
      jlog("Error: voca_lookup: word \"%s\" not found in dict\n", p);
      free(wseq);
      return NULL;
    }
    num++;
  }

  *len_return = num;
  return(wseq);
}
Esempio n. 2
0
/** 
 * <JA>
 * @brief  単語辞書をファイルから読み込んでセットアップする. 
 *
 * 辞書上のモノフォン表記からトライフォンへの計算は init_voca() で
 * 読み込み時に行われる. このため,辞書読み込み時には,認識で使用する
 * 予定のHMM情報を与える必要がある. 
 *
 * N-gram 使用時は,文頭無音単語およぶ文末無音単語をここで設定する. 
 * また,"-iwspword" 指定時は,ポーズ単語を辞書の最後に挿入する. 
 * 
 * </JA>
 * <EN>
 * @brief  Read in word dictionary from a file and setup for recognition.
 *
 * Monophone-to-triphone conversion will be performed inside init_voca().
 * So, an HMM definition data that will be used with the LM should also be
 * specified as an argument.
 * 
 * When reading dictionary for N-gram, sentence head silence word and
 * tail silence word will be determined in this function.  Also,
 * when an option "-iwspword" is specified, this will insert a pause
 * word at the last of the given dictionary.
 * 
 * </EN>
 *
 * @param lmconf [in] LM configuration variables
 * @param hmminfo [in] HMM definition of each phone in dictionary, for
 * phone checking and monophone-to-triphone conversion.
 *
 * @return the newly created word dictionary structure, or NULL on failure.
 * 
 */
static WORD_INFO *
initialize_dict(JCONF_LM *lmconf, HTK_HMM_INFO *hmminfo)
{
  WORD_INFO *winfo;
  JCONF_LM_NAMELIST *nl;
  char buf[MAXLINELEN];
  int n;

  /* allocate new word dictionary */
  winfo = word_info_new();
  /* read in dictinary from file */
  if ( ! 
#ifdef MONOTREE
      /* leave winfo monophone for 1st pass lexicon tree */
       init_voca(winfo, lmconf->dictfilename, hmminfo, TRUE, lmconf->forcedict_flag)
#else 
       init_voca(winfo, lmconf->dictfilename, hmminfo, FALSE, lmconf->forcedict_flag)
#endif
       ) {
    jlog("ERROR: m_fusion: failed to read dictionary, terminated\n");
    word_info_free(winfo);
    return NULL;
  }

  /* load additional entries */
  for (nl = lmconf->additional_dict_files; nl; nl=nl->next) {
    FILE *fp;
    if ((fp = fopen(nl->name, "rb")) == NULL) {
      jlog("ERROR: m_fusion: failed to open %s\n",nl->name);
      word_info_free(winfo);
      return NULL;
    }
    n = winfo->num;
    while (getl_fp(buf, MAXLINELEN, fp) != NULL) {
      if (voca_load_line(buf, winfo, hmminfo) == FALSE) break;
    }
    if (voca_load_end(winfo) == FALSE) {
      if (lmconf->forcedict_flag) {
	jlog("Warning: m_fusion: the error words above are ignored\n");
      } else {
	jlog("ERROR: m_fusion: error in reading dictionary %s\n", nl->name);
	fclose(fp);
	word_info_free(winfo);
	return NULL;
      }
    }
    if (fclose(fp) == -1) {
      jlog("ERROR: m_fusion: failed to close %s\n", nl->name);
      word_info_free(winfo);
      return NULL;
    }
    jlog("STAT: + additional dictionary: %s (%d words)\n", nl->name, winfo->num - n);
  }
  n = winfo->num;
  for (nl = lmconf->additional_dict_entries; nl; nl=nl->next) {
    if (voca_load_line(nl->name, winfo, hmminfo) == FALSE) {
      jlog("ERROR: m_fusion: failed to set entry: %s\n", nl->name);
    }
  }
  if (lmconf->additional_dict_entries) {
    if (voca_load_end(winfo) == FALSE) {
      jlog("ERROR: m_fusion: failed to read additinoal word entry\n");
      word_info_free(winfo);
      return NULL;
    }
    jlog("STAT: + additional entries: %d words\n", winfo->num - n);
  }

  if (lmconf->lmtype == LM_PROB) {
    /* if necessary, append a IW-sp word to the dict if "-iwspword" specified */
    if (lmconf->enable_iwspword) {
      if (
#ifdef MONOTREE
	  voca_append_htkdict(lmconf->iwspentry, winfo, hmminfo, TRUE)
#else 
	  voca_append_htkdict(lmconf->iwspentry, winfo, hmminfo, FALSE)
#endif
	  == FALSE) {
	jlog("ERROR: m_fusion: failed to make IW-sp word entry \"%s\"\n", lmconf->iwspentry);
	word_info_free(winfo);
	return NULL;
      } else {
	jlog("STAT: 1 IW-sp word entry added\n");
      }
    }
    /* set {head,tail}_silwid */
    winfo->head_silwid = voca_lookup_wid(lmconf->head_silname, winfo);
    if (winfo->head_silwid == WORD_INVALID) { /* not exist */
      jlog("ERROR: m_fusion: head sil word \"%s\" not exist in voca\n", lmconf->head_silname);
      word_info_free(winfo);
      return NULL;
    }
    winfo->tail_silwid = voca_lookup_wid(lmconf->tail_silname, winfo);
    if (winfo->tail_silwid == WORD_INVALID) { /* not exist */
      jlog("ERROR: m_fusion: tail sil word \"%s\" not exist in voca\n", lmconf->tail_silname);
      word_info_free(winfo);
      return NULL;
    }
  }
  
  return(winfo);
  
}