/** * Convert string of space-separated word strings to array of word ids. * * @param winfo [in] word dictionary * @param s [in] string of space-separated word strings * @param len_return [out] number of found words * * @return pointer to a newly allocated word list. */ WORD_ID * new_str2wordseq(WORD_INFO *winfo, char *s, int *len_return) { char *p; int num; int maxnum; WORD_ID *wseq; maxnum = WSSTEP; wseq = (WORD_ID *)mymalloc(sizeof(WORD_ID)*maxnum); num = 0; for (p = strtok(s, " "); p != NULL; p = strtok(NULL, " ")) { if (num >= maxnum) { maxnum += WSSTEP; wseq = (WORD_ID *)myrealloc(wseq, sizeof(WORD_ID) * maxnum); } if ((wseq[num] = voca_lookup_wid(p, winfo)) == WORD_INVALID) { /* not found */ jlog("Error: voca_lookup: word \"%s\" not found in dict\n", p); free(wseq); return NULL; } num++; } *len_return = num; return(wseq); }
/** * <JA> * @brief 単語辞書をファイルから読み込んでセットアップする. * * 辞書上のモノフォン表記からトライフォンへの計算は init_voca() で * 読み込み時に行われる. このため,辞書読み込み時には,認識で使用する * 予定のHMM情報を与える必要がある. * * N-gram 使用時は,文頭無音単語およぶ文末無音単語をここで設定する. * また,"-iwspword" 指定時は,ポーズ単語を辞書の最後に挿入する. * * </JA> * <EN> * @brief Read in word dictionary from a file and setup for recognition. * * Monophone-to-triphone conversion will be performed inside init_voca(). * So, an HMM definition data that will be used with the LM should also be * specified as an argument. * * When reading dictionary for N-gram, sentence head silence word and * tail silence word will be determined in this function. Also, * when an option "-iwspword" is specified, this will insert a pause * word at the last of the given dictionary. * * </EN> * * @param lmconf [in] LM configuration variables * @param hmminfo [in] HMM definition of each phone in dictionary, for * phone checking and monophone-to-triphone conversion. * * @return the newly created word dictionary structure, or NULL on failure. * */ static WORD_INFO * initialize_dict(JCONF_LM *lmconf, HTK_HMM_INFO *hmminfo) { WORD_INFO *winfo; JCONF_LM_NAMELIST *nl; char buf[MAXLINELEN]; int n; /* allocate new word dictionary */ winfo = word_info_new(); /* read in dictinary from file */ if ( ! #ifdef MONOTREE /* leave winfo monophone for 1st pass lexicon tree */ init_voca(winfo, lmconf->dictfilename, hmminfo, TRUE, lmconf->forcedict_flag) #else init_voca(winfo, lmconf->dictfilename, hmminfo, FALSE, lmconf->forcedict_flag) #endif ) { jlog("ERROR: m_fusion: failed to read dictionary, terminated\n"); word_info_free(winfo); return NULL; } /* load additional entries */ for (nl = lmconf->additional_dict_files; nl; nl=nl->next) { FILE *fp; if ((fp = fopen(nl->name, "rb")) == NULL) { jlog("ERROR: m_fusion: failed to open %s\n",nl->name); word_info_free(winfo); return NULL; } n = winfo->num; while (getl_fp(buf, MAXLINELEN, fp) != NULL) { if (voca_load_line(buf, winfo, hmminfo) == FALSE) break; } if (voca_load_end(winfo) == FALSE) { if (lmconf->forcedict_flag) { jlog("Warning: m_fusion: the error words above are ignored\n"); } else { jlog("ERROR: m_fusion: error in reading dictionary %s\n", nl->name); fclose(fp); word_info_free(winfo); return NULL; } } if (fclose(fp) == -1) { jlog("ERROR: m_fusion: failed to close %s\n", nl->name); word_info_free(winfo); return NULL; } jlog("STAT: + additional dictionary: %s (%d words)\n", nl->name, winfo->num - n); } n = winfo->num; for (nl = lmconf->additional_dict_entries; nl; nl=nl->next) { if (voca_load_line(nl->name, winfo, hmminfo) == FALSE) { jlog("ERROR: m_fusion: failed to set entry: %s\n", nl->name); } } if (lmconf->additional_dict_entries) { if (voca_load_end(winfo) == FALSE) { jlog("ERROR: m_fusion: failed to read additinoal word entry\n"); word_info_free(winfo); return NULL; } jlog("STAT: + additional entries: %d words\n", winfo->num - n); } if (lmconf->lmtype == LM_PROB) { /* if necessary, append a IW-sp word to the dict if "-iwspword" specified */ if (lmconf->enable_iwspword) { if ( #ifdef MONOTREE voca_append_htkdict(lmconf->iwspentry, winfo, hmminfo, TRUE) #else voca_append_htkdict(lmconf->iwspentry, winfo, hmminfo, FALSE) #endif == FALSE) { jlog("ERROR: m_fusion: failed to make IW-sp word entry \"%s\"\n", lmconf->iwspentry); word_info_free(winfo); return NULL; } else { jlog("STAT: 1 IW-sp word entry added\n"); } } /* set {head,tail}_silwid */ winfo->head_silwid = voca_lookup_wid(lmconf->head_silname, winfo); if (winfo->head_silwid == WORD_INVALID) { /* not exist */ jlog("ERROR: m_fusion: head sil word \"%s\" not exist in voca\n", lmconf->head_silname); word_info_free(winfo); return NULL; } winfo->tail_silwid = voca_lookup_wid(lmconf->tail_silname, winfo); if (winfo->tail_silwid == WORD_INVALID) { /* not exist */ jlog("ERROR: m_fusion: tail sil word \"%s\" not exist in voca\n", lmconf->tail_silname); word_info_free(winfo); return NULL; } } return(winfo); }