/** * <JA> * 認識単語の情報を出力するサブルーチン(第2パス用). * * @param w [in] 単語ID * @param winfo [in] 単語辞書 * </JA> * <EN> * Subroutine to output information of a recognized word at 2nd pass. * * @param w [in] word ID * @param winfo [in] word dictionary * </EN> */ static void msock_word_out2(WORD_ID w, RecogProcess *r) { int j; static char buf[MAX_HMMNAME_LEN]; WORD_INFO *winfo; winfo = r->lm->winfo; if (out2_word) { module_send(module_sd, " WORD=\"%s\"", winfo->woutput[w]); } if (out2_lm) { module_send(module_sd, " CLASSID=\"%s\"", winfo->wname[w]); } if (out2_phone) { module_send(module_sd, " PHONE=\""); for(j=0;j<winfo->wlen[w];j++) { center_name(winfo->wseq[w][j]->name, buf); if (j == 0) module_send(module_sd, "%s", buf); else module_send(module_sd, " %s", buf); } module_send(module_sd, "\""); } }
/** * Construct the whole pseudo %HMM information, and also add them to the logical Triphone tree. * * @param hmminfo [i/o] %HMM definition data. The generated data will also * be stored within this. * * @return TRUE on success, FALSE on failure. */ boolean make_cdset(HTK_HMM_INFO *hmminfo) { HMM_Logical *lg; char buf[MAX_HMMNAME_LEN]; cdset_init(hmminfo); /* make cdset name from logical HMM name */ /* left-context set: "a-k" for /a-k+i/, /a-k+o/, ... for 1st pass (word end) */ for(lg = hmminfo->lgstart; lg; lg = lg->next) { if (lg->is_pseudo) continue; regist_cdset(&(hmminfo->cdset_info.cdtree), lg->body.defined, leftcenter_name(lg->name, buf), &(hmminfo->cdset_root)); } /* right-context set: "a+o" for /b-a+o/, /t-a+o/, ... for 2nd pass (word beginning) */ for(lg = hmminfo->lgstart; lg; lg = lg->next) { if (lg->is_pseudo) continue; regist_cdset(&(hmminfo->cdset_info.cdtree), lg->body.defined, rightcenter_name(lg->name, buf), &(hmminfo->cdset_root)); } /* both-context set: "a" for all triphone with same base phone "a" for 1st pass (1 phoneme word, with no previous word hypo.) */ for(lg = hmminfo->lgstart; lg; lg = lg->next) { if (lg->is_pseudo) continue; regist_cdset(&(hmminfo->cdset_info.cdtree), lg->body.defined, center_name(lg->name, buf), &(hmminfo->cdset_root)); } /* now that cdset is completely built */ hmminfo->cdset_info.binary_malloc = FALSE; return(TRUE); }
//読みを取得する std::string JuliusPlus::ConvertYomi(const WORD_INFO * wordinfo,int index) const { std::string yomi; char buf[64]; for (int j=0;j<wordinfo->wlen[index];j++) { yomi += center_name( (wordinfo->wseq[index][j]->name),buf); } return yomi; }
/** * @brief Generate context-dependent phone name from base phone name * and its right context phone name. * * The center phone name of the right context name will be appended to the * base phone name. If the phone name already has right context, it will * be renamed to the new one. * * - Example 1: "a" | "r" -> "a+r" * - Example 2: "a" | "e-k+i" -> "a+k" * - Example 3: "k-a" | "e" -> "k-a+e" * - Example 4: "k-a+e" | "b" -> "k-a+b" * - Example 5: "k-a+e" | "r-u+k" -> "k-a+u" * * @param name [i/o] string of phone name to be the base name (will be modified) * @param rc [in] right context phone name */ void add_right_context(char name[], char *rc) { char *p; int i; if ((p = strchr(name, HMM_RC_DLIM_C)) != NULL) { p++; *p = '\0'; } else { strcat(name, HMM_RC_DLIM); } i = strlen(name); center_name(rc, &(name[i])); }
/** * @brief Generate context-dependent phone name from base phone name * and its left context phone name. * * The center phone name of the left context name will be appended to the * base phone name. If the phone name already has left context, it will * be renamed to the new one. * * @param name [i/o] string of phone name to be the base name (will be modified) * @param lc [in] left context phone name */ void add_left_context(char name[], char *lc) { char *p; static char buf[MAX_HMMNAME_LEN]; if ((p = strchr(name, HMM_LC_DLIM_C)) != NULL) { p++; } else { p = name; } center_name(lc, buf); strcat(buf, HMM_LC_DLIM); strcat(buf, p); strcpy(name, buf); }
void cTumkwsjSink::juPutHypoPhoneme(WORD_ID *seq, int n, WORD_INFO *winfo) { int i,j; WORD_ID w; static char buf[MAX_HMMNAME_LEN]; if (seq != NULL) { for (i=0;i<n;i++) { if (i > 0) printf(" |"); w = seq[i]; for (j=0;j<winfo->wlen[w];j++) { center_name(winfo->wseq[w][j]->name, buf); printf(" %s", buf); } } } printf("\n"); }
void put_hypo_phoneme(WORD_ID *seq, int n, WORD_INFO *winfo) { int i,j; WORD_ID w; static char buf[MAX_HMMNAME_LEN]; if (seq != NULL) { for (i=0;i<n;i++) { if (i > 0) fprintf(stderr, " |"); w = seq[i]; for (j=0;j<winfo->wlen[w];j++) { center_name(winfo->wseq[w][j]->name, buf); fprintf(stderr, " %s", buf); } } } fprintf(stderr, "\n"); }
/* based on julius/output_module.c */ void msock_word_out1(WORD_ID w, RecogProcess *r) { int j; static char buf[MAX_HMMNAME_LEN]; WORD_INFO *winfo; winfo = r->lm->winfo; fprintf(stderr, " WORD=\"%s\"", to_utf(winfo->woutput[w])); fprintf(stderr, " CLASSID=\"%s\"", winfo->wname[w]); fprintf(stderr, " PHONE=\""); for(j=0;j<winfo->wlen[w];j++) { /* libsent/src/hmminfo/cdhmm.c */ center_name(winfo->wseq[w][j]->name, buf); if (j == 0) fprintf(stderr, "%s", buf); else fprintf(stderr, " %s", buf); } fprintf(stderr, "\""); }
static void outfile_sentence(Recog *recog, void *dummy) { RecogProcess *r; Sentence *s; WORD_INFO *winfo; WORD_ID *seq; int seqnum; int n, num; int i, j; boolean multi; static char phbuf[MAX_HMMNAME_LEN]; SentenceAlign *align; HMM_Logical *p; if (recog->process_list->next != NULL) multi = TRUE; else multi = FALSE; for(r=recog->process_list;r;r=r->next) { if (! r->live) continue; if (multi) fprintf(fp, "[#%d %s]\n", r->config->id, r->config->name); if (r->result.status < 0) { switch(r->result.status) { case J_RESULT_STATUS_REJECT_POWER: fprintf(fp, "<input rejected by power>\n"); break; case J_RESULT_STATUS_TERMINATE: fprintf(fp, "<input teminated by request>\n"); break; case J_RESULT_STATUS_ONLY_SILENCE: fprintf(fp, "<input rejected by decoder (silence input result)>\n"); break; case J_RESULT_STATUS_REJECT_GMM: fprintf(fp, "<input rejected by GMM>\n"); break; case J_RESULT_STATUS_REJECT_SHORT: fprintf(fp, "<input rejected by short input>\n"); break; case J_RESULT_STATUS_FAIL: fprintf(fp, "<search failed>\n"); break; } continue; } winfo = r->lm->winfo; num = r->result.sentnum; for(n=0;n<num;n++) { s = &(r->result.sent[n]); seq = s->word; seqnum = s->word_num; fprintf(fp, "sentence%d:", n+1); for (i=0;i<seqnum;i++) { fprintf(fp, " %s", winfo->woutput[seq[i]]); } fprintf(fp, "\n"); fprintf(fp, "wseq%d:", n+1); for (i=0;i<seqnum;i++) { fprintf(fp, " %s", winfo->wname[seq[i]]); } fprintf(fp, "\n"); fprintf(fp, "phseq%d:", n+1); for (i=0;i<seqnum;i++) { if (i > 0) fprintf(fp, " |"); for (j=0;j<winfo->wlen[seq[i]];j++) { center_name(winfo->wseq[seq[i]][j]->name, phbuf); fprintf(fp, " %s", phbuf); } } fprintf(fp, "\n"); #ifdef CONFIDENCE_MEASURE fprintf(fp, "cmscore%d:", n+1); for (i=0;i<seqnum;i++) { fprintf(fp, " %5.3f", s->confidence[i]); } fprintf(fp, "\n"); #endif fprintf(fp, "score%d: %f", n+1, s->score); if (r->lmtype == LM_PROB) { fprintf(fp, " (AM: %f LM: %f)", s->score_am, s->score_lm); } fprintf(fp, "\n"); if (r->lmtype == LM_DFA) { if (multigram_get_all_num(r->lm) > 1) { fprintf(fp, "grammar%d: %d\n", n+1, s->gram_id); } } /* output alignment result if exist */ for (align = s->align; align; align = align->next) { fprintf(fp, "=== begin forced alignment ===\n"); switch(align->unittype) { case PER_WORD: fprintf(fp, "-- word alignment --\n"); break; case PER_PHONEME: fprintf(fp, "-- phoneme alignment --\n"); break; case PER_STATE: fprintf(fp, "-- state alignment --\n"); break; } fprintf(fp, " id: from to n_score unit\n"); fprintf(fp, " ----------------------------------------\n"); for(i=0;i<align->num;i++) { fprintf(fp, "[%4d %4d] %f ", align->begin_frame[i], align->end_frame[i], align->avgscore[i]); switch(align->unittype) { case PER_WORD: fprintf(fp, "%s\t[%s]\n", winfo->wname[align->w[i]], winfo->woutput[align->w[i]]); break; case PER_PHONEME: p = align->ph[i]; if (p->is_pseudo) { fprintf(fp, "{%s}\n", p->name); } else if (strmatch(p->name, p->body.defined->name)) { fprintf(fp, "%s\n", p->name); } else { fprintf(fp, "%s[%s]\n", p->name, p->body.defined->name); } break; case PER_STATE: p = align->ph[i]; if (p->is_pseudo) { fprintf(fp, "{%s}", p->name); } else if (strmatch(p->name, p->body.defined->name)) { fprintf(fp, "%s", p->name); } else { fprintf(fp, "%s[%s]", p->name, p->body.defined->name); } if (r->am->hmminfo->multipath) { if (align->is_iwsp[i]) { fprintf(fp, " #%d (sp)\n", align->loc[i]); } else { fprintf(fp, " #%d\n", align->loc[i]); } } else { fprintf(fp, " #%d\n", align->loc[i]); } break; } } fprintf(fp, "re-computed AM score: %f\n", align->allscore); fprintf(fp, "=== end forced alignment ===\n"); } } } }
void cTumkwsjSink::cbResultPass1(Recog *recog, void *dummy) { int i,j; static char buf[MAX_HMMNAME_LEN]; WORD_INFO *winfo; WORD_ID *seq; int num; RecogProcess *r; Sentence *s; boolean multi; // int len; boolean have_progout = TRUE; if (isAbort()) return; if (recog->process_list->next != NULL) multi = TRUE; else multi = FALSE; r=recog->process_list; if (! r->live) return; if (r->result.status < 0) return; /* search already failed */ if (have_progout && r->config->successive.enabled) return; /* short pause segmentation */ if (r->config->output.progout_flag) printf("\r"); winfo = r->lm->winfo; seq = r->result.pass1.word; num = r->result.pass1.word_num; s = &(r->result.pass1); /* words */ if (verbose_flag) { /* output further info */ /* phoneme sequence */ printf("p1_phon:"); for (i=0;i<num;i++) { for (j=0;j<winfo->wlen[seq[i]];j++) { center_name(winfo->wseq[seq[i]][j]->name, buf); printf(" %s", buf); } if (i < num-1) printf(" |"); } printf("\n"); if (debug2_flag) { /* logical HMMs */ printf("pass1_best_HMMseq_logical:"); for (i=0;i<num;i++) { for (j=0;j<winfo->wlen[seq[i]];j++) { printf(" %s", winfo->wseq[seq[i]][j]->name); } if (i < num-1) printf(" |"); } printf("\n"); } } juAlignPass1Keywords(r, r->am->mfcc->param); //create smile message: Kresult k; fillKresult(&k, seq, num, winfo, s->confidence, r->result.num_frame, s->align); if (resultRecp != NULL) { cComponentMessage msg("asrKeywordOutput"); msg.custData = &k; msg.userTime1 = turnStartSmileTimeCur; msg.userTime2 = turnStartSmileTimeCur + ((double)(k.turnDuration))*period; sendComponentMessage( resultRecp, &msg ); SMILE_IDBG(3,"sending 'asrKeywordOutput' message to '%s'",resultRecp); } //output content of k: int kc = 0; printf("-------result package (pass 1):----------\n"); printf("numOfKw:%i\n",k.numOfKw); printf("turnDuration:%i\n",k.turnDuration); printf("keywords: "); for (kc=0;kc<(k.numOfKw);kc++) { printf("%s ",k.keyword[kc]); } printf("\n"); printf("kwConf: "); for (kc=0;kc<(k.numOfKw);kc++) { printf("%5.3f ",k.kwConf[kc]); } printf("\n"); printf("kwStartTimes: "); for (kc=0;kc<(k.numOfKw);kc++) { printf("%.3f ",k.kwStartTime[kc]); } printf("\n"); printf("-----------------------------------------\n"); printf("\n\n"); fflush(stdout); }
/** * Sub function to Add a dictionary entry line to the word dictionary. * * @param buf [i/o] buffer to hold the input string, will be modified in this function * @param vnum_p [in] current number of words in @a winfo * @param linenum [in] current line number of the input * @param winfo [out] pointer to word dictionary to append the data. * @param hmminfo [in] HTK %HMM definition data. if NULL, phonemes are ignored. * @param do_conv [in] TRUE if performing triphone conversion * @param ok_flag [out] will be set to FALSE if an error occured for this input. * @param headphone [in] word head silence model name * @param tailphone [in] word tail silence model name * @param contextphone [in] silence context name to be used at head and tail * * @return FALSE if buf == "DICEND", else TRUE will be returned. */ boolean voca_load_wordlist_line(char *buf, WORD_ID *vnum_p, int linenum, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean do_conv, boolean *ok_flag, char *headphone, char *tailphone, char *contextphone) { char *ptmp, *lp = NULL, *p; static char cbuf[MAX_HMMNAME_LEN]; static HMM_Logical **tmpwseq = NULL; static int tmpmaxlen; int len; HMM_Logical *tmplg; boolean pok, first; int vnum; vnum = *vnum_p; if (strmatch(buf, "DICEND")) return FALSE; /* allocate temporal work area for the first call */ if (tmpwseq == NULL) { tmpmaxlen = PHONEMELEN_STEP; tmpwseq = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * tmpmaxlen); } /* backup whole line for debug output */ strcpy(bufbak, buf); /* Output string */ if ((ptmp = mystrtok_quote(buf, " \t\n")) == NULL) { jlog("Error: voca_load_wordlist: line %d: corrupted data:\n> %s\n", linenum, bufbak); winfo->errnum++; *ok_flag = FALSE; return TRUE; } winfo->wname[vnum] = strcpy((char *)mybmalloc2(strlen(ptmp)+1, &(winfo->mroot)), ptmp); /* reset transparent flag */ winfo->is_transparent[vnum] = FALSE; /* just move pointer to next token */ if ((ptmp = mystrtok_movetonext(NULL, " \t\n")) == NULL) { jlog("Error: voca_load_wordlist: line %d: corrupted data:\n> %s\n", linenum, bufbak); winfo->errnum++; *ok_flag = FALSE; return TRUE; } #ifdef CLASS_NGRAM winfo->cprob[vnum] = 0.0; /* prob = 1.0, logprob = 0.0 */ #endif if (ptmp[0] == '@') { /* class N-gram prob */ #ifdef CLASS_NGRAM /* word probability within the class (for class N-gram) */ /* format: classname @classprob wordname [output] phoneseq */ /* classname equals to wname, and wordname will be omitted */ /* format: @%f (log scale) */ /* if "@" not found or "@0", it means class == word */ if ((ptmp = mystrtok(NULL, " \t\n")) == NULL) { jlog("Error: voca_load_wordlist: line %d: corrupted data:\n> %s\n", linenum, bufbak); winfo->errnum++; *ok_flag = FALSE; return TRUE; } if (ptmp[1] == '\0') { /* space between '@' and figures */ jlog("Error: voca_load_wordlist: line %d: value after '@' missing, maybe wrong space?\n> %s\n", linenum, bufbak); winfo->errnum++; *ok_flag = FALSE; return TRUE; } winfo->cprob[vnum] = atof(&(ptmp[1])); if (winfo->cprob[vnum] != 0.0) winfo->cwnum++; /* read next word entry (just skip them) */ if ((ptmp = mystrtok(NULL, " \t\n")) == NULL) { jlog("Error: voca_load_wordlist: line %d: corrupted data:\n> %s\n", linenum,bufbak); winfo->errnum++; *ok_flag = FALSE; return TRUE; } /* move to the next word entry */ if ((ptmp = mystrtok_movetonext(NULL, " \t\n")) == NULL) { jlog("Error: voca_load_wordlist: line %d: corrupted data:\n> %s\n", linenum, bufbak); winfo->errnum++; *ok_flag = FALSE; return TRUE; } #else /* ~CLASS_NGRAM */ jlog("Error: voca_load_wordlist: line %d: cannot handle in-class word probability\n> %s\n", linenum, ptmp, bufbak); winfo->errnum++; *ok_flag = FALSE; return TRUE; #endif /* CLASS_NGRAM */ } /* OutputString */ switch(ptmp[0]) { case '[': /* ignore transparency */ ptmp = mystrtok_quotation(NULL, " \t\n", '[', ']', 0); break; case '{': /* ignore transparency */ ptmp = mystrtok_quotation(NULL, " \t\n", '{', '}', 0); break; default: /* ALLOW no entry for output */ /* same as wname is used */ ptmp = winfo->wname[vnum]; } if (ptmp == NULL) { jlog("Error: voca_load_htkdict: line %d: corrupted data:\n> %s\n", linenum, bufbak); winfo->errnum++; *ok_flag = FALSE; return TRUE; } winfo->woutput[vnum] = strcpy((char *)mybmalloc2(strlen(ptmp)+1, &(winfo->mroot)), ptmp); /* phoneme sequence */ if (hmminfo == NULL) { /* don't read */ winfo->wseq[vnum] = NULL; winfo->wlen[vnum] = 0; } else { len = 0; first = TRUE; pok = TRUE; for (;;) { if (do_conv) { if (first) { /* init phone cycler */ cycle_triphone(NULL); /* insert head phone at beginning of word */ if (contextphone) { if (strlen(contextphone) >= MAX_HMMNAME_LEN) { jlog("Error: voca_load_htkdict: line %d: too long phone name: %s\n", linenum, contextphone); winfo->errnum++; *ok_flag = FALSE; return TRUE; } cycle_triphone(contextphone); } else { cycle_triphone("NULL_C"); } if ((lp = mystrtok(NULL, " \t\n")) == NULL) { jlog("Error: voca_load_wordlist: line %d: word %s has no phoneme:\n> %s\n", linenum, winfo->wname[vnum], bufbak); winfo->errnum++; *ok_flag = FALSE; return TRUE; } if (strlen(lp) >= MAX_HMMNAME_LEN) { jlog("Error: voca_load_htkdict: line %d: too long phone name: %s\n", linenum, lp); winfo->errnum++; *ok_flag = FALSE; return TRUE; } p = cycle_triphone(lp); first = FALSE; } else { /* do_conv, not first */ if (lp != NULL) { /* some token processed at last loop */ lp = mystrtok(NULL, " \t\n"); if (lp != NULL) { /* token exist */ if (strlen(lp) >= MAX_HMMNAME_LEN) { jlog("Error: voca_load_htkdict: line %d: too long phone name: %s\n", linenum, lp); winfo->errnum++; *ok_flag = FALSE; return TRUE; } p = cycle_triphone(lp); } else { /* no more token, insert tail phone at end of word */ if (contextphone) { if (strlen(contextphone) >= MAX_HMMNAME_LEN) { jlog("Error: voca_load_htkdict: line %d: too long phone name: %s\n", linenum, contextphone); winfo->errnum++; *ok_flag = FALSE; return TRUE; } p = cycle_triphone(contextphone); } else { p = cycle_triphone("NULL_C"); } } } else { /* no more token at last input */ /* flush tone cycler */ p = cycle_triphone_flush(); } } } else { /* not do_conv */ if (first) { p = lp = headphone; first = FALSE; } else { if (lp != NULL) { /* some token processed at last loop */ p = lp = mystrtok(NULL, " \t\n"); /* if no more token, use tailphone */ if (lp == NULL) p = tailphone; } else { /* no more token at last input, exit loop */ p = NULL; } } } if (p == NULL) break; /* for headphone and tailphone, their context should not be handled */ /* and when they appear as context they should be replaced by contextphone */ if (do_conv) { center_name(p, cbuf); if (contextphone) { if (strmatch(cbuf, contextphone)) { if (len == 0) { p = headphone; } else if (lp == NULL) { p = tailphone; } } } else { if (strmatch(cbuf, "NULL_C")) { if (len == 0) { p = headphone; } else if (lp == NULL) { p = tailphone; } } else { if (strnmatch(p, "NULL_C", 6)) { if (strnmatch(&(p[strlen(p)-6]), "NULL_C", 6)) { p = cbuf; } else { p = rightcenter_name(p, cbuf); } } else if (strnmatch(&(p[strlen(p)-6]), "NULL_C", 6)) { p = leftcenter_name(p, cbuf); } } } } //printf("[[%s]]\n", p); /* both defined/pseudo phone is allowed */ tmplg = htk_hmmdata_lookup_logical(hmminfo, p); if (tmplg == NULL) { /* not found */ if (do_conv) { /* logical phone was not found */ jlog("Error: voca_load_wordlist: line %d: logical phone \"%s\" not found\n", linenum, p); snprintf(cbuf,MAX_HMMNAME_LEN,"%s", p); } else { jlog("Error: voca_load_wordlist: line %d: phone \"%s\" not found\n", linenum, p); snprintf(cbuf, MAX_HMMNAME_LEN, "%s", p); } add_to_error(winfo, cbuf); pok = FALSE; } else { /* found */ if (len >= tmpmaxlen) { /* expand wseq area by PHONEMELEN_STEP */ tmpmaxlen += PHONEMELEN_STEP; tmpwseq = (HMM_Logical **)myrealloc(tmpwseq, sizeof(HMM_Logical *) * tmpmaxlen); } /* store to temporal buffer */ tmpwseq[len] = tmplg; } len++; } if (!pok) { /* error in phoneme */ jlog("Error: voca_load_wordlist: the line content was: %s\n", bufbak); winfo->errnum++; *ok_flag = FALSE; return TRUE; } if (len == 0) { jlog("Error: voca_load_wordlist: line %d: no phone specified:\n> %s\n", linenum, bufbak); winfo->errnum++; *ok_flag = FALSE; return TRUE; } /* store to winfo */ winfo->wseq[vnum] = (HMM_Logical **)mybmalloc2(sizeof(HMM_Logical *) * len, &(winfo->mroot)); memcpy(winfo->wseq[vnum], tmpwseq, sizeof(HMM_Logical *) * len); winfo->wlen[vnum] = len; winfo->wton[vnum] = 0; } vnum++; *vnum_p = vnum; return(TRUE); }