/** * <JA> * 音声波形データから MFCC パラメータを抽出する. * * @param speech [in] 音声波形データ * @param speechlen [in] @a speech の長さ(単位:サンプル数) * * @return 新たに割り付けられ抽出パラメータベクトルが格納されている * パラメータ構造体へのポインタを返す. * </JA> * <EN> * Extract MFCC parameters with sentence CMN from given waveform. * * @param speech [in] buffer of speech waveform * @param speechlen [in] length of @a speech in samples * * @return pointer to newly allocated parameter structure data with extracted * MFCC vector sequence. * </EN> */ HTK_Param *new_wav2mfcc(SP16 speech[], int speechlen) { HTK_Param *param; int framenum; int i; int len; if (ssload_filename && ssbuf == NULL) { /* load noise spectrum for spectral subtraction from file (once) */ if ((ssbuf = new_SS_load_from_file(ssload_filename, &sslen)) == NULL) { j_error("Error: failed to read \"%s\"\n", ssload_filename); } } if (sscalc) { /* compute noise spectrum from head silence for each input */ len = sscalc_len * para.smp_freq / 1000; if (len > speechlen) len = speechlen; #ifdef SSDEBUG printf("[%d]\n", len); #endif ssbuf = new_SS_calculate(speech, len, para, &sslen); } #ifdef SSDEBUG { int i; for(i=0;i<sslen;i++) { printf("%d: %f\n", i, ssbuf[i]); } } #endif /* calculate frame length from speech length, frame size and frame shift */ framenum = (int)((speechlen - para.framesize) / para.frameshift) + 1; if (framenum < 1) { j_printerr("input too short (%d samples), ignored\n", speechlen); return NULL; } /* malloc new param */ param = new_param(); param->parvec = (VECT **)mymalloc(sizeof(VECT *) * framenum); for(i=0;i<framenum;i++) { param->parvec[i] = (VECT *)mymalloc(sizeof(VECT) * para.veclen); } /* make MFCC from speech data */ Wav2MFCC(speech, param->parvec, para, speechlen, ssbuf, sslen); /* set miscellaneous parameters */ param->header.samplenum = framenum; param->header.wshift = para.smp_period * para.frameshift; param->header.sampsize = para.veclen * sizeof(VECT); /* not compressed */ param->header.samptype = F_MFCC; if (para.delta) param->header.samptype |= F_DELTA; if (para.acc) param->header.samptype |= F_ACCL; if (para.energy) param->header.samptype |= F_ENERGY; if (para.c0) param->header.samptype |= F_ZEROTH; if (para.absesup) param->header.samptype |= F_ENERGY_SUP; if (para.cmn) param->header.samptype |= F_CEPNORM; param->veclen = para.veclen; param->samplenum = framenum; return param; }
/** * <JA> * 音声波形データから MFCC パラメータを抽出する. * エンジンインスタンス内の MFCC 計算インスタンスごとにパラメータ抽出が * 行われ,それぞれの mfcc->param に格納される. * * @param speech [in] 音声波形データ * @param speechlen [in] @a speech の長さ(単位:サンプル数) * @param recog [in] エンジンインスタンス * * @return 成功時 TRUE, エラー時 FALSE を返す. * </JA> * <EN> * Extract MFCC parameters with sentence CMN from given waveform. * Parameters will be computed for each MFCC calculation instance * in the engine instance, and stored in mfcc->param for each. * * @param speech [in] buffer of speech waveform * @param speechlen [in] length of @a speech in samples * @param recog [in] engine instance * * @return TRUE on success, FALSE on error. * </EN> * * @callgraph * @callergraph */ boolean wav2mfcc(SP16 speech[], int speechlen, Recog *recog) { int framenum; int len; Value *para; MFCCCalc *mfcc; int veclen; int t, i; /* calculate frame length from speech length, frame size and frame shift */ framenum = (int)((speechlen - recog->jconf->input.framesize) / recog->jconf->input.frameshift) + 1; if (framenum < 1) { jlog("WARNING: input too short (%d samples), ignored\n", speechlen); return FALSE; } for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { if (mfcc->frontend.ssload_filename) { /* setup for spectral subtraction using file */ if (mfcc->frontend.ssbuf == NULL) { /* load noise spectrum for spectral subtraction from file (once) */ if ((mfcc->frontend.ssbuf = new_SS_load_from_file(mfcc->frontend.ssload_filename, &(mfcc->frontend.sslen))) == NULL) { jlog("ERROR: wav2mfcc: failed to read noise spectrum from file \"%s\"\n", mfcc->frontend.ssload_filename); return FALSE; } } } if (mfcc->frontend.sscalc) { /* compute noise spectrum from head silence for each input */ len = mfcc->frontend.sscalc_len * recog->jconf->input.sfreq / 1000; if (len > speechlen) len = speechlen; #ifdef SSDEBUG jlog("DEBUG: [%d]\n", len); #endif mfcc->frontend.ssbuf = new_SS_calculate(speech, len, &(mfcc->frontend.sslen), mfcc->frontend.mfccwrk_ss, mfcc->para); } } /* compute mfcc from speech file for each mfcc instances */ for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { para = mfcc->para; veclen = para->veclen * mfcc->splice; if (framenum - (mfcc->splice - 1) < 1) { jlog("WARNING: input too short (%d samples), ignored\n", speechlen); return FALSE; } /* malloc new param */ param_init_content(mfcc->param); if (param_alloc(mfcc->param, framenum, veclen) == FALSE) { jlog("ERROR: failed to allocate memory for converted parameter vectors\n"); return FALSE; } if (mfcc->frontend.ssload_filename || mfcc->frontend.sscalc) { /* make link from mfccs to this buffer */ mfcc->wrk->ssbuf = mfcc->frontend.ssbuf; mfcc->wrk->ssbuflen = mfcc->frontend.sslen; mfcc->wrk->ss_alpha = mfcc->frontend.ss_alpha; mfcc->wrk->ss_floor = mfcc->frontend.ss_floor; } /* make MFCC from speech data */ if (Wav2MFCC(speech, mfcc->param->parvec, para, speechlen, mfcc->wrk, mfcc->cmn.wrk) == FALSE) { jlog("ERROR: failed to compute features from input speech\n"); if (mfcc->frontend.sscalc) { free(mfcc->frontend.ssbuf); mfcc->frontend.ssbuf = NULL; } return FALSE; } /* splicing */ if (mfcc->splice > 1) { for (t = 0; t < framenum - (mfcc->splice - 1); t++) { for (i = 1; i < mfcc->splice; i++) { memcpy(&(mfcc->param->parvec[t][para->veclen * i]), &(mfcc->param->parvec[t + i][0]), sizeof(VECT) * para->veclen); } } } /* set miscellaneous parameters */ mfcc->param->header.samplenum = framenum - (mfcc->splice - 1); mfcc->param->header.wshift = para->smp_period * para->frameshift; mfcc->param->header.sampsize = veclen * sizeof(VECT); /* not compressed */ mfcc->param->header.samptype = para->basetype; if (para->delta) mfcc->param->header.samptype |= F_DELTA; if (para->acc) mfcc->param->header.samptype |= F_ACCL; if (para->energy) mfcc->param->header.samptype |= F_ENERGY; if (para->c0) mfcc->param->header.samptype |= F_ZEROTH; if (para->absesup) mfcc->param->header.samptype |= F_ENERGY_SUP; if (para->cmn) mfcc->param->header.samptype |= F_CEPNORM; mfcc->param->veclen = veclen; mfcc->param->samplenum = framenum - (mfcc->splice - 1); if (mfcc->frontend.sscalc) { free(mfcc->frontend.ssbuf); mfcc->frontend.ssbuf = NULL; } } return TRUE; }