int main() { const char *data_filenm = "sample_data.txt"; const int gmm_num_components = 3; // Load data from file FILE *fp = fopen(data_filenm, "r"); if (fp == NULL) { printf("ERROR: File 'sample_data.txt' not found.\nRun scripts/generate_data.py to generate sample data.\n"); exit(1); } int N = 0, D = 0; size_t bytes_read, len = 0; char *line = NULL; while ((bytes_read = getline(&line, &len, fp)) != -1) { if (bytes_read > 0) N++; } rewind(fp); len = 0; getline(&line, &len, fp); char *token = strtok(line, " \n"); while (token != NULL) { D++; token = strtok(NULL, " \n"); } double *X = malloc(N*D*sizeof(double)); rewind(fp); line = NULL; len = 0; for (int t=0; t<N; t++) { getline(&line, &len, fp); token = strtok(line, " \n"); X[D*t+0] = atof(token); for (int i=1; i<D; i++) { token = strtok(NULL, " \n"); X[D*t+i] = atof(token); } } fclose(fp); // Train the SGMM GMM *gmm = gmm_new(gmm_num_components, D, "diagonal"); gmm_set_convergence_tol(gmm, 1e-6); gmm_set_regularization_value(gmm, 1e-6); gmm_set_initialization_method(gmm, "random"); struct timeval st, en; gettimeofday(&st, NULL); gmm_fit(gmm, X, N); gettimeofday(&en, NULL); printf("Time elapsed = %lf s\n", (en.tv_sec-st.tv_sec) + (1e-6)*(en.tv_usec-st.tv_usec)); gmm_print_params(gmm); double llh = gmm_score(gmm, X, N); printf("Score (LLH) = %lf\n", llh); gmm_free(gmm); // Free data free(X); return 0; }
AUD_Int32s wov_adapt_gmm_si() { AUD_Error error = AUD_ERROR_NONE; AUD_Int32s ret = 0; AUD_Int8s wavPath[256] = { 0, }; AUD_Int32s data; setbuf( stdout, NULL ); setbuf( stdin, NULL ); AUDLOG( "pls give adapt wav stream's folder path:\n" ); wavPath[0] = '\0'; data = scanf( "%s", wavPath ); AUDLOG( "adapt wav stream's folder path is: %s\n", wavPath ); // step 1: read UBM model from file void *hUbm = NULL; FILE *fpUbm = fopen( WOV_UBM_GMMMODEL_FILE, "rb" ); if ( fpUbm == NULL ) { AUDLOG( "cannot open ubm model file: [%s]\n", WOV_UBM_GMMMODEL_FILE ); return AUD_ERROR_IOFAILED; } error = gmm_import( &hUbm, fpUbm ); AUD_ASSERT( error == AUD_ERROR_NONE ); fclose( fpUbm ); fpUbm = NULL; // AUDLOG( "ubm GMM as:\n" ); // gmm_show( hUbm ); AUD_Int32s i = 0, j = 0; entry *pEntry = NULL; dir *pDir = NULL; AUD_Int32s totalWinNum = 0; pDir = openDir( (const char*)wavPath ); if ( pDir == NULL ) { AUDLOG( "cannot open folder: %s\n", wavPath ); return -1; } while ( ( pEntry = scanDir( pDir ) ) ) { AUD_Int8s keywordFile[256] = { 0, }; AUD_Summary fileSummary; AUD_Int32s sampleNum = 0; snprintf( (char*)keywordFile, 256, "%s/%s", wavPath, pEntry->name ); // AUDLOG( "%s\n", keywordFile ); ret = parseWavFromFile( keywordFile, &fileSummary ); if ( ret < 0 ) { continue; } AUD_ASSERT( fileSummary.channelNum == CHANNEL_NUM && fileSummary.bytesPerSample == BYTES_PER_SAMPLE && fileSummary.sampleRate == SAMPLE_RATE ); // request memeory for template sampleNum = fileSummary.dataChunkBytes / fileSummary.bytesPerSample; for ( j = 0; j * FRAME_STRIDE + FRAME_LEN <= sampleNum; j++ ) { ; } j = j - MFCC_DELAY; totalWinNum += j; } closeDir( pDir ); pDir = NULL; AUD_Matrix featureMatrix; featureMatrix.rows = totalWinNum; featureMatrix.cols = MFCC_FEATDIM; featureMatrix.dataType = AUD_DATATYPE_INT32S; ret = createMatrix( &featureMatrix ); AUD_ASSERT( ret == 0 ); AUD_Int32s currentRow = 0; pDir = openDir( (const char*)wavPath ); while ( ( pEntry = scanDir( pDir ) ) ) { AUD_Int8s keywordFile[256] = { 0, }; AUD_Summary fileSummary; AUD_Int32s sampleNum = 0; void *hMfccHandle = NULL; snprintf( (char*)keywordFile, 256, "%s/%s", wavPath, pEntry->name ); // AUDLOG( "%s\n", keywordFile ); ret = parseWavFromFile( keywordFile, &fileSummary ); if ( ret < 0 ) { continue; } AUD_ASSERT( fileSummary.channelNum == CHANNEL_NUM && fileSummary.bytesPerSample == BYTES_PER_SAMPLE && fileSummary.sampleRate == SAMPLE_RATE ); AUD_Int32s bufLen = fileSummary.dataChunkBytes; AUD_Int16s *pBuf = (AUD_Int16s*)calloc( bufLen, 1 ); AUD_ASSERT( pBuf ); sampleNum = readWavFromFile( (AUD_Int8s*)keywordFile, pBuf, bufLen ); AUD_ASSERT( sampleNum > 0 ); // pre-processing // pre-emphasis sig_preemphasis( pBuf, pBuf, sampleNum ); // calc framing number for ( j = 0; j * FRAME_STRIDE + FRAME_LEN <= sampleNum; j++ ) { ; } // XXX: select salient frames AUD_Feature feature; feature.featureMatrix.rows = j - MFCC_DELAY; feature.featureMatrix.cols = MFCC_FEATDIM; feature.featureMatrix.dataType = AUD_DATATYPE_INT32S; feature.featureMatrix.pInt32s = featureMatrix.pInt32s + currentRow * feature.featureMatrix.cols; feature.featureNorm.len = j - MFCC_DELAY; feature.featureNorm.dataType = AUD_DATATYPE_INT64S; ret = createVector( &(feature.featureNorm) ); AUD_ASSERT( ret == 0 ); error = mfcc16s32s_init( &hMfccHandle, FRAME_LEN, WINDOW_TYPE, MFCC_ORDER, FRAME_STRIDE, SAMPLE_RATE, COMPRESS_TYPE ); AUD_ASSERT( error == AUD_ERROR_NONE ); error = mfcc16s32s_calc( hMfccHandle, pBuf, sampleNum, &feature ); AUD_ASSERT( error == AUD_ERROR_NONE ); error = mfcc16s32s_deinit( &hMfccHandle ); AUD_ASSERT( error == AUD_ERROR_NONE ); free( pBuf ); pBuf = NULL; bufLen = 0; ret = destroyVector( &(feature.featureNorm) ); AUD_ASSERT( ret == 0 ); currentRow += feature.featureMatrix.rows; } closeDir( pDir ); pDir = NULL; AUD_Matrix llrMatrix; llrMatrix.rows = totalWinNum; llrMatrix.cols = gmm_getmixnum( hUbm ); llrMatrix.dataType = AUD_DATATYPE_DOUBLE; ret = createMatrix( &llrMatrix ); AUD_ASSERT( ret == 0 ); AUD_Double llr = 0.; for ( j = 0; j < featureMatrix.rows; j++ ) { AUD_Vector componentLLR; componentLLR.len = llrMatrix.cols; componentLLR.dataType = AUD_DATATYPE_DOUBLE; componentLLR.pDouble = llrMatrix.pDouble + j * llrMatrix.cols; llr = gmm_llr( hUbm, &(featureMatrix), j, &componentLLR ); } AUD_Vector sumLlr; sumLlr.len = llrMatrix.cols; sumLlr.dataType = AUD_DATATYPE_DOUBLE; ret = createVector( &sumLlr ); AUD_ASSERT( ret == 0 ); AUD_Double *pSumLlr = sumLlr.pDouble; for ( j = 0; j < llrMatrix.cols; j++ ) { pSumLlr[j] = llrMatrix.pDouble[j]; } for ( i = 1; i < llrMatrix.rows; i++ ) { for ( j = 0; j < llrMatrix.cols; j++ ) { pSumLlr[j] = logadd( pSumLlr[j], *(llrMatrix.pDouble + i * llrMatrix.cols + j) ); } } #if 0 AUD_Vector bestIndex; bestIndex.len = TOP_N; bestIndex.dataType = AUD_DATATYPE_INT32S; ret = createVector( &bestIndex ); AUD_ASSERT( ret == 0 ); // get top TOP_N component ret = sortVector( &sumLlr, &bestIndex ); AUD_ASSERT( ret == 0 ); #else llr = pSumLlr[0]; for ( j = 1; j < sumLlr.len; j++ ) { llr = logadd( llr, pSumLlr[j] ); } // AUDLOG( "llr: %.f\n", llr ); AUD_Vector sortIndex; sortIndex.len = sumLlr.len; sortIndex.dataType = AUD_DATATYPE_INT32S; ret = createVector( &sortIndex ); AUD_ASSERT( ret == 0 ); ret = sortVector( &sumLlr, &sortIndex ); AUD_ASSERT( ret == 0 ); int num = 0; double val = 0.; for ( i = 0; i < sortIndex.len; i++ ) { // ln( 0.001 ) ~= -7. val = pSumLlr[sortIndex.pInt32s[i]] - llr + 7.; // AUDLOG( "%f, \n", val ); if ( val < 0 ) { break; } num++; } // AUDLOG( "\n" ); AUD_ASSERT( num > 0 ); AUDLOG( "computed component num: %d\n", num ); num = AUD_MAX( num, TOP_N ); AUDLOG( "normalized component num: %d\n", num ); AUD_Vector bestIndex; bestIndex.len = num; bestIndex.dataType = AUD_DATATYPE_INT32S; bestIndex.pInt32s = sortIndex.pInt32s; #endif int slash = '/'; char *ptr = strrchr( (char*)wavPath, slash ); ptr++; // select imposter GMM void *hImposterGmm = NULL; AUD_Int8s imposterGmmName[256] = { 0, }; snprintf( (char*)imposterGmmName, 256, "%s-imposter", ptr ); error = gmm_select( &hImposterGmm, hUbm, &bestIndex, 0 | GMM_INVERTSELECT_MASK, imposterGmmName ); AUD_ASSERT( error == AUD_ERROR_NONE ); gmm_show( hImposterGmm ); // export gmm char imposterFile[256] = { 0 }; snprintf( imposterFile, 256, "%s/%s-imposter.gmm", WOV_IMPOSTER_GMMMODEL_DIR, ptr ); AUDLOG( "Export imposter GMM Model to: %s\n", imposterFile ); FILE *fpImposterGmm = fopen( imposterFile, "wb" ); AUD_ASSERT( fpImposterGmm ); error = gmm_export( hImposterGmm, fpImposterGmm ); AUD_ASSERT( error == AUD_ERROR_NONE ); AUDLOG( "Export imposter GMM Model File Done\n" ); fclose( fpImposterGmm ); fpImposterGmm = NULL; error = gmm_free( &hImposterGmm ); AUD_ASSERT( error == AUD_ERROR_NONE ); // select keyword GMM void *hAdaptedGmm = NULL; AUD_Int8s adaptedGmmName[256] = { 0, }; snprintf( (char*)adaptedGmmName, 256, "%s", ptr ); // AUDLOG( "%s\n", adaptedGmmName ); error = gmm_select( &hAdaptedGmm, hUbm, &bestIndex, 0, adaptedGmmName ); AUD_ASSERT( error == AUD_ERROR_NONE ); ret = destroyVector( &sumLlr ); AUD_ASSERT( ret == 0 ); ret = destroyMatrix( &llrMatrix ); AUD_ASSERT( ret == 0 ); #if 0 ret = destroyVector( &bestIndex ); AUD_ASSERT( ret == 0 ); #else ret = destroyVector( &sortIndex ); AUD_ASSERT( ret == 0 ); #endif #if 1 // adapt GMM error = gmm_adapt( hAdaptedGmm, &featureMatrix ); AUD_ASSERT( error == AUD_ERROR_NONE ); #endif gmm_show( hAdaptedGmm ); // export gmm char modelFile[256] = { 0 }; snprintf( modelFile, 256, "%s/%s.gmm", WOV_KEYWORD_GMMMODEL_DIR, ptr ); AUDLOG( "Export GMM Model to: %s\n", modelFile ); FILE *fpGmm = fopen( modelFile, "wb" ); AUD_ASSERT( fpGmm ); error = gmm_export( hAdaptedGmm, fpGmm ); AUD_ASSERT( error == AUD_ERROR_NONE ); AUDLOG( "Export GMM Model File Done\n" ); fclose( fpGmm ); fpGmm = NULL; ret = destroyMatrix( &featureMatrix ); AUD_ASSERT( ret == 0 ); error = gmm_free( &hAdaptedGmm ); AUD_ASSERT( error == AUD_ERROR_NONE ); error = gmm_free( &hUbm ); AUD_ASSERT( error == AUD_ERROR_NONE ); AUDLOG( "keyword model adapt2 done\n" ); return 0; }
cudaError_t cudaFree(void *devPtr) { return gmm_free(devPtr); }
AUD_Int32s train_keyword_hmm( const AUD_Int8s *pKeywordFile, AUD_Int8s *pHmmName ) { AUD_Error error = AUD_ERROR_NONE; // step 1: read garbage model from file void *hGarbageGmm = NULL; FILE *fpGarbage = fopen( (char*)WOV_UBM_GMMHMMMODEL_FILE, "rb" ); if ( fpGarbage == NULL ) { AUDLOG( "cannot open gmm model file: [%s]\n", WOV_UBM_GMMHMMMODEL_FILE ); return AUD_ERROR_IOFAILED; } error = gmm_import( &hGarbageGmm, fpGarbage ); AUD_ASSERT( error == AUD_ERROR_NONE ); fclose( fpGarbage ); fpGarbage = NULL; // AUDLOG( "garbage GMM as:\n" ); // gmm_show( hGarbageGmm ); // step 2: read template stream & extract MFCC feature vector AUD_Int32s sampleNum = 0; AUD_Int32s bufLen = SAMPLE_RATE * BYTES_PER_SAMPLE * 10; AUD_Int16s *pBuf = (AUD_Int16s*)calloc( bufLen, 1 ); AUD_ASSERT( pBuf ); AUD_Int32s ret; // read stream from file sampleNum = readWavFromFile( (AUD_Int8s*)pKeywordFile, pBuf, bufLen ); AUD_ASSERT( sampleNum > 0 ); AUD_Int32s i = 0, j = 0, k = 0, m = 0; // front end processing // pre-emphasis sig_preemphasis( pBuf, pBuf, sampleNum ); // calc frame number for ( j = 0; j * FRAME_STRIDE + FRAME_LEN <= sampleNum; j++ ) { ; } AUD_Feature feature; feature.featureMatrix.rows = j - MFCC_DELAY; feature.featureMatrix.cols = MFCC_FEATDIM; feature.featureMatrix.dataType = AUD_DATATYPE_INT32S; ret = createMatrix( &(feature.featureMatrix) ); AUD_ASSERT( ret == 0 ); feature.featureNorm.len = j - MFCC_DELAY; feature.featureNorm.dataType = AUD_DATATYPE_INT64S; ret = createVector( &(feature.featureNorm) ); AUD_ASSERT( ret == 0 ); // init mfcc handle void *hMfccHandle = NULL; error = mfcc16s32s_init( &hMfccHandle, FRAME_LEN, WINDOW_TYPE, MFCC_ORDER, FRAME_STRIDE, SAMPLE_RATE, COMPRESS_TYPE ); AUD_ASSERT( error == AUD_ERROR_NONE ); // calc MFCC feature error = mfcc16s32s_calc( hMfccHandle, pBuf, sampleNum, &feature ); AUD_ASSERT( error == AUD_ERROR_NONE ); free( pBuf ); pBuf = NULL; // step 3: for each feature vector, get the bestN most likelihood component indices from GMM AUD_Vector componentLLR; componentLLR.len = gmm_getmixnum( hGarbageGmm ); componentLLR.dataType = AUD_DATATYPE_DOUBLE; ret = createVector( &componentLLR ); AUD_ASSERT( ret == 0 ); AUD_Matrix indexTable; indexTable.rows = feature.featureMatrix.rows ; indexTable.cols = WOV_KEYWORD_GMMMODEL_ORDER; indexTable.dataType = AUD_DATATYPE_INT32S; ret = createMatrix( &indexTable ); AUD_ASSERT( ret == 0 ); AUD_Matrix llrTable; llrTable.rows = feature.featureMatrix.rows; llrTable.cols = WOV_KEYWORD_GMMMODEL_ORDER; llrTable.dataType = AUD_DATATYPE_DOUBLE; ret = createMatrix( &llrTable ); AUD_ASSERT( ret == 0 ); AUD_Double totalLLR; for ( i = 0; i < feature.featureMatrix.rows; i++ ) { totalLLR = gmm_llr( hGarbageGmm, &(feature.featureMatrix), i, &componentLLR ); #if 0 showVector( &componentLLR ); #endif // sort the bestN likelihood AUD_Int32s *pIndex = indexTable.pInt32s + i * indexTable.cols; AUD_Double *pLLR = llrTable.pDouble + i * llrTable.cols; for ( j = 0; j < WOV_KEYWORD_GMMMODEL_ORDER; j++ ) { pIndex[j] = -1; pLLR[j] = 0.; } for ( j = 0; j < componentLLR.len; j++ ) { for ( k = 0; k < WOV_KEYWORD_GMMMODEL_ORDER; k++ ) { if ( pIndex[k] == -1 ) { pIndex[k] = j; pLLR[k] = componentLLR.pDouble[j]; break; } else if ( componentLLR.pDouble[j] > pLLR[k] ) { for ( m = WOV_KEYWORD_GMMMODEL_ORDER - 1; m > k ; m-- ) { pIndex[m] = pIndex[m - 1]; pLLR[m] = pLLR[m - 1]; } pIndex[k] = j; pLLR[k] = componentLLR.pDouble[j]; break; } } } } #if 0 AUDLOG( "index table( %s, %s, %d ):\n", __FILE__, __FUNCTION__, __LINE__ ); showMatrix( &indexTable ); AUDLOG( "llr table( %s, %s, %d ):\n", __FILE__, __FUNCTION__, __LINE__ ); showMatrix( &llrTable ); #endif ret = destroyVector( &componentLLR ); AUD_ASSERT( ret == 0 ); // step 4: cluster GMM AUD_Int32s *pClusterLabel = (AUD_Int32s*)calloc( sizeof(AUD_Int32s) * feature.featureMatrix.rows, 1 ); AUD_ASSERT( pClusterLabel ); error = gmm_cluster( hGarbageGmm, &indexTable, WOV_GMM_CLUSTER_THRESHOLD, pClusterLabel ); AUD_ASSERT( error == AUD_ERROR_NONE ); AUD_Int32s stateNum = pClusterLabel[feature.featureMatrix.rows - 1]; AUD_ASSERT( stateNum >= 5 ); // step 5: select and build state GMM void **phKeywordGmms = (void**)calloc( sizeof(void*) * stateNum, 1 ); AUD_ASSERT( phKeywordGmms ); AUD_Vector indexVector; indexVector.len = WOV_KEYWORD_GMMMODEL_ORDER; indexVector.dataType = AUD_DATATYPE_INT32S; ret = createVector( &indexVector ); AUD_ASSERT( ret == 0 ); AUD_Vector llrVector; llrVector.len = WOV_KEYWORD_GMMMODEL_ORDER; llrVector.dataType = AUD_DATATYPE_DOUBLE; ret = createVector( &llrVector ); AUD_ASSERT( ret == 0 ); int start = 0, end = 0; for ( i = 0; i < stateNum; i++ ) { for ( j = 0; j < indexVector.len; j++ ) { indexVector.pInt32s[j] = -1; llrVector.pInt32s[j] = 1.; } for ( j = start; j < feature.featureMatrix.rows; j++ ) { if ( pClusterLabel[j] != i ) { break; } } end = j; for ( k = start * llrTable.cols; k < end * llrTable.cols; k++ ) { for ( m = 0; m < indexVector.len; m++ ) { if ( llrTable.pDouble[k] == llrVector.pDouble[m] && indexTable.pInt32s[k] == indexVector.pInt32s[m] ) { break; } else if ( indexVector.pInt32s[m] == -1 || llrTable.pDouble[k] > llrVector.pDouble[m] ) { for ( int n = indexVector.len - 1; n > m ; n-- ) { indexVector.pInt32s[n] = indexVector.pInt32s[n - 1]; llrVector.pDouble[n] = llrVector.pDouble[n - 1]; } indexVector.pInt32s[m] = indexTable.pInt32s[k]; llrVector.pDouble[m] = llrTable.pDouble[k]; break; } } } // AUDLOG( "Final GMM indices for state[%d]:\n", i ); // showVector( &indexVector ); AUD_Int8s gmmName[256] = { 0, }; sprintf( (char*)gmmName, "state%d", i ); error = gmm_select( &phKeywordGmms[i], hGarbageGmm, &indexVector, 0, gmmName ); AUD_ASSERT( error == AUD_ERROR_NONE ); start = end; } ret = destroyMatrix( &indexTable ); AUD_ASSERT( ret == 0 ); ret = destroyMatrix( &llrTable ); AUD_ASSERT( ret == 0 ); ret = destroyVector( &indexVector ); AUD_ASSERT( ret == 0 ); ret = destroyVector( &llrVector ); AUD_ASSERT( ret == 0 ); free( pClusterLabel ); pClusterLabel = NULL; // step 6: generate keyword model by Baum-Welch algorithm AUD_Vector pi; pi.len = stateNum; pi.dataType = AUD_DATATYPE_DOUBLE; ret = createVector( &pi ); AUD_ASSERT( ret == 0 ); pi.pDouble[0] = 1.0f; void *hKeywordHmm = NULL; error = gmmhmm_init( &hKeywordHmm, stateNum, &pi, phKeywordGmms ); AUD_ASSERT( error == AUD_ERROR_NONE ); error = gmmhmm_learn( hKeywordHmm, &feature, 1, 0.001 ); AUD_ASSERT( error == AUD_ERROR_NONE ); // step 8: write model to file error = gmmhmm_export( hKeywordHmm, pHmmName ); AUD_ASSERT( error == AUD_ERROR_NONE ); // gmmhmm_show( hKeywordHmm ); // clean field error = mfcc16s32s_deinit( &hMfccHandle ); AUD_ASSERT( error == AUD_ERROR_NONE ); error = gmm_free( &hGarbageGmm ); AUD_ASSERT( error == AUD_ERROR_NONE ); ret = destroyMatrix( &(feature.featureMatrix) ); AUD_ASSERT( ret == 0 ); ret = destroyVector( &(feature.featureNorm) ); AUD_ASSERT( ret == 0 ); ret = destroyVector( &pi ); AUD_ASSERT( ret == 0 ); for ( i = 0; i < stateNum; i++ ) { error = gmm_free( &phKeywordGmms[i] ); AUD_ASSERT( error == AUD_ERROR_NONE ); } free( phKeywordGmms ); phKeywordGmms = NULL; error = gmmhmm_free( &hKeywordHmm ); AUD_ASSERT( error == AUD_ERROR_NONE ); return 0; }