Example #1
0
void setup() {
    stream.setLabelsForAllDimensions({"audio"});

    pipeline.addFeatureExtractionModule(
        FFT(kFftWindowSize, kFftHopSize,
            DIM, FFT::HAMMING_WINDOW, true, false));

    MFCC::Options options;
    options.sample_rate = kSampleRate;
    options.fft_size = kFftWindowSize / 2;
    options.start_freq = 300;
    options.end_freq = 3700;
    options.num_tri_filter = 26;
    options.num_cepstral_coeff = 12;
    options.lifter_param = 22;
    options.use_vad = true;
    options.noise_level = noise_level;

    pipeline.addFeatureExtractionModule(MFCC(options));

    pipeline.setClassifier(SVM());
    // GMM(16, true, false, 1, 100, 0.001));

    // In post processing, we wait #n predicitons. If m out of n predictions are
    // from the same class, we declare the class as the right one.
    //
    // n = (duration * sample_rate) / frame_size
    //   where duration    = post_duration
    //         sample_rate = kSampleRate
    //         frame_size  = kFftHopSize
    // m = n * post_ratio
    int num_predictions = post_duration / 1000 * kSampleRate / kFftHopSize;
    pipeline.addPostProcessingModule(
            ClassLabelFilter(num_predictions * post_ratio, num_predictions));

    auto ratio_updater = [](double new_ratio) {
        ClassLabelFilter* filter =
            dynamic_cast<ClassLabelFilter*>(pipeline.getPostProcessingModule(0));
        // Recalculate num_predictions as post_duration might have been changed
        int num_predictions = post_duration / 1000 * kSampleRate / kFftHopSize;
        filter->setMinimumCount(new_ratio * num_predictions);
    };

    auto duration_updater = [](int new_duration) {
        ClassLabelFilter* filter =
            dynamic_cast<ClassLabelFilter*>(pipeline.getPostProcessingModule(0));
        // Recalculate num_predictions as post_duration might have been changed
        int num_predictions = post_duration / 1000 * kSampleRate / kFftHopSize;
        filter->setBufferSize(num_predictions);
    };

    auto noise_updater = [](int new_noise_level) {
        MFCC *mfcc = dynamic_cast<MFCC*>(pipeline.getFeatureExtractionModule(1));
        mfcc->setNoiseLevel(new_noise_level);
    };

    registerTuneable(noise_level, 0, 20,
                     "Noise Level",
                     "The threshold for the system to distinguish between "
                     "ambient noise and speech/sound",
                     noise_updater);

    registerTuneable(post_duration, 0, 2000,
                     "Duration",
                     "Time (in ms) that is considered as a whole "
                     "for smoothing the prediction",
                     duration_updater);

    registerTuneable(post_ratio, 0.0f, 1.0f,
                     "Ratio",
                     "The portion of time in duration that "
                     "should be from the same class",
                     ratio_updater);

    useInputStream(stream);
    useOutputStream(oStream);
    usePipeline(pipeline);
    useLeaveOneOutScoring(false);
    setGUIBufferSize(kSampleRate);
}
int main (int argc, char **argv)
{
    float mfcc_result[NUMFILTERBANK-1];
    FILE *fptr;
   
    float Min=0.0f,Max=0.0f;
    int16_t *samples = NULL;
    uint16_t numFrame = 0;
    uint16_t i,j,k;
    int32_t Val_RGB;
    uint32_t idxCoeff;
    int tmp;
    char Len;
    
    Len = strlen(argv[1]);
    for ( i=0; i < Len-4; i++)
    {
        FileOut[i]= argv[1][i];
        FileOutTxt[i]= argv[1][i];
    }
    
    FileOut[Len-4] = '.';
    FileOut[Len-3] = 'b';
    FileOut[Len-2] = 'm';
    FileOut[Len-1] = 'p';
    FileOut[Len] = '\0';

    FileOutTxt[Len-4] = '.';
    FileOutTxt[Len-3] = 't';
    FileOutTxt[Len-2] = 'x';
    FileOutTxt[Len-1] = 't';
    FileOutTxt[Len] = '\0';
    
    /* Sotarage  MFCC coeffience */
    fptr=fopen(FileOutTxt,"w");
    
    PreCalcFilterBank(FilterBank,fNorm, NUMBINHALF, NUMFILTERBANK);
    //printf("Length: %d \n", strlen(argv[1]));
    wavread(argv[1], &samples);
    printf("No. of channels: %d\n", header->num_channels);
    printf("Sample rate: %d\n", header->sample_rate);
    printf("Bit rate: %dkbps\n", header->byte_rate*8 / 1000);
    printf("Bits per sample: %d\n\n", header->bps);
    //printf("Sample 0: %d\n", samples[0]);
    //printf("Sample 1: %d\n", samples[1]);
    // Modify the header values & samples before writing the new file
    wavwrite("track2.wav", samples);

    numFrame = header->datachunk_size/(2*2*NUMBINHALF) ;   
    //printf("Num Frame:  %d \n", numFrame );
    /*
    BMP = (RGB_data **)malloc((NUMFILTERBANK-1)*sizeof(RGB_data*));
    for (i=0; i < NUMFILTERBANK-1; i++)
    {
        BMP[i] = (RGB_data *)malloc(sizeof(RGB_data)*(2*numFrame)) ; 
        if(BMP[i] == NULL)
        {
            fprintf(stderr, "out of memory\n");
            exit (0);
        }        
    } 
    */
    BMP = (RGB_data *)malloc((NUMFILTERBANK-1)*sizeof(RGB_data*)*2*numFrame);     
    //memset(BMP, 0, sizeof(BMP));    
    for (i = 0; i < 2*numFrame-1; i++) 
    {
       for (j=0; j< 2*NUMBINHALF;j++)
       {
           V[j].Re = (float)(samples[i*NUMBINHALF + j]); 
           //printf(" %d", samples[i*NUMBINHALF + j]);
           V[j].Im = (float)0.0f;
       } 
       //printf("\n");
       MFCC(V, FilterBank ,fNorm, mfcc_result);

       //printf("MFCC:");
       for(idxCoeff = 0; idxCoeff < NUMFILTERBANK -1; idxCoeff++)
       {	     
           fprintf(fptr,"%f ", mfcc_result[idxCoeff]);
           //Val_RGB = (int)(255/(63.164356+46.877232)*(mfcc_result[idxCoeff]+10));
           Val_RGB = (int)(255*(mfcc_result[idxCoeff]));  
           //Val_RGB = tmp*tmp;
           //Val_RGB = Val_RGB/255;
           if (Val_RGB < 0)  Val_RGB = 0;
           if (Val_RGB > 255)  Val_RGB = 255;

           BMP[(NUMFILTERBANK - idxCoeff-2)*2*numFrame +i].g =  (BYTE)(Val_RGB); 
           BMP[(NUMFILTERBANK - idxCoeff-2)*2*numFrame +i].b =  (BYTE)(Val_RGB);
           BMP[(NUMFILTERBANK - idxCoeff-2)*2*numFrame +i].r =  (BYTE)(Val_RGB); 
           //BMP[NUMFILTERBANK - idxCoeff-1][i].b = (BYTE)((Val_RGB & 0x00FF00)>>8);
           //BMP[NUMFILTERBANK - idxCoeff-1][i].r = (BYTE)((Val_RGB & 0xFF0000)>>16); 
           //printf(" %d  ", Val_RGB); 
           //if (mfcc_result[idxCoeff] < Min) Min=mfcc_result[idxCoeff];
           //if (mfcc_result[idxCoeff] > Max) Max=mfcc_result[idxCoeff];  
       }
       fprintf(fptr,"\r\n");
    }

    //printf("Min: %f  Max: %f \n", Min, Max);

    bmp_generator(FileOut, 2*numFrame, NUMFILTERBANK -1 ,(BYTE*) (BMP)); 

    //bmpread("record_21.bmp");

    free(header);
    free(samples);
    free(BMP);
    fclose(fptr);
    exit (0);
}