Exemplo n.º 1
0
double blok::deviation(){
	double total = std::accumulate(content.begin(), content.end(), 0);
	double mean = total / content.size();
	vector<unsigned int> zero_mean( content );
	transform( zero_mean.begin(), zero_mean.end(), zero_mean.begin(),bind2nd( minus<double>(), mean ) );

	double deviation = inner_product( zero_mean.begin(),zero_mean.end(), zero_mean.begin(), 0.0 );
	deviation = sqrt( deviation / ( content.size() - 1 ) );
	return deviation;
}
Exemplo n.º 2
0
/* 
 * Get the MFCC feature for a raw voice data. You can get the sample from the
 * wav filer header.
 * 
 * @param data the raw data the wav file
 * @param len the length of the wav raw data
 * @param sample_rate sample frequency in Hz (8000 or 16000 or 22050 or 44000)
 * @param mfcc mfcc (39 dimension, in the program)
 * @param frame_time the time(in ms) you take it as a frame
 * @param step_time the time(in ms) between two neighbor frame
 * 
 */
int array_to_mfcc(const double* data, const int len, int sample_rate,
                  double*** mfcc_data, double frame_time, double step_time) {
    int i, j;
    /* (frame size / sameple rate) * 1000 = frame time */
    // int frame_size = sample_rate * frame_time / 1000;
    // int move_size = sample_rate * step_time / 1000;
    int frame_size = 240;
    int move_size = 80;
    int n = (len - frame_size) / move_size + 1; /* total number of frames */
    int tmp = log(frame_size * 1.0) / log(2.0);
    double** mfcc; /* temp data for storing mfcc */
    double* tmp_data; /* temp data for storing each frame */
    double* lo_chan;
    double* lo_wt;
    double* cep_win; /* weighted window for MFCC */
    double* fbank; /* triangular bandpass filter */
    int fft_n; /*  */

    if ((int)pow(2.0, tmp * 1.0) + 0.5 >= frame_size) {
        fft_n = (int)pow(2.0, tmp) + 0.5;
    } else {
        fft_n = (int)pow(2.0, tmp + 1) + 0.5;
    }

    mfcc = (double**) calloc(n, sizeof(double*));
    for (i = 0; i < n; ++i) {
        mfcc[i] = (double*) calloc(MFCC_DIMENSION, sizeof(double));
    }

    tmp_data = (double*) calloc(fft_n, sizeof(double));
    lo_chan = (double*) calloc(fft_n, sizeof(double));
    lo_wt = (double*) calloc(fft_n, sizeof(double));
    cep_win = (double*) calloc(12, sizeof(double));
    fbank = (double*) calloc(27, sizeof(double));

    gen_cep_win(cep_win, 22, 12);
    for (i = 0; i < n; ++i) {
        for (j = 0; j < frame_size; ++j) {
            tmp_data[j] = data[i * move_size + j];
        }
        for (j = frame_size; j < fft_n; ++j) {
            tmp_data[j] = 0.0;
        }

        mfcc[i][12] = cpt_energy(tmp_data, frame_size);
        pre_emphasise(tmp_data, frame_size);
        hamming(tmp_data, frame_size);
        realft(tmp_data, fft_n);
        init_mel(lo_chan, lo_wt, sample_rate, fft_n);
        wave_to_fbank(tmp_data, fbank, lo_chan, lo_wt, fft_n);
        fbank_to_mfcc(mfcc[i], fbank, 12);
        weight_cep(mfcc[i], cep_win, 12);
    }
    get_delta_cep(mfcc, n, 13, 2);
    zero_mean(mfcc, n, 12);

    *mfcc_data = mfcc; /*  */

    free(tmp_data);
    free(lo_chan);
    free(lo_wt);
    free(cep_win);
    free(fbank);

    return n;
}