void MFCC::process(const MatrixXR& spectrum, MatrixXR* mfccCoeffs){ (*mfccCoeffs).resize(spectrum.rows(), _coefficientCount); for ( int i = 0; i < spectrum.rows(); i++) { LOUDIA_DEBUG("MFCC: Processing Melbands"); // Process the mel bands on the power of the spectrum _melbands.process(spectrum.row(i).array().square(), &_bands); LOUDIA_DEBUG("MFCC: Processing Log of bands"); // Apply a power to the log mel amplitudes as in: http://en.wikipedia.org/wiki/Mel_frequency_cepstral_coefficient // V. Tyagi and C. Wellekens // On desensitizing the Mel-Cepstrum to spurious spectral components for Robust Speech Recognition // in Acoustics, Speech, and Signal Processing, 2005. Proceedings. // IEEE International Conference on, vol. 1, 2005, pp. 529–532. _bands = (_bands.array() + _minSpectrum).log() / log(10.0); _bands = _bands.array().pow(_power); LOUDIA_DEBUG("MFCC: Processing DCT"); // Process the DCT _dct.process(_bands, &_coeffs); (*mfccCoeffs).row(i) = _coeffs; } LOUDIA_DEBUG("MFCC: Finished Processing"); }
void SpectralNoiseSuppression::process(const MatrixXR& spectrum, MatrixXR* noise, MatrixXR* result){ const int rows = spectrum.rows(); const int cols = spectrum.cols(); (*result).resize(rows, cols); (*result) = spectrum; //DEBUG("SPECTRALNOISESUPPRESSION: Calculate wrapped magnitude."); // Calculate the wrapped magnitude of the spectrum _g = (1.0 / (_k1 - _k0 + 1.0) * spectrum.block(0, _k0, rows, _k1 - _k0).cwise().pow(1.0/3.0).rowwise().sum()).cwise().cube(); //cout << (_g) << endl; for ( int i = 0; i < cols; i++ ) { (*result).col(i) = (((*result).col(i).cwise() * _g.cwise().inverse()).cwise() + 1.0).cwise().log(); } //cout << (*result) << endl; //DEBUG("SPECTRALNOISESUPPRESSION: Estimate spectral noise."); // Estimate spectral noise _bands.process((*result), noise); //DEBUG("SPECTRALNOISESUPPRESSION: Suppress spectral noise."); // Suppress spectral noise (*result) = ((*result) - (*noise)).cwise().clipUnder(); }
void Window::setWindow( const MatrixXR& window, bool callSetup ){ if (window.cols() != _inputSize || window.rows() != 1) { // Throw exception wrong window size } setWindowType(CUSTOM, false); _window = window; if ( callSetup ) setup(); }
void PitchSaliency::process(const MatrixXR& spectrum, MatrixXR* pitches, MatrixXR* saliencies){ const int rows = spectrum.rows(); (*pitches).resize( rows, 1 ); (*saliencies).resize( rows, 1 ); for ( int row = 0; row < rows; row++ ) { Real tLow = _tMin; Real tUp = _tMax; Real sal; Real tLowBest = tLow; Real tUpBest = tUp; Real salBest; Real period; Real deltaPeriod; while ( ( tUp - tLow ) > _tPrec ) { // Split the best block and compute new limits tLow = (tLowBest + tUpBest) / 2.0; tUp = tUpBest; tUpBest = tLow; // Compute new saliences for the new blocks period = (tLowBest + tUpBest) / 2.0; deltaPeriod = tUpBest - tLowBest; salBest = saliency(period, deltaPeriod, tLowBest, tUpBest, spectrum.row( row )); period = (tLow + tUp) / 2.0; deltaPeriod = tUp - tLow; sal = saliency(period, deltaPeriod, tLow, tUp, spectrum.row( row )); if (sal > salBest) { tLowBest = tLow; tUpBest = tUp; salBest = sal; } } period = (tLowBest + tUpBest) / 2.0; deltaPeriod = tUpBest - tLowBest; (*pitches)(row, 0) = _sampleRate / period; (*saliencies)(row, 0) = saliency(period, deltaPeriod, tLowBest, tUpBest, spectrum.row( row )); } }
void VoiceActivityDetection::process(const MatrixXR& frames, MatrixXR* vad){ const int rows = frames.rows(); vad->resize(rows, 1); for (int i=0; i < rows; i++){ // compute barkbands _barkBands.process(frames.row(0), &_bands); // copy frame into memory _memory.row(_currentMemoryPos) = _bands.row(0); _currentMemoryPos = (_currentMemoryPos + 1) % _memorySize; // compute the VAD RowXR LTSE = _memory.colwise().maxCoeff(); RowXR noise = _memory.colwise().sum() / _memorySize; (*vad)(i,0) = log10((LTSE.array().square() / noise.array().square()).sum()); } }