void PitchSaliency::process(const MatrixXR& spectrum, MatrixXR* pitches, MatrixXR* saliencies){ const int rows = spectrum.rows(); (*pitches).resize( rows, 1 ); (*saliencies).resize( rows, 1 ); for ( int row = 0; row < rows; row++ ) { Real tLow = _tMin; Real tUp = _tMax; Real sal; Real tLowBest = tLow; Real tUpBest = tUp; Real salBest; Real period; Real deltaPeriod; while ( ( tUp - tLow ) > _tPrec ) { // Split the best block and compute new limits tLow = (tLowBest + tUpBest) / 2.0; tUp = tUpBest; tUpBest = tLow; // Compute new saliences for the new blocks period = (tLowBest + tUpBest) / 2.0; deltaPeriod = tUpBest - tLowBest; salBest = saliency(period, deltaPeriod, tLowBest, tUpBest, spectrum.row( row )); period = (tLow + tUp) / 2.0; deltaPeriod = tUp - tLow; sal = saliency(period, deltaPeriod, tLow, tUp, spectrum.row( row )); if (sal > salBest) { tLowBest = tLow; tUpBest = tUp; salBest = sal; } } period = (tLowBest + tUpBest) / 2.0; deltaPeriod = tUpBest - tLowBest; (*pitches)(row, 0) = _sampleRate / period; (*saliencies)(row, 0) = saliency(period, deltaPeriod, tLowBest, tUpBest, spectrum.row( row )); } }
void MFCC::process(const MatrixXR& spectrum, MatrixXR* mfccCoeffs){ (*mfccCoeffs).resize(spectrum.rows(), _coefficientCount); for ( int i = 0; i < spectrum.rows(); i++) { LOUDIA_DEBUG("MFCC: Processing Melbands"); // Process the mel bands on the power of the spectrum _melbands.process(spectrum.row(i).array().square(), &_bands); LOUDIA_DEBUG("MFCC: Processing Log of bands"); // Apply a power to the log mel amplitudes as in: http://en.wikipedia.org/wiki/Mel_frequency_cepstral_coefficient // V. Tyagi and C. Wellekens // On desensitizing the Mel-Cepstrum to spurious spectral components for Robust Speech Recognition // in Acoustics, Speech, and Signal Processing, 2005. Proceedings. // IEEE International Conference on, vol. 1, 2005, pp. 529–532. _bands = (_bands.array() + _minSpectrum).log() / log(10.0); _bands = _bands.array().pow(_power); LOUDIA_DEBUG("MFCC: Processing DCT"); // Process the DCT _dct.process(_bands, &_coeffs); (*mfccCoeffs).row(i) = _coeffs; } LOUDIA_DEBUG("MFCC: Finished Processing"); }
void VoiceActivityDetection::process(const MatrixXR& frames, MatrixXR* vad){ const int rows = frames.rows(); vad->resize(rows, 1); for (int i=0; i < rows; i++){ // compute barkbands _barkBands.process(frames.row(0), &_bands); // copy frame into memory _memory.row(_currentMemoryPos) = _bands.row(0); _currentMemoryPos = (_currentMemoryPos + 1) % _memorySize; // compute the VAD RowXR LTSE = _memory.colwise().maxCoeff(); RowXR noise = _memory.colwise().sum() / _memorySize; (*vad)(i,0) = log10((LTSE.array().square() / noise.array().square()).sum()); } }