void MFCC::process(const MatrixXR& spectrum, MatrixXR* mfccCoeffs){ (*mfccCoeffs).resize(spectrum.rows(), _coefficientCount); for ( int i = 0; i < spectrum.rows(); i++) { LOUDIA_DEBUG("MFCC: Processing Melbands"); // Process the mel bands on the power of the spectrum _melbands.process(spectrum.row(i).array().square(), &_bands); LOUDIA_DEBUG("MFCC: Processing Log of bands"); // Apply a power to the log mel amplitudes as in: http://en.wikipedia.org/wiki/Mel_frequency_cepstral_coefficient // V. Tyagi and C. Wellekens // On desensitizing the Mel-Cepstrum to spurious spectral components for Robust Speech Recognition // in Acoustics, Speech, and Signal Processing, 2005. Proceedings. // IEEE International Conference on, vol. 1, 2005, pp. 529–532. _bands = (_bands.array() + _minSpectrum).log() / log(10.0); _bands = _bands.array().pow(_power); LOUDIA_DEBUG("MFCC: Processing DCT"); // Process the DCT _dct.process(_bands, &_coeffs); (*mfccCoeffs).row(i) = _coeffs; } LOUDIA_DEBUG("MFCC: Finished Processing"); }
void SpectralNoiseSuppression::process(const MatrixXR& spectrum, MatrixXR* noise, MatrixXR* result){ const int rows = spectrum.rows(); const int cols = spectrum.cols(); (*result).resize(rows, cols); (*result) = spectrum; //DEBUG("SPECTRALNOISESUPPRESSION: Calculate wrapped magnitude."); // Calculate the wrapped magnitude of the spectrum _g = (1.0 / (_k1 - _k0 + 1.0) * spectrum.block(0, _k0, rows, _k1 - _k0).cwise().pow(1.0/3.0).rowwise().sum()).cwise().cube(); //cout << (_g) << endl; for ( int i = 0; i < cols; i++ ) { (*result).col(i) = (((*result).col(i).cwise() * _g.cwise().inverse()).cwise() + 1.0).cwise().log(); } //cout << (*result) << endl; //DEBUG("SPECTRALNOISESUPPRESSION: Estimate spectral noise."); // Estimate spectral noise _bands.process((*result), noise); //DEBUG("SPECTRALNOISESUPPRESSION: Suppress spectral noise."); // Suppress spectral noise (*result) = ((*result) - (*noise)).cwise().clipUnder(); }
void Window::setWindow( const MatrixXR& window, bool callSetup ){ if (window.cols() != _inputSize || window.rows() != 1) { // Throw exception wrong window size } setWindowType(CUSTOM, false); _window = window; if ( callSetup ) setup(); }
Real PitchSaliency::saliency(Real period, Real deltaPeriod, Real tLow, Real tUp, const MatrixXR& spectrum){ const int cols = spectrum.cols(); Real sum = 0.0; for ( int m = 1; m < _numHarmonics; m++ ) { int begin = (int)round(m * _fftSize / (period + (deltaPeriod / 2.0))); int end = min((int)round(m * _fftSize / (period - (deltaPeriod / 2.0))), cols - 1); if (begin < end) sum += harmonicWeight(period, tLow, tUp, m) * spectrum.block(0, begin, 1, end - begin).maxCoeff(); } return sum; }
void PitchSaliency::process(const MatrixXR& spectrum, MatrixXR* pitches, MatrixXR* saliencies){ const int rows = spectrum.rows(); (*pitches).resize( rows, 1 ); (*saliencies).resize( rows, 1 ); for ( int row = 0; row < rows; row++ ) { Real tLow = _tMin; Real tUp = _tMax; Real sal; Real tLowBest = tLow; Real tUpBest = tUp; Real salBest; Real period; Real deltaPeriod; while ( ( tUp - tLow ) > _tPrec ) { // Split the best block and compute new limits tLow = (tLowBest + tUpBest) / 2.0; tUp = tUpBest; tUpBest = tLow; // Compute new saliences for the new blocks period = (tLowBest + tUpBest) / 2.0; deltaPeriod = tUpBest - tLowBest; salBest = saliency(period, deltaPeriod, tLowBest, tUpBest, spectrum.row( row )); period = (tLow + tUp) / 2.0; deltaPeriod = tUp - tLow; sal = saliency(period, deltaPeriod, tLow, tUp, spectrum.row( row )); if (sal > salBest) { tLowBest = tLow; tUpBest = tUp; salBest = sal; } } period = (tLowBest + tUpBest) / 2.0; deltaPeriod = tUpBest - tLowBest; (*pitches)(row, 0) = _sampleRate / period; (*saliencies)(row, 0) = saliency(period, deltaPeriod, tLowBest, tUpBest, spectrum.row( row )); } }
void PeakCOG::process(const MatrixXC& fft, const MatrixXR& peakPos, MatrixXR* peakCog) { LOUDIA_DEBUG("PEAKCOG: Processing windowed"); const int rows = fft.rows(); const int cols = fft.cols(); const int halfCols = min((int)ceil(_fftLength / 2.0), cols); const int peakCount = peakPos.cols(); LOUDIA_DEBUG("PEAKCOG: fft.shape " << fft.rows() << "," << fft.cols()); _spectrumAbs2 = fft.block(0, 0, rows, halfCols).cwise().abs2(); LOUDIA_DEBUG("PEAKCOG: Spectrum resized rows: " << rows << " halfCols: " << halfCols); unwrap(fft.block(0, 0, rows, halfCols).cwise().angle(), &_spectrumArg); derivate(_spectrumArg, &_spectrumArgDeriv); (*peakCog).resize(rows, peakCount); (*peakCog).setZero(); for(int row = 0; row < rows; row++) { for(int i = 0; i < peakCount; i++){ if (peakPos(row, i) != -1) { int start = max(0, (int)floor(peakPos(row, i) - _bandwidth / 2)); int end = min(halfCols, (int)ceil(peakPos(row, i) + _bandwidth / 2)); if ( (end - start) > 0) { (*peakCog)(row, i) = ((-_spectrumArgDeriv).block(row, start, 1, end-start).cwise() * _spectrumAbs2.block(row, start, 1, end-start)).sum() / _spectrumAbs2.block(row, start, 1, end-start).sum(); } } } } LOUDIA_DEBUG("PEAKCOG: Finished Processing"); }
void SpectralODFPhase::phaseDeviation(const MatrixXC& spectrum, const MatrixXR& spectrumArg, MatrixXR* odfValue) { const int rows = spectrum.rows(); const int cols = spectrum.cols(); _phaseDiff = spectrumArg.block(1, 0, rows - 1, cols) - spectrumArg.block(0, 0, rows - 1, cols); _instFreq = _phaseDiff.block(1, 0, rows - 2, cols) - _phaseDiff.block(0, 0, rows - 2, cols); if (_weighted) _instFreq.array() *= spectrum.block(2, 0, rows - 2, cols).array().abs(); if (_normalize) { (*odfValue) = _instFreq.rowwise().sum().array() / (cols * spectrum.block(2, 0, rows - 2, cols).array().abs().rowwise().sum()); return; } (*odfValue) = _instFreq.rowwise().sum() / cols; return; }
void VoiceActivityDetection::process(const MatrixXR& frames, MatrixXR* vad){ const int rows = frames.rows(); vad->resize(rows, 1); for (int i=0; i < rows; i++){ // compute barkbands _barkBands.process(frames.row(0), &_bands); // copy frame into memory _memory.row(_currentMemoryPos) = _bands.row(0); _currentMemoryPos = (_currentMemoryPos + 1) % _memorySize; // compute the VAD RowXR LTSE = _memory.colwise().maxCoeff(); RowXR noise = _memory.colwise().sum() / _memorySize; (*vad)(i,0) = log10((LTSE.array().square() / noise.array().square()).sum()); } }
void BandFilter::setup(){ LOUDIA_DEBUG("BANDFILTER: Setting up..."); _filter.setChannelCount( _channelCount, false ); LOUDIA_DEBUG("BANDFILTER: Getting zpk"); // Get the lowpass z, p, k MatrixXC zeros, poles; Real gain; switch( _filterType ){ case CHEBYSHEVI: chebyshev1(_order, _passRipple, _channelCount, &zeros, &poles, &gain); break; case CHEBYSHEVII: chebyshev2(_order, _stopAttenuation, _channelCount, &zeros, &poles, &gain); break; case BUTTERWORTH: butterworth(_order, _channelCount, &zeros, &poles, &gain); break; case BESSEL: bessel(_order, _channelCount, &zeros, &poles, &gain); break; } LOUDIA_DEBUG("BANDFILTER: zeros:" << zeros ); LOUDIA_DEBUG("BANDFILTER: poles:" << poles ); LOUDIA_DEBUG("BANDFILTER: gain:" << gain ); // Convert zpk to ab coeffs MatrixXC a; MatrixXC b; zpkToCoeffs(zeros, poles, gain, &b, &a); LOUDIA_DEBUG("BANDFILTER: Calculated the coeffs"); // Since we cannot create matrices of Nx0 // we have created at least one Zero in 0 if ( zeros == MatrixXC::Zero(zeros.rows(), zeros.cols()) ){ // Now we must remove the last coefficient from b MatrixXC temp = b.block(0, 0, b.rows(), b.cols()-1); b = temp; } // Get the warped critical frequency Real fs = 2.0; Real warped = 2.0 * fs * tan( M_PI * _lowFrequency / fs ); Real warpedStop = 2.0 * fs * tan( M_PI * _highFrequency / fs ); Real warpedCenter = sqrt(warped * warpedStop); Real warpedBandwidth = warpedStop - warped; // Warpped coeffs MatrixXC wa; MatrixXC wb; LOUDIA_DEBUG("BANDFILTER: Create the band type filter from the analog prototype"); switch( _bandType ){ case LOWPASS: lowPassToLowPass(b, a, warped, &wb, &wa); break; case HIGHPASS: lowPassToHighPass(b, a, warped, &wb, &wa); break; case BANDPASS: lowPassToBandPass(b, a, warpedCenter, warpedBandwidth, &wb, &wa); break; case BANDSTOP: lowPassToBandStop(b, a, warpedCenter, warpedBandwidth, &wb, &wa); break; } LOUDIA_DEBUG("BANDFILTER: Calculated the low pass to band pass"); // Digital coeffs MatrixXR da; MatrixXR db; bilinear(wb, wa, fs, &db, &da); LOUDIA_DEBUG("BANDFILTER: setup the coeffs"); // Set the coefficients to the filter _filter.setA( da.transpose() ); _filter.setB( db.transpose() ); _filter.setup(); LOUDIA_DEBUG("BANDFILTER: Finished set up..."); }
void SpectralReassignment::setup(){ LOUDIA_DEBUG("SPECTRALREASSIGNMENT: Setting up..."); // Setup the window so it gets calculated and can be reused _windowAlgo.setup(); // Create the time vector LOUDIA_DEBUG("SPECTRALREASSIGNMENT: Creating time vector..."); Real timestep = 1.0 / _sampleRate; // The unit of the vectors is Time Sample fractions // So the difference between one coeff and the next is 1 // and the center of the window must be 0, so even sized windows // will have the two center coeffs to -0.5 and 0.5 // This should be a line going from [-(window_size - 1)/2 ... (window_size - 1)/2] _time.resize(_frameSize, 1); for(int i = 0; i < _time.rows(); i++){ _time(i, 0) = (i - Real(_time.rows() - 1)/2.0); } // Create the freq vector LOUDIA_DEBUG("SPECTRALREASSIGNMENT: Creating freq vector..."); // The unit of the vectors is Frequency Bin fractions // TODO: Must rethink how the frequency vector is initialized // as we did for the time vector _freq.resize(1, _fftSize); range(0, _fftSize, _fftSize, &_freq); // Calculate and set the time weighted window LOUDIA_DEBUG("SPECTRALREASSIGNMENT: Calculate time weighted window..."); MatrixXR windowInteg = _windowAlgo.window(); windowInteg = windowInteg.cwise() * _time.transpose(); _windowIntegAlgo.setWindow(windowInteg); // Calculate and set the time derivated window LOUDIA_DEBUG("SPECTRALREASSIGNMENT: Calculate time derivative window..."); MatrixXR windowDeriv = _windowAlgo.window(); for(int i = windowDeriv.cols() - 1; i > 0; i--){ windowDeriv(0, i) = (windowDeriv(0, i) - windowDeriv(0, i - 1)) / timestep; } // TODO: Check what is the initial condition for the window // Should this be 0 or just the value it was originally * dt //windowDeriv(0, 0) = 0.0; _windowDerivAlgo.setWindow(windowDeriv); // Create the necessary buffers for the windowing _window.resize(1, _frameSize); _windowInteg.resize(1, _frameSize); _windowDeriv.resize(1, _frameSize); // Create the necessary buffers for the FFT _fftAbs2.resize(1, _fftSize); _fftInteg.resize(1, _fftSize); _fftDeriv.resize(1, _fftSize); // Setup the algos _windowIntegAlgo.setup(); _windowDerivAlgo.setup(); _fftAlgo.setup(); LOUDIA_DEBUG("SPECTRALREASSIGNMENT: Finished set up..."); }
void PeakInterpolationComplex::process(const MatrixXC& input, const MatrixXR& peakPositions, const MatrixXR& peakMagnitudes, const MatrixXR& peakPhases, MatrixXR* peakPositionsInterp, MatrixXR* peakMagnitudesInterp, MatrixXR* peakPhasesInterp) { LOUDIA_DEBUG("PEAKINTERPOLATIONCOMPLEX: Processing"); Real leftMag, leftPhase; Real rightMag, rightPhase; Real mag, interpFactor; (*peakPositionsInterp).resize(input.rows(), peakPositions.cols()); (*peakMagnitudesInterp).resize(input.rows(), peakPositions.cols()); (*peakPhasesInterp).resize(input.rows(), peakPositions.cols()); _magnitudes = input.cwise().abs(); unwrap(input.cwise().angle(), &_phases); for ( int row = 0 ; row < _magnitudes.rows(); row++ ) { for ( int i = 0; i < peakPositions.cols(); i++ ) { // If the position is -1 do nothing since it means it is nothing if( peakPositions(row, i) == -1 ){ (*peakMagnitudesInterp)(row, i) = peakMagnitudes(row, i); (*peakPhasesInterp)(row, i) = peakPhases(row, i); (*peakPositionsInterp)(row, i) = peakPositions(row, i); } else { // Take the center magnitude in dB mag = 20.0 * log10( peakMagnitudes(row, i) ); // Take the left magnitude in dB if( peakPositions(row, i) <= 0 ){ leftMag = 20.0 * log10( _magnitudes(row, (int)peakPositions(row, i) + 1) ); } else { leftMag = 20.0 * log10( _magnitudes(row, (int)peakPositions(row, i) - 1) ); } // Take the right magnitude in dB if( peakPositions(row, i) >= _magnitudes.row(row).cols() - 1 ){ rightMag = 20.0 * log10( _magnitudes(row, (int)peakPositions(row, i) - 1) ); } else { rightMag = 20.0 * log10( _magnitudes(row, (int)peakPositions(row, i) + 1) ); } // Calculate the interpolated position (*peakPositionsInterp)(row, i) = peakPositions(row, i) + 0.5 * (leftMag - rightMag) / (leftMag - 2.0 * mag + rightMag); interpFactor = ((*peakPositionsInterp)(row, i) - peakPositions(row, i)); // Calculate the interpolated magnitude in dB (*peakMagnitudesInterp)(row, i) = mag - 0.25 * (leftMag - rightMag) * interpFactor; // Calculate the interpolated phase leftPhase = _phases(row, (int)floor((*peakPositionsInterp)(row, i))); rightPhase = _phases(row, (int)floor((*peakPositionsInterp)(row, i)) + 1); interpFactor = (interpFactor >= 0) ? interpFactor : interpFactor + 1; (*peakPhasesInterp)(row, i) = (leftPhase + interpFactor * (rightPhase - leftPhase)); } } } // Calculate the princarg() of the phase: remap to (-pi pi] (*peakPhasesInterp) = ((*peakPhasesInterp).cwise() != -1).select(((*peakPhasesInterp).cwise() + M_PI).cwise().modN(-2.0 * M_PI).cwise() + M_PI, (*peakPhasesInterp)); LOUDIA_DEBUG("PEAKINTERPOLATIONCOMPLEX: Finished Processing"); }