Exemple #1
0
void MFCC::process(const MatrixXR& spectrum, MatrixXR* mfccCoeffs){
  (*mfccCoeffs).resize(spectrum.rows(), _coefficientCount);
  
  for ( int i = 0; i < spectrum.rows(); i++) {
    LOUDIA_DEBUG("MFCC: Processing Melbands");
    // Process the mel bands on the power of the spectrum
    _melbands.process(spectrum.row(i).array().square(), &_bands);
        
    LOUDIA_DEBUG("MFCC: Processing Log of bands");
    // Apply a power to the log mel amplitudes as in: http://en.wikipedia.org/wiki/Mel_frequency_cepstral_coefficient
    // V. Tyagi and C. Wellekens
    // On desensitizing the Mel-Cepstrum to spurious spectral components for Robust Speech Recognition
    // in Acoustics, Speech, and Signal Processing, 2005. Proceedings. 
    // IEEE International Conference on, vol. 1, 2005, pp. 529–532.
    _bands = (_bands.array() + _minSpectrum).log() / log(10.0);
    _bands = _bands.array().pow(_power);
    
    LOUDIA_DEBUG("MFCC: Processing DCT");
    // Process the DCT
    _dct.process(_bands, &_coeffs);

    (*mfccCoeffs).row(i) = _coeffs;
  }

  LOUDIA_DEBUG("MFCC: Finished Processing");
}
void SpectralNoiseSuppression::process(const MatrixXR& spectrum, MatrixXR* noise, MatrixXR* result){
  const int rows = spectrum.rows();
  const int cols = spectrum.cols();

  (*result).resize(rows, cols);
  
  (*result) = spectrum;

  //DEBUG("SPECTRALNOISESUPPRESSION: Calculate wrapped magnitude.");
  // Calculate the wrapped magnitude of the spectrum
  _g = (1.0 / (_k1 - _k0 + 1.0) * spectrum.block(0, _k0, rows, _k1 - _k0).cwise().pow(1.0/3.0).rowwise().sum()).cwise().cube();

  //cout << (_g) << endl;
  
  for ( int i = 0; i < cols; i++ ) {
    (*result).col(i) = (((*result).col(i).cwise() * _g.cwise().inverse()).cwise() + 1.0).cwise().log();
  }
  
  //cout << (*result) << endl;
  
  //DEBUG("SPECTRALNOISESUPPRESSION: Estimate spectral noise.");
  // Estimate spectral noise
  _bands.process((*result), noise);
  
  //DEBUG("SPECTRALNOISESUPPRESSION: Suppress spectral noise.");
  // Suppress spectral noise
  (*result) = ((*result) - (*noise)).cwise().clipUnder();
}
Exemple #3
0
void Window::setWindow( const MatrixXR& window, bool callSetup ){
  if (window.cols() != _inputSize || window.rows() != 1) {
    // Throw exception wrong window size
  }

  setWindowType(CUSTOM, false);
  _window = window;

  if ( callSetup ) setup();
}
Exemple #4
0
void PitchSaliency::process(const MatrixXR& spectrum, MatrixXR* pitches, MatrixXR* saliencies){
  const int rows = spectrum.rows();

  (*pitches).resize( rows, 1 );
  (*saliencies).resize( rows, 1 );
  
  for ( int row = 0; row < rows; row++ ) {

    Real tLow = _tMin;
    Real tUp = _tMax;
    Real sal;

    Real tLowBest = tLow;
    Real tUpBest = tUp;
    Real salBest;
  
    Real period;
    Real deltaPeriod;

    while ( ( tUp - tLow ) > _tPrec ) {
      // Split the best block and compute new limits
      tLow = (tLowBest + tUpBest) / 2.0;
      tUp = tUpBest;
      tUpBest = tLow;
      
      // Compute new saliences for the new blocks
      period = (tLowBest + tUpBest) / 2.0;
      deltaPeriod = tUpBest - tLowBest;
      salBest = saliency(period, deltaPeriod, tLowBest, tUpBest, spectrum.row( row ));

      period = (tLow + tUp) / 2.0;
      deltaPeriod = tUp - tLow;
      sal = saliency(period, deltaPeriod, tLow, tUp, spectrum.row( row ));

      if (sal > salBest) {
        tLowBest = tLow;
        tUpBest = tUp;
        salBest = sal;
      }
    }

    period = (tLowBest + tUpBest) / 2.0;
    deltaPeriod = tUpBest - tLowBest;

    (*pitches)(row, 0) = _sampleRate / period;
    (*saliencies)(row, 0) = saliency(period, deltaPeriod, tLowBest, tUpBest, spectrum.row( row ));
  }
}
void VoiceActivityDetection::process(const MatrixXR& frames, MatrixXR* vad){
  const int rows = frames.rows();
  
  vad->resize(rows, 1);

  for (int i=0; i < rows; i++){
    // compute barkbands
    _barkBands.process(frames.row(0), &_bands);

    // copy frame into memory
    _memory.row(_currentMemoryPos) = _bands.row(0);

    _currentMemoryPos = (_currentMemoryPos + 1) % _memorySize;

    // compute the VAD
    RowXR LTSE = _memory.colwise().maxCoeff();
    RowXR noise = _memory.colwise().sum() / _memorySize;

    (*vad)(i,0) = log10((LTSE.array().square() / noise.array().square()).sum());
  }
}