Exemple #1
0
void PitchSaliency::process(const MatrixXR& spectrum, MatrixXR* pitches, MatrixXR* saliencies){
  const int rows = spectrum.rows();

  (*pitches).resize( rows, 1 );
  (*saliencies).resize( rows, 1 );
  
  for ( int row = 0; row < rows; row++ ) {

    Real tLow = _tMin;
    Real tUp = _tMax;
    Real sal;

    Real tLowBest = tLow;
    Real tUpBest = tUp;
    Real salBest;
  
    Real period;
    Real deltaPeriod;

    while ( ( tUp - tLow ) > _tPrec ) {
      // Split the best block and compute new limits
      tLow = (tLowBest + tUpBest) / 2.0;
      tUp = tUpBest;
      tUpBest = tLow;
      
      // Compute new saliences for the new blocks
      period = (tLowBest + tUpBest) / 2.0;
      deltaPeriod = tUpBest - tLowBest;
      salBest = saliency(period, deltaPeriod, tLowBest, tUpBest, spectrum.row( row ));

      period = (tLow + tUp) / 2.0;
      deltaPeriod = tUp - tLow;
      sal = saliency(period, deltaPeriod, tLow, tUp, spectrum.row( row ));

      if (sal > salBest) {
        tLowBest = tLow;
        tUpBest = tUp;
        salBest = sal;
      }
    }

    period = (tLowBest + tUpBest) / 2.0;
    deltaPeriod = tUpBest - tLowBest;

    (*pitches)(row, 0) = _sampleRate / period;
    (*saliencies)(row, 0) = saliency(period, deltaPeriod, tLowBest, tUpBest, spectrum.row( row ));
  }
}
Exemple #2
0
void MFCC::process(const MatrixXR& spectrum, MatrixXR* mfccCoeffs){
  (*mfccCoeffs).resize(spectrum.rows(), _coefficientCount);
  
  for ( int i = 0; i < spectrum.rows(); i++) {
    LOUDIA_DEBUG("MFCC: Processing Melbands");
    // Process the mel bands on the power of the spectrum
    _melbands.process(spectrum.row(i).array().square(), &_bands);
        
    LOUDIA_DEBUG("MFCC: Processing Log of bands");
    // Apply a power to the log mel amplitudes as in: http://en.wikipedia.org/wiki/Mel_frequency_cepstral_coefficient
    // V. Tyagi and C. Wellekens
    // On desensitizing the Mel-Cepstrum to spurious spectral components for Robust Speech Recognition
    // in Acoustics, Speech, and Signal Processing, 2005. Proceedings. 
    // IEEE International Conference on, vol. 1, 2005, pp. 529–532.
    _bands = (_bands.array() + _minSpectrum).log() / log(10.0);
    _bands = _bands.array().pow(_power);
    
    LOUDIA_DEBUG("MFCC: Processing DCT");
    // Process the DCT
    _dct.process(_bands, &_coeffs);

    (*mfccCoeffs).row(i) = _coeffs;
  }

  LOUDIA_DEBUG("MFCC: Finished Processing");
}
void VoiceActivityDetection::process(const MatrixXR& frames, MatrixXR* vad){
  const int rows = frames.rows();
  
  vad->resize(rows, 1);

  for (int i=0; i < rows; i++){
    // compute barkbands
    _barkBands.process(frames.row(0), &_bands);

    // copy frame into memory
    _memory.row(_currentMemoryPos) = _bands.row(0);

    _currentMemoryPos = (_currentMemoryPos + 1) % _memorySize;

    // compute the VAD
    RowXR LTSE = _memory.colwise().maxCoeff();
    RowXR noise = _memory.colwise().sum() / _memorySize;

    (*vad)(i,0) = log10((LTSE.array().square() / noise.array().square()).sum());
  }
}