Exemplo n.º 1
0
void HighResolutionFeatures::compute() {
  const vector<Real>& hpcp = _hpcp.get();

  const int hpcpSize = int(hpcp.size());
  const int binsPerSemitone = hpcpSize / 12;

  if (hpcpSize % 12 != 0 || hpcpSize == 0) {
    throw EssentiaException("HighResolutionFeatures: Cannot compute high-resolution features of an hpcp vector which size is not a non-zero multiple of 12");
  }

  // 1.- Equal-temperament deviation: measure of the deviation of HPCP local
  //     maxima with respect to equal-tempered bins.
  // a) Compute local maxima of HPCP vector
  //
  // should 24 be a parameter? Does this mean we are interested in 2 peaks per
  // semitone? -eaylon
  //
  // This doesn't mean 2 peaks per semitone, it means 24 peaks over the entire
  // hpcp vector. If there is a very high-resolution hpcp vector given, then
  // potentially, only the peaks in the first semitone will be detected. This
  // is OK however, because the peaks are truncated _after_ they are sorted by
  // amplitude. This means that we get the 24 largest peaks, which are assumed
  // to be the most relevant for this algorithm. As to whether it should become
  // a parameter, yes. I'll add it on the next commit. -rtoscano
  vector<Peak> peaks = detectPeaks(hpcp, parameter("maxPeaks").toInt());

  const int peaksSize = int(peaks.size());

  // b) replace the bin index by its deviation from equal-tempered bins
  for (int i=0; i<peaksSize; ++i) {
    // this could be changed by:
    Real f = peaks[i].position/ binsPerSemitone;
    Real dev = f - int(f);
    if (dev > 0.5) dev -= 1.0;
    peaks[i].position = dev;
  }

  // weight deviations by their amplitude
  Real eqTempDeviation = 0.0;
  Real totalWeights = 0.0;
  for (int i=0; i<peaksSize; ++i) {
    eqTempDeviation += abs(peaks[i].position * peaks[i].magnitude);
    totalWeights += peaks[i].magnitude;
  }

  if (totalWeights != 0.0) eqTempDeviation /= totalWeights;

  _equalTemperedDeviation.get() = eqTempDeviation;

  // 2.- NonTempered energy ratio: ratio betwen the energy on
  //     non-tempered bins and the total energy, computed from the HPCP average
  Real temperedEnergy = 0.0;
  Real totalEnergy = 0.0;
  for (int i=0; i<hpcpSize; ++i) {
    totalEnergy += hpcp[i] * hpcp[i];
    if (i % binsPerSemitone == 0) {
      temperedEnergy += hpcp[i] * hpcp[i];
    }
  }

  if (totalEnergy > 0.0) {
    _nt2tEnergyRatio.get() = 1.0 - temperedEnergy / totalEnergy;
  }
  else {
    _nt2tEnergyRatio.get() = 0.0;
  }

  // 3.- NonTempered peak energy ratio: ratio betwen the energy on
  //     non tempered peaks and the total energy, computed from the HPCP average
  Real temperedPeaksEnergy = 0.0;
  Real totalPeaksEnergy = 0.0;
  for (int i=0; i<peaksSize; ++i) {
    totalPeaksEnergy += peaks[i].magnitude * peaks[i].magnitude;
    if (peaks[i].position == 0.0) {
      temperedPeaksEnergy += peaks[i].magnitude * peaks[i].magnitude;
    }
  }

  if (totalPeaksEnergy > 0.0) {
    _nt2tPeaksEnergyRatio.get() = 1.0 - temperedPeaksEnergy / totalPeaksEnergy;
  }
  else {
    _nt2tPeaksEnergyRatio.get() = 0.0;
  }
}
Exemplo n.º 2
0
void Key::configure() {
  _slope = parameter("slope").toReal();
  _numHarmonics = parameter("numHarmonics").toInt();
  _profileType = parameter("profileType").toString();

  const char* keyNames[] = { "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#" };
  _keys = arrayToVector<string>(keyNames);

  Real profileTypes[][12] = {
    // Diatonic
    { 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1 },
    { 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1 },

    // Krumhansl
    { 6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88 },
    { 6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17 },

    // A revised version of the key profiles, by David Temperley, see [2]
    { 5.0, 2.0, 3.5, 2.0, 4.5, 4.0, 2.0, 4.5, 2.0, 3.5, 1.5, 4.0 },
    { 5.0, 2.0, 3.5, 4.5, 2.0, 4.0, 2.0, 4.5, 3.5, 2.0, 1.5, 4.0 },

    // Wei Chai MIT PhD thesis
    { 81302, 320, 65719, 1916, 77469, 40928, 2223, 83997, 1218, 39853, 1579, 28908 },
    { 39853, 1579, 28908, 81302, 320, 65719, 1916, 77469, 40928, 2223, 83997, 1218 },

    // Tonic triad
    { 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0 },
    { 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0 },

    // Temperley MIREX 2005
    { 0.748, 0.060, 0.488, 0.082, 0.67, 0.46, 0.096, 0.715, 0.104, 0.366, 0.057, 0.4 },
    { 0.712, 0.084, 0.474, 0.618, 0.049, 0.46, 0.105, 0.747, 0.404, 0.067, 0.133, 0.33 },

    // Statistics THPCP over all the evaluation set
    { 0.95162, 0.20742, 0.71758, 0.22007, 0.71341, 0.48841, 0.31431, 1.00000, 0.20957, 0.53657, 0.22585, 0.55363 },
    { 0.94409, 0.21742, 0.64525, 0.63229, 0.27897, 0.57709, 0.26428, 1.0000, 0.26428, 0.30633, 0.45924, 0.35929 }

  };

#define SET_PROFILE(i) _M = arrayToVector<Real>(profileTypes[2*i]); _m = arrayToVector<Real>(profileTypes[2*i+1])

  if      (_profileType == "diatonic")      { SET_PROFILE(0); }
  else if (_profileType == "krumhansl")     { SET_PROFILE(1); }
  else if (_profileType == "temperley")     { SET_PROFILE(2); }
  else if (_profileType == "weichai")       { SET_PROFILE(3); }
  else if (_profileType == "tonictriad")    { SET_PROFILE(4); }
  else if (_profileType == "temperley2005") { SET_PROFILE(5); }
  else if (_profileType == "thpcp")         { SET_PROFILE(6); }
  else {
    throw EssentiaException("Key: Unsupported profile type: ", _profileType);
  }

  // Compute the other vectors getting into account chords:
  vector<Real> M_chords(12, (Real)0.0);
  vector<Real> m_chords(12, (Real)0.0);

  /* Under test: Purwins et al.
  for (int n=0; n<12; n++) {
    TIndex dominant = n+7;
    if ( dominant > 11)
      dominant -= 12;
    M_chords[n]= _M[n] + (1.0/3.0)*_M[dominant];
    m_chords[n]= _m[n] + (1.0/3.0)*_m[dominant];
  }
  */

  /*
  Assumptions:
    - We consider that the tonal hierarchy is kept when dealing with polyphonic sounds.
      That means that Krumhansl profiles are seen as the tonal hierarchy of
      each of the chords of the harmonic scale within a major/minor tonal contest.
    - We compute from these chord profiles the corresponding note (pitch class) profiles,
      which will be compared to HPCP values.

  Rationale:
    - Each note contribute to the different harmonics.
    - All the chords of the major/minor key are considered.

  Procedure:
    - First, profiles are initialized to 0
    - We take _M[i], n[i] as Krumhansl profiles i=1,...12 related to each of the chords
      of the major/minor key.
    - For each chord, we add its contribution to the three notes (pitch classes) of the chord.
      We use the same weight for all the notes of the chord.
    - For each note, we add its contribution to the different harmonics
  */

  /** MAJOR KEY */
  // Tonic (I)
  addMajorTriad(0, _M[0], M_chords);

  if (!parameter("useThreeChords").toBool())
  {
    // II
    addMinorTriad(2, _M[2], M_chords);
    // Only root: AddContributionHarmonics(2, _M[2], M_chords);
    // III
    addMinorTriad(4, _M[4], M_chords);
    // Only root: AddContributionHarmonics(4, _M[4], M_chords);
  }

  // Subdominant (IV)
  addMajorTriad(5, _M[5], M_chords);
  // Dominant (V)
  addMajorTriad(7, _M[7], M_chords);

  if (!parameter("useThreeChords").toBool()) {
    // VI
    addMinorTriad(9, _M[9], M_chords);
    // Only root: AddContributionHarmonics(9, _M[9], M_chords);
    // VII (5th diminished)
    addContributionHarmonics(11, _M[11], M_chords);
    addContributionHarmonics(2 , _M[11], M_chords);
    addContributionHarmonics(5 , _M[11], M_chords);
    // Only root: AddContributionHarmonics(11, _M[11], M_chords);
  }

  /** MINOR KEY */
  // Tonica I
  addMinorTriad(0, _m[0], m_chords);
  if (!parameter("useThreeChords").toBool()){
    // II (5th diminished)
    addContributionHarmonics(2, _m[2], m_chords);
    addContributionHarmonics(5, _m[2], m_chords);
    addContributionHarmonics(8, _m[2], m_chords);
    // Only root: AddContributionHarmonics(2, _m[2], m_chords);

    // III (5th augmented)
    addContributionHarmonics(3, _m[3], m_chords);
    addContributionHarmonics(7, _m[3], m_chords);
    addContributionHarmonics(11,_m[3], m_chords); // Harmonic minor scale! antes 10!!!
    // Only root: AddContributionHarmonics(3, _m[3], m_chords);
  }

  // Subdominant (IV)
  addMinorTriad(5, _m[5], m_chords);

  // Dominant (V) (harmonic minor scale)
  addMajorTriad(7, _m[7], m_chords);

  if (!parameter("useThreeChords").toBool()) {
    // VI
    addMajorTriad(8, _m[8], m_chords);
    // Only root: AddContributionHarmonics(8, _m[8], m_chords);
    // VII (diminished 5th)
    addContributionHarmonics(11, _m[8], m_chords);
    addContributionHarmonics(2, _m[8], m_chords);
    addContributionHarmonics(5, _m[8], m_chords);
    // Only root: AddContributionHarmonics(11, _m[8], m_chords);
  }

  if (parameter("usePolyphony").toBool()) {
    _M = M_chords;
    _m = m_chords;
  }

  resize(parameter("pcpSize").toInt());
}
Exemplo n.º 3
0
void KeyEDM3::compute() {

  const vector<Real>& pcp = _pcp.get();

  int pcpsize = (int)pcp.size();
  int n = pcpsize/12;

  if (pcpsize < 12 || pcpsize % 12 != 0)
    throw EssentiaException("KeyEDM3: input PCP size is not a positive multiple of 12");

  if (pcpsize != (int)_profile_dom.size()) {
    resize(pcpsize);
  }

  // Compute Correlation
  // Means
  Real mean_pcp = mean(pcp);
  Real std_pcp = 0;

  // Standard Deviations
  for (int i=0; i<pcpsize; i++)
    std_pcp += (pcp[i] - mean_pcp) * (pcp[i] - mean_pcp);
  std_pcp = sqrt(std_pcp);

  // Correlation Matrix
  int keyIndex = -1; // index of the first maximum
  Real max     = -1;     // first maximum
  Real max2    = -1;    // second maximum
  int scale    = MAJOR;  // scale

  // Compute maximum for major, minor and other.
  Real maxMajor     = -1;
  Real max2Major    = -1;
  int keyIndexMajor = -1;

  Real maxMinor     = -1;
  Real max2Minor    = -1;
  int keyIndexMinor = -1;

  Real maxOther     = -1;
  Real max2Other    = -1;
  int keyIndexOther = -1;


  // calculate the correlation between the profiles and the PCP...
  // we shift the profile around to find the best match
  for (int shift=0; shift<pcpsize; shift++) {
    Real corrMajor = correlation(pcp, mean_pcp, std_pcp, _profile_doM, _mean_profile_M, _std_profile_M, shift);
    // Compute maximum value for major keys
    if (corrMajor > maxMajor) {
      max2Major = maxMajor;
      maxMajor = corrMajor;
      keyIndexMajor = shift;
    }

    Real corrMinor = correlation(pcp, mean_pcp, std_pcp, _profile_dom, _mean_profile_m, _std_profile_m, shift);
    // Compute maximum value for minor keys
    if (corrMinor > maxMinor) {
      max2Minor = maxMinor;
      maxMinor = corrMinor;
      keyIndexMinor = shift;
    }

    Real corrOther = correlation(pcp, mean_pcp, std_pcp, _profile_doO, _mean_profile_O, _std_profile_O, shift);
    // Compute maximum value for other keys
    if (corrOther > maxOther) {
      max2Other = maxOther;
      maxOther = corrOther;
      keyIndexOther = shift;
    }
  }


  if (maxMajor > maxMinor && maxMajor > maxOther) {
    keyIndex = (int) (keyIndexMajor *  12 / pcpsize + 0.5);
    scale = MAJOR;
    max = maxMajor;
    max2 = max2Major;
  }

  else if (maxMinor >= maxMajor && maxMinor >= maxOther) {
    keyIndex = (int) (keyIndexMinor * 12 / pcpsize + 0.5);
    scale = MINOR;
    max = maxMinor;
    max2 = max2Minor;
    }

	else if (maxOther > maxMajor && maxOther > maxMinor) {
    keyIndex = (int) (keyIndexOther * 12 / pcpsize + 0.5);
    scale = OTHER;
    max = maxOther;
    max2 = max2Other;
    }
  
  if (keyIndex < 0) {
    throw EssentiaException("KeyEDM3: keyIndex smaller than zero. Could not find key.");
  }

  //////////////////////////////////////////////////////////////////////////////
  // Here we calculate the outputs...

  // first three outputs are key, scale and strength
  _key.get() = _keys[keyIndex];

  if (scale == MAJOR) {
    _scale.get() = "major";
  }

  else if (scale == MINOR) {
    _scale.get() = "minor";
  }

  else if (scale == OTHER) {
    _scale.get() = "minor";
  }

  _strength.get() = max;

  // this one outputs the relative difference between the maximum and the
  // second highest maximum (i.e. Compute second highest correlation peak)
  _firstToSecondRelativeStrength.get() = (max - max2) / max;
}
Exemplo n.º 4
0
void ChordsDescriptors::compute() {
  const vector<string>& chords = _chords.get();

  if (chords.empty()) {
    throw EssentiaException("ChordsDescriptors: Chords input empty");
  }

  string key = toUpper(_key.get());
  string scale = toLower(_scale.get());

  if (_scale.get() == "minor") {
    key += "m";
  }

    // Chords Histogram
  map<int, Real> chordsHist = chordsHistogram(chords);
  map<int, Real> chordsHistNorm = chordsHistogramNorm(chordsHist, key);

  vector<Real>& chordsHistNormVect = _chordsHistogram.get();
  chordsHistNormVect.resize(0); // erase anything that was in there
  for (int i=0; i<int(ARRAY_SIZE(circleOfFifth)); ++i) {
    chordsHistNormVect.push_back(chordsHistNorm[i]);
  }

  // Chords Number Rate
  Real& chordNumberRate = _chordsNumberRate.get();
  chordNumberRate = 0.0;
  for (int i=0; i<int(chordsHistNormVect.size()); ++i) {
    if (chordsHistNormVect[i] > 1.0) {
      chordNumberRate += 1.0;
    }
  }
  chordNumberRate /= (Real)chords.size();

  // Chords Changes Rate
  Real& chordChangesRate = _chordsChangesRate.get();
  chordChangesRate = 0.0;
  for (int i=1; i<int(chords.size()); ++i) {
    if (chords[i] != chords[i-1]) {
      chordChangesRate += 1.0;
    }
  }
  chordChangesRate /= (Real)chords.size();

  // Chords Key and Scale = most frequent chord
  string& chordsKey = _chordsKey.get();
  string& chordsScale = _chordsScale.get();
  chordsKey = "A";
  Real maxValue = 0.0;

  for (int i=0; i<int(ARRAY_SIZE(circleOfFifth)); ++i) {
    if (chordsHist[i] > maxValue) {
      maxValue = chordsHist[i];
      chordsKey = circleOfFifth[i];
    }
  }

  bool major = true;
  string::size_type position = chordsKey.find("m");

  if ((position == 1) || (position == 2)) {
    major = false;
  }

  if (major) {
    chordsKey = chordsKey;
    chordsScale = "major";
  }
  else {
    chordsKey = chordsKey.substr(0, position);
    chordsScale = "minor";
  }
}
Exemplo n.º 5
0
void Leq::finalProduce() {
  if (_size == 0) throw EssentiaException("Leq: signal is empty");

  _leq.push(pow2db(_energy/_size));
}
Exemplo n.º 6
0
void NoveltyCurve::compute() {
  const vector<vector<Real> >& frequencyBands = _frequencyBands.get();
  vector<Real>& novelty = _novelty.get();
  if (frequencyBands.empty())
    throw EssentiaException("NoveltyCurve::compute, cannot compute from an empty input matrix");

  int nFrames = frequencyBands.size();
  int nBands = (int)frequencyBands[0].size();
  //vector<Real> weights = weightCurve(nBands);
  novelty.resize(nFrames-1);
  fill(novelty.begin(), novelty.end(), Real(0.0));

  vector<vector<Real> > t_frequencyBands = essentia::transpose(frequencyBands); // [bands x frames]
  vector<vector<Real> > noveltyBands(nBands);

  int meanSize = int(0.1 * _frameRate); // integral number of frames in 2*0.05 second

  // compute novelty for each sub-band
  meanSize += (meanSize % 2); // force even size // TODO: why?
  for (int bandIdx=0; bandIdx<nBands; bandIdx++) {
    noveltyBands[bandIdx] = noveltyFunction(t_frequencyBands[bandIdx], 1000, meanSize);
  }


  //sum novelty on all bands (weighted) to get a single novelty value per frame
  noveltyBands = essentia::transpose(noveltyBands); // back to [frames x bands]

  // TODO: weight curves should be pre-computed in configure() method
  if (_type == HYBRID) {
    // EAylon: By trial-&-error I found that combining weightings (flat, quadratic,
    // linear and inverse quadratic) was giving better results.   
    vector<Real> aweights = weightCurve(nBands, FLAT);
    vector<Real> bweights = weightCurve(nBands, QUADRATIC);
    vector<Real> cweights = weightCurve(nBands, LINEAR);
    vector<Real> dweights = weightCurve(nBands, INVERSE_QUADRATIC);

    vector<Real> bnovelty(nFrames-1, 0.0);
    vector<Real> cnovelty(nFrames-1, 0.0);
    vector<Real> dnovelty(nFrames-1, 0.0);

    for (int frameIdx=0; frameIdx<nFrames-1; frameIdx++) { // noveltyBands is a derivative whose size is nframes-1
      for (int bandIdx=0; bandIdx<nBands; bandIdx++) {
        novelty[frameIdx] += aweights[bandIdx] * noveltyBands[frameIdx][bandIdx];
        bnovelty[frameIdx] += bweights[bandIdx] * noveltyBands[frameIdx][bandIdx];
        cnovelty[frameIdx] += cweights[bandIdx] * noveltyBands[frameIdx][bandIdx];
        dnovelty[frameIdx] += dweights[bandIdx] * noveltyBands[frameIdx][bandIdx];
      }
    }
    for (int frameIdx=0; frameIdx<nFrames-1; frameIdx++) {
      // TODO why multiplication instead of sum (or mean)? 
      novelty[frameIdx] *= bnovelty[frameIdx];
      novelty[frameIdx] *= cnovelty[frameIdx];
      novelty[frameIdx] *= dnovelty[frameIdx];
    }
  }
  else {
    // TODO weight curve should be pre-computed in configure() method
    vector<Real> weights = weightCurve(nBands, _type);

    for (int frameIdx=0; frameIdx<nFrames-1; frameIdx++) {
      for (int bandIdx=0; bandIdx<nBands; bandIdx++) {
        novelty[frameIdx] += weights[bandIdx] * noveltyBands[frameIdx][bandIdx];
      }
    }
  }

  // smoothing
  Algorithm * mavg = AlgorithmFactory::create("MovingAverage", "size", meanSize);
  vector<Real> novelty_ma;
  mavg->input("signal").set(novelty);
  mavg->output("signal").set(novelty_ma);
  mavg->compute();
  delete mavg;
  novelty.assign(novelty_ma.begin(), novelty_ma.end());
}
Exemplo n.º 7
0
AlgorithmStatus MetadataReader::process() {
  if (_filename == "" || !_newlyConfigured) return PASS;

  TagLib::FileRef f(_filename.c_str());

  //Pool tagPool;

  if (f.isNull()) {
    // in case TagLib can't get metadata out of this file, try some basic PCM approach
    int pcmSampleRate = 0;
    int pcmChannels = 0;
    int pcmBitrate = 0;

    try {
      pcmMetadata(_filename, pcmSampleRate, pcmChannels, pcmBitrate);
    }
    catch (EssentiaException& e) {
      if (parameter("failOnError").toBool())
        throw EssentiaException("MetadataReader: File does not exist or does not seem to be of a supported filetype. ", e.what());
    }
    string ns = "";
    _title.push(ns);
    _artist.push(ns);
    _album.push(ns);
    _comment.push(ns);
    _genre.push(ns);
    _track.push(ns);
    _date.push(ns);
    //_tagPool.push(tagPool);
    _duration.push(0);
    _bitrate.push(pcmBitrate);
    _sampleRate.push(pcmSampleRate);
    _channels.push(pcmChannels);
  }
  else {
    TagLib::PropertyMap tags = f.file()->properties();

    _title.push(formatString(tags["TITLE"]));
    _artist.push(formatString(tags["ARTIST"]));
    _album.push(formatString(tags["ALBUM"]));
    _comment.push(formatString(tags["COMMENT"]));
    _genre.push(formatString(tags["GENRE"]));
    _track.push(formatString(tags["TRACKNUMBER"]));
    _date.push(formatString(tags["DATE"]));


    /*
    // populate tag pool
    for(PropertyMap::Iterator it = tags.begin(); it != tags.end(); ++it) {
      for(StringList::Iterator str = it->second.begin(); str != it->second.end(); ++str) {
        tagPool.add(it->first.to8Bit(true), str->to8Bit(true));
      }
    }
    _tagPool.push(tagPool);
    */

    _duration.push((int)f.audioProperties()->length());

    int bitrate = f.audioProperties()->bitrate();
    // fix for taglib incorrectly returning the bitrate for wave files
    string ext = toLower(_filename.substr(_filename.size()-3));
    if (ext == "wav") {
      bitrate = bitrate * 1024 / 1000;
    }

    _bitrate.push((int)bitrate);
    _sampleRate.push((int)f.audioProperties()->sampleRate());
    _channels.push((int)f.audioProperties()->channels());
  }

  _newlyConfigured = false;
  shouldStop(true);
  return OK;
}
Exemplo n.º 8
0
int AudioLoader::decode_audio_frame(AVCodecContext* audioCtx,
                                    float* output,
                                    int* outputSize,
                                    AVPacket* packet) {

    // _dataSize  input = number of bytes available for write in buff
    //           output = number of bytes actually written (actual: FLT data)
    //E_DEBUG(EAlgorithm, "decode_audio_frame, available bytes in buffer = " << _dataSize);
    int gotFrame = 0;
    av_frame_unref(_decodedFrame); //avcodec_get_frame_defaults(_decodedFrame);

    int len = avcodec_decode_audio4(audioCtx, _decodedFrame, &gotFrame, packet);

    if (len < 0) return len; // error handling should be done outside

    if (gotFrame) {
        int inputSamples = _decodedFrame->nb_samples;
        int inputPlaneSize = av_samples_get_buffer_size(NULL, _nChannels, inputSamples,
                                                        audioCtx->sample_fmt, 1);
        int outputPlaneSize = av_samples_get_buffer_size(NULL, _nChannels, inputSamples,
                                                        AV_SAMPLE_FMT_FLT, 1);
        // the size of the output buffer in samples
        int outputBufferSamples = *outputSize / 
                (av_get_bytes_per_sample(AV_SAMPLE_FMT_FLT) * _nChannels);

        if (outputBufferSamples < inputSamples) { 
            // this should never happen, throw exception here
            throw EssentiaException("AudioLoader: Insufficient buffer size for format conversion");
        }

        if (audioCtx->sample_fmt == AV_SAMPLE_FMT_FLT) {
            // TODO: no need in this check? Not many of common formats support FLT
            // no conversion needed, direct copy from our frame to output buffer
            memcpy(output, _decodedFrame->data[0], inputPlaneSize);
        }
        else {
          int samplesWrittern = avresample_convert(_convertCtxAv, 
                                          (uint8_t**) &output, 
                                          outputPlaneSize,
                                          outputBufferSamples, 
                                          (uint8_t**)_decodedFrame->data,               
                                          inputPlaneSize, 
                                          inputSamples);

          if (samplesWrittern < inputSamples) {
              // TODO: there may be data remaining in the internal FIFO buffer
              // to get this data: call avresample_convert() with NULL input 
              // Test if this happens in practice
              ostringstream msg;
              msg << "AudioLoader: Incomplete format conversion (some samples missing)"
                  << " from " << av_get_sample_fmt_name(_audioCtx->sample_fmt)
                  << " to "   << av_get_sample_fmt_name(AV_SAMPLE_FMT_FLT);
              throw EssentiaException(msg);
          }
        }
        *outputSize = outputPlaneSize;
    }
    else {
      E_DEBUG(EAlgorithm, "AudioLoader: tried to decode packet but didn't get any frame...");
      *outputSize = 0;
    }

    return len;
}
Exemplo n.º 9
0
void AudioLoader::openAudioFile(const string& filename) {
    E_DEBUG(EAlgorithm, "AudioLoader: opening file: " << parameter("filename").toString());

    // Open file
    if (avformat_open_input(&_demuxCtx, filename.c_str(), NULL, NULL) != 0) {
        throw EssentiaException("AudioLoader: Could not open file \"", filename, "\"");
    }

    // Retrieve stream information
    int errnum;
    if ((errnum = avformat_find_stream_info(_demuxCtx, NULL)) < 0) {
        char errorstr[128];
        string error = "Unknown error";
        if (av_strerror(errnum, errorstr, 128) == 0) error = errorstr;
        avformat_close_input(&_demuxCtx);
        _demuxCtx = 0;
        throw EssentiaException("AudioLoader: Could not find stream information, error = ", error);
    }

    // Dump information about file onto standard error
    //dump_format(_demuxCtx, 0, filename.c_str(), 0);

    // Check that we have only 1 audio stream in the file
    int nAudioStreams = 0;
    for (int i=0; i<(int)_demuxCtx->nb_streams; i++) {
        if (_demuxCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
            _streamIdx = i;
            nAudioStreams++;
        }
    }
    if (nAudioStreams != 1) {
        throw EssentiaException("AudioLoader ERROR: found ", nAudioStreams, " streams in the file, expecting only one audio stream");
    }

    // Load corresponding audio codec
    _audioCtx = _demuxCtx->streams[_streamIdx]->codec;

    _audioCodec = avcodec_find_decoder(_audioCtx->codec_id);

    if (!_audioCodec) {
        throw EssentiaException("AudioLoader: Unsupported codec!");
    }

    if (avcodec_open2(_audioCtx, _audioCodec, NULL) < 0) {
        throw EssentiaException("AudioLoader: Unable to instantiate codec...");
    }

    if (_audioCtx->sample_fmt != AV_SAMPLE_FMT_S16) {

#if HAVE_SWRESAMPLE

        E_DEBUG(EAlgorithm, "AudioLoader: using sample format conversion from libswresample");

        // No samplerate conversion yet, only format
        int64_t layout = av_get_default_channel_layout(_audioCtx->channels);

        _convertCtx = swr_alloc_set_opts(_convertCtx,
                                         layout, AV_SAMPLE_FMT_S16,     _audioCtx->sample_rate,
                                         layout, _audioCtx->sample_fmt, _audioCtx->sample_rate,
                                         0, NULL);

        if (swr_init(_convertCtx) < 0) {
            throw EssentiaException("Could not initialize swresample context");
        }

        /*
        const char* fmt = 0;
        get_format_from_sample_fmt(&fmt, _audioCtx->sample_fmt);
        E_DEBUG(EAlgorithm, "AudioLoader: converting from " << (fmt ? fmt : "unknown") << " to S16");
        */

#else

        E_DEBUG(EAlgorithm, "AudioLoader: using sample format conversion from "
                            "deprecated audioconvert");

        _audioConvert = av_audio_convert_alloc(AV_SAMPLE_FMT_S16, 1, _audioCtx->sample_fmt, 1, NULL, 0);

        // reserve some more space
        _buff1 = (int16_t*)av_malloc(MAX_AUDIO_FRAME_SIZE * 3);
        _buff2 = (int16_t*)av_malloc(MAX_AUDIO_FRAME_SIZE * 3);

#endif

    }
    else {
        E_DEBUG(EAlgorithm, "AudioLoader: no sample format conversion, using direct copy");
    }

    av_init_packet(&_packet);

#if LIBAVCODEC_VERSION_INT >= AVCODEC_AUDIO_DECODE4
    _decodedFrame = avcodec_alloc_frame();
    if (!_decodedFrame) {
        throw EssentiaException("Could not allocate audio frame");
    }
#endif


#if LIBAVCODEC_VERSION_INT < AVCODEC_51_28_0
    E_DEBUG(EAlgorithm, "AudioLoader: using ffmpeg avcodec_decode_audio() function");
#elif LIBAVCODEC_VERSION_INT < AVCODEC_52_47_0
    E_DEBUG(EAlgorithm, "AudioLoader: using ffmpeg avcodec_decode_audio2() function");
#elif LIBAVCODEC_VERSION_INT < AVCODEC_AUDIO_DECODE4
    E_DEBUG(EAlgorithm, "AudioLoader: using ffmpeg avcodec_decode_audio3() function");
#else
    E_DEBUG(EAlgorithm, "AudioLoader: using ffmpeg avcodec_decode_audio4() function");
#endif

}
Exemplo n.º 10
0
void MetadataReader::compute() {
  if (!parameter("filename").isConfigured()) {
    throw EssentiaException("MetadataReader: 'filename' parameter has not been configured");
  }

#ifdef _WIN32
  int len = MultiByteToWideChar(CP_UTF8, 0, _filename.c_str(), -1, NULL, 0);
  wchar_t *buf = (wchar_t*)malloc(sizeof(wchar_t)*len);
  memset(buf, 0, len);
  MultiByteToWideChar(CP_UTF8, 0, _filename.c_str(), -1, buf, len);
  TagLib::FileRef f(buf);
  free(buf);
#else
  TagLib::FileRef f(_filename.c_str());
#endif

  Pool tagPool;

  if (f.isNull()) {
    // in case TagLib can't get metadata out of this file, try some basic PCM approach
    int pcmSampleRate = 0;
    int pcmChannels = 0;
    int pcmBitrate = 0;

    try {
      pcmMetadata(_filename, pcmSampleRate, pcmChannels, pcmBitrate);
      // works only for 16bit wavs/pcm; it should output incorrect value for 
      // 24bit or 32bit float files, therefore, print a warning
      E_WARNING("MetadataReader: TagLib could not get metadata for this file. The output bitrate is estimated treating the input as 16-bit PCM, and therefore may be incorrect.");
    }
    catch (EssentiaException& e) {
      if (parameter("failOnError").toBool())
        throw EssentiaException("MetadataReader: File does not exist or does not seem to be of a supported filetype. ", e.what());
    }

    _title.get()   = "";
    _artist.get()  = "";
    _album.get()   = "";
    _comment.get() = "";
    _genre.get()   = "";
    _track.get()   = "";
    _date.get()    = "";

    _tagPool.get()  = tagPool;

    _duration.get()   = 0;
    _bitrate.get()    = pcmBitrate;
    _sampleRate.get() = pcmSampleRate;
    _channels.get()   = pcmChannels;

    return;
  }

  TagLib::PropertyMap tags = f.file()->properties();

  _title.get()   = formatString(tags["TITLE"]);
  _artist.get()  = formatString(tags["ARTIST"]);
  _album.get()   = formatString(tags["ALBUM"]);
  _comment.get() = formatString(tags["COMMENT"]);
  _genre.get()   = formatString(tags["GENRE"]);
  _track.get()   = formatString(tags["TRACKNUMBER"]);
  _date.get()    = formatString(tags["DATE"]);

  // populate tag pool
  for(TagLib::PropertyMap::ConstIterator i = tags.begin(); i != tags.end(); ++i) {
    string key = i->first.to8Bit(true);
    if (!_filterMetadata || std::find(_filterMetadataTags.begin(), _filterMetadataTags.end(), key) != _filterMetadataTags.end()) {
        // remove '.' chars which are used in Pool descriptor names as a separator
        // convert to lowercase
        std::replace(key.begin(), key.end(), '.', '_');
        std::transform(key.begin(), key.end(), key.begin(), ::tolower);
        key = _tagPoolName + "." + key;

        for(TagLib::StringList::ConstIterator str = i->second.begin(); str != i->second.end(); ++str) {
          tagPool.add(key, str->to8Bit(true));
        }
    }
  }

  _tagPool.get()  = tagPool;

  _duration.get()     = f.audioProperties()->length();
  _bitrate.get()    = f.audioProperties()->bitrate();
  _sampleRate.get() = f.audioProperties()->sampleRate();
  _channels.get()   = f.audioProperties()->channels();

  // fix for taglib incorrectly returning the bitrate for wave files
  string ext = toLower(_filename.substr(_filename.size()-3));
  if (ext == "wav") {
    _bitrate.get() = _bitrate.get() * 1024 / 1000;
  }
}
Exemplo n.º 11
0
/**
 * Gets the AVPacket stored in _packet, and decodes all the samples it can from it,
 * putting them in _buffer, the total number of bytes written begin stored in _dataSize.
 */
int AudioLoader::decodePacket() {
    /*
    E_DEBUG(EAlgorithm, "-----------------------------------------------------");
    E_DEBUG(EAlgorithm, "decoding packet of " << _packet.size << " bytes");
    E_DEBUG(EAlgorithm, "pts: " << _packet.pts << " - dts: " << _packet.dts); //" - pos: " << pkt->pos);
    E_DEBUG(EAlgorithm, "flags: " << _packet.flags);
    E_DEBUG(EAlgorithm, "duration: " << _packet.duration);
    */

    int len = 0;

    // buff is an offset in our output buffer, it points to where we should start
    // writing the next decoded samples
    int16_t* buff = _buffer;

#if !HAVE_SWRESAMPLE
    if (_audioConvert) { buff = _buff1; }
#endif

    // _dataSize gets the size of the buffer, in bytes
    _dataSize = FFMPEG_BUFFER_SIZE*sizeof(int16_t);

    // _dataSize  input = number of bytes available for write in buff
    //           output = number of bytes actually written (actual: S16 data)
    //E_DEBUG(EAlgorithm, "decode_audio_frame, available bytes in buffer = " << _dataSize);
    len = decode_audio_frame(_audioCtx, buff, &_dataSize, &_packet);

    if (len < 0) {
        // only print error msg when file is not an mp3, because mp3 streams can have tag
        // frames (id3v2?) which libavcodec tries to read as audio anyway, and we don't want
        // to print an error message for that...
        if (_audioCtx->codec_id == CODEC_ID_MP3) {
            E_DEBUG(EAlgorithm, "AudioLoader: invalid frame, probably an mp3 tag frame, skipping it");
        }
        else {
            E_WARNING("AudioLoader: error while decoding, skipping frame");
        }
        return 0;
    }

    if (_dataSize <= 0) {
        // No data yet, get more frames
        //cout << "no data yet, get more frames" << endl;
        _dataSize = 0;
        return 0;
    }

#if !HAVE_SWRESAMPLE
    if (_audioConvert) {
        // this assumes that all audio is interleaved in the first channel
        // it works as we're only doing sample format conversion, but we
        // should be very careful
        const void* ibuf[6] = { buff };
              void* obuf[6] = { _buff2 };
        int istride[6]      = { av_get_bytes_per_sample(_audioCtx->sample_fmt) };
        int ostride[6]      = { av_get_bytes_per_sample(AV_SAMPLE_FMT_S16)     };
        int totalsamples    = _dataSize / istride[0]; // == num_samp_per_channel * num_channels

        if (av_audio_convert(_audioConvert, obuf, ostride, ibuf, istride, totalsamples) < 0) {
            ostringstream msg;
            msg << "AudioLoader: Error converting "
                << " from " << avcodec_get_sample_fmt_name(_audioCtx->sample_fmt)
                << " to "   << avcodec_get_sample_fmt_name(SAMPLE_FMT_S16);
            throw EssentiaException(msg);
        }

        // when entering the current block, dataSize contained the size in bytes
        // that the audio was taking in its native format. Now it needs to be set
        // to the size of the audio we're returning, after conversion
        _dataSize = totalsamples * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);
        memcpy(_buffer, _buff2, _dataSize);
    }
#endif

    if (len != _packet.size) {
        // FIXME: investigate why this happens and whether it is a big issue
        //        (looks like it only loses silent samples at the end of files)

        // more than 1 frame in a packet, happens a lot with flac for instance...
        E_WARNING("AudioLoader: more than 1 frame in packet, dropping remaining bytes...");
        E_WARNING("at sample index: " << output("audio").totalProduced());
        E_WARNING("decoded samples: " << len);
        E_WARNING("packet size: " << _packet.size);
    }

    return len;
}
Exemplo n.º 12
0
int AudioLoader::decode_audio_frame(AVCodecContext* audioCtx,
                                    int16_t* output,
                                    int* outputSize,
                                    AVPacket* packet) {


#if LIBAVCODEC_VERSION_INT < AVCODEC_51_28_0

    int len = avcodec_decode_audio(audioCtx, output, outputSize,
                                 packet->data, packet->size);

#elif LIBAVCODEC_VERSION_INT < AVCODEC_52_47_0

    int len = avcodec_decode_audio2(audioCtx, output, outputSize,
                                    packet->data, packet->size);

#elif LIBAVCODEC_VERSION_INT < AVCODEC_AUDIO_DECODE4

    int len = avcodec_decode_audio3(audioCtx, output, outputSize,
                                    packet);

#else

    int gotFrame = 0;
    avcodec_get_frame_defaults(_decodedFrame);

    int len = avcodec_decode_audio4(audioCtx, _decodedFrame, &gotFrame, packet);

    if (len < 0) return len; // error handling should be done outside

    if (gotFrame) {
        int nsamples = _decodedFrame->nb_samples;
        int inputDataSize = av_samples_get_buffer_size(NULL, audioCtx->channels, nsamples,
                                                       audioCtx->sample_fmt, 1);

#  if HAVE_SWRESAMPLE
        if (_convertCtx) {
            int outputSamples = *outputSize / (2 /*sizeof(S16)*/ * _nChannels);
            //if (outputSamples < nsamples) { cout << "OOPS!!" << endl; }

            if (swr_convert(_convertCtx,
                            (uint8_t**) &output, outputSamples,
                            (const uint8_t**)_decodedFrame->data, nsamples) < 0) {
                ostringstream msg;
                msg << "AudioLoader: Error converting"
                    << " from " << av_get_sample_fmt_name(_audioCtx->sample_fmt)
                    << " to "   << av_get_sample_fmt_name(AV_SAMPLE_FMT_S16);
                throw EssentiaException(msg);
            }
            *outputSize = nsamples * (2 /*sizeof(S16)*/ * _nChannels);
        }
        else {
            // no conversion needed, make a direct copy
            // copy and convert data from our frame to our output audio buffer
            //E_WARNING("Should use swresample always!");
            memcpy(output, _decodedFrame->data[0], inputDataSize);
            *outputSize = inputDataSize;
        }
#  else
        // direct copy, we do the sample format conversion later if needed
        memcpy(output, _decodedFrame->data[0], inputDataSize);
        *outputSize = inputDataSize;
#  endif

    }
    else {
        E_DEBUG(EAlgorithm, "AudioLoader: tried to decode packet but didn't get any frame...");
        *outputSize = 0;
    }

#endif

    return len;
}
Exemplo n.º 13
0
void Slicer::configure() {
  _sampleRate = parameter("sampleRate").toReal();
  _startTimes = parameter("startTimes").toVectorReal();
  _endTimes = parameter("endTimes").toVectorReal();
  _timeUnits = parameter("timeUnits").toString();

  if (_startTimes.size() != _endTimes.size()) {
    throw EssentiaException("Slicer: startTimes and endTimes do not have the same number of elements");
  }

  // check given times correspond to valid slices
  for (int i=0; i<int(_startTimes.size()); ++i) {
    if (_startTimes[i] > _endTimes[i]) {
      ostringstream msg;
      msg << "Slicer: Slice number " << i+1 << ": [" << _startTimes[i] << ", " << _endTimes[i] << "] is invalid because its start time is after its end time";
      throw EssentiaException(msg);
    }

    // if the time units are in seconds, we have to make sure that the
    // startTime[i]*sampleRate doesn't cause an overflow of int (31 bits)
    if (_timeUnits == "seconds" &&
        (double(_startTimes[i])*_sampleRate > 0xEFFFFFFF ||
         double(_endTimes[i])*_sampleRate > 0xEFFFFFFF)) {
      ostringstream msg;
      msg << "Slicer: start or end time, multiplied by the sampleRate (" <<
        _sampleRate << "Hz), is too large (greater than 31 bits): [" <<
        _startTimes[i] << "s, " << _endTimes[i] << "s]";
      throw EssentiaException(msg);
    }
  }

  _slices.clear();
  _slices.resize(_startTimes.size());

  if (_timeUnits == "samples") {
    for (int i=0; i<int(_slices.size()); ++i) {
      _slices[i] = make_pair(static_cast<int>(_startTimes[i]),
                             static_cast<int>(_endTimes[i]));
    }
  }
  else {
    for (int i=0; i<int(_slices.size()); ++i) {
      int s = int(_startTimes[i] * _sampleRate + 0.5);
      int e = s + int((_endTimes[i] - _startTimes[i]) * _sampleRate + 0.5);
      _slices[i] = make_pair(s, e);
    }
  }

  // set the acquireSize of the sink to the max size of the slices.
  // this will get overwritten as soon as we start processing, but is a hint
  // for automatic buffer resizing
  int maxSlice = defaultPreferredSize;
  for (int i=0; i<int(_slices.size()); ++i) {
    maxSlice = max(maxSlice, _slices[i].second - _slices[i].first);
  }

  _input.setAcquireSize(maxSlice);

  sort(_slices.begin(), _slices.end());

  reset();
}
Exemplo n.º 14
0
Arquivo: key.cpp Projeto: MTG/essentia
void Key::compute() {

  const vector<Real>& pcp = _pcp.get();

  int pcpsize = (int)pcp.size();
  int n = pcpsize/12;

  if (pcpsize < 12 || pcpsize % 12 != 0)
    throw EssentiaException("Key: input PCP size is not a positive multiple of 12");

  if (pcpsize != (int)_profile_dom.size()) {
    resize(pcpsize);
  }

  ///////////////////////////////////////////////////////////////////
  // compute correlation

  // Compute means
  Real mean_pcp = mean(pcp);
  Real std_pcp = 0;

  // Compute Standard Deviations
  for (int i=0; i<pcpsize; i++)
    std_pcp += (pcp[i] - mean_pcp) * (pcp[i] - mean_pcp);
  std_pcp = sqrt(std_pcp);

  // Compute correlation matrix
  int keyIndex = -1; // index of the first maximum
  Real max     = -1;     // first maximum
  Real max2    = -1;    // second maximum
  int scale    = MAJOR;  // scale

  // Compute maximum for major, minor and other.
  Real maxMajor     = -1;
  Real max2Major    = -1;
  int keyIndexMajor = -1;

  Real maxMinor     = -1;
  Real max2Minor    = -1;
  int keyIndexMinor = -1;

  Real maxOther     = -1;
  Real max2Other    = -1;
  int keyIndexOther = -1;

  // calculate the correlation between the profiles and the PCP...
  // we shift the profile around to find the best match
  for (int shift=0; shift<pcpsize; shift++) {
    /*
    // Penalization if the Tonic has not a minimum amplitude
    // max_pcp needs to be calculated...
    Real factor = pcp[i]/max_pcp;
    if (factor < 0.6) {
      corrMajor *= factor / 0.6;
      corrMinor *= factor / 0.6;
    }
    */
    Real corrMajor = correlation(pcp, mean_pcp, std_pcp, _profile_doM, _mean_profile_M, _std_profile_M, shift);
    // Compute maximum value for major keys
    if (corrMajor > maxMajor) {
      max2Major = maxMajor;
      maxMajor = corrMajor;
      keyIndexMajor = shift;
    }

    Real corrMinor = correlation(pcp, mean_pcp, std_pcp, _profile_dom, _mean_profile_m, _std_profile_m, shift);
    // Compute maximum value for minor keys
    if (corrMinor > maxMinor) {
      max2Minor = maxMinor;
      maxMinor = corrMinor;
      keyIndexMinor = shift;
    }

    Real corrOther = 0;
    if (_useMajMin) {
      corrOther = correlation(pcp, mean_pcp, std_pcp, _profile_doO, _mean_profile_O, _std_profile_O, shift);
      // Compute maximum value for other keys
      if (corrOther > maxOther) {
        max2Other = maxOther;
        maxOther = corrOther;
        keyIndexOther = shift;
      }
    }
  }


  if (maxMajor > maxMinor && maxMajor > maxOther) {
    keyIndex = (int) (keyIndexMajor *  12 / pcpsize + 0.5);
    scale = MAJOR;
    max = maxMajor;
    max2 = max2Major;
  }

  else if (maxMinor >= maxMajor && maxMinor >= maxOther) {
    keyIndex = (int) (keyIndexMinor * 12 / pcpsize + 0.5);
    scale = MINOR;
    max = maxMinor;
    max2 = max2Minor;
    }

	else if (maxOther > maxMajor && maxOther > maxMinor) {
    keyIndex = (int) (keyIndexOther * 12 / pcpsize + 0.5);
    scale = MAJMIN;
    max = maxOther;
    max2 = max2Other;
    }

  // In the case of Wei Chai algorithm, the scale is detected in a second step
  // In this point, always the major relative is detected, as it is the first
  // maximum
  if (_profileType == "weichai") {
    if (scale == MINOR)
      throw EssentiaException("Key: error in Wei Chai algorithm. Wei Chai algorithm does not support minor scales.");

    int fifth = keyIndex + 7*n;
    if (fifth > pcpsize)
      fifth -= pcpsize;
    int sixth = keyIndex + 9*n;
    if (sixth > pcpsize)
      sixth -= pcpsize;

    if (pcp[sixth] >  pcp[fifth]) {
      keyIndex = sixth;
      keyIndex = (int) (keyIndex * 12 / pcpsize + .5);
      scale = MINOR;
    }
  }

  // keyIndex = (int)(keyIndex * 12.0 / pcpsize + 0.5) % 12;

  if (keyIndex < 0) {
    throw EssentiaException("Key: keyIndex smaller than zero. Could not find key.");
  }

  //////////////////////////////////////////////////////////////////////////////
  // Here we calculate the outputs...

  // first three outputs are key, scale and strength
  _key.get() = _keys[keyIndex];

  if (scale == MAJOR) {
    _scale.get() = "major";
  }

  else if (scale == MINOR) {
    _scale.get() = "minor";
  }

  else if (scale == MAJMIN) {
    _scale.get() = "majmin";
  }

  _strength.get() = max;

  // this one outputs the relative difference between the maximum and the
  // second highest maximum (i.e. Compute second highest correlation peak)
  _firstToSecondRelativeStrength.get() = (max - max2) / max;

}
Exemplo n.º 15
0
void FadeDetection::compute() {

  const vector<Real>& rms = _rms.get();
  if (rms.empty()) {
    // throw exception as mean of empty arrays cannot be computed
    throw EssentiaException("FadeDetection: RMS array is empty");
  }
  Array2D<Real>& fade_in  = _fade_in.get();
  Array2D<Real>& fade_out = _fade_out.get();

  Real meanRms = mean(rms);
  Real thresholdHigh = _cutoffHigh * meanRms;
  Real thresholdLow = _cutoffLow * meanRms;
  int minLength = int(_minLength * _frameRate); // change minLength to samples

  // FADE-IN
  bool fade = false;
  vector<pair<int,int> > fade_in_vector;
  int fade_in_start = 0;
  int fade_in_stop;
  Real fade_in_start_value = 0.0;

  for (int i=0; i<int(rms.size()); ++i) {
    if (!fade) {
      // To get the fade-in start point
      if (rms[i] <= thresholdLow) {
        fade_in_start_value = rms[i];
        fade_in_start = i;
        fade = true;
      }
    }
    if (fade) {
      // To get the point with minimum energy as the fade-in starting point
      if (rms[i] < fade_in_start_value) {
        fade_in_start_value = rms[i];
        fade_in_start = i;
      }
      // To get the fade-in stop point
      if (rms[i] >= thresholdHigh) {
      	fade_in_stop = i;
        if ((fade_in_stop - fade_in_start) >= minLength) {
          fade_in_vector.push_back(make_pair(fade_in_start, fade_in_stop));
        }
        fade = false;
      }
    }
  }

  // convert units and push to output
  if (fade_in_vector.size() != 0) {
    fade_in = Array2D<Real>(int(fade_in_vector.size()), 2);
    for (int i=0; i<fade_in.dim1(); i++) {
      fade_in[i][0] = fade_in_vector[i].first / _frameRate;
      fade_in[i][1] = fade_in_vector[i].second / _frameRate;
    }
  }

  // FADE-OUT
  fade = false;
  vector<pair<int, int> > fade_out_vector;
  int fade_out_start;
  int fade_out_stop = 0;
  Real fade_out_stop_value = 0.0;

  for (int i=rms.size()-1; i>=0; i--) {
    if (!fade) {
      // To get the fade-out stop point
      if (rms[i] <= thresholdLow) {
        fade_out_stop_value = rms[i];
        fade_out_stop = i;
        fade = true;
      }
    }
    if (fade) {
      // To get the energy minimum for the fade-out stop point
      if (rms[i] <= fade_out_stop_value) {
        fade_out_stop_value = rms[i];
        fade_out_stop = i;
      }
      // To get the fade-out start point
      if (rms[i] >= thresholdHigh) {
      	fade_out_start = i;
        if ((fade_out_stop - fade_out_start) >= minLength) {
          fade_out_vector.push_back(make_pair(fade_out_start, fade_out_stop));
        }
      	fade = false;
      }
    }
  }

  // convert units and push to output
  if (fade_out_vector.size() != 0) {
    fade_out = Array2D<Real>(int(fade_out_vector.size()), 2);
    for (int i=0; i<fade_out.dim1(); i++) {
      fade_out[i][0] = fade_out_vector[fade_out_vector.size()-1-i].first / _frameRate;
      fade_out[i][1] = fade_out_vector[fade_out_vector.size()-1-i].second / _frameRate;
    }
  }
}
Exemplo n.º 16
0
Arquivo: key.cpp Projeto: MTG/essentia
void Key::configure() {
  _slope = parameter("slope").toReal();
  _numHarmonics = parameter("numHarmonics").toInt();
  _profileType = parameter("profileType").toString();
  _useMajMin = parameter("useMajMin").toBool();

  if (_useMajMin) {
    if (_profileType == "diatonic" || _profileType == "krumhansl"  || _profileType == "temperley" ||
        _profileType == "weichai"  || _profileType == "tonictriad" || _profileType == "temperley2005" ||
        _profileType == "thpcp"    || _profileType == "shaath"     || _profileType == "gomez" ||
        _profileType == "noland"   || _profileType == "edmm") {
      E_INFO("Key: the profile '" << _profileType << "' does not support the use of 'majmin' mode.");
      _useMajMin = false;
    }
  }
  const char* keyNames[] = { "A", "Bb", "B", "C", "C#", "D", "Eb", "E", "F", "F#", "G", "Ab" };
  _keys = arrayToVector<string>(keyNames);

  Real profileTypes[][12] = {
    // Diatonic
    { 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1 },
    { 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1 },

    // Krumhansl
    { 6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88 },
    { 6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17 },

    // A revised version of the key profiles, by David Temperley, see [2]
    { 5.0, 2.0, 3.5, 2.0, 4.5, 4.0, 2.0, 4.5, 2.0, 3.5, 1.5, 4.0 },
    { 5.0, 2.0, 3.5, 4.5, 2.0, 4.0, 2.0, 4.5, 3.5, 2.0, 1.5, 4.0 },

    // Wei Chai MIT PhD thesis
    { 81302, 320, 65719, 1916, 77469, 40928, 2223, 83997, 1218, 39853, 1579, 28908 },
    { 39853, 1579, 28908, 81302, 320, 65719, 1916, 77469, 40928, 2223, 83997, 1218 },

    // Tonic triad.
    { 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0 },
    { 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0 },

    // Temperley MIREX 2005
    { 0.748, 0.060, 0.488, 0.082, 0.67, 0.46, 0.096, 0.715, 0.104, 0.366, 0.057, 0.4 },
    { 0.712, 0.084, 0.474, 0.618, 0.049, 0.46, 0.105, 0.747, 0.404, 0.067, 0.133, 0.33 },

    // Statistics THPCP over all the evaluation set
    { 0.95162, 0.20742, 0.71758, 0.22007, 0.71341, 0.48841, 0.31431, 1.00000, 0.20957, 0.53657, 0.22585, 0.55363 },
    { 0.94409, 0.21742, 0.64525, 0.63229, 0.27897, 0.57709, 0.26428, 1.0000, 0.26428, 0.30633, 0.45924, 0.35929 },

    // Shaath
    { 6.6, 2.0, 3.5, 2.3, 4.6, 4.0, 2.5, 5.2, 2.4, 3.7, 2.3, 3.4 },
    { 6.5, 2.7, 3.5, 5.4, 2.6, 3.5, 2.5, 5.2, 4.0, 2.7, 4.3, 3.2 },

    // Gómez (as specified by Shaath)
    { 0.82, 0.00, 0.55, 0.00, 0.53, 0.30, 0.08, 1.00, 0.00, 0.38, 0.00, 0.47 },
    { 0.81, 0.00, 0.53, 0.54, 0.00, 0.27, 0.07, 1.00, 0.27, 0.07, 0.10, 0.36 },

    // Noland
    { 0.0629, 0.0146, 0.061, 0.0121, 0.0623, 0.0414, 0.0248, 0.0631, 0.015, 0.0521, 0.0142, 0.0478 },
    { 0.0682, 0.0138, 0.0543, 0.0519, 0.0234, 0.0544, 0.0176, 0.067, 0.0349, 0.0297, 0.0401, 0.027 },

    // edmm
    { 0.083, 0.083, 0.083, 0.083, 0.083, 0.083, 0.083, 0.083, 0.083, 0.083, 0.083, 0.083 },
    { 0.17235348, 0.04, 0.0761009,  0.12, 0.05621498, 0.08527853, 0.0497915,  0.13451001, 0.07458916, 0.05003023, 0.09187879, 0.05545106 },

    // edma
    // { 0.16519551, 0.04749026, 0.08293076, 0.06687112, 0.09994645, 0.09274123, 0.05294487, 0.13159476, 0.05218986, 0.07443653, 0.06940723, 0.0642515  },
    // { 0.17235348, 0.05336489, 0.0761009,  0.10043649, 0.05621498, 0.08527853, 0.0497915,  0.13451001, 0.07458916, 0.05003023, 0.09187879, 0.05545106 },
  };

  Real profileTypesWithOther[][12] = {
    // bgate
    { 1.00  , 0.00  , 0.42  , 0.00  , 0.53  , 0.37  , 0.00  , 0.77  , 0.00  , 0.38,   0.21  , 0.30   },
    { 1.00  , 0.00  , 0.36  , 0.39  , 0.00  , 0.38  , 0.00  , 0.74  , 0.27  , 0.00  , 0.42  , 0.23   },
    { 1.00  , 0.26  , 0.35  , 0.29  , 0.44  , 0.36  , 0.21  , 0.78  , 0.26  , 0.25  , 0.32  , 0.26   },

    // braw
    { 1.0000, 0.1573, 0.4200, 0.1570, 0.5296, 0.3669, 0.1632, 0.7711, 0.1676, 0.3827, 0.2113, 0.2965 },
    { 1.0000, 0.2330, 0.3615, 0.3905, 0.2925, 0.3777, 0.1961, 0.7425, 0.2701, 0.2161, 0.4228, 0.2272 },
    { 1.0000, 0.2608, 0.3528, 0.2935, 0.4393, 0.3580, 0.2137, 0.7809, 0.2578, 0.2539, 0.3233, 0.2615 },

    // edma
    { 1.00  , 0.29  , 0.50  , 0.40  , 0.60  , 0.56  , 0.32  , 0.80  , 0.31  , 0.45  , 0.42  , 0.39   },
    { 1.00  , 0.31  , 0.44  , 0.58  , 0.33  , 0.49  , 0.29  , 0.78  , 0.43  , 0.29  , 0.53  , 0.32   },
	  { 1.00  , 0.26  , 0.35  , 0.29  , 0.44  , 0.36  , 0.21  , 0.78  , 0.26  , 0.25  , 0.32  , 0.26   }
  };


#define SET_PROFILE(i) _M = arrayToVector<Real>(profileTypes[2*i]); _m = arrayToVector<Real>(profileTypes[2*i+1])
#define SET_PROFILE_OTHER(i) _M = arrayToVector<Real>(profileTypesWithOther[3*i]); _m = arrayToVector<Real>(profileTypesWithOther[3*i+1]); _O = arrayToVector<Real>(profileTypesWithOther[3*i+2])

  _O.assign(12, 0.);

  if      (_profileType == "diatonic")      { SET_PROFILE(0);  }
  else if (_profileType == "krumhansl")     { SET_PROFILE(1);  }
  else if (_profileType == "temperley")     { SET_PROFILE(2);  }
  else if (_profileType == "weichai")       { SET_PROFILE(3);  }
  else if (_profileType == "tonictriad")    { SET_PROFILE(4);  }
  else if (_profileType == "temperley2005") { SET_PROFILE(5);  }
  else if (_profileType == "thpcp")         { SET_PROFILE(6);  }
  else if (_profileType == "shaath")        { SET_PROFILE(7);  }
  else if (_profileType == "gomez")         { SET_PROFILE(8);  }
  else if (_profileType == "noland")        { SET_PROFILE(9);  }
  else if (_profileType == "edmm")          { SET_PROFILE(10); }
  // else if (_profileType == "edma")          { SET_PROFILE(13); }
  else if (_profileType == "bgate")         { SET_PROFILE_OTHER(0); }
  else if (_profileType == "braw")          { SET_PROFILE_OTHER(1); }
  else if (_profileType == "edma")          { SET_PROFILE_OTHER(2); }
  else {
    throw EssentiaException("Key: Unsupported profile type: ", _profileType);
  }

  // Compute the other vectors getting into account chords:
  vector<Real> M_chords(12, (Real)0.0);
  vector<Real> m_chords(12, (Real)0.0);

  /* Under test: Purwins et al.
  for (int n=0; n<12; n++) {
    TIndex dominant = n+7;
    if ( dominant > 11)
      dominant -= 12;
    M_chords[n]= _M[n] + (1.0/3.0)*_M[dominant];
    m_chords[n]= _m[n] + (1.0/3.0)*_m[dominant];
  }
  */

  /*
  Assumptions:
    - We consider that the tonal hierarchy is kept when dealing with polyphonic sounds.
      That means that Krumhansl profiles are seen as the tonal hierarchy of
      each of the chords of the harmonic scale within a major/minor tonal contest.
    - We compute from these chord profiles the corresponding note (pitch class) profiles,
      which will be compared to HPCP values.

  Rationale:
    - Each note contribute to the different harmonics.
    - All the chords of the major/minor key are considered.

  Procedure:
    - First, profiles are initialized to 0
    - We take _M[i], n[i] as Krumhansl profiles i=1,...12 related to each of the chords
      of the major/minor key.
    - For each chord, we add its contribution to the three notes (pitch classes) of the chord.
      We use the same weight for all the notes of the chord.
    - For each note, we add its contribution to the different harmonics
  */

  /** MAJOR KEY */
  // Tonic (I)
  addMajorTriad(0, _M[0], M_chords);

  if (!parameter("useThreeChords").toBool())
  {
    // II
    addMinorTriad(2, _M[2], M_chords);
    // Only root: AddContributionHarmonics(2, _M[2], M_chords);
    // III
    addMinorTriad(4, _M[4], M_chords);
    // Only root: AddContributionHarmonics(4, _M[4], M_chords);
  }

  // Subdominant (IV)
  addMajorTriad(5, _M[5], M_chords);
  // Dominant (V)
  addMajorTriad(7, _M[7], M_chords);

  if (!parameter("useThreeChords").toBool()) {
    // VI
    addMinorTriad(9, _M[9], M_chords);
    // Only root: AddContributionHarmonics(9, _M[9], M_chords);
    // VII (5th diminished)
    addContributionHarmonics(11, _M[11], M_chords);
    addContributionHarmonics(2 , _M[11], M_chords);
    addContributionHarmonics(5 , _M[11], M_chords);
    // Only root: AddContributionHarmonics(11, _M[11], M_chords);
  }

  /** MINOR KEY */
  // Tonica I
  addMinorTriad(0, _m[0], m_chords);
  if (!parameter("useThreeChords").toBool()){
    // II (5th diminished)
    addContributionHarmonics(2, _m[2], m_chords);
    addContributionHarmonics(5, _m[2], m_chords);
    addContributionHarmonics(8, _m[2], m_chords);
    // Only root: AddContributionHarmonics(2, _m[2], m_chords);

    // III (5th augmented)
    addContributionHarmonics(3, _m[3], m_chords);
    addContributionHarmonics(7, _m[3], m_chords);
    addContributionHarmonics(11,_m[3], m_chords); // Harmonic minor scale! antes 10!!!
    // Only root: AddContributionHarmonics(3, _m[3], m_chords);
  }

  // Subdominant (IV)
  addMinorTriad(5, _m[5], m_chords);

  // Dominant (V) (harmonic minor scale)
  addMajorTriad(7, _m[7], m_chords);

  if (!parameter("useThreeChords").toBool()) {
    // VI
    addMajorTriad(8, _m[8], m_chords);
    // Only root: AddContributionHarmonics(8, _m[8], m_chords);
    // VII (diminished 5th)
    addContributionHarmonics(11, _m[8], m_chords);
    addContributionHarmonics(2, _m[8], m_chords);
    addContributionHarmonics(5, _m[8], m_chords);
    // Only root: AddContributionHarmonics(11, _m[8], m_chords);
  }

  if (parameter("usePolyphony").toBool()) {
    _M = M_chords;
    _m = m_chords;
  }

  resize(parameter("pcpSize").toInt());
}
Exemplo n.º 17
0
AlgorithmStatus Resample::process() {
  EXEC_DEBUG("process()");

  EXEC_DEBUG("Trying to acquire data");
  AlgorithmStatus status = acquireData();

  if (status != OK) {
    // FIXME: are we sure this still works?
    // if status == NO_OUTPUT, we should temporarily stop the resampler,
    // return from this function so its dependencies can process the frames,
    // and reschedule the framecutter to run when all this is done.
    if (status == NO_OUTPUT) {
      EXEC_DEBUG("no more output available for resampling; mark it for rescheduling and return");
      //_reschedule = true;
      return NO_OUTPUT; // if the buffer is full, we need to have produced something!
    }

    // if shouldStop is true, that means there is no more audio, so we need
    // to take what's left to fill in the output, instead of waiting for more
    // data to come in (which would have done by returning from this function)
    if (!shouldStop()) return NO_INPUT;

    int available = input("signal").available();
    EXEC_DEBUG("There are " << available << " available tokens");
    if (available == 0) return NO_INPUT;

    input("signal").setAcquireSize(available);
    input("signal").setReleaseSize(available);
    output("signal").setAcquireSize((int)(_data.src_ratio * available + 100 + (int)_delay));
    _data.end_of_input = 1;

    return process();
  }

  EXEC_DEBUG("data acquired");

  const vector<AudioSample>& signal = _signal.tokens();
  vector<AudioSample>& resampled = _resampled.tokens();

  EXEC_DEBUG("signal size:" << signal.size());
  EXEC_DEBUG("resampled size:" << resampled.size());

  _data.data_in = const_cast<float*>(&(signal[0]));
  _data.input_frames = (long)signal.size();

  _data.data_out = &(resampled[0]);
  _data.output_frames = (long)resampled.size();


  if (_data.src_ratio == 1.0) {
    assert(_data.output_frames >= _data.input_frames);
    fastcopy(_data.data_out, _data.data_in, _data.input_frames);
    _data.input_frames_used = _data.input_frames;
    _data.output_frames_gen = _data.input_frames;
  }
  else {
    int error = src_process(_state, &_data);

    if (error) {
      throw EssentiaException("Resample: ", src_strerror(error));
    }

    if (_data.input_frames_used == 0) {
      throw EssentiaException("Resample: Internal consumption problem while resampling");
    }
  }

  EXEC_DEBUG("input frames:" << _data.input_frames_used);
  EXEC_DEBUG("produced:" << _data.output_frames_gen);

  _delay += (Real)_data.input_frames_used*_data.src_ratio - (Real)_data.output_frames_gen;

  assert((int)resampled.size() >= _data.output_frames_gen);
  assert((int)signal.size() >= _data.input_frames_used);

  _signal.setReleaseSize(_data.input_frames_used);
  _resampled.setReleaseSize(_data.output_frames_gen);

  releaseData();

  EXEC_DEBUG("released");

  return OK;
}
Exemplo n.º 18
0
void AudioLoader::openAudioFile(const string& filename) {
    E_DEBUG(EAlgorithm, "AudioLoader: opening file: " << filename);

    // Open file
    int errnum;
    if ((errnum = avformat_open_input(&_demuxCtx, filename.c_str(), NULL, NULL)) != 0) {
        char errorstr[128];
        string error = "Unknown error";
        if (av_strerror(errnum, errorstr, 128) == 0) error = errorstr;
        throw EssentiaException("AudioLoader: Could not open file \"", filename, "\", error = ", error);
    }

    // Retrieve stream information
    if ((errnum = avformat_find_stream_info(_demuxCtx, NULL)) < 0) {
        char errorstr[128];
        string error = "Unknown error";
        if (av_strerror(errnum, errorstr, 128) == 0) error = errorstr;
        avformat_close_input(&_demuxCtx);
        _demuxCtx = 0;
        throw EssentiaException("AudioLoader: Could not find stream information, error = ", error);
    }

    // Dump information about file onto standard error
    //dump_format(_demuxCtx, 0, filename.c_str(), 0);

    // Check that we have only 1 audio stream in the file
    _streams.clear();
    for (int i=0; i<(int)_demuxCtx->nb_streams; i++) {
        if (_demuxCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
            _streams.push_back(i);
        }
    }
    int nAudioStreams = _streams.size();
    
    if (nAudioStreams == 0) {
        avformat_close_input(&_demuxCtx);
        _demuxCtx = 0;
        throw EssentiaException("AudioLoader ERROR: found 0 streams in the file, expecting one or more audio streams");
    }

    if (_selectedStream >= nAudioStreams) {
        avformat_close_input(&_demuxCtx);
        _demuxCtx = 0;
        throw EssentiaException("AudioLoader ERROR: 'audioStream' parameter set to ", _selectedStream ,". It should be smaller than the audio streams count, ", nAudioStreams);
    }

    _streamIdx = _streams[_selectedStream];

    // Load corresponding audio codec
    _audioCtx = _demuxCtx->streams[_streamIdx]->codec;
    _audioCodec = avcodec_find_decoder(_audioCtx->codec_id);

    if (!_audioCodec) {
        throw EssentiaException("AudioLoader: Unsupported codec!");
    }

    if (avcodec_open2(_audioCtx, _audioCodec, NULL) < 0) {
        throw EssentiaException("AudioLoader: Unable to instantiate codec...");
    }
  
    // Configure format convertion  (no samplerate conversion yet)
    int64_t layout = av_get_default_channel_layout(_audioCtx->channels);

    /*
    const char* fmt = 0;
    get_format_from_sample_fmt(&fmt, _audioCtx->sample_fmt);
    E_DEBUG(EAlgorithm, "AudioLoader: converting from " << (fmt ? fmt : "unknown") << " to FLT");
    */

    E_DEBUG(EAlgorithm, "AudioLoader: using sample format conversion from libavresample");
    _convertCtxAv = avresample_alloc_context();
        
    av_opt_set_int(_convertCtxAv, "in_channel_layout", layout, 0);
    av_opt_set_int(_convertCtxAv, "out_channel_layout", layout, 0);
    av_opt_set_int(_convertCtxAv, "in_sample_rate", _audioCtx->sample_rate, 0);
    av_opt_set_int(_convertCtxAv, "out_sample_rate", _audioCtx->sample_rate, 0);
    av_opt_set_int(_convertCtxAv, "in_sample_fmt", _audioCtx->sample_fmt, 0);
    av_opt_set_int(_convertCtxAv, "out_sample_fmt", AV_SAMPLE_FMT_FLT, 0);

    if (avresample_open(_convertCtxAv) < 0) {
        throw EssentiaException("AudioLoader: Could not initialize avresample context");
    }

    av_init_packet(&_packet);

    _decodedFrame = av_frame_alloc();
    if (!_decodedFrame) {
        throw EssentiaException("AudioLoader: Could not allocate audio frame");
    }

    av_md5_init(_md5Encoded);
}
Exemplo n.º 19
0
void NoveltyCurve::compute() {
  const vector<vector<Real> >& frequencyBands = _frequencyBands.get();
  vector<Real>& novelty = _novelty.get();
  if (frequencyBands.empty())
    throw EssentiaException("NoveltyCurve::compute, cannot compute from an empty input matrix");

  int nFrames = frequencyBands.size();
  int nBands = (int)frequencyBands[0].size();
  //vector<Real> weights = weightCurve(nBands);
  novelty.resize(nFrames-1);
  fill(novelty.begin(), novelty.end(), Real(0.0));

  vector<vector<Real> > t_frequencyBands = essentia::transpose(frequencyBands); // [bands x frames]
  vector<vector<Real> > noveltyBands(nBands);

  int meanSize = int(0.1 * _frameRate); // integral number of frames in 2*0.05 second

  // compute novelty for each sub-band
  meanSize += (meanSize % 2); // force even size // TODO: why?
  for (int bandIdx=0; bandIdx<nBands; bandIdx++) {
    noveltyBands[bandIdx] = noveltyFunction(t_frequencyBands[bandIdx], 1000, meanSize);
  }
  /////////////////////////////////////////////////////////////////////////////
  // TODO: By trial-&-error I found that combining weightings (flat, quadratic,
  // linear and inverse quadratic) was giving better results. Should this be
  // left as is or should we allow the algorithm to work with the given
  // weightings from the configuration. This overrides the parameters, so if
  // left as is, they should be removed as well.
  /////////////////////////////////////////////////////////////////////////////
  _type = FLAT;
  vector<Real> aweights = weightCurve(nBands);
  _type = QUADRATIC;
  vector<Real> bweights = weightCurve(nBands);
  _type = LINEAR;
  vector<Real> cweights = weightCurve(nBands);
  _type = INVERSE_QUADRATIC;
  vector<Real> dweights = weightCurve(nBands);
  //sum novelty on all bands (weighted) to get a single novelty value per frame
  noveltyBands = essentia::transpose(noveltyBands); // back to [frames x bands]
  vector<Real> bnovelty(nFrames-1, 0.0);
  vector<Real> cnovelty(nFrames-1, 0.0);
  vector<Real> dnovelty(nFrames-1, 0.0);
  for (int frameIdx=0; frameIdx<nFrames-1; frameIdx++) { // nFrames -1 as noveltyBands is a derivative whose size is nframes-1
    const vector<Real>& frame = noveltyBands[frameIdx];
    for (int bandIdx=0; bandIdx<nBands; bandIdx++) {
      novelty[frameIdx] += aweights[bandIdx] * frame[bandIdx];
      bnovelty[frameIdx] += bweights[bandIdx] * frame[bandIdx];
      cnovelty[frameIdx] += cweights[bandIdx] * frame[bandIdx];
      dnovelty[frameIdx] += dweights[bandIdx] * frame[bandIdx];
    }
  }
  for (int frameIdx=0; frameIdx<nFrames-1; frameIdx++) {
      novelty[frameIdx] *= bnovelty[frameIdx];
      novelty[frameIdx] *= cnovelty[frameIdx];
      novelty[frameIdx] *= dnovelty[frameIdx];
  }


  Algorithm * mavg = AlgorithmFactory::create("MovingAverage", "size", meanSize);
  vector<Real> novelty_ma;
  mavg->input("signal").set(novelty);
  mavg->output("signal").set(novelty_ma);
  mavg->compute();
  delete mavg;
  novelty.assign(novelty_ma.begin(), novelty_ma.end());
}
Exemplo n.º 20
0
AlgorithmStatus FrameCutter::process() {
  bool lastFrame = false;

  EXEC_DEBUG("process()");

  // if _streamIndex < _startIndex, we need to advance into the stream until we
  // arrive at _startIndex
  if (_streamIndex < _startIndex) {
    // to make sure we can skip that many, use frameSize (buffer has been resized
    // to be able to accomodate at least that many sample before starting processing)
    int skipSize = _frameSize;
    int howmuch = min(_startIndex - _streamIndex, skipSize);
    _audio.setAcquireSize(howmuch);
    _audio.setReleaseSize(howmuch);
    _frames.setAcquireSize(0);
    _frames.setReleaseSize(0);

    if (acquireData() != OK) return NO_INPUT;

    releaseData();
    _streamIndex += howmuch;

    return OK;
  }

  // need to know whether we have to zero-pad on the left: ie, _startIndex < 0
  int zeropadSize = 0;
  int acquireSize = _frameSize;
  int releaseSize = min(_hopSize, _frameSize); // in case hopsize > framesize
  int available = _audio.available();

  // we need this check anyway because we might be at the very end of the stream and try to acquire 0
  // for our last frame, which will unfortunately work, so just get rid of this case right now
  if (available == 0) return NO_INPUT;

  if (_startIndex < 0) {
    // left zero-padding and only acquire  as much as _frameSize + startIndex tokens and should release zero
    acquireSize = _frameSize + _startIndex;
    releaseSize = 0;
    zeropadSize = -_startIndex;
  }

  // if there are not enough tokens in the stream (howmuch < available):
  if (acquireSize >= available) { // has to be >= in case the size of the audio fits exactly with frameSize & hopSize
    if (!shouldStop()) return NO_INPUT; // not end of stream -> return and wait for more data to come

    acquireSize = available; // need to acquire what's left
    releaseSize = _startIndex >= 0 ? min(available, _hopSize) : 0; // cannot release more tokens than there are available
    if (_startFromZero) {
      if (_lastFrameToEndOfFile) {
        if (_startIndex >= _streamIndex+available) lastFrame = true;
      }
      else lastFrame = true;
    }
    else {
      if (_startIndex + _frameSize/2 >= _streamIndex + available) // center of frame >= end of stream
        lastFrame = true;
    }
  }

  _frames.setAcquireSize(1);
  _frames.setReleaseSize(1);
  _audio.setAcquireSize(acquireSize);
  _audio.setReleaseSize(releaseSize);

  /*
  EXEC_DEBUG("zeropadSize: " << zeropadSize
             << "\tacquireSize: " << acquireSize
             << "\treleaseSize: " << releaseSize
             << "\tavailable: " << available
             << "\tlast frame: " << lastFrame
             << "\tstartIndex: " << _startIndex
             << "\tstreamIndex: " << _streamIndex);
  */

  AlgorithmStatus status = acquireData();
  EXEC_DEBUG("data acquired (audio: " << acquireSize << " - frames: 1)");

  if (status != OK) {
    if (status == NO_INPUT) return NO_INPUT;
    if (status == NO_OUTPUT) return NO_OUTPUT;
    throw EssentiaException("FrameCutter: something weird happened.");
  }

  // some semantic description to not get mixed up between the 2 meanings
  // of a vector<Real> (which acts both as a stream of Real tokens at the
  // input and as a single vector<Real> token at the output)
  typedef vector<AudioSample> Frame;

  // get the audio input and copy it as a frame to the output
  const vector<AudioSample>& audio = _audio.tokens();
  Frame& frame = _frames.firstToken();


  frame.resize(_frameSize);

  // left zero-padding of the frame
  int idxInFrame = 0;
  for (; idxInFrame < zeropadSize; idxInFrame++) {
    frame[idxInFrame] = (Real)0.0;
  }

  fastcopy(frame.begin()+idxInFrame, audio.begin(), acquireSize);
  idxInFrame += acquireSize;

  // check if the idxInFrame is below the threshold (this would only happen
  // for the last frame in the stream) and if so, don't produce data
  if (idxInFrame < _validFrameThreshold) {
    E_INFO("FrameCutter: dropping incomplete frame");

    // release inputs (advance to next frame), but not the output frame (we didn't produce anything)
    _audio.release(_audio.releaseSize());
    return NO_INPUT;
  }

  // right zero-padding on the last frame
  for (; idxInFrame < _frameSize; idxInFrame++) {
    frame[idxInFrame] = (Real)0.0;
  }

  _startIndex += _hopSize;

  if (isSilent(frame)) {
    switch (_silentFrames) {
    case DROP:
      E_INFO("FrameCutter: dropping silent frame");

      // release inputs (advance to next frame), but not the output frame (we didn't produce anything)
      _audio.release(_audio.releaseSize());
      return OK;

    case ADD_NOISE: {
      vector<AudioSample> inputFrame(_frameSize, 0.0);
      fastcopy(&inputFrame[0]+zeropadSize, &frame[0], acquireSize);
      _noiseAdder->input("signal").set(inputFrame);
      _noiseAdder->output("signal").set(frame);
      _noiseAdder->compute();
      break;
    }

    // otherwise, don't do nothing...
    case KEEP:
    default:
      ;
    }
  }

  EXEC_DEBUG("produced frame; releasing");
  releaseData();
  _streamIndex += _audio.releaseSize();

  EXEC_DEBUG("released");

  if (lastFrame) return PASS;

  return OK;
}
Exemplo n.º 21
0
ReaderID SinkBase::id() const {
  // NOTE: if this sink is connected to a sourceproxy, it will have a _source set, but the ID is still invalid...
  if (_source) return _id;
  else
    throw EssentiaException("Undefined reader ID for sink ", fullName());
}