예제 #1
0
  KeyDetectionResult KeyFinder::keyOfChromagram(
    Workspace& workspace,
    const Parameters& params
  ) const {

    KeyDetectionResult result;

    // working copy of chromagram
    Chromagram* ch = new Chromagram(*workspace.chroma);
    ch->reduceToOneOctave();

    // get harmonic change signal and segment
    Segmentation segmenter;
    std::vector<unsigned int> segmentBoundaries = segmenter.getSegmentationBoundaries(ch, params);
    segmentBoundaries.push_back(ch->getHops()); // sentinel

    // get key estimates for each segment
    KeyClassifier classifier(
      params.getSimilarityMeasure(),
      params.getToneProfile(),
      params.getOffsetToC(),
      params.getCustomToneProfile()
    );

    std::vector<float> keyWeights(24); // TODO: not ideal using int cast of key_t enum. Hash?

    for (int s = 0; s < (signed) segmentBoundaries.size() - 1; s++) {
      KeyDetectionResultSegment segment;
      segment.firstHop = segmentBoundaries[s];
      segment.lastHop  = segmentBoundaries[s+1] - 1;
      // collapse segment's time dimension
      std::vector<float> segmentChroma(ch->getBands(), 0.0);
      for (unsigned int hop = segment.firstHop; hop <= segment.lastHop; hop++) {
        for (unsigned int band = 0; band < ch->getBands(); band++) {
          float value = ch->getMagnitude(hop, band);
          segmentChroma[band] += value;
          segment.energy += value;
        }
      }
      segment.chromaVector = segmentChroma;
      segment.key = classifier.classify(segmentChroma);
      if (segment.key != SILENCE)
        keyWeights[segment.key] += segment.energy;
      result.segments.push_back(segment);
    }

    delete ch;

    // get global key
    result.globalKeyEstimate = SILENCE;
    float mostCommonKeyWeight = 0.0;
    for (int k = 0; k < (signed)keyWeights.size(); k++) {
      if (keyWeights[k] > mostCommonKeyWeight) {
        mostCommonKeyWeight = keyWeights[k];
        result.globalKeyEstimate = (key_t)k;
      }
    }

    return result;
  }
예제 #2
0
 std::vector<float> CosineHcdf::getRateOfChange(const Chromagram& ch, const Parameters& params){
   unsigned int hops = ch.getHops();
   unsigned int bins = ch.getBins();
   unsigned int gaussianSize = params.getHcdfGaussianSize();
   float gaussianSigma = params.getHcdfGaussianSigma();
   unsigned int padding = 0; // as opposed to gaussianSize/2
   std::vector<float> cosine(hops+padding);
   for (unsigned int hop = 0; hop < hops; hop++){
     float top = 0.0;
     float bottom = 0.0;
     for (unsigned int bin = 0; bin < bins; bin++){
       float mag = ch.getMagnitude(hop, bin);
       top += mag;
       bottom += pow(mag,2);
     }
     float cos;
     if(bottom > 0.0) // divzero
       cos = top / sqrt(bottom) * sqrt(bins * sqrt(2));
     else
       cos = 0.0;
     cosine[hop] = cos;
   }
   // gaussian
   std::vector<float> gaussian(gaussianSize);
   for (unsigned int i=0; i<gaussianSize; i++){
     gaussian[i] = exp(-1 * (pow(i - gaussianSize/2 , 2) / (2 * gaussianSigma * gaussianSigma)));
   }
   std::vector<float> smoothed(hops);
   for (unsigned int hop = padding; hop < cosine.size(); hop++){
     float conv = 0.0;
     for (unsigned int k=0; k<gaussianSize; k++){
       int frm = hop - (gaussianSize/2) + k;
       if(frm >= 0 && frm < (signed)cosine.size()){
         conv += cosine[frm] * gaussian[k];
       }
     }
     smoothed[hop-padding] = conv;
   }
   // rate of change of hcdf signal; look at all hops except first.
   std::vector<float> rateOfChange(hops);
   for (unsigned int hop=1; hop<hops; hop++){
     float change = (smoothed[hop] - smoothed[hop-1]) / 90.0;
     change = (change >= 0 ? change : change * -1.0);
     change = change / 0.16; // very cheeky magic number; for display purposes in KeyFinder GUI app
     rateOfChange[hop] = change;
   }
   // fudge first
   rateOfChange[0] = rateOfChange[1];
   return rateOfChange;
 }
예제 #3
0
 void KeyFinder::chromagramOfBufferedAudio(
   Workspace& workspace,
   const Parameters& params
 ) {
   if (workspace.getFftAdapter() == NULL)
     workspace.setFftAdapter(new FftAdapter(params.getFftFrameSize()));
   SpectrumAnalyser sa(workspace.buffer.getFrameRate(), params, ctFactory);
   Chromagram* c = sa.chromagramOfWholeFrames(workspace.buffer, workspace.getFftAdapter());
   // deal with tuning if necessary
   if (c->getBandsPerSemitone() > 1) {
     if (params.getTuningMethod() == TUNING_BAND_ADAPTIVE) {
       c->tuningBandAdaptive(params.getDetunedBandWeight());
     } else if (params.getTuningMethod() == TUNING_HARTE) {
       c->tuningHarte();
     }
   }
   workspace.buffer.discardFramesFromFront(params.getHopSize() * c->getHops());
   if (workspace.chroma == NULL) {
     workspace.chroma = c;
   } else {
     workspace.chroma->append(*c);
     delete c;
   }
 }
예제 #4
0
  KeyDetectionResult KeyFinder::findKey(const AudioData& originalAudio, const Parameters& params){

    KeyDetectionResult result;

    AudioData* workingAudio = new AudioData(originalAudio);

    workingAudio->reduceToMono();

    Downsampler ds;
    ds.downsample(workingAudio, params.getLastFreq(), &lpfFactory);

    SpectrumAnalyser* sa = new SpectrumAnalyser(workingAudio->getFrameRate(), params, &ctFactory);

    // run spectral analysis
    Chromagram* ch = sa->chromagram(workingAudio);

    delete workingAudio;
    delete sa;

    // reduce chromagram
    ch->reduceTuningBins(params);
    result.fullChromagram = Chromagram(*ch);
    ch->reduceToOneOctave(params);
    result.oneOctaveChromagram = Chromagram(*ch);

    // get harmonic change signal
    Segmentation* segmenter = Segmentation::getSegmentation(params);
    result.harmonicChangeSignal = segmenter->getRateOfChange(*ch, params);

    // get track segmentation
    std::vector<unsigned int> segmentBoundaries = segmenter->getSegments(result.harmonicChangeSignal, params);
    segmentBoundaries.push_back(ch->getHops()); // sentinel
    delete segmenter;

    // get key estimates for each segment
    KeyClassifier hc(params);
    std::vector<float> keyWeights(24); // TODO: not ideal using int cast of key_t enum. Hash?

    for (int s = 0; s < (signed)segmentBoundaries.size()-1; s++){
      KeyDetectionSegment segment;
      segment.firstWindow = segmentBoundaries[s];
      segment.lastWindow = segmentBoundaries[s+1] - 1;
      // collapse segment's time dimension, for a single chroma vector and a single energy value
      std::vector<float> segmentChroma(ch->getBins());
      // for each relevant hop of the chromagram
      for (unsigned int hop = segment.firstWindow; hop <= segment.lastWindow; hop++) {
        // for each bin
        for (unsigned int bin = 0; bin < ch->getBins(); bin++) {
          float value = ch->getMagnitude(hop, bin);
          segmentChroma[bin] += value;
          segment.energy += value;
        }
      }
      segment.key = hc.classify(segmentChroma);
      if(segment.key != SILENCE){
        keyWeights[segment.key] += segment.energy;
      }
      result.segments.push_back(segment);
    }

    delete ch;

    // get global key
    result.globalKeyEstimate = SILENCE;
    float mostCommonKeyWeight = 0.0;
    for (int k = 0; k < (signed)keyWeights.size(); k++){
      if(keyWeights[k] > mostCommonKeyWeight){
        mostCommonKeyWeight = keyWeights[k];
        result.globalKeyEstimate = (key_t)k;
      }
    }

    return result;

  }
예제 #5
0
 std::vector<float> ArbitrarySeg::getRateOfChange(const Chromagram& ch, const Parameters& /*params*/){
   std::vector<float> NoChange(ch.getHops());
   return NoChange;
 }
void KeyFinderWorkerThread::run(){
	if(!haveParams){
		emit failed("No parameters.");
		return;
	}
	// initialise stream and decode file into it
	AudioStream* astrm = NULL;
  AudioFileDecoder* dec = AudioFileDecoder::getDecoder(filePath.toUtf8().data());
	try{
    astrm = dec->decodeFile(filePath.toUtf8().data());
	}catch(Exception){
		delete astrm;
		delete dec;
		emit failed("Could not decode file.");
		return;
	}
	delete dec;
	emit decoded();

	// make audio stream monaural
	astrm->reduceToMono();
	emit madeMono();

	// downsample if necessary
	if(prefs.getDFactor() > 1){
		Downsampler* ds = Downsampler::getDownsampler(prefs.getDFactor(),astrm->getFrameRate(),prefs.getLastFreq());
		try{
			astrm = ds->downsample(astrm,prefs.getDFactor());
		}catch(Exception){
			delete astrm;
			delete ds;
			emit failed("Downsampler failed.");
			return;
		}
		delete ds;
		emit downsampled();
	}

	// start spectrum analysis
	SpectrumAnalyser* sa = NULL;
  Chromagram* ch = NULL;
  sa = SpectrumAnalyserFactory::getInstance()->getSpectrumAnalyser(astrm->getFrameRate(),prefs);
  ch = sa->chromagram(astrm);
  delete astrm; // note we don't delete the spectrum analyser; it stays in the centralised factory for reuse.
  ch->reduceTuningBins(prefs);
	emit producedFullChromagram(*ch);

	// reduce chromagram
	ch->reduceToOneOctave(prefs);
	emit producedOneOctaveChromagram(*ch);

	// get energy level across track to weight segments
	std::vector<float> loudness(ch->getHops());
	for(int h=0; h<ch->getHops(); h++)
		for(int b=0; b<ch->getBins(); b++)
			loudness[h] += ch->getMagnitude(h,b);

	// get harmonic change signal
	Segmentation* hcdf = Segmentation::getSegmentation(prefs);
	std::vector<double> harmonicChangeSignal = hcdf->getRateOfChange(ch,prefs);
	emit producedHarmonicChangeSignal(harmonicChangeSignal);

	// get track segmentation
  std::vector<int> changes = hcdf->getSegments(harmonicChangeSignal,prefs);
  changes.push_back(ch->getHops()); // It used to be getHops()-1. But this doesn't crash. So we like it.

	// batch output of keychange locations for Beatles experiment
	//for(int i=1; i<changes.size(); i++) // don't want the leading zero
	//	std::cout << filePath.substr(53) << "\t" << std::fixed << std::setprecision(2) << changes[i]*(prefs.getHopSize()/(44100.0/prefs.getDFactor())) << std::endl;
	// end experiment output

	// get key estimates for segments
	KeyClassifier hc(prefs);
	std::vector<int> keys(0);
	std::vector<float> keyWeights(24);
  for(int i=0; i<(signed)changes.size()-1; i++){
    std::vector<double> chroma(ch->getBins());
		for(int j=changes[i]; j<changes[i+1]; j++)
			for(int k=0; k<ch->getBins(); k++)
        chroma[k] += ch->getMagnitude(j,k);
    int key = hc.classify(chroma);
    for(int j=changes[i]; j<changes[i+1]; j++){
			keys.push_back(key);
			if(key < 24) // ignore parts that were classified as silent
				keyWeights[key] += loudness[j];
    }
	}
	keys.push_back(keys[keys.size()-1]); // put last key on again to match length of track
	delete ch;
	emit producedKeyEstimates(keys);

	// get global key
	int mostCommonKey = 24;
	float mostCommonKeyWeight = 0.0;
	for(int i=0; i<(signed)keyWeights.size(); i++){
		if(keyWeights[i] > mostCommonKeyWeight){
			mostCommonKeyWeight = keyWeights[i];
			mostCommonKey = i;
		}
	}
	emit producedGlobalKeyEstimate(mostCommonKey);
	return;
}
예제 #7
0
  KeyDetectionResult KeyFinder::findKey(const AudioData& originalAudio, const Parameters& params){

    KeyDetectionResult result;

    AudioData* workingAudio = new AudioData(originalAudio);

    workingAudio->reduceToMono();

    // TODO: there is presumably some good maths to determine filter frequencies
    float lpfCutoff = params.getLastFrequency() * 1.05;
    float dsCutoff = params.getLastFrequency() * 1.10;
    unsigned int downsampleFactor = (int)floor( workingAudio->getFrameRate() / 2 / dsCutoff );

    // get filter
    LowPassFilter* lpf = lpfFactory.getLowPassFilter(160, workingAudio->getFrameRate(), lpfCutoff, 2048);
    // feeding in the downsampleFactor for a shortcut
    lpf->filter(workingAudio, downsampleFactor);
    // note we don't delete the LPF; it's stored in the factory for reuse

    Downsampler ds;
    ds.downsample(workingAudio, downsampleFactor);

    SpectrumAnalyser* sa = new SpectrumAnalyser(workingAudio->getFrameRate(), params, &ctFactory);

    // run spectral analysis
    Chromagram* ch = sa->chromagram(workingAudio);

    delete workingAudio;
    delete sa;

    // reduce chromagram
    ch->reduceTuningBins(params);
    result.fullChromagram = Chromagram(*ch);
    ch->reduceToOneOctave(params);
    result.oneOctaveChromagram = Chromagram(*ch);

    // get harmonic change signal
    Segmentation* segmenter = Segmentation::getSegmentation(params);
    result.harmonicChangeSignal = segmenter->getRateOfChange(*ch, params);

    // get track segmentation
    std::vector<unsigned int> segmentBoundaries = segmenter->getSegments(result.harmonicChangeSignal, params);
    segmentBoundaries.push_back(ch->getHops()); // sentinel
    delete segmenter;

    // get key estimates for each segment
    KeyClassifier hc(params);
    std::vector<float> keyWeights(24); // TODO: not ideal using int cast of key_t enum. Hash?

    for (int s = 0; s < (signed)segmentBoundaries.size()-1; s++){
      KeyDetectionSegment segment;
      segment.firstHop = segmentBoundaries[s];
      segment.lastHop = segmentBoundaries[s+1] - 1;
      // collapse segment's time dimension
      std::vector<float> segmentChroma(ch->getBins());
      for (unsigned int hop = segment.firstHop; hop <= segment.lastHop; hop++) {
        for (unsigned int bin = 0; bin < ch->getBins(); bin++) {
          float value = ch->getMagnitude(hop, bin);
          segmentChroma[bin] += value;
          segment.energy += value;
        }
      }
      segment.key = hc.classify(segmentChroma);
      if(segment.key != SILENCE){
        keyWeights[segment.key] += segment.energy;
      }
      result.segments.push_back(segment);
    }

    delete ch;

    // get global key
    result.globalKeyEstimate = SILENCE;
    float mostCommonKeyWeight = 0.0;
    for (int k = 0; k < (signed)keyWeights.size(); k++){
      if(keyWeights[k] > mostCommonKeyWeight){
        mostCommonKeyWeight = keyWeights[k];
        result.globalKeyEstimate = (key_t)k;
      }
    }

    return result;

  }