예제 #1
0
KeyFinderResultWrapper keyDetectionProcess(const AsyncFileObject& object){

  KeyFinderResultWrapper result;
  result.batchRow = object.batchRow;

  // initialise stream and decode file into it.
  KeyFinder::AudioData* audio = NULL;
  AudioFileDecoder* decoder = NULL;
  try{
    decoder = AudioFileDecoder::getDecoder();
  }catch(KeyFinder::Exception& e){
    delete decoder;
    result.errorMessage = QString(e.what().c_str());
    return result;
  }

  try{
    audio = decoder->decodeFile(object.filePath);
    delete decoder;
  }catch(KeyFinder::Exception& e){
    delete audio;
    delete decoder;
    result.errorMessage = QString(e.what().c_str());
    return result;
  }

  // make audio stream monaural ahead of downsample to reduce load
  audio->reduceToMono();

  // downsample if necessary
  if(object.prefs.getDFactor() > 1){
    Downsampler* ds = Downsampler::getDownsampler(object.prefs.getDFactor(),audio->getFrameRate(),object.prefs.getLastFreq());
    try{
      audio = ds->downsample(audio,object.prefs.getDFactor());
    }catch(KeyFinder::Exception& e){
      delete audio;
      delete ds;
      result.errorMessage = QString(e.what().c_str());
      return result;
    }
    delete ds;
  }

  KeyFinder::KeyFinder* kf = LibKeyFinderSingleton::getInstance()->getKeyFinder();
  result.core = kf->findKey(*audio, object.prefs.core);

  delete audio;

  return result;
}
예제 #2
0
int main(int argc, char* argv[]) {
  
  FILE*	fin;
  FILE* fout;
  SDownsamplerCtx	sDownsamplerCtx;
  SDownsamplerCtx*	pDownsamplerCtx = &sDownsamplerCtx;
  Downsampler*	pDownsampler = NULL;
  int iMaxFrameNum = 0;
  int iFileSize = 0;
  int iFrameSize = 0;
  
  memset(&sDownsamplerCtx, 0, sizeof(sDownsamplerCtx));
  
  parseCmdLine(argc, argv, pDownsamplerCtx);
  ChooseDownsampler(pDownsamplerCtx, &pDownsampler);
  
  fin = fopen((const char*)pDownsamplerCtx->pInfileName, "rb");
  if(!fin) {
    cout << "Error: can not open input file!" << endl;
    exit(1);
  }
  fseek(fin, 0L, SEEK_END);
  iFileSize = (int)ftell(fin);
  fseek(fin, 0L, SEEK_SET);
  
  fout = fopen((const char*)pDownsamplerCtx->pOutfileName, "wb");
  if(!fout) {
    cout << "Error: can not open output file!" << endl;
    exit(1);
  }
  
  //caculate max frame number
  iFrameSize = pDownsamplerCtx->iSrcWidth * pDownsamplerCtx->iSrcHeight * 1.5;
  iMaxFrameNum = iFileSize / iFrameSize;
  
  pDownsampler->InitDownsampler(pDownsamplerCtx);
  
  for(int i = 0; i < iMaxFrameNum; i++) {
    pDownsampler->ReadFrameFromFile(fin);
    pDownsampler->Processing();
    pDownsampler->WriteFrameToFile(fout);
  }
  
  //downsample end
  delete pDownsampler;
  fclose(fin);
  fclose(fout);
  
  return 0;
}
예제 #3
0
  KeyDetectionResult KeyFinder::findKey(const AudioData& originalAudio, const Parameters& params){

    KeyDetectionResult result;

    AudioData* workingAudio = new AudioData(originalAudio);

    workingAudio->reduceToMono();

    Downsampler ds;
    ds.downsample(workingAudio, params.getLastFreq(), &lpfFactory);

    SpectrumAnalyser* sa = new SpectrumAnalyser(workingAudio->getFrameRate(), params, &ctFactory);

    // run spectral analysis
    Chromagram* ch = sa->chromagram(workingAudio);

    delete workingAudio;
    delete sa;

    // reduce chromagram
    ch->reduceTuningBins(params);
    result.fullChromagram = Chromagram(*ch);
    ch->reduceToOneOctave(params);
    result.oneOctaveChromagram = Chromagram(*ch);

    // get harmonic change signal
    Segmentation* segmenter = Segmentation::getSegmentation(params);
    result.harmonicChangeSignal = segmenter->getRateOfChange(*ch, params);

    // get track segmentation
    std::vector<unsigned int> segmentBoundaries = segmenter->getSegments(result.harmonicChangeSignal, params);
    segmentBoundaries.push_back(ch->getHops()); // sentinel
    delete segmenter;

    // get key estimates for each segment
    KeyClassifier hc(params);
    std::vector<float> keyWeights(24); // TODO: not ideal using int cast of key_t enum. Hash?

    for (int s = 0; s < (signed)segmentBoundaries.size()-1; s++){
      KeyDetectionSegment segment;
      segment.firstWindow = segmentBoundaries[s];
      segment.lastWindow = segmentBoundaries[s+1] - 1;
      // collapse segment's time dimension, for a single chroma vector and a single energy value
      std::vector<float> segmentChroma(ch->getBins());
      // for each relevant hop of the chromagram
      for (unsigned int hop = segment.firstWindow; hop <= segment.lastWindow; hop++) {
        // for each bin
        for (unsigned int bin = 0; bin < ch->getBins(); bin++) {
          float value = ch->getMagnitude(hop, bin);
          segmentChroma[bin] += value;
          segment.energy += value;
        }
      }
      segment.key = hc.classify(segmentChroma);
      if(segment.key != SILENCE){
        keyWeights[segment.key] += segment.energy;
      }
      result.segments.push_back(segment);
    }

    delete ch;

    // get global key
    result.globalKeyEstimate = SILENCE;
    float mostCommonKeyWeight = 0.0;
    for (int k = 0; k < (signed)keyWeights.size(); k++){
      if(keyWeights[k] > mostCommonKeyWeight){
        mostCommonKeyWeight = keyWeights[k];
        result.globalKeyEstimate = (key_t)k;
      }
    }

    return result;

  }
void KeyFinderWorkerThread::run(){
	if(!haveParams){
		emit failed("No parameters.");
		return;
	}
	// initialise stream and decode file into it
	AudioStream* astrm = NULL;
  AudioFileDecoder* dec = AudioFileDecoder::getDecoder(filePath.toUtf8().data());
	try{
    astrm = dec->decodeFile(filePath.toUtf8().data());
	}catch(Exception){
		delete astrm;
		delete dec;
		emit failed("Could not decode file.");
		return;
	}
	delete dec;
	emit decoded();

	// make audio stream monaural
	astrm->reduceToMono();
	emit madeMono();

	// downsample if necessary
	if(prefs.getDFactor() > 1){
		Downsampler* ds = Downsampler::getDownsampler(prefs.getDFactor(),astrm->getFrameRate(),prefs.getLastFreq());
		try{
			astrm = ds->downsample(astrm,prefs.getDFactor());
		}catch(Exception){
			delete astrm;
			delete ds;
			emit failed("Downsampler failed.");
			return;
		}
		delete ds;
		emit downsampled();
	}

	// start spectrum analysis
	SpectrumAnalyser* sa = NULL;
  Chromagram* ch = NULL;
  sa = SpectrumAnalyserFactory::getInstance()->getSpectrumAnalyser(astrm->getFrameRate(),prefs);
  ch = sa->chromagram(astrm);
  delete astrm; // note we don't delete the spectrum analyser; it stays in the centralised factory for reuse.
  ch->reduceTuningBins(prefs);
	emit producedFullChromagram(*ch);

	// reduce chromagram
	ch->reduceToOneOctave(prefs);
	emit producedOneOctaveChromagram(*ch);

	// get energy level across track to weight segments
	std::vector<float> loudness(ch->getHops());
	for(int h=0; h<ch->getHops(); h++)
		for(int b=0; b<ch->getBins(); b++)
			loudness[h] += ch->getMagnitude(h,b);

	// get harmonic change signal
	Segmentation* hcdf = Segmentation::getSegmentation(prefs);
	std::vector<double> harmonicChangeSignal = hcdf->getRateOfChange(ch,prefs);
	emit producedHarmonicChangeSignal(harmonicChangeSignal);

	// get track segmentation
  std::vector<int> changes = hcdf->getSegments(harmonicChangeSignal,prefs);
  changes.push_back(ch->getHops()); // It used to be getHops()-1. But this doesn't crash. So we like it.

	// batch output of keychange locations for Beatles experiment
	//for(int i=1; i<changes.size(); i++) // don't want the leading zero
	//	std::cout << filePath.substr(53) << "\t" << std::fixed << std::setprecision(2) << changes[i]*(prefs.getHopSize()/(44100.0/prefs.getDFactor())) << std::endl;
	// end experiment output

	// get key estimates for segments
	KeyClassifier hc(prefs);
	std::vector<int> keys(0);
	std::vector<float> keyWeights(24);
  for(int i=0; i<(signed)changes.size()-1; i++){
    std::vector<double> chroma(ch->getBins());
		for(int j=changes[i]; j<changes[i+1]; j++)
			for(int k=0; k<ch->getBins(); k++)
        chroma[k] += ch->getMagnitude(j,k);
    int key = hc.classify(chroma);
    for(int j=changes[i]; j<changes[i+1]; j++){
			keys.push_back(key);
			if(key < 24) // ignore parts that were classified as silent
				keyWeights[key] += loudness[j];
    }
	}
	keys.push_back(keys[keys.size()-1]); // put last key on again to match length of track
	delete ch;
	emit producedKeyEstimates(keys);

	// get global key
	int mostCommonKey = 24;
	float mostCommonKeyWeight = 0.0;
	for(int i=0; i<(signed)keyWeights.size(); i++){
		if(keyWeights[i] > mostCommonKeyWeight){
			mostCommonKeyWeight = keyWeights[i];
			mostCommonKey = i;
		}
	}
	emit producedGlobalKeyEstimate(mostCommonKey);
	return;
}
예제 #5
0
  KeyDetectionResult KeyFinder::findKey(const AudioData& originalAudio, const Parameters& params){

    KeyDetectionResult result;

    AudioData* workingAudio = new AudioData(originalAudio);

    workingAudio->reduceToMono();

    // TODO: there is presumably some good maths to determine filter frequencies
    float lpfCutoff = params.getLastFrequency() * 1.05;
    float dsCutoff = params.getLastFrequency() * 1.10;
    unsigned int downsampleFactor = (int)floor( workingAudio->getFrameRate() / 2 / dsCutoff );

    // get filter
    LowPassFilter* lpf = lpfFactory.getLowPassFilter(160, workingAudio->getFrameRate(), lpfCutoff, 2048);
    // feeding in the downsampleFactor for a shortcut
    lpf->filter(workingAudio, downsampleFactor);
    // note we don't delete the LPF; it's stored in the factory for reuse

    Downsampler ds;
    ds.downsample(workingAudio, downsampleFactor);

    SpectrumAnalyser* sa = new SpectrumAnalyser(workingAudio->getFrameRate(), params, &ctFactory);

    // run spectral analysis
    Chromagram* ch = sa->chromagram(workingAudio);

    delete workingAudio;
    delete sa;

    // reduce chromagram
    ch->reduceTuningBins(params);
    result.fullChromagram = Chromagram(*ch);
    ch->reduceToOneOctave(params);
    result.oneOctaveChromagram = Chromagram(*ch);

    // get harmonic change signal
    Segmentation* segmenter = Segmentation::getSegmentation(params);
    result.harmonicChangeSignal = segmenter->getRateOfChange(*ch, params);

    // get track segmentation
    std::vector<unsigned int> segmentBoundaries = segmenter->getSegments(result.harmonicChangeSignal, params);
    segmentBoundaries.push_back(ch->getHops()); // sentinel
    delete segmenter;

    // get key estimates for each segment
    KeyClassifier hc(params);
    std::vector<float> keyWeights(24); // TODO: not ideal using int cast of key_t enum. Hash?

    for (int s = 0; s < (signed)segmentBoundaries.size()-1; s++){
      KeyDetectionSegment segment;
      segment.firstHop = segmentBoundaries[s];
      segment.lastHop = segmentBoundaries[s+1] - 1;
      // collapse segment's time dimension
      std::vector<float> segmentChroma(ch->getBins());
      for (unsigned int hop = segment.firstHop; hop <= segment.lastHop; hop++) {
        for (unsigned int bin = 0; bin < ch->getBins(); bin++) {
          float value = ch->getMagnitude(hop, bin);
          segmentChroma[bin] += value;
          segment.energy += value;
        }
      }
      segment.key = hc.classify(segmentChroma);
      if(segment.key != SILENCE){
        keyWeights[segment.key] += segment.energy;
      }
      result.segments.push_back(segment);
    }

    delete ch;

    // get global key
    result.globalKeyEstimate = SILENCE;
    float mostCommonKeyWeight = 0.0;
    for (int k = 0; k < (signed)keyWeights.size(); k++){
      if(keyWeights[k] > mostCommonKeyWeight){
        mostCommonKeyWeight = keyWeights[k];
        result.globalKeyEstimate = (key_t)k;
      }
    }

    return result;

  }