// default signal processing method is called 'perform' void perform(double **ins, long numins, double **outs, long numouts, long sampleframes) { // char *this_chordname; std::vector<double> chroma; chroma.resize(12); double rms, rms_tot; int q; rms = 0; rms_tot = 0; for (long channel = 0; channel < numouts; channel++) { double * in = ins[channel]; double * out = outs[channel]; for (long i=0; i < sampleframes; i++) { out[i] = in[i]; // we only want to process on channel 0 if (channel == 0) { if (currentsamples < frameSize - 1) { frame[currentsamples] = in[i]; currentsamples++; } else { currentsamples = 0; for (q = 0; q < frameSize; q++) { rms_tot += pow(frame[q],2); } rms = rms_tot / frameSize; if (rms > rms_cutoff){ c.processAudioFrame(frame); outlet_int(m_outlets[3], 1); } else { outlet_int(m_outlets[3], 0); } if (c.isReady()) { std::vector<double> chroma = c.getChromagram(); chordspotter.detectChord(chroma); currentchord = 10000 * chordspotter.rootNote + chordspotter.chord_num; chord_name = chordspotter.chord_name; const char *this_chordname = chord_name.c_str(); if (currentchord != lastchord) { outlet_anything(m_outlets[0], gensym(this_chordname), 0, NULL); outlet_int(m_outlets[1], currentchord); midilist(chordspotter.rootNote, chordspotter.chord_num); } lastchord = currentchord; } } } } } }
std::vector<float> CosineHcdf::getRateOfChange(const Chromagram& ch, const Parameters& params){ unsigned int hops = ch.getHops(); unsigned int bins = ch.getBins(); unsigned int gaussianSize = params.getHcdfGaussianSize(); float gaussianSigma = params.getHcdfGaussianSigma(); unsigned int padding = 0; // as opposed to gaussianSize/2 std::vector<float> cosine(hops+padding); for (unsigned int hop = 0; hop < hops; hop++){ float top = 0.0; float bottom = 0.0; for (unsigned int bin = 0; bin < bins; bin++){ float mag = ch.getMagnitude(hop, bin); top += mag; bottom += pow(mag,2); } float cos; if(bottom > 0.0) // divzero cos = top / sqrt(bottom) * sqrt(bins * sqrt(2)); else cos = 0.0; cosine[hop] = cos; } // gaussian std::vector<float> gaussian(gaussianSize); for (unsigned int i=0; i<gaussianSize; i++){ gaussian[i] = exp(-1 * (pow(i - gaussianSize/2 , 2) / (2 * gaussianSigma * gaussianSigma))); } std::vector<float> smoothed(hops); for (unsigned int hop = padding; hop < cosine.size(); hop++){ float conv = 0.0; for (unsigned int k=0; k<gaussianSize; k++){ int frm = hop - (gaussianSize/2) + k; if(frm >= 0 && frm < (signed)cosine.size()){ conv += cosine[frm] * gaussian[k]; } } smoothed[hop-padding] = conv; } // rate of change of hcdf signal; look at all hops except first. std::vector<float> rateOfChange(hops); for (unsigned int hop=1; hop<hops; hop++){ float change = (smoothed[hop] - smoothed[hop-1]) / 90.0; change = (change >= 0 ? change : change * -1.0); change = change / 0.16; // very cheeky magic number; for display purposes in KeyFinder GUI app rateOfChange[hop] = change; } // fudge first rateOfChange[0] = rateOfChange[1]; return rateOfChange; }
void rate(long inlet, t_symbol * s, long ac, t_atom * av) { t_atom * this_atom; long this_number; this_atom = &av[0]; this_number = atom_getlong(this_atom); if (this_number > 0) { int new_rate = this_number * 512; c.setChromaCalculationInterval(512 * this_number); } }
Chordid(t_symbol * sym, long ac, t_atom * av) : c(frameSize, sampleRate), chordspotter() { c.setChromaCalculationInterval(512); frame.resize(frameSize); m_outlets = (void **)sysmem_newptr(sizeof(void *) * numoutlets); for (unsigned int i = 0; i < numoutlets; i++){ m_outlets[numoutlets - i - 1] = outlet_new(this, NULL); // generic outlet } setupIO(1, 1); // post("object created"); }
KeyDetectionResult KeyFinder::keyOfChromagram( Workspace& workspace, const Parameters& params ) const { KeyDetectionResult result; // working copy of chromagram Chromagram* ch = new Chromagram(*workspace.chroma); ch->reduceToOneOctave(); // get harmonic change signal and segment Segmentation segmenter; std::vector<unsigned int> segmentBoundaries = segmenter.getSegmentationBoundaries(ch, params); segmentBoundaries.push_back(ch->getHops()); // sentinel // get key estimates for each segment KeyClassifier classifier( params.getSimilarityMeasure(), params.getToneProfile(), params.getOffsetToC(), params.getCustomToneProfile() ); std::vector<float> keyWeights(24); // TODO: not ideal using int cast of key_t enum. Hash? for (int s = 0; s < (signed) segmentBoundaries.size() - 1; s++) { KeyDetectionResultSegment segment; segment.firstHop = segmentBoundaries[s]; segment.lastHop = segmentBoundaries[s+1] - 1; // collapse segment's time dimension std::vector<float> segmentChroma(ch->getBands(), 0.0); for (unsigned int hop = segment.firstHop; hop <= segment.lastHop; hop++) { for (unsigned int band = 0; band < ch->getBands(); band++) { float value = ch->getMagnitude(hop, band); segmentChroma[band] += value; segment.energy += value; } } segment.chromaVector = segmentChroma; segment.key = classifier.classify(segmentChroma); if (segment.key != SILENCE) keyWeights[segment.key] += segment.energy; result.segments.push_back(segment); } delete ch; // get global key result.globalKeyEstimate = SILENCE; float mostCommonKeyWeight = 0.0; for (int k = 0; k < (signed)keyWeights.size(); k++) { if (keyWeights[k] > mostCommonKeyWeight) { mostCommonKeyWeight = keyWeights[k]; result.globalKeyEstimate = (key_t)k; } } return result; }
void KeyFinder::chromagramOfBufferedAudio( Workspace& workspace, const Parameters& params ) { if (workspace.getFftAdapter() == NULL) workspace.setFftAdapter(new FftAdapter(params.getFftFrameSize())); SpectrumAnalyser sa(workspace.buffer.getFrameRate(), params, ctFactory); Chromagram* c = sa.chromagramOfWholeFrames(workspace.buffer, workspace.getFftAdapter()); // deal with tuning if necessary if (c->getBandsPerSemitone() > 1) { if (params.getTuningMethod() == TUNING_BAND_ADAPTIVE) { c->tuningBandAdaptive(params.getDetunedBandWeight()); } else if (params.getTuningMethod() == TUNING_HARTE) { c->tuningHarte(); } } workspace.buffer.discardFramesFromFront(params.getHopSize() * c->getHops()); if (workspace.chroma == NULL) { workspace.chroma = c; } else { workspace.chroma->append(*c); delete c; } }
KeyDetectionResult KeyFinder::findKey(const AudioData& originalAudio, const Parameters& params){ KeyDetectionResult result; AudioData* workingAudio = new AudioData(originalAudio); workingAudio->reduceToMono(); Downsampler ds; ds.downsample(workingAudio, params.getLastFreq(), &lpfFactory); SpectrumAnalyser* sa = new SpectrumAnalyser(workingAudio->getFrameRate(), params, &ctFactory); // run spectral analysis Chromagram* ch = sa->chromagram(workingAudio); delete workingAudio; delete sa; // reduce chromagram ch->reduceTuningBins(params); result.fullChromagram = Chromagram(*ch); ch->reduceToOneOctave(params); result.oneOctaveChromagram = Chromagram(*ch); // get harmonic change signal Segmentation* segmenter = Segmentation::getSegmentation(params); result.harmonicChangeSignal = segmenter->getRateOfChange(*ch, params); // get track segmentation std::vector<unsigned int> segmentBoundaries = segmenter->getSegments(result.harmonicChangeSignal, params); segmentBoundaries.push_back(ch->getHops()); // sentinel delete segmenter; // get key estimates for each segment KeyClassifier hc(params); std::vector<float> keyWeights(24); // TODO: not ideal using int cast of key_t enum. Hash? for (int s = 0; s < (signed)segmentBoundaries.size()-1; s++){ KeyDetectionSegment segment; segment.firstWindow = segmentBoundaries[s]; segment.lastWindow = segmentBoundaries[s+1] - 1; // collapse segment's time dimension, for a single chroma vector and a single energy value std::vector<float> segmentChroma(ch->getBins()); // for each relevant hop of the chromagram for (unsigned int hop = segment.firstWindow; hop <= segment.lastWindow; hop++) { // for each bin for (unsigned int bin = 0; bin < ch->getBins(); bin++) { float value = ch->getMagnitude(hop, bin); segmentChroma[bin] += value; segment.energy += value; } } segment.key = hc.classify(segmentChroma); if(segment.key != SILENCE){ keyWeights[segment.key] += segment.energy; } result.segments.push_back(segment); } delete ch; // get global key result.globalKeyEstimate = SILENCE; float mostCommonKeyWeight = 0.0; for (int k = 0; k < (signed)keyWeights.size(); k++){ if(keyWeights[k] > mostCommonKeyWeight){ mostCommonKeyWeight = keyWeights[k]; result.globalKeyEstimate = (key_t)k; } } return result; }
std::vector<float> ArbitrarySeg::getRateOfChange(const Chromagram& ch, const Parameters& /*params*/){ std::vector<float> NoChange(ch.getHops()); return NoChange; }
void KeyFinderWorkerThread::run(){ if(!haveParams){ emit failed("No parameters."); return; } // initialise stream and decode file into it AudioStream* astrm = NULL; AudioFileDecoder* dec = AudioFileDecoder::getDecoder(filePath.toUtf8().data()); try{ astrm = dec->decodeFile(filePath.toUtf8().data()); }catch(Exception){ delete astrm; delete dec; emit failed("Could not decode file."); return; } delete dec; emit decoded(); // make audio stream monaural astrm->reduceToMono(); emit madeMono(); // downsample if necessary if(prefs.getDFactor() > 1){ Downsampler* ds = Downsampler::getDownsampler(prefs.getDFactor(),astrm->getFrameRate(),prefs.getLastFreq()); try{ astrm = ds->downsample(astrm,prefs.getDFactor()); }catch(Exception){ delete astrm; delete ds; emit failed("Downsampler failed."); return; } delete ds; emit downsampled(); } // start spectrum analysis SpectrumAnalyser* sa = NULL; Chromagram* ch = NULL; sa = SpectrumAnalyserFactory::getInstance()->getSpectrumAnalyser(astrm->getFrameRate(),prefs); ch = sa->chromagram(astrm); delete astrm; // note we don't delete the spectrum analyser; it stays in the centralised factory for reuse. ch->reduceTuningBins(prefs); emit producedFullChromagram(*ch); // reduce chromagram ch->reduceToOneOctave(prefs); emit producedOneOctaveChromagram(*ch); // get energy level across track to weight segments std::vector<float> loudness(ch->getHops()); for(int h=0; h<ch->getHops(); h++) for(int b=0; b<ch->getBins(); b++) loudness[h] += ch->getMagnitude(h,b); // get harmonic change signal Segmentation* hcdf = Segmentation::getSegmentation(prefs); std::vector<double> harmonicChangeSignal = hcdf->getRateOfChange(ch,prefs); emit producedHarmonicChangeSignal(harmonicChangeSignal); // get track segmentation std::vector<int> changes = hcdf->getSegments(harmonicChangeSignal,prefs); changes.push_back(ch->getHops()); // It used to be getHops()-1. But this doesn't crash. So we like it. // batch output of keychange locations for Beatles experiment //for(int i=1; i<changes.size(); i++) // don't want the leading zero // std::cout << filePath.substr(53) << "\t" << std::fixed << std::setprecision(2) << changes[i]*(prefs.getHopSize()/(44100.0/prefs.getDFactor())) << std::endl; // end experiment output // get key estimates for segments KeyClassifier hc(prefs); std::vector<int> keys(0); std::vector<float> keyWeights(24); for(int i=0; i<(signed)changes.size()-1; i++){ std::vector<double> chroma(ch->getBins()); for(int j=changes[i]; j<changes[i+1]; j++) for(int k=0; k<ch->getBins(); k++) chroma[k] += ch->getMagnitude(j,k); int key = hc.classify(chroma); for(int j=changes[i]; j<changes[i+1]; j++){ keys.push_back(key); if(key < 24) // ignore parts that were classified as silent keyWeights[key] += loudness[j]; } } keys.push_back(keys[keys.size()-1]); // put last key on again to match length of track delete ch; emit producedKeyEstimates(keys); // get global key int mostCommonKey = 24; float mostCommonKeyWeight = 0.0; for(int i=0; i<(signed)keyWeights.size(); i++){ if(keyWeights[i] > mostCommonKeyWeight){ mostCommonKeyWeight = keyWeights[i]; mostCommonKey = i; } } emit producedGlobalKeyEstimate(mostCommonKey); return; }
KeyDetectionResult KeyFinder::findKey(const AudioData& originalAudio, const Parameters& params){ KeyDetectionResult result; AudioData* workingAudio = new AudioData(originalAudio); workingAudio->reduceToMono(); // TODO: there is presumably some good maths to determine filter frequencies float lpfCutoff = params.getLastFrequency() * 1.05; float dsCutoff = params.getLastFrequency() * 1.10; unsigned int downsampleFactor = (int)floor( workingAudio->getFrameRate() / 2 / dsCutoff ); // get filter LowPassFilter* lpf = lpfFactory.getLowPassFilter(160, workingAudio->getFrameRate(), lpfCutoff, 2048); // feeding in the downsampleFactor for a shortcut lpf->filter(workingAudio, downsampleFactor); // note we don't delete the LPF; it's stored in the factory for reuse Downsampler ds; ds.downsample(workingAudio, downsampleFactor); SpectrumAnalyser* sa = new SpectrumAnalyser(workingAudio->getFrameRate(), params, &ctFactory); // run spectral analysis Chromagram* ch = sa->chromagram(workingAudio); delete workingAudio; delete sa; // reduce chromagram ch->reduceTuningBins(params); result.fullChromagram = Chromagram(*ch); ch->reduceToOneOctave(params); result.oneOctaveChromagram = Chromagram(*ch); // get harmonic change signal Segmentation* segmenter = Segmentation::getSegmentation(params); result.harmonicChangeSignal = segmenter->getRateOfChange(*ch, params); // get track segmentation std::vector<unsigned int> segmentBoundaries = segmenter->getSegments(result.harmonicChangeSignal, params); segmentBoundaries.push_back(ch->getHops()); // sentinel delete segmenter; // get key estimates for each segment KeyClassifier hc(params); std::vector<float> keyWeights(24); // TODO: not ideal using int cast of key_t enum. Hash? for (int s = 0; s < (signed)segmentBoundaries.size()-1; s++){ KeyDetectionSegment segment; segment.firstHop = segmentBoundaries[s]; segment.lastHop = segmentBoundaries[s+1] - 1; // collapse segment's time dimension std::vector<float> segmentChroma(ch->getBins()); for (unsigned int hop = segment.firstHop; hop <= segment.lastHop; hop++) { for (unsigned int bin = 0; bin < ch->getBins(); bin++) { float value = ch->getMagnitude(hop, bin); segmentChroma[bin] += value; segment.energy += value; } } segment.key = hc.classify(segmentChroma); if(segment.key != SILENCE){ keyWeights[segment.key] += segment.energy; } result.segments.push_back(segment); } delete ch; // get global key result.globalKeyEstimate = SILENCE; float mostCommonKeyWeight = 0.0; for (int k = 0; k < (signed)keyWeights.size(); k++){ if(keyWeights[k] > mostCommonKeyWeight){ mostCommonKeyWeight = keyWeights[k]; result.globalKeyEstimate = (key_t)k; } } return result; }