void AudioPlayer::play(const AudioData& data) { if (_stream == NULL) initStream(data.format()); if (!Pa_IsStreamActive(_stream)) Pa_StartStream(_stream); std::lock_guard<std::mutex> lock(_mutex); _buffer.write((Byte*)data.frames(), (Byte*)data.frames() + (data.nbFrames() * data.format().frameSize())); }
AudioData AudioResampleImpl::process(AudioComponentPtr source, int desiredSamples) const { AudioData result(m_to); if (!m_resampler) return result; if (source->format() != m_from) { assert(!"AudioResampler::process: incompatible source format"); return result; } int lastReadSamples = 0; int resultSamplesPerChannelFact = 0; do { AudioData sourceData = source->read( av_rescale_rnd(desiredSamples, m_from.sampleRate(), m_to.sampleRate(), AV_ROUND_DOWN )); if (sourceData.isEmpty()) break; lastReadSamples = sourceData.numSamples(); const auto sourceSamplesPerChannel = sourceData.numSamples(); const auto resultSamplesPerChannel = resultSamples(sourceSamplesPerChannel); result.setSamples(resultSamplesPerChannel); boost::scoped_array<char*> preparedSourceData; splitAudioData(sourceData, preparedSourceData); boost::scoped_array<char*> preparedDestData; splitAudioData(result, preparedDestData); const uint8_t ** sourcePtr = (const uint8_t **)preparedSourceData.get(); uint8_t ** destPtr = (uint8_t**)preparedDestData.get(); resultSamplesPerChannelFact = swr_convert(m_resampler.get(), destPtr, static_cast<int>(resultSamplesPerChannel), sourcePtr, sourceSamplesPerChannel ); if (!resultSamplesPerChannelFact) { qDebug() << "AudioResampler::process: not enought data to process. (" << m_from.sampleRate() << "," << m_to.sampleRate() << ").\t source samples" << sourceSamplesPerChannel; } } while(!resultSamplesPerChannelFact); result.setSamples(resultSamplesPerChannelFact); return result; }
static void SendStreamAudio(DecodedStreamData* aStream, int64_t aStartTime, MediaData* aData, AudioSegment* aOutput, uint32_t aRate, double aVolume) { MOZ_ASSERT(aData); AudioData* audio = aData->As<AudioData>(); // This logic has to mimic AudioSink closely to make sure we write // the exact same silences CheckedInt64 audioWrittenOffset = aStream->mAudioFramesWritten + UsecsToFrames(aStartTime, aRate); CheckedInt64 frameOffset = UsecsToFrames(audio->mTime, aRate); if (!audioWrittenOffset.isValid() || !frameOffset.isValid() || // ignore packet that we've already processed frameOffset.value() + audio->mFrames <= audioWrittenOffset.value()) { return; } if (audioWrittenOffset.value() < frameOffset.value()) { int64_t silentFrames = frameOffset.value() - audioWrittenOffset.value(); // Write silence to catch up AudioSegment silence; silence.InsertNullDataAtStart(silentFrames); aStream->mAudioFramesWritten += silentFrames; audioWrittenOffset += silentFrames; aOutput->AppendFrom(&silence); } MOZ_ASSERT(audioWrittenOffset.value() >= frameOffset.value()); int64_t offset = audioWrittenOffset.value() - frameOffset.value(); size_t framesToWrite = audio->mFrames - offset; audio->EnsureAudioBuffer(); nsRefPtr<SharedBuffer> buffer = audio->mAudioBuffer; AudioDataValue* bufferData = static_cast<AudioDataValue*>(buffer->Data()); nsAutoTArray<const AudioDataValue*, 2> channels; for (uint32_t i = 0; i < audio->mChannels; ++i) { channels.AppendElement(bufferData + i * audio->mFrames + offset); } aOutput->AppendFrames(buffer.forget(), channels, framesToWrite); aStream->mAudioFramesWritten += framesToWrite; aOutput->ApplyVolume(aVolume); aStream->mNextAudioTime = audio->GetEndTime(); }
static void SendStreamAudio(DecodedStreamData* aStream, int64_t aStartTime, MediaData* aData, AudioSegment* aOutput, uint32_t aRate, const PrincipalHandle& aPrincipalHandle) { // The amount of audio frames that is used to fuzz rounding errors. static const int64_t AUDIO_FUZZ_FRAMES = 1; MOZ_ASSERT(aData); AudioData* audio = aData->As<AudioData>(); // This logic has to mimic AudioSink closely to make sure we write // the exact same silences CheckedInt64 audioWrittenOffset = aStream->mAudioFramesWritten + UsecsToFrames(aStartTime, aRate); CheckedInt64 frameOffset = UsecsToFrames(audio->mTime, aRate); if (!audioWrittenOffset.isValid() || !frameOffset.isValid() || // ignore packet that we've already processed audio->GetEndTime() <= aStream->mNextAudioTime) { return; } if (audioWrittenOffset.value() + AUDIO_FUZZ_FRAMES < frameOffset.value()) { int64_t silentFrames = frameOffset.value() - audioWrittenOffset.value(); // Write silence to catch up AudioSegment silence; silence.InsertNullDataAtStart(silentFrames); aStream->mAudioFramesWritten += silentFrames; audioWrittenOffset += silentFrames; aOutput->AppendFrom(&silence); } // Always write the whole sample without truncation to be consistent with // DecodedAudioDataSink::PlayFromAudioQueue() audio->EnsureAudioBuffer(); RefPtr<SharedBuffer> buffer = audio->mAudioBuffer; AudioDataValue* bufferData = static_cast<AudioDataValue*>(buffer->Data()); AutoTArray<const AudioDataValue*, 2> channels; for (uint32_t i = 0; i < audio->mChannels; ++i) { channels.AppendElement(bufferData + i * audio->mFrames); } aOutput->AppendFrames(buffer.forget(), channels, audio->mFrames, aPrincipalHandle); aStream->mAudioFramesWritten += audio->mFrames; aStream->mNextAudioTime = audio->GetEndTime(); }
Chromagram SpectrumAnalyser::chromagramOfFirstFrame( const AudioData& audio ) const { if (audio.getChannels() != 1) throw Exception("Audio must be monophonic to be analysed"); Chromagram c(1, octaves, bandsPerSemitone); unsigned int fftFrameSize = fft->getFrameSize(); for (unsigned int sample = 0; sample < fftFrameSize; sample++) { fft->setInput(sample, audio.getSample(sample) * temporalWindow[sample]); } fft->execute(); std::vector<float> cv = ct->chromaVector(fft); for (unsigned int band = 0; band < c.getBands(); band++) { c.setMagnitude(0, band, cv[band]); } return c; }
void AudioResampleImpl::splitAudioData(AudioData & data, boost::scoped_array<char*> & split) const { auto dataFormat = data.format(); if (dataFormat.isPlanar()) { /// Для планарного формата необходимо представить данные из result const int numChannels = dataFormat.channelCount(); split.reset(new char*[numChannels]); split_ref(data.begin(), data.end(), data.numBytes() / numChannels, split.get()); } else { /// Interleaved данные помещаются в один массив split.reset(new char*[1]); split[0] = data.data(); } }
void KeyFinder::preprocess( AudioData& workingAudio, Workspace& workspace, const Parameters& params ) { workingAudio.reduceToMono(); // TODO: there is presumably some good maths to determine filter frequencies. // For now, this approximates original experiment values for default params. float lpfCutoff = params.getLastFrequency() * 1.012; float dsCutoff = params.getLastFrequency() * 1.10; unsigned int downsampleFactor = (int) floor(workingAudio.getFrameRate() / 2 / dsCutoff); // get filter const LowPassFilter* lpf = lpfFactory.getLowPassFilter(160, workingAudio.getFrameRate(), lpfCutoff, 2048); lpf->filter(workingAudio, workspace, downsampleFactor); // downsampleFactor shortcut // note we don't delete the LPF; it's stored in the factory for reuse workingAudio.downsample(downsampleFactor); }
void FileUploader::uploadMedia(const FullMsgId &msgId, const ReadyLocalMedia &media) { if (media.type == PreparePhoto) { App::feedPhoto(media.photo, media.photoThumbs); } else if (media.type == PrepareDocument) { DocumentData *document; if (media.photoThumbs.isEmpty()) { document = App::feedDocument(media.document); } else { document = App::feedDocument(media.document, media.photoThumbs.begin().value()); } document->status = FileUploading; if (!media.file.isEmpty()) { document->setLocation(FileLocation(StorageFilePartial, media.file)); } } else if (media.type == PrepareAudio) { AudioData *audio = App::feedAudio(media.audio); audio->status = FileUploading; audio->setData(media.data); } queue.insert(msgId, File(media)); sendNext(); }
static void* clientCallback(void* arg) { ClientData* data = static_cast<ClientData*>(arg); std::string host = "127.0.0.2"; if (!data->isServer) host = "127.0.0.1"; try { ISocket* client = new Socket(); if (client->run(host)) { IAudioIO* audioIO = new PortAudio; while (client->isConnected()) { // std::string tosend = "recorded.. from " + data->login; audioIO->startRecord(RECORD_TIME); AudioData* data = audioIO->getRecorded(); client->send(data->getData(), data->getSize()); // sleep(1); } delete audioIO; } delete client; data->clientThread->exit(); } catch (ISocket::Exception& e) { std::cerr << e.what() << std::endl; } catch (IAudioIO::Exception& e) { std::cerr << e.what() << std::endl; } return NULL; }
void FileUploader::upload(const FullMsgId &msgId, const FileLoadResultPtr &file) { if (file->type == PreparePhoto) { PhotoData *photo = App::feedPhoto(file->photo, file->photoThumbs); photo->uploadingData = new PhotoData::UploadingData(file->partssize); } else if (file->type == PrepareDocument) { DocumentData *document; if (file->thumb.isNull()) { document = App::feedDocument(file->document); } else { document = App::feedDocument(file->document, file->thumb); } document->status = FileUploading; if (!file->filepath.isEmpty()) { document->setLocation(FileLocation(StorageFilePartial, file->filepath)); } } else if (file->type == PrepareAudio) { AudioData *audio = App::feedAudio(file->audio); audio->status = FileUploading; audio->setData(file->content); } queue.insert(msgId, File(file)); sendNext(); }
void LowPassFilterPrivate::filter(AudioData& audio, Workspace& workspace, unsigned int shortcutFactor) const { if (audio.getChannels() > 1) { throw Exception("Monophonic audio only"); } std::vector<double>* buffer = workspace.lpfBuffer; if (buffer == NULL) { workspace.lpfBuffer = new std::vector<double>(impulseLength, 0.0); buffer = workspace.lpfBuffer; } else { // clear delay buffer std::vector<double>::iterator bufferIterator = buffer->begin(); while (bufferIterator < buffer->end()) { *bufferIterator = 0.0; std::advance(bufferIterator, 1); } } std::vector<double>::iterator bufferFront = buffer->begin(); std::vector<double>::iterator bufferBack; std::vector<double>::iterator bufferTemp; unsigned int sampleCount = audio.getSampleCount(); audio.resetIterators(); double sum; // for each frame (running off the end of the sample stream by delay) for (unsigned int inSample = 0; inSample < sampleCount + delay; inSample++) { // shuffle old samples along delay buffer bufferBack = bufferFront; std::advance(bufferFront, 1); if (bufferFront == buffer->end()) { bufferFront = buffer->begin(); } // load new sample into back of delay buffer if (audio.readIteratorWithinUpperBound()) { *bufferBack = audio.getSampleAtReadIterator() / gain; audio.advanceReadIterator(); } else { *bufferBack = 0.0; // zero pad once we're past the end of the file } // start doing the maths once the delay has passed int outSample = (signed)inSample - (signed)delay; if (outSample < 0) { continue; } // and, if shortcut != 1, only do the maths for the useful samples (this is mathematically dodgy, but it's faster and it usually works) if (outSample % shortcutFactor > 0) { continue; } sum = 0.0; bufferTemp = bufferFront; std::vector<double>::const_iterator coefficientIterator = coefficients.begin(); while (coefficientIterator < coefficients.end()) { sum += *coefficientIterator * *bufferTemp; std::advance(coefficientIterator, 1); std::advance(bufferTemp, 1); if (bufferTemp == buffer->end()) { bufferTemp = buffer->begin(); } } audio.setSampleAtWriteIterator(sum); audio.advanceWriteIterator(shortcutFactor); } }
//Decodes the data in a WAV file and saves it into an AudioData class AudioData * coder::decode(string filename){ fstream audioFile; audioFile.open(filename, ios_base::in | ios_base::binary); //File does not exist if(audioFile.fail()){ return NULL; } AudioData * data = NULL; if(audioFile.is_open()){ data = new AudioData; //Read RIFF int smallBuffer; audioFile.read((char *)&smallBuffer, 4); data->setRIFF((char *)&smallBuffer); //Read file size audioFile.read((char *)&smallBuffer, 4); data->setFileSize(smallBuffer); //Read file type audioFile.read((char *)&smallBuffer, 4); data->setFileType((char *)&smallBuffer); //Read format audioFile.read((char *)&smallBuffer, 4); data->setFormat((char *)&smallBuffer); //Read format info size unsigned int infoSize = 0; audioFile.read((char *)&infoSize, 4); data->setFormatInfoSize(infoSize); //Read format info char * largeBuffer = (char *)malloc(sizeof(char) * infoSize); //remember to free on close memset(largeBuffer, 0x00, infoSize); audioFile.read(largeBuffer, infoSize); data->setFormatInfo(largeBuffer); //Read data chunk audioFile.read((char *)&smallBuffer, 4); data->setDataChunk((char *)&smallBuffer); //Read data size unsigned int dataSize = 0; audioFile.read((char *)&dataSize, 4); data->setDataSize(dataSize); //Read audio data largeBuffer = (char *)malloc(sizeof(char) * dataSize); memset(largeBuffer, 0x00, dataSize); audioFile.read(largeBuffer, dataSize); //Seperate audio data into multiple channels short ** channelData = seperateChannels((short *)largeBuffer, data->getChannels(), data->getNumberOfSamples()); data->setChannelData(channelData); free(largeBuffer); audioFile.close(); } return data; }
void FileUploader::partLoaded(const MTPBool &result, mtpRequestId requestId) { QMap<mtpRequestId, int32>::iterator j = docRequestsSent.end(); QMap<mtpRequestId, QByteArray>::iterator i = requestsSent.find(requestId); if (i == requestsSent.cend()) { j = docRequestsSent.find(requestId); } if (i != requestsSent.cend() || j != docRequestsSent.cend()) { if (mtpIsFalse(result)) { // failed to upload current file currentFailed(); return; } else { QMap<mtpRequestId, int32>::iterator dcIt = dcMap.find(requestId); if (dcIt == dcMap.cend()) { // must not happen currentFailed(); return; } int32 dc = dcIt.value(); dcMap.erase(dcIt); int32 sentPartSize = 0; Queue::const_iterator k = queue.constFind(uploading); if (i != requestsSent.cend()) { sentPartSize = i.value().size(); requestsSent.erase(i); } else { sentPartSize = k->docPartSize; docRequestsSent.erase(j); } sentSize -= sentPartSize; sentSizes[dc] -= sentPartSize; if (k->type() == PreparePhoto) { k->fileSentSize += sentPartSize; PhotoData *photo = App::photo(k->id()); if (photo->uploading() && k->file) { photo->uploadingData->size = k->file->partssize; photo->uploadingData->offset = k->fileSentSize; } emit photoProgress(k.key()); } else if (k->type() == PrepareDocument) { DocumentData *doc = App::document(k->id()); if (doc->uploading()) { doc->uploadOffset = (k->docSentParts - docRequestsSent.size()) * k->docPartSize; if (doc->uploadOffset > doc->size) { doc->uploadOffset = doc->size; } } emit documentProgress(k.key()); } else if (k->type() == PrepareAudio) { AudioData *audio = App::audio(k->id()); if (audio->uploading()) { audio->uploadOffset = (k->docSentParts - docRequestsSent.size()) * k->docPartSize; if (audio->uploadOffset > audio->size) { audio->uploadOffset = audio->size; } } emit audioProgress(k.key()); } } } sendNext(); }
Status IPPAudioDecoder::Run() { m_bPlaying = true; // [neuromos] 현재 Encoder Loop에 있는지 확인할 수 있는 플래그. if (m_nAudioSliceLen == 0) ASSERT(FALSE); AudioData in; AudioData out; out.Alloc(m_nAudioSliceLen); AudioData* p_dataIn = ∈ AudioData* p_dataOut = &out; Status ret = UMC_OK; Ipp32s len; // p_dataIn->Init(); p_dataIn->Alloc(m_nAudioSliceLen); decodedSize = 0; mFramesDecoded = 0; for (;;) { if (p_dataIn) { if (UMC_OK != GetInputData(p_dataIn)) break; // if (UMC_OK == GetInputData(p_dataIn)) // p_dataIn->SetTime((mFramesEncoded+1) / pEncoderParams->info.framerate); // else // p_dataIn = NULL; } int nDataInSize = p_dataIn->GetDataSize(); ret = pCodec->GetFrame(p_dataIn, p_dataOut); p_dataIn->MoveDataPointer(-nDataInSize); if (ret != UMC_OK && ret != UMC_ERR_NOT_ENOUGH_DATA && ret != UMC_ERR_END_OF_STREAM) { m_bPlaying = false; // [neuromos] 현재 Encoder Loop에 있는지 확인할 수 있는 플래그. return UMC_ERR_FAILED; } len = (Ipp32s) p_dataOut->GetDataSize(); decodedSize += len; if (UMC_OK != PutOutputData(p_dataOut)) { m_bPlaying = false; // [neuromos] 현재 Encoder Loop에 있는지 확인할 수 있는 플래그. return UMC_ERR_FAILED; } if (p_dataIn) { mFramesDecoded++; } else { if (! len || ret == UMC_ERR_END_OF_STREAM) break; // EOF } } PutOutputData(NULL); // means EOF pCodec->Close(); m_bPlaying = false; // [neuromos] 현재 Encoder Loop에 있는지 확인할 수 있는 플래그. return UMC_OK; }
// This method only supports PCM/uncompressed format, with a single fmt // chunk followed by a single data chunk AudioData* loadWaveFile(int fd) { int srate = 0; int channels = 0; if (fd == -1) return 0; unsigned char hdr[36]; if (!readBytes(fd, hdr, 36)) { close(fd); return 0; } if (hdr[0] != 'R' || hdr[1] != 'I' || hdr[2] != 'F' || hdr[3] != 'F') { close(fd); return 0; } // Note: bytes 4 thru 7 contain the file size - 8 bytes if (hdr[8] != 'W' || hdr[9] != 'A' || hdr[10] != 'V' || hdr[11] != 'E') { close(fd); return 0; } if (hdr[12] != 'f' || hdr[13] != 'm' || hdr[14] != 't' || hdr[15] != ' ') { close(fd); return 0; } long extraBytes = hdr[16] + (hdr[17] << 8) + (hdr[18] << 16) + (hdr[19] << 24) - 16; int compression = hdr[20] + (hdr[21] << 8); // Type 1 is PCM/Uncompressed if (compression != 1) { close(fd); return 0; } channels = hdr[22] + (hdr[23] << 8); // Only mono or stereo PCM is supported in this example if (channels < 1 || channels > 2) { close(fd); return 0; } // Samples per second, independent of number of channels srate = hdr[24] + (hdr[25] << 8) + (hdr[26] << 16) + (hdr[27] << 24); // Bytes 28-31 contain the "average bytes per second", unneeded here // Bytes 32-33 contain the number of bytes per sample (includes channels) // Bytes 34-35 contain the number of bits per single sample int bits = hdr[34] + (hdr[35] << 8); // Supporting othe sample depths will require conversion if (bits != 16) { close(fd); return 0; } // Skip past extra bytes, if any unsigned char extraskip[extraBytes]; if(!readBytes(fd,extraskip,extraBytes)){ return 0; } // Start reading the next frame. Only supported frame is the data block unsigned char b[8]; if (!readBytes(fd, b, 8)) { close(fd); return 0; } // Do we have a fact block? if (b[0] == 'f' && b[1] == 'a' && b[2] == 'c' && b[3] == 't') { // Skip the fact block unsigned char factskip[12]; if(!readBytes(fd,factskip,12)){ return 0; } // Read the next frame if (!readBytes(fd, b, 8)) { close(fd); return 0; } } // Now look for the data block if (b[0] != 'd' || b[1] != 'a' || b[2] != 't' || b[3] != 'a') { close(fd); return 0; } // this will be 0 if ffmpeg is used // since it can't seek the stream to write this value // so we ignore this value and just read to the end if it is 0 long bytes = b[4] + (b[5] << 8) + (b[6] << 16) + (b[7] << 24); // No need to read the whole file, just the first 135 seconds long bytesPerSecond = srate * 2 * channels; if(bytes == 0) //maximum data is 2 gigabyte, getting a puid won't work with bigger files bytes = 2*1000*1000*1000; // Now we read parts of the file until the eof or bytes is reached // bytesPerSecond is used as puffersize int readSize = bytesPerSecond; unsigned char* samples = NULL; int size = 0; while(size<bytes){ samples = (unsigned char*)realloc ( samples, size+readSize ); int n = read(fd,samples+size,readSize); if(n < 0){ delete[] samples; close(fd); return 0; } size += n; if(n == 0) break; } close(fd); long ms = (size/2)/(srate/1000); if ( channels == 2 ) ms /= 2; AudioData *data = new AudioData(); data->setData(samples, OFA_LITTLE_ENDIAN, size/2, srate, channels == 2 ? 1 : 0, ms, "wav"); return data; }
KeyDetectionResult KeyFinder::findKey(const AudioData& originalAudio, const Parameters& params){ KeyDetectionResult result; AudioData* workingAudio = new AudioData(originalAudio); workingAudio->reduceToMono(); Downsampler ds; ds.downsample(workingAudio, params.getLastFreq(), &lpfFactory); SpectrumAnalyser* sa = new SpectrumAnalyser(workingAudio->getFrameRate(), params, &ctFactory); // run spectral analysis Chromagram* ch = sa->chromagram(workingAudio); delete workingAudio; delete sa; // reduce chromagram ch->reduceTuningBins(params); result.fullChromagram = Chromagram(*ch); ch->reduceToOneOctave(params); result.oneOctaveChromagram = Chromagram(*ch); // get harmonic change signal Segmentation* segmenter = Segmentation::getSegmentation(params); result.harmonicChangeSignal = segmenter->getRateOfChange(*ch, params); // get track segmentation std::vector<unsigned int> segmentBoundaries = segmenter->getSegments(result.harmonicChangeSignal, params); segmentBoundaries.push_back(ch->getHops()); // sentinel delete segmenter; // get key estimates for each segment KeyClassifier hc(params); std::vector<float> keyWeights(24); // TODO: not ideal using int cast of key_t enum. Hash? for (int s = 0; s < (signed)segmentBoundaries.size()-1; s++){ KeyDetectionSegment segment; segment.firstWindow = segmentBoundaries[s]; segment.lastWindow = segmentBoundaries[s+1] - 1; // collapse segment's time dimension, for a single chroma vector and a single energy value std::vector<float> segmentChroma(ch->getBins()); // for each relevant hop of the chromagram for (unsigned int hop = segment.firstWindow; hop <= segment.lastWindow; hop++) { // for each bin for (unsigned int bin = 0; bin < ch->getBins(); bin++) { float value = ch->getMagnitude(hop, bin); segmentChroma[bin] += value; segment.energy += value; } } segment.key = hc.classify(segmentChroma); if(segment.key != SILENCE){ keyWeights[segment.key] += segment.energy; } result.segments.push_back(segment); } delete ch; // get global key result.globalKeyEstimate = SILENCE; float mostCommonKeyWeight = 0.0; for (int k = 0; k < (signed)keyWeights.size(); k++){ if(keyWeights[k] > mostCommonKeyWeight){ mostCommonKeyWeight = keyWeights[k]; result.globalKeyEstimate = (key_t)k; } } return result; }
KeyDetectionResult KeyFinder::findKey(const AudioData& originalAudio, const Parameters& params){ KeyDetectionResult result; AudioData* workingAudio = new AudioData(originalAudio); workingAudio->reduceToMono(); // TODO: there is presumably some good maths to determine filter frequencies float lpfCutoff = params.getLastFrequency() * 1.05; float dsCutoff = params.getLastFrequency() * 1.10; unsigned int downsampleFactor = (int)floor( workingAudio->getFrameRate() / 2 / dsCutoff ); // get filter LowPassFilter* lpf = lpfFactory.getLowPassFilter(160, workingAudio->getFrameRate(), lpfCutoff, 2048); // feeding in the downsampleFactor for a shortcut lpf->filter(workingAudio, downsampleFactor); // note we don't delete the LPF; it's stored in the factory for reuse Downsampler ds; ds.downsample(workingAudio, downsampleFactor); SpectrumAnalyser* sa = new SpectrumAnalyser(workingAudio->getFrameRate(), params, &ctFactory); // run spectral analysis Chromagram* ch = sa->chromagram(workingAudio); delete workingAudio; delete sa; // reduce chromagram ch->reduceTuningBins(params); result.fullChromagram = Chromagram(*ch); ch->reduceToOneOctave(params); result.oneOctaveChromagram = Chromagram(*ch); // get harmonic change signal Segmentation* segmenter = Segmentation::getSegmentation(params); result.harmonicChangeSignal = segmenter->getRateOfChange(*ch, params); // get track segmentation std::vector<unsigned int> segmentBoundaries = segmenter->getSegments(result.harmonicChangeSignal, params); segmentBoundaries.push_back(ch->getHops()); // sentinel delete segmenter; // get key estimates for each segment KeyClassifier hc(params); std::vector<float> keyWeights(24); // TODO: not ideal using int cast of key_t enum. Hash? for (int s = 0; s < (signed)segmentBoundaries.size()-1; s++){ KeyDetectionSegment segment; segment.firstHop = segmentBoundaries[s]; segment.lastHop = segmentBoundaries[s+1] - 1; // collapse segment's time dimension std::vector<float> segmentChroma(ch->getBins()); for (unsigned int hop = segment.firstHop; hop <= segment.lastHop; hop++) { for (unsigned int bin = 0; bin < ch->getBins(); bin++) { float value = ch->getMagnitude(hop, bin); segmentChroma[bin] += value; segment.energy += value; } } segment.key = hc.classify(segmentChroma); if(segment.key != SILENCE){ keyWeights[segment.key] += segment.energy; } result.segments.push_back(segment); } delete ch; // get global key result.globalKeyEstimate = SILENCE; float mostCommonKeyWeight = 0.0; for (int k = 0; k < (signed)keyWeights.size(); k++){ if(keyWeights[k] > mostCommonKeyWeight){ mostCommonKeyWeight = keyWeights[k]; result.globalKeyEstimate = (key_t)k; } } return result; }