AAUDIO_API aaudio_result_t AAudioStream_read(AAudioStream* stream, void *buffer, int32_t numFrames, int64_t timeoutNanoseconds) { AudioStream *audioStream = convertAAudioStreamToAudioStream(stream); if (buffer == nullptr) { return AAUDIO_ERROR_NULL; } if (numFrames < 0) { return AAUDIO_ERROR_ILLEGAL_ARGUMENT; } else if (numFrames == 0) { return 0; } aaudio_result_t result = audioStream->read(buffer, numFrames, timeoutNanoseconds); return result; }
int main(int argc, char *argv[]) { init(argc, argv); AudioFile a(input1.c_str()); AudioFile b(input2.c_str()); if (channel >= a.getNumberOfChannels() || channel >= b.getNumberOfChannels()) { ls << log_error() << "Error, channel not available" << endl; return 1; } //cross correlate x seconds at a time. uint32_t samplesPrBlock = a.getSampleRate() * secondsPrBlock; ls << log_debug() << "samples a: " << samplesPrBlock << endl; ls << log_debug() << "samples b: " << b.getSampleRate() * secondsPrBlock << endl; AudioStream aStream = a.getStream(channel); AudioStream bStream = b.getStream(channel); bool done = false; bool first = true; bool success = true; size_t blockFailure = 0; double blockFailureVal = 0.0; int64_t blockFailureOffset = 0; double firstMaxVal = 0.0; int64_t firstOffset = 0; double minimumVal = 2; for (size_t block = 1; !done; ++block) { vector<int16_t> aSamples, bSamples; vector<uint64_t> aSquarePrefixSum, bSquarePrefixSum; vector<complex<double> > result; aStream.read(samplesPrBlock, aSamples); bStream.read(samplesPrBlock, bSamples); if (aSamples.size() == 0 && bSamples.size() == 0) { // reached the end of both of the samples if (verbose) { cout << "finished: reached end of both samples" << endl; } break; } if (padShortBlock) { // If this correlation involves 'short' files, shorter than one // block, pad the blocks with silence - this will only pad the last // block, so that for short files, shorter than one block, the // correlation is performed, rather than terminating the loop early // in the following break, leaving minimumVal as 2, and indicating // success. Because we're looping until the end of both samples, // this will pad out a shorter sample with silence (which will // probably yield a correlation failure) aSamples.resize(samplesPrBlock, 0); bSamples.resize(samplesPrBlock, 0); } if (aSamples.size() < samplesPrBlock/2 || bSamples.size() < samplesPrBlock) { // not enough samples for another reliable check. if (verbose) { cout << "finished: not enough samples for another reliable check" << endl; } break; } prefixSquareSum(aSamples, aSquarePrefixSum); prefixSquareSum(bSamples, bSquarePrefixSum); // we count the average of absolute values // if the average is close to 0, then we decide it is silence size_t absSumA = 0; size_t absSumB = 0; bool silence = false; for (size_t i = 0; i < aSamples.size(); ++i) { absSumA += (aSamples[i]>=0)?aSamples[i]:-aSamples[i]; } for (size_t i = 0; i < bSamples.size(); ++i) { absSumB += (bSamples[i]>=0)?bSamples[i]:-bSamples[i]; } double avgA = static_cast<double>(absSumA)/aSamples.size(); double avgB = static_cast<double>(absSumB)/bSamples.size(); if (avgA <= 5.0 && avgB <= 5.0) { silence = true; } else if (avgA <= 5.0 || avgB <= 5.0) { success = false; } bool compare = !silence; int64_t maxIdx = -1; double maxVal = -1.0; if (compare) { proxyFFT<int16_t, double> aFFT(aSamples); proxyFFT<int16_t, double> bFFT(bSamples); pair<int64_t, double> tmp = compareBlock(aFFT, bFFT, aSquarePrefixSum, bSquarePrefixSum); maxIdx = tmp.first; maxVal = tmp.second; tmp = compareBlock(bFFT, aFFT, bSquarePrefixSum, aSquarePrefixSum); if (tmp.second > maxVal + 1e-6) { maxIdx = -tmp.first; maxVal = tmp.second; } if (first) { first = false; firstMaxVal = maxVal; firstOffset = maxIdx; minimumVal = maxVal; if (firstMaxVal < threshold) { if (verbose) { cout << "failed: threshold crossed in first block (" << firstMaxVal << ")" << endl; } success = false; blockFailure = block; blockFailureVal = maxVal; blockFailureOffset = maxIdx; } } else { int64_t offsetDistance = abs(maxIdx - firstOffset); bool offsetDistanceExceeded = offsetDistance > 500; bool thresholdCrossed = maxVal < threshold; if (offsetDistanceExceeded || thresholdCrossed) { if (verbose && offsetDistanceExceeded) { cout << "failed: offset distance exceeded (" << offsetDistance << ")" << endl; } if (verbose && thresholdCrossed) { cout << "failed: threshold crossed (" << maxVal << ")" << endl; } // check to see that the offset between blocks is not too large. success = false; blockFailure = block; blockFailureVal = maxVal; blockFailureOffset = maxIdx; } if (maxVal < minimumVal) { minimumVal = maxVal; } } } if (aSamples.size() < samplesPrBlock || bSamples.size() < samplesPrBlock) { // we don't check the last block. Is this ok? if (verbose) { cout << "finished: at last block" << endl; } done = true; } if (verbose) { if (compare) { cout << "block " << block << ": " << maxVal << " " << maxIdx << endl; } else if (silence) { cout << "block " << block << ": " << "silence" << endl; } } } if (success) { cout << "Success" << endl; cout << "Offset: " << firstOffset << endl; cout << "Similarity: " << minimumVal << endl; return 0; } else { cout << "Failure" << endl; cout << "Block: " << blockFailure << endl; cout << "Value in block: " << blockFailureVal << endl; cout << "Offset in block: " << blockFailureOffset << " (normal: " << firstOffset << ")" << endl; return 1; /* cout << "block " << blockFailure << ":" << endl; cout << "Time: " << getTimestampFromSeconds(blockFailure*5-5) << " - " << getTimestampFromSeconds(blockFailure*5) << " did not match properly" << endl; */ } }
/** * The playback thread code * \internal */ void *narrator_thread(void *narrator) { //Narrator *n = static_cast<Narrator *>(narrator); Narrator *n = (Narrator*)narrator; int queueitems; // Set initial values to 0 so that they get updated when thread gets play signal float gain = 0; float tempo = 0; float pitch = 0; PortAudio portaudio; Filter filter; Narrator::threadState state = n->getState(); LOG4CXX_INFO(narratorLog, "Starting playback thread"); do { queueitems = n->numPlaylistItems(); if(queueitems == 0) { // Wait a little before calling callback long waitms = portaudio.getRemainingms(); if(waitms != 0) { LOG4CXX_DEBUG(narratorLog, "Waiting " << waitms << " ms for playback to finish"); while(waitms > 0 && queueitems == 0) { usleep(100000); queueitems = n->numPlaylistItems(); waitms -= 100; } } // Break if we during the pause got some more queued items to play if(queueitems == 0) { if(state != Narrator::DEAD) n->audioFinishedPlaying(); n->setState(Narrator::WAIT); LOG4CXX_INFO(narratorLog, "Narrator in WAIT state"); portaudio.stop(); while(queueitems == 0) { state = n->getState(); if(state == Narrator::EXIT) break; usleep(10000); queueitems = n->numPlaylistItems(); } } LOG4CXX_INFO(narratorLog, "Narrator starting playback"); } if(state == Narrator::EXIT) break; n->setState(Narrator::PLAY); n->bResetFlag = false; Narrator::PlaylistItem pi; pthread_mutex_lock(n->narratorMutex); if(n->mPlaylist.size() > 0) { pi = n->mPlaylist.front(); n->mPlaylist.pop(); } else { LOG4CXX_ERROR(narratorLog, "Narrator started playback thread without playlistitems"); pthread_mutex_unlock(n->narratorMutex); continue; } string lang = n->mLanguage; pthread_mutex_unlock(n->narratorMutex); // If trying to play a file, open it if(pi.mClass == "file") { LOG4CXX_DEBUG(narratorLog, "Playing file: " << pi.mIdentifier); AudioStream *audioStream; std::string fileExtension = getFileExtension(pi.mIdentifier); if (fileExtension == "ogg") { audioStream = new OggStream; } else if (fileExtension == "mp3") { audioStream = new Mp3Stream; } else { LOG4CXX_ERROR(narratorLog, "extension '" << fileExtension << "' not supported"); continue; } if(!audioStream->open(pi.mIdentifier)) { LOG4CXX_ERROR(narratorLog, "error opening audio stream: " << pi.mIdentifier); audioStream->close(); continue; } if (portaudio.getRate() != audioStream->getRate()) { long waitms = portaudio.getRemainingms(); if (waitms != 0) { LOG4CXX_DEBUG(narratorLog, "Waiting for current playback to finish"); while (waitms > 0) { usleep(100000); waitms -= 100; } } } if(!portaudio.open(audioStream->getRate(), audioStream->getChannels())) { LOG4CXX_ERROR(narratorLog, "error initializing portaudio, (rate: " << audioStream->getRate() << " channels: " << audioStream->getChannels() << ")"); continue; } if(!filter.open(audioStream->getRate(), audioStream->getChannels())) { LOG4CXX_ERROR(narratorLog, "error initializing filter"); continue; } LOG4CXX_DEBUG(narratorLog, "Audio stream has " << audioStream->getChannels() << " channel(s) and rate " << audioStream->getRate() << " Hz"); int inSamples = 0; soundtouch::SAMPLETYPE* buffer = new soundtouch::SAMPLETYPE[audioStream->getChannels()*BUFFERSIZE]; //buffer = (short*)malloc(sizeof(short) * 2 * BUFFERSIZE); // long totalSamplesRead = 0; do { // change gain, tempo and pitch adjustGainTempoPitch(n, filter, gain, tempo, pitch); // read some stuff from the audio stream inSamples = audioStream->read(buffer, BUFFERSIZE/**audioStream->getChannels()*/); LOG4CXX_TRACE(narratorLog, "got " << inSamples << " samples"); //printf("Read %d samples from audio stream\n", inSamples); if(inSamples != 0) { filter.write(buffer, inSamples); // One sample contains data for all channels here writeSamplesToPortaudio( n, portaudio, filter, buffer ); } else { LOG4CXX_INFO(narratorLog, "Flushing soundtouch buffer"); filter.flush(); } state = n->getState(); } while (inSamples != 0 && state == Narrator::PLAY && !n->bResetFlag); if(buffer != NULL) delete [] (buffer); audioStream->close(); delete audioStream; } // Else try opening from database else { vector <MessageAudio> vAudioQueue; // Get a list of MessageAudio objects to play Message *m = pi.mMessage; if(m==NULL){ LOG4CXX_ERROR(narratorLog, "Message was null"); } m->setLanguage(lang); m->load(pi.mIdentifier, pi.mClass); if(!m->compile() || !m->hasAudio()) { LOG4CXX_ERROR(narratorLog, "Narrator translation not found: could not find audio for '" << pi.mIdentifier << "'"); } else { vAudioQueue = m->getAudioQueue(); } // Play what we got if(vAudioQueue.size() > 0) { vector <MessageAudio>::iterator audio; audio = vAudioQueue.begin(); do { LOG4CXX_INFO(narratorLog, "Saying: " << audio->getText()); AudioStream *audioStream; std::string encoding = ((MessageAudio&)*audio).getEncoding(); if (encoding == "ogg") { audioStream = new OggStream; } else if (encoding == "mp3") { audioStream = new Mp3Stream; } else { LOG4CXX_ERROR(narratorLog, "encoding '" << encoding << "' not supported"); audio++; continue; } if(!audioStream->open(*audio)) { LOG4CXX_ERROR(narratorLog, "error opening audio stream"); audioStream->close(); break; } if (portaudio.getRate() != audioStream->getRate()) { long waitms = portaudio.getRemainingms(); if (waitms != 0) { LOG4CXX_DEBUG(narratorLog, "Waiting for current playback to finish"); while (waitms > 0) { usleep(100000); waitms -= 100; } } } if(!portaudio.open(audioStream->getRate(), audioStream->getChannels())) { LOG4CXX_ERROR(narratorLog, "error initializing portaudio"); break; } if(!filter.open(audioStream->getRate(), audioStream->getChannels())) { LOG4CXX_ERROR(narratorLog, "error initializing filter"); break; } int inSamples = 0; soundtouch::SAMPLETYPE* buffer = new soundtouch::SAMPLETYPE[audioStream->getChannels()*BUFFERSIZE]; do { // change gain, tempo and pitch adjustGainTempoPitch(n, filter, gain, tempo, pitch); // read some stuff from the audio stream inSamples = audioStream->read(buffer, BUFFERSIZE*audioStream->getChannels()); if(inSamples != 0) { filter.write(buffer, inSamples); writeSamplesToPortaudio( n, portaudio, filter, buffer ); } else { LOG4CXX_INFO(narratorLog, "Flushing soundtouch buffer"); filter.flush(); } state = n->getState(); } while (inSamples != 0 && state == Narrator::PLAY && !n->bResetFlag); if(buffer != NULL) delete [] (buffer); audioStream->close(); audio++; } while(audio != vAudioQueue.end() && state == Narrator::PLAY && !n->bResetFlag); } //Cleanup message object delete(pi.mMessage); } // Abort stream? if(n->bResetFlag) { n->bResetFlag = false; portaudio.stop(); filter.clear(); } } while(state != Narrator::EXIT); LOG4CXX_INFO(narratorLog, "Shutting down playbackthread"); pthread_exit(NULL); return NULL; }
void match(AudioFile &needle, AudioFile &haystack, std::vector<pair<size_t, double> > &results) { std::vector<short> small; std::vector<short> large; std::vector<int64_t> smallPrefixSum; std::vector<int64_t> largePrefixSum; needle.getSamplesForChannel(0, small); prefixSquareSum(small, smallPrefixSum); proxyFFT<short, double> smallFFT(small); smallFFT.transform(); size_t largeTotalSize = haystack.getNumberOfSamplesPrChannel(); // vector<int64_t> maxSamplesBegin(largeTotalSize/small.size()); // vector<int64_t> maxSamplesEnd(largeTotalSize/small.size()); vector<Record> maxSamplesBegin(largeTotalSize/small.size()); vector<Record> maxSamplesEnd(largeTotalSize/small.size()); size_t stillToRead = largeTotalSize; AudioStream hayStream = haystack.getStream(0); size_t pieces = 13; for (int j = 0; ; ++j) { hayStream.read(pieces*small.size(), large); prefixSquareSum(large, largePrefixSum); size_t numberOfParts = large.size()/small.size(); size_t idxAdd = j*pieces; // Progress information std::cout << '\r' << setw(8) << ((largeTotalSize-stillToRead)+0.0)/largeTotalSize*100 << " %"; std::cout.flush(); stillToRead -= large.size(); for (size_t ii = 0; ii < numberOfParts*small.size(); ii += small.size()) { //do stuff.. proxyFFT<short, double> largeFFT(large.begin()+ii, large.begin()+ii+small.size()); vector<complex<double> > outBegin; vector<complex<double> > outEnd; //std::cout << "TEST1" << std::endl; cross_correlation(largeFFT, smallFFT, outBegin); cross_correlation(smallFFT, largeFFT, outEnd); size_t maxSampleBegin = 0; double maxNormFactorBegin = computeNormFactor(smallPrefixSum, largePrefixSum, smallPrefixSum.begin(), smallPrefixSum.end(), largePrefixSum.begin()+ii, largePrefixSum.begin()+small.size()+ii); for (size_t i = 0 ; i < outBegin.size(); ++i) { double normFactor = computeNormFactor(smallPrefixSum, largePrefixSum, smallPrefixSum.begin(), smallPrefixSum.end()-i, largePrefixSum.begin()+i+ii, largePrefixSum.begin()+ii+small.size()); if (outBegin[maxSampleBegin].real()/maxNormFactorBegin < outBegin[i].real()/normFactor) { maxSampleBegin = i; maxNormFactorBegin = normFactor; } } //std::cout << "TEST2" << std::endl; size_t maxSampleEnd = 0; double maxNormFactorEnd = computeNormFactor(smallPrefixSum, largePrefixSum, smallPrefixSum.begin(), smallPrefixSum.end(), largePrefixSum.begin()+ii, largePrefixSum.begin()+small.size()+ii); for (size_t i = 0 ; i < outEnd.size(); ++i) { double normFactor = computeNormFactor(smallPrefixSum, largePrefixSum, smallPrefixSum.begin()+i, smallPrefixSum.end(), largePrefixSum.begin()+ii, largePrefixSum.begin()-i+ii+small.size()); if (outEnd[maxSampleEnd].real()/maxNormFactorEnd < outEnd[i].real()/normFactor) { maxSampleEnd = i; maxNormFactorEnd = normFactor; } } maxSamplesBegin[ii/small.size()+idxAdd] = Record(maxNormFactorBegin, outBegin[maxSampleBegin].real(), small.size() - maxSampleBegin); maxSamplesEnd[ii/small.size()+idxAdd] = Record(maxNormFactorEnd, outEnd[maxSampleEnd].real(), small.size() - maxSampleEnd); } if (numberOfParts != pieces) break; } std::cout << '\r' << setw(8) << 100 << "%" << std::endl; // // FIXME: special case. // // small size does not divide large size // // => last piece is not analysed. // // fix this. for (size_t i = 0; i < maxSamplesBegin.size()-1; ++i) { double val = (maxSamplesBegin[i].cv + maxSamplesEnd[i+1].cv)/(maxSamplesBegin[i].nf + maxSamplesEnd[i+1].nf); if (val > 0.3) { // arbitrary magic number. Seems to work well. size_t length = maxSamplesBegin[i].s + maxSamplesEnd[i+1].s; if (length <= small.size() && length >= THRESHHOLD*small.size()) { // length must be appropriate results.push_back(make_pair((i+1)*small.size()-maxSamplesBegin[i].s, val)); } } } }