예제 #1
0
    void AudioPlayer::play(const AudioData& data) {
	if (_stream == NULL)
	    initStream(data.format());
	if (!Pa_IsStreamActive(_stream))
	    Pa_StartStream(_stream);
	std::lock_guard<std::mutex> lock(_mutex);
	_buffer.write((Byte*)data.frames(), (Byte*)data.frames() + (data.nbFrames() * data.format().frameSize()));
    }
예제 #2
0
AudioData AudioResampleImpl::process(AudioComponentPtr source, int desiredSamples) const
{
	AudioData result(m_to);

	if (!m_resampler) return result;

	if (source->format() != m_from)
	{
		assert(!"AudioResampler::process: incompatible source format");
		return result;
	}

	int lastReadSamples = 0;
	int resultSamplesPerChannelFact = 0;
	do
	{
        AudioData sourceData = source->read(
                    av_rescale_rnd(desiredSamples,
                                   m_from.sampleRate(),
                                   m_to.sampleRate(),
                                   AV_ROUND_DOWN
                                   ));

		if (sourceData.isEmpty())
			break;

		lastReadSamples = sourceData.numSamples();

        const auto sourceSamplesPerChannel = sourceData.numSamples();
		const auto resultSamplesPerChannel = resultSamples(sourceSamplesPerChannel);
        result.setSamples(resultSamplesPerChannel);

		boost::scoped_array<char*> preparedSourceData;
		splitAudioData(sourceData, preparedSourceData);

		boost::scoped_array<char*> preparedDestData;
		splitAudioData(result, preparedDestData);

		const uint8_t ** sourcePtr = (const uint8_t **)preparedSourceData.get();
		uint8_t ** destPtr = (uint8_t**)preparedDestData.get();

		resultSamplesPerChannelFact = swr_convert(m_resampler.get(), destPtr, static_cast<int>(resultSamplesPerChannel),
			sourcePtr, sourceSamplesPerChannel );

		if (!resultSamplesPerChannelFact)
		{
			qDebug() << "AudioResampler::process: not enought data to process. ("
				<< m_from.sampleRate() << "," << m_to.sampleRate() << ").\t source samples" << sourceSamplesPerChannel;
		}

	} while(!resultSamplesPerChannelFact);

    result.setSamples(resultSamplesPerChannelFact);

	return result;
}
예제 #3
0
static void
SendStreamAudio(DecodedStreamData* aStream, int64_t aStartTime,
                MediaData* aData, AudioSegment* aOutput,
                uint32_t aRate, double aVolume)
{
  MOZ_ASSERT(aData);
  AudioData* audio = aData->As<AudioData>();
  // This logic has to mimic AudioSink closely to make sure we write
  // the exact same silences
  CheckedInt64 audioWrittenOffset = aStream->mAudioFramesWritten +
                                    UsecsToFrames(aStartTime, aRate);
  CheckedInt64 frameOffset = UsecsToFrames(audio->mTime, aRate);

  if (!audioWrittenOffset.isValid() ||
      !frameOffset.isValid() ||
      // ignore packet that we've already processed
      frameOffset.value() + audio->mFrames <= audioWrittenOffset.value()) {
    return;
  }

  if (audioWrittenOffset.value() < frameOffset.value()) {
    int64_t silentFrames = frameOffset.value() - audioWrittenOffset.value();
    // Write silence to catch up
    AudioSegment silence;
    silence.InsertNullDataAtStart(silentFrames);
    aStream->mAudioFramesWritten += silentFrames;
    audioWrittenOffset += silentFrames;
    aOutput->AppendFrom(&silence);
  }

  MOZ_ASSERT(audioWrittenOffset.value() >= frameOffset.value());

  int64_t offset = audioWrittenOffset.value() - frameOffset.value();
  size_t framesToWrite = audio->mFrames - offset;

  audio->EnsureAudioBuffer();
  nsRefPtr<SharedBuffer> buffer = audio->mAudioBuffer;
  AudioDataValue* bufferData = static_cast<AudioDataValue*>(buffer->Data());
  nsAutoTArray<const AudioDataValue*, 2> channels;
  for (uint32_t i = 0; i < audio->mChannels; ++i) {
    channels.AppendElement(bufferData + i * audio->mFrames + offset);
  }
  aOutput->AppendFrames(buffer.forget(), channels, framesToWrite);
  aStream->mAudioFramesWritten += framesToWrite;
  aOutput->ApplyVolume(aVolume);

  aStream->mNextAudioTime = audio->GetEndTime();
}
예제 #4
0
static void
SendStreamAudio(DecodedStreamData* aStream, int64_t aStartTime,
                MediaData* aData, AudioSegment* aOutput, uint32_t aRate,
                const PrincipalHandle& aPrincipalHandle)
{
  // The amount of audio frames that is used to fuzz rounding errors.
  static const int64_t AUDIO_FUZZ_FRAMES = 1;

  MOZ_ASSERT(aData);
  AudioData* audio = aData->As<AudioData>();
  // This logic has to mimic AudioSink closely to make sure we write
  // the exact same silences
  CheckedInt64 audioWrittenOffset = aStream->mAudioFramesWritten +
                                    UsecsToFrames(aStartTime, aRate);
  CheckedInt64 frameOffset = UsecsToFrames(audio->mTime, aRate);

  if (!audioWrittenOffset.isValid() ||
      !frameOffset.isValid() ||
      // ignore packet that we've already processed
      audio->GetEndTime() <= aStream->mNextAudioTime) {
    return;
  }

  if (audioWrittenOffset.value() + AUDIO_FUZZ_FRAMES < frameOffset.value()) {
    int64_t silentFrames = frameOffset.value() - audioWrittenOffset.value();
    // Write silence to catch up
    AudioSegment silence;
    silence.InsertNullDataAtStart(silentFrames);
    aStream->mAudioFramesWritten += silentFrames;
    audioWrittenOffset += silentFrames;
    aOutput->AppendFrom(&silence);
  }

  // Always write the whole sample without truncation to be consistent with
  // DecodedAudioDataSink::PlayFromAudioQueue()
  audio->EnsureAudioBuffer();
  RefPtr<SharedBuffer> buffer = audio->mAudioBuffer;
  AudioDataValue* bufferData = static_cast<AudioDataValue*>(buffer->Data());
  AutoTArray<const AudioDataValue*, 2> channels;
  for (uint32_t i = 0; i < audio->mChannels; ++i) {
    channels.AppendElement(bufferData + i * audio->mFrames);
  }
  aOutput->AppendFrames(buffer.forget(), channels, audio->mFrames, aPrincipalHandle);
  aStream->mAudioFramesWritten += audio->mFrames;

  aStream->mNextAudioTime = audio->GetEndTime();
}
예제 #5
0
 Chromagram SpectrumAnalyser::chromagramOfFirstFrame(
   const AudioData& audio
 ) const {
   if (audio.getChannels() != 1)
     throw Exception("Audio must be monophonic to be analysed");
   Chromagram c(1, octaves, bandsPerSemitone);
   unsigned int fftFrameSize = fft->getFrameSize();
   for (unsigned int sample = 0; sample < fftFrameSize; sample++) {
     fft->setInput(sample, audio.getSample(sample) * temporalWindow[sample]);
   }
   fft->execute();
   std::vector<float> cv = ct->chromaVector(fft);
   for (unsigned int band = 0; band < c.getBands(); band++) {
     c.setMagnitude(0, band, cv[band]);
   }
   return c;
 }
예제 #6
0
void AudioResampleImpl::splitAudioData(AudioData & data, boost::scoped_array<char*> & split) const
{
	auto dataFormat = data.format();

	if (dataFormat.isPlanar())
	{
		/// Для планарного формата необходимо представить данные из result
		const int numChannels = dataFormat.channelCount();
		split.reset(new char*[numChannels]);
		split_ref(data.begin(), data.end(), data.numBytes() / numChannels, split.get());
	}
	else
	{
		/// Interleaved данные помещаются в один массив
		split.reset(new char*[1]);
		split[0] = data.data();
	}
}
예제 #7
0
  void KeyFinder::preprocess(
    AudioData& workingAudio,
    Workspace& workspace,
    const Parameters& params
  ) {
    workingAudio.reduceToMono();

    // TODO: there is presumably some good maths to determine filter frequencies.
    // For now, this approximates original experiment values for default params.
    float lpfCutoff = params.getLastFrequency() * 1.012;
    float dsCutoff = params.getLastFrequency() * 1.10;
    unsigned int downsampleFactor = (int) floor(workingAudio.getFrameRate() / 2 / dsCutoff);

    // get filter
    const LowPassFilter* lpf = lpfFactory.getLowPassFilter(160, workingAudio.getFrameRate(), lpfCutoff, 2048);
    lpf->filter(workingAudio, workspace, downsampleFactor); // downsampleFactor shortcut
    // note we don't delete the LPF; it's stored in the factory for reuse

    workingAudio.downsample(downsampleFactor);
  }
예제 #8
0
void FileUploader::uploadMedia(const FullMsgId &msgId, const ReadyLocalMedia &media) {
	if (media.type == PreparePhoto) {
		App::feedPhoto(media.photo, media.photoThumbs);
	} else if (media.type == PrepareDocument) {
		DocumentData *document;
		if (media.photoThumbs.isEmpty()) {
			document = App::feedDocument(media.document);
		} else {
			document = App::feedDocument(media.document, media.photoThumbs.begin().value());
		}
		document->status = FileUploading;
		if (!media.file.isEmpty()) {
			document->setLocation(FileLocation(StorageFilePartial, media.file));
		}
	} else if (media.type == PrepareAudio) {
		AudioData *audio = App::feedAudio(media.audio);
		audio->status = FileUploading;
		audio->setData(media.data);
	}
	queue.insert(msgId, File(media));
	sendNext();
}
예제 #9
0
static void* clientCallback(void* arg)
{
  ClientData* data = static_cast<ClientData*>(arg);
  std::string host = "127.0.0.2";
  if (!data->isServer)
      host = "127.0.0.1";

  try
    {
      ISocket* client = new Socket();

      if (client->run(host))
	{
	  IAudioIO* audioIO = new PortAudio;

	  while (client->isConnected())
	    {
	      //	      std::string tosend = "recorded.. from " + data->login;
	      audioIO->startRecord(RECORD_TIME);
	      AudioData* data = audioIO->getRecorded();

	      client->send(data->getData(), data->getSize());
	      //	      sleep(1);
	    }
	  delete audioIO;
	}
      delete client;
      data->clientThread->exit();
    }    
  catch (ISocket::Exception& e)
    {
      std::cerr << e.what() << std::endl; 
    }
  catch (IAudioIO::Exception& e)
    {
      std::cerr << e.what() << std::endl; 
    }
  return NULL;
}
예제 #10
0
void FileUploader::upload(const FullMsgId &msgId, const FileLoadResultPtr &file) {
	if (file->type == PreparePhoto) {
		PhotoData *photo = App::feedPhoto(file->photo, file->photoThumbs);
		photo->uploadingData = new PhotoData::UploadingData(file->partssize);
	} else if (file->type == PrepareDocument) {
		DocumentData *document;
		if (file->thumb.isNull()) {
			document = App::feedDocument(file->document);
		} else {
			document = App::feedDocument(file->document, file->thumb);
		}
		document->status = FileUploading;
		if (!file->filepath.isEmpty()) {
			document->setLocation(FileLocation(StorageFilePartial, file->filepath));
		}
	} else if (file->type == PrepareAudio) {
		AudioData *audio = App::feedAudio(file->audio);
		audio->status = FileUploading;
		audio->setData(file->content);
	}
	queue.insert(msgId, File(file));
	sendNext();
}
예제 #11
0
  void LowPassFilterPrivate::filter(AudioData& audio, Workspace& workspace, unsigned int shortcutFactor) const {

    if (audio.getChannels() > 1) {
      throw Exception("Monophonic audio only");
    }

    std::vector<double>* buffer = workspace.lpfBuffer;

    if (buffer == NULL) {
      workspace.lpfBuffer = new std::vector<double>(impulseLength, 0.0);
      buffer = workspace.lpfBuffer;
    } else {
      // clear delay buffer
      std::vector<double>::iterator bufferIterator = buffer->begin();
      while (bufferIterator < buffer->end()) {
        *bufferIterator = 0.0;
        std::advance(bufferIterator, 1);
      }
    }

    std::vector<double>::iterator bufferFront = buffer->begin();
    std::vector<double>::iterator bufferBack;
    std::vector<double>::iterator bufferTemp;

    unsigned int sampleCount = audio.getSampleCount();
    audio.resetIterators();

    double sum;
    // for each frame (running off the end of the sample stream by delay)
    for (unsigned int inSample = 0; inSample < sampleCount + delay; inSample++) {
      // shuffle old samples along delay buffer
      bufferBack = bufferFront;
      std::advance(bufferFront, 1);
      if (bufferFront == buffer->end()) {
        bufferFront = buffer->begin();
      }

      // load new sample into back of delay buffer
      if (audio.readIteratorWithinUpperBound()) {
        *bufferBack = audio.getSampleAtReadIterator() / gain;
        audio.advanceReadIterator();
      } else {
        *bufferBack = 0.0; // zero pad once we're past the end of the file
      }
      // start doing the maths once the delay has passed
      int outSample = (signed)inSample - (signed)delay;
      if (outSample < 0) {
        continue;
      }
      // and, if shortcut != 1, only do the maths for the useful samples (this is mathematically dodgy, but it's faster and it usually works)
      if (outSample % shortcutFactor > 0) {
        continue;
      }
      sum = 0.0;
      bufferTemp = bufferFront;
      std::vector<double>::const_iterator coefficientIterator = coefficients.begin();
      while (coefficientIterator < coefficients.end()) {
        sum += *coefficientIterator * *bufferTemp;
        std::advance(coefficientIterator, 1);
        std::advance(bufferTemp, 1);
        if (bufferTemp == buffer->end()) {
          bufferTemp = buffer->begin();
        }
      }
      audio.setSampleAtWriteIterator(sum);
      audio.advanceWriteIterator(shortcutFactor);
    }
  }
예제 #12
0
//Decodes the data in a WAV file and saves it into an AudioData class
AudioData * coder::decode(string filename){
    fstream audioFile;
    audioFile.open(filename, ios_base::in | ios_base::binary);

    //File does not exist
    if(audioFile.fail()){
        return NULL;
    }

    AudioData * data = NULL;
    if(audioFile.is_open()){
        data = new AudioData;

        //Read RIFF
        int smallBuffer;
        audioFile.read((char *)&smallBuffer, 4);
        data->setRIFF((char *)&smallBuffer);

        //Read file size
        audioFile.read((char *)&smallBuffer, 4);
        data->setFileSize(smallBuffer);

        //Read file type
        audioFile.read((char *)&smallBuffer, 4);
        data->setFileType((char *)&smallBuffer);

        //Read format
        audioFile.read((char *)&smallBuffer, 4);
        data->setFormat((char *)&smallBuffer);

        //Read format info size
        unsigned int infoSize = 0;
        audioFile.read((char *)&infoSize, 4);
        data->setFormatInfoSize(infoSize);

        //Read format info
        char * largeBuffer = (char *)malloc(sizeof(char) * infoSize); //remember to free on close
        memset(largeBuffer, 0x00, infoSize);
        audioFile.read(largeBuffer, infoSize);
        data->setFormatInfo(largeBuffer);

        //Read data chunk
        audioFile.read((char *)&smallBuffer, 4);
        data->setDataChunk((char *)&smallBuffer);

        //Read data size
        unsigned int dataSize = 0;
        audioFile.read((char *)&dataSize, 4);
        data->setDataSize(dataSize);

        //Read audio data
        largeBuffer = (char *)malloc(sizeof(char) * dataSize);
        memset(largeBuffer, 0x00, dataSize);
        audioFile.read(largeBuffer, dataSize);

        //Seperate audio data into multiple channels
        short ** channelData = seperateChannels((short *)largeBuffer, data->getChannels(), data->getNumberOfSamples());
        data->setChannelData(channelData);
        free(largeBuffer);

        audioFile.close();
    }

    return data;
}
예제 #13
0
void FileUploader::partLoaded(const MTPBool &result, mtpRequestId requestId) {
	QMap<mtpRequestId, int32>::iterator j = docRequestsSent.end();
	QMap<mtpRequestId, QByteArray>::iterator i = requestsSent.find(requestId);
	if (i == requestsSent.cend()) {
		j = docRequestsSent.find(requestId);
	}
	if (i != requestsSent.cend() || j != docRequestsSent.cend()) {
		if (mtpIsFalse(result)) { // failed to upload current file
			currentFailed();
			return;
		} else {
			QMap<mtpRequestId, int32>::iterator dcIt = dcMap.find(requestId);
			if (dcIt == dcMap.cend()) { // must not happen
				currentFailed();
				return;
			}
			int32 dc = dcIt.value();
			dcMap.erase(dcIt);

			int32 sentPartSize = 0;
			Queue::const_iterator k = queue.constFind(uploading);
			if (i != requestsSent.cend()) {
				sentPartSize = i.value().size();
				requestsSent.erase(i);
			} else {
				sentPartSize = k->docPartSize;
				docRequestsSent.erase(j);
			}
			sentSize -= sentPartSize;
			sentSizes[dc] -= sentPartSize;
			if (k->type() == PreparePhoto) {
				k->fileSentSize += sentPartSize;
				PhotoData *photo = App::photo(k->id());
				if (photo->uploading() && k->file) {
					photo->uploadingData->size = k->file->partssize;
					photo->uploadingData->offset = k->fileSentSize;
				}
				emit photoProgress(k.key());
			} else if (k->type() == PrepareDocument) {
				DocumentData *doc = App::document(k->id());
				if (doc->uploading()) {
					doc->uploadOffset = (k->docSentParts - docRequestsSent.size()) * k->docPartSize;
					if (doc->uploadOffset > doc->size) {
						doc->uploadOffset = doc->size;
					}
				}
				emit documentProgress(k.key());
			} else if (k->type() == PrepareAudio) {
				AudioData *audio = App::audio(k->id());
				if (audio->uploading()) {
					audio->uploadOffset = (k->docSentParts - docRequestsSent.size()) * k->docPartSize;
					if (audio->uploadOffset > audio->size) {
						audio->uploadOffset = audio->size;
					}
				}
				emit audioProgress(k.key());
			}
		}
	}

	sendNext();
}
예제 #14
0
Status IPPAudioDecoder::Run()
{
	m_bPlaying = true; // [neuromos] 현재 Encoder Loop에 있는지 확인할 수 있는 플래그.

	if (m_nAudioSliceLen == 0)
		ASSERT(FALSE);

	AudioData	in;
	AudioData	out; out.Alloc(m_nAudioSliceLen);
	AudioData*	p_dataIn  = &in;
	AudioData*	p_dataOut = &out;
	Status		ret = UMC_OK;
	Ipp32s		len;

//	p_dataIn->Init();

	p_dataIn->Alloc(m_nAudioSliceLen);

	decodedSize = 0;

	mFramesDecoded = 0;

	for (;;)
	{
		if (p_dataIn)
		{
			if (UMC_OK != GetInputData(p_dataIn))
				break;
//			if (UMC_OK == GetInputData(p_dataIn))
//				p_dataIn->SetTime((mFramesEncoded+1) / pEncoderParams->info.framerate);
//			else
//				p_dataIn = NULL;
		}

		int	nDataInSize = p_dataIn->GetDataSize();
		ret = pCodec->GetFrame(p_dataIn, p_dataOut);
		p_dataIn->MoveDataPointer(-nDataInSize);

		if (ret != UMC_OK && ret != UMC_ERR_NOT_ENOUGH_DATA && ret != UMC_ERR_END_OF_STREAM)
		{
			m_bPlaying = false; // [neuromos] 현재 Encoder Loop에 있는지 확인할 수 있는 플래그.
			return UMC_ERR_FAILED;
		}

		len = (Ipp32s) p_dataOut->GetDataSize();
		decodedSize += len;

		if (UMC_OK != PutOutputData(p_dataOut))
		{
			m_bPlaying = false; // [neuromos] 현재 Encoder Loop에 있는지 확인할 수 있는 플래그.
			return UMC_ERR_FAILED;
		}

		if (p_dataIn)
		{
			mFramesDecoded++;
		}
		else
		{
			if (! len || ret == UMC_ERR_END_OF_STREAM) break; // EOF
		}
	}

	PutOutputData(NULL); // means EOF

	pCodec->Close();

	m_bPlaying = false; // [neuromos] 현재 Encoder Loop에 있는지 확인할 수 있는 플래그.

	return UMC_OK;
}
예제 #15
0
// This method only supports PCM/uncompressed format, with a single fmt
// chunk followed by a single data chunk
AudioData* loadWaveFile(int fd) {

    int srate = 0;
    int channels = 0;

    if (fd == -1)
	return 0;

    unsigned char hdr[36];
    if (!readBytes(fd, hdr, 36)) {
	close(fd);
	return 0;
    }
    if (hdr[0] != 'R' || hdr[1] != 'I' || hdr[2] != 'F' || hdr[3] != 'F') {
	close(fd);
	return 0;
    }
    // Note: bytes 4 thru 7 contain the file size - 8 bytes
    if (hdr[8] != 'W' || hdr[9] != 'A' || hdr[10] != 'V' || hdr[11] != 'E') {
	close(fd);
	return 0;
    }
    if (hdr[12] != 'f' || hdr[13] != 'm' || hdr[14] != 't' || hdr[15] != ' ') {
	close(fd);
	return 0;
    }

    long extraBytes = hdr[16] + (hdr[17] << 8) + (hdr[18] << 16) + (hdr[19] << 24) - 16;
    int compression = hdr[20] + (hdr[21] << 8);
    // Type 1 is PCM/Uncompressed
    if (compression != 1) {
	close(fd);
	return 0;
    }
    channels = hdr[22] + (hdr[23] << 8);
    // Only mono or stereo PCM is supported in this example
    if (channels < 1 || channels > 2) {
	close(fd);
	return 0;
    }
    // Samples per second, independent of number of channels
    srate = hdr[24] + (hdr[25] << 8) + (hdr[26] << 16) + (hdr[27] << 24);
    // Bytes 28-31 contain the "average bytes per second", unneeded here
    // Bytes 32-33 contain the number of bytes per sample (includes channels)
    // Bytes 34-35 contain the number of bits per single sample
    int bits = hdr[34] + (hdr[35] << 8);
    // Supporting othe sample depths will require conversion
    if (bits != 16) {
	close(fd);
	return 0;
    }
    
    // Skip past extra bytes, if any
    unsigned char extraskip[extraBytes];
    if(!readBytes(fd,extraskip,extraBytes)){
    	return 0;
    }
    
    // Start reading the next frame.  Only supported frame is the data block
    unsigned char b[8];
    if (!readBytes(fd, b, 8)) {
	close(fd);
	return 0;
    }
    // Do we have a fact block?
    if (b[0] == 'f' && b[1] == 'a' && b[2] == 'c' && b[3] == 't') {
	// Skip the fact block
    unsigned char factskip[12];
    if(!readBytes(fd,factskip,12)){
    	return 0;
    }
    
	// Read the next frame
	if (!readBytes(fd, b, 8)) {
	    close(fd);
	    return 0;
	}
    }
    
    // Now look for the data block
    if (b[0] != 'd' || b[1] != 'a' || b[2] != 't' || b[3] != 'a') {
   	close(fd);
	return 0;
    }
    
    // this will be 0 if ffmpeg is used
    // since it can't seek the stream to write this value
    // so we ignore this value and just read to the end if it is 0
    long bytes = b[4] + (b[5] << 8) + (b[6] << 16) + (b[7] << 24);

    // No need to read the whole file, just the first 135 seconds
    long bytesPerSecond = srate * 2 * channels;
    
    if(bytes == 0)
    	//maximum data is 2 gigabyte, getting a puid won't work with bigger files
    	bytes = 2*1000*1000*1000;
    
    // Now we read parts of the file until the eof or bytes is reached
    // bytesPerSecond is used as puffersize
    int readSize = bytesPerSecond;
    
    unsigned char* samples = NULL;
    int size = 0;
    while(size<bytes){
    	samples = (unsigned char*)realloc ( samples, size+readSize );
    	int n = read(fd,samples+size,readSize);
    	
    	if(n < 0){
    		delete[] samples;
    		close(fd);
    		return 0;
    	}
    	size += n;
    	if(n == 0)
    		break;
    }
    close(fd);

    long ms = (size/2)/(srate/1000);
    if ( channels == 2 ) ms /= 2;
    
    AudioData *data = new AudioData();

    data->setData(samples, OFA_LITTLE_ENDIAN, size/2, srate,
	    channels == 2 ? 1 : 0, ms, "wav");
    
    return data;
}
예제 #16
0
  KeyDetectionResult KeyFinder::findKey(const AudioData& originalAudio, const Parameters& params){

    KeyDetectionResult result;

    AudioData* workingAudio = new AudioData(originalAudio);

    workingAudio->reduceToMono();

    Downsampler ds;
    ds.downsample(workingAudio, params.getLastFreq(), &lpfFactory);

    SpectrumAnalyser* sa = new SpectrumAnalyser(workingAudio->getFrameRate(), params, &ctFactory);

    // run spectral analysis
    Chromagram* ch = sa->chromagram(workingAudio);

    delete workingAudio;
    delete sa;

    // reduce chromagram
    ch->reduceTuningBins(params);
    result.fullChromagram = Chromagram(*ch);
    ch->reduceToOneOctave(params);
    result.oneOctaveChromagram = Chromagram(*ch);

    // get harmonic change signal
    Segmentation* segmenter = Segmentation::getSegmentation(params);
    result.harmonicChangeSignal = segmenter->getRateOfChange(*ch, params);

    // get track segmentation
    std::vector<unsigned int> segmentBoundaries = segmenter->getSegments(result.harmonicChangeSignal, params);
    segmentBoundaries.push_back(ch->getHops()); // sentinel
    delete segmenter;

    // get key estimates for each segment
    KeyClassifier hc(params);
    std::vector<float> keyWeights(24); // TODO: not ideal using int cast of key_t enum. Hash?

    for (int s = 0; s < (signed)segmentBoundaries.size()-1; s++){
      KeyDetectionSegment segment;
      segment.firstWindow = segmentBoundaries[s];
      segment.lastWindow = segmentBoundaries[s+1] - 1;
      // collapse segment's time dimension, for a single chroma vector and a single energy value
      std::vector<float> segmentChroma(ch->getBins());
      // for each relevant hop of the chromagram
      for (unsigned int hop = segment.firstWindow; hop <= segment.lastWindow; hop++) {
        // for each bin
        for (unsigned int bin = 0; bin < ch->getBins(); bin++) {
          float value = ch->getMagnitude(hop, bin);
          segmentChroma[bin] += value;
          segment.energy += value;
        }
      }
      segment.key = hc.classify(segmentChroma);
      if(segment.key != SILENCE){
        keyWeights[segment.key] += segment.energy;
      }
      result.segments.push_back(segment);
    }

    delete ch;

    // get global key
    result.globalKeyEstimate = SILENCE;
    float mostCommonKeyWeight = 0.0;
    for (int k = 0; k < (signed)keyWeights.size(); k++){
      if(keyWeights[k] > mostCommonKeyWeight){
        mostCommonKeyWeight = keyWeights[k];
        result.globalKeyEstimate = (key_t)k;
      }
    }

    return result;

  }
예제 #17
0
  KeyDetectionResult KeyFinder::findKey(const AudioData& originalAudio, const Parameters& params){

    KeyDetectionResult result;

    AudioData* workingAudio = new AudioData(originalAudio);

    workingAudio->reduceToMono();

    // TODO: there is presumably some good maths to determine filter frequencies
    float lpfCutoff = params.getLastFrequency() * 1.05;
    float dsCutoff = params.getLastFrequency() * 1.10;
    unsigned int downsampleFactor = (int)floor( workingAudio->getFrameRate() / 2 / dsCutoff );

    // get filter
    LowPassFilter* lpf = lpfFactory.getLowPassFilter(160, workingAudio->getFrameRate(), lpfCutoff, 2048);
    // feeding in the downsampleFactor for a shortcut
    lpf->filter(workingAudio, downsampleFactor);
    // note we don't delete the LPF; it's stored in the factory for reuse

    Downsampler ds;
    ds.downsample(workingAudio, downsampleFactor);

    SpectrumAnalyser* sa = new SpectrumAnalyser(workingAudio->getFrameRate(), params, &ctFactory);

    // run spectral analysis
    Chromagram* ch = sa->chromagram(workingAudio);

    delete workingAudio;
    delete sa;

    // reduce chromagram
    ch->reduceTuningBins(params);
    result.fullChromagram = Chromagram(*ch);
    ch->reduceToOneOctave(params);
    result.oneOctaveChromagram = Chromagram(*ch);

    // get harmonic change signal
    Segmentation* segmenter = Segmentation::getSegmentation(params);
    result.harmonicChangeSignal = segmenter->getRateOfChange(*ch, params);

    // get track segmentation
    std::vector<unsigned int> segmentBoundaries = segmenter->getSegments(result.harmonicChangeSignal, params);
    segmentBoundaries.push_back(ch->getHops()); // sentinel
    delete segmenter;

    // get key estimates for each segment
    KeyClassifier hc(params);
    std::vector<float> keyWeights(24); // TODO: not ideal using int cast of key_t enum. Hash?

    for (int s = 0; s < (signed)segmentBoundaries.size()-1; s++){
      KeyDetectionSegment segment;
      segment.firstHop = segmentBoundaries[s];
      segment.lastHop = segmentBoundaries[s+1] - 1;
      // collapse segment's time dimension
      std::vector<float> segmentChroma(ch->getBins());
      for (unsigned int hop = segment.firstHop; hop <= segment.lastHop; hop++) {
        for (unsigned int bin = 0; bin < ch->getBins(); bin++) {
          float value = ch->getMagnitude(hop, bin);
          segmentChroma[bin] += value;
          segment.energy += value;
        }
      }
      segment.key = hc.classify(segmentChroma);
      if(segment.key != SILENCE){
        keyWeights[segment.key] += segment.energy;
      }
      result.segments.push_back(segment);
    }

    delete ch;

    // get global key
    result.globalKeyEstimate = SILENCE;
    float mostCommonKeyWeight = 0.0;
    for (int k = 0; k < (signed)keyWeights.size(); k++){
      if(keyWeights[k] > mostCommonKeyWeight){
        mostCommonKeyWeight = keyWeights[k];
        result.globalKeyEstimate = (key_t)k;
      }
    }

    return result;

  }