static void CopyChunkToBlock(AudioChunk& aInput, AudioBlock *aBlock, uint32_t aOffsetInBlock) { uint32_t blockChannels = aBlock->ChannelCount(); AutoTArray<const T*,2> channels; if (aInput.IsNull()) { channels.SetLength(blockChannels); PodZero(channels.Elements(), blockChannels); } else { const nsTArray<const T*>& inputChannels = aInput.ChannelData<T>(); channels.SetLength(inputChannels.Length()); PodCopy(channels.Elements(), inputChannels.Elements(), channels.Length()); if (channels.Length() != blockChannels) { // We only need to upmix here because aBlock's channel count has been // chosen to be a superset of the channel count of every chunk. AudioChannelsUpMix(&channels, blockChannels, static_cast<T*>(nullptr)); } } for (uint32_t c = 0; c < blockChannels; ++c) { float* outputData = aBlock->ChannelFloatsForWrite(c) + aOffsetInBlock; if (channels[c]) { ConvertAudioSamplesWithScale(channels[c], outputData, aInput.GetDuration(), aInput.mVolume); } else { PodZero(outputData, aInput.GetDuration()); } } }
// Read audio data in aChunk, resample them if needed, // and then send the result to OMX input buffer (or buffers if one buffer is not enough). // aSamplesRead will be the number of samples that have been read from aChunk. BufferState ReadChunk(AudioChunk& aChunk, size_t* aSamplesRead) { size_t chunkSamples = aChunk.GetDuration(); size_t bytesToCopy = chunkSamples * mOMXAEncoder.mResamplingRatio * mOMXAEncoder.mChannels * sizeof(AudioDataValue); size_t bytesCopied = 0; if (bytesToCopy <= AvailableSize()) { if (aChunk.IsNull()) { bytesCopied = SendSilenceToBuffer(chunkSamples); } else { bytesCopied = SendChunkToBuffer(aChunk, chunkSamples); } UpdateAfterSendChunk(chunkSamples, bytesCopied, aSamplesRead); } else { // Interleave data to a temporary buffer. nsAutoTArray<AudioDataValue, 9600> pcm; pcm.SetLength(bytesToCopy); AudioDataValue* interleavedSource = pcm.Elements(); AudioTrackEncoder::InterleaveTrackData(aChunk, chunkSamples, mOMXAEncoder.mChannels, interleavedSource); // When the data size of chunk is larger than the buffer capacity, // we split it into sub-chunks to fill up buffers. size_t subChunkSamples = 0; while(GetNextSubChunk(bytesToCopy, subChunkSamples)) { // To avoid enqueueing an empty buffer, we follow the order that // clear up buffer first, then create one, send data to it in the end. if (!IsEmpty()) { // Submit the filled-up buffer and request a new buffer. status_t result = Enqueue(mOMXAEncoder.mTimestamp, mInputFlags & ~OMXCodecWrapper::BUFFER_EOS); if (result != OK) { return BUFFER_FAIL; } result = Dequeue(); if (result == -EAGAIN) { return WAIT_FOR_NEW_BUFFER; } if (result != OK) { return BUFFER_FAIL; } } if (aChunk.IsNull()) { bytesCopied = SendSilenceToBuffer(subChunkSamples); } else { bytesCopied = SendInterleavedSubChunkToBuffer(interleavedSource, subChunkSamples); } UpdateAfterSendChunk(subChunkSamples, bytesCopied, aSamplesRead); // Move to the position where samples are not yet send to the buffer. interleavedSource += subChunkSamples * mOMXAEncoder.mChannels; } } return BUFFER_OK; }
/** * Copies the data in aInput to aOffsetInBlock within aBlock. All samples must * be float. Both chunks must have the same number of channels (or else * aInput is null). aBlock must have been allocated with AllocateInputBlock. */ static void CopyChunkToBlock(const AudioChunk& aInput, AudioChunk *aBlock, uint32_t aOffsetInBlock) { uint32_t d = aInput.GetDuration(); for (uint32_t i = 0; i < aBlock->mChannelData.Length(); ++i) { float* out = static_cast<float*>(const_cast<void*>(aBlock->mChannelData[i])) + aOffsetInBlock; if (aInput.IsNull()) { PodZero(out, d); } else { const float* in = static_cast<const float*>(aInput.mChannelData[i]); ConvertAudioSamplesWithScale(in, out, d, aInput.mVolume); } } }
/** * Copies the data in aInput to aOffsetInBlock within aBlock. * aBlock must have been allocated with AllocateInputBlock and have a channel * count that's a superset of the channels in aInput. */ static void CopyChunkToBlock(const AudioChunk& aInput, AudioChunk *aBlock, uint32_t aOffsetInBlock) { uint32_t blockChannels = aBlock->ChannelCount(); nsAutoTArray<const void*,2> channels; if (aInput.IsNull()) { channels.SetLength(blockChannels); PodZero(channels.Elements(), blockChannels); } else { channels.SetLength(aInput.ChannelCount()); PodCopy(channels.Elements(), aInput.mChannelData.Elements(), channels.Length()); if (channels.Length() != blockChannels) { // We only need to upmix here because aBlock's channel count has been // chosen to be a superset of the channel count of every chunk. AudioChannelsUpMix(&channels, blockChannels, nullptr); } } uint32_t duration = aInput.GetDuration(); for (uint32_t c = 0; c < blockChannels; ++c) { float* outputData = static_cast<float*>(const_cast<void*>(aBlock->mChannelData[c])) + aOffsetInBlock; if (channels[c]) { switch (aInput.mBufferFormat) { case AUDIO_FORMAT_FLOAT32: ConvertAudioSamplesWithScale( static_cast<const float*>(channels[c]), outputData, duration, aInput.mVolume); break; case AUDIO_FORMAT_S16: ConvertAudioSamplesWithScale( static_cast<const int16_t*>(channels[c]), outputData, duration, aInput.mVolume); break; default: NS_ERROR("Unhandled format"); } } else { PodZero(outputData, duration); } } }
// The MediaStreamGraph guarantees that this is actually one block, for // AudioNodeStreams. void AudioNodeStream::ProduceOutput(GraphTime aFrom, GraphTime aTo) { StreamBuffer::Track* track = EnsureTrack(); AudioChunk outputChunk; AudioSegment* segment = track->Get<AudioSegment>(); outputChunk.SetNull(0); if (mInCycle) { // XXX DelayNode not supported yet so just produce silence outputChunk.SetNull(WEBAUDIO_BLOCK_SIZE); } else { AudioChunk tmpChunk; AudioChunk* inputChunk = ObtainInputBlock(&tmpChunk); bool finished = false; mEngine->ProduceAudioBlock(this, *inputChunk, &outputChunk, &finished); if (finished) { FinishOutput(); } } mLastChunk = outputChunk; if (mKind == MediaStreamGraph::EXTERNAL_STREAM) { segment->AppendAndConsumeChunk(&outputChunk); } else { segment->AppendNullData(outputChunk.GetDuration()); } for (uint32_t j = 0; j < mListeners.Length(); ++j) { MediaStreamListener* l = mListeners[j]; AudioChunk copyChunk = outputChunk; AudioSegment tmpSegment; tmpSegment.AppendAndConsumeChunk(©Chunk); l->NotifyQueuedTrackChanges(Graph(), AUDIO_NODE_STREAM_TRACK_ID, IdealAudioRate(), segment->GetDuration(), 0, tmpSegment); } }
nsresult VorbisTrackEncoder::GetEncodedTrack(EncodedFrameContainer& aData) { if (mEosSetInEncoder) { return NS_OK; } PROFILER_LABEL("VorbisTrackEncoder", "GetEncodedTrack", js::ProfileEntry::Category::OTHER); nsAutoPtr<AudioSegment> sourceSegment; sourceSegment = new AudioSegment(); { // Move all the samples from mRawSegment to sourceSegment. We only hold // the monitor in this block. ReentrantMonitorAutoEnter mon(mReentrantMonitor); // Wait if mEncoder is not initialized, or when not enough raw data, but is // not the end of stream nor is being canceled. while (!mCanceled && mRawSegment.GetDuration() < GetPacketDuration() && !mEndOfStream) { mon.Wait(); } VORBISLOG("GetEncodedTrack passes wait, duration is %lld\n", mRawSegment.GetDuration()); if (mCanceled || mEncodingComplete) { return NS_ERROR_FAILURE; } sourceSegment->AppendFrom(&mRawSegment); } if (mEndOfStream && (sourceSegment->GetDuration() == 0) && !mEosSetInEncoder) { mEncodingComplete = true; mEosSetInEncoder = true; VORBISLOG("[Vorbis] Done encoding."); vorbis_analysis_wrote(&mVorbisDsp, 0); GetEncodedFrames(aData); return NS_OK; } // Start encoding data. AudioSegment::ChunkIterator iter(*sourceSegment); AudioDataValue **vorbisBuffer = vorbis_analysis_buffer(&mVorbisDsp, (int)sourceSegment->GetDuration()); int framesCopied = 0; AutoTArray<AudioDataValue, 9600> interleavedPcm; AutoTArray<AudioDataValue, 9600> nonInterleavedPcm; interleavedPcm.SetLength(sourceSegment->GetDuration() * mChannels); nonInterleavedPcm.SetLength(sourceSegment->GetDuration() * mChannels); while (!iter.IsEnded()) { AudioChunk chunk = *iter; int frameToCopy = chunk.GetDuration(); if (!chunk.IsNull()) { InterleaveTrackData(chunk, frameToCopy, mChannels, interleavedPcm.Elements() + framesCopied * mChannels); } else { // empty data memset(interleavedPcm.Elements() + framesCopied * mChannels, 0, frameToCopy * mChannels * sizeof(AudioDataValue)); } framesCopied += frameToCopy; iter.Next(); } // De-interleave the interleavedPcm. DeInterleaveTrackData(interleavedPcm.Elements(), framesCopied, mChannels, nonInterleavedPcm.Elements()); // Copy the nonInterleavedPcm to vorbis buffer. for(uint8_t i = 0; i < mChannels; ++i) { memcpy(vorbisBuffer[i], nonInterleavedPcm.Elements() + framesCopied * i, framesCopied * sizeof(AudioDataValue)); } // Now the vorbisBuffer contain the all data in non-interleaved. // Tell the library how much we actually submitted. vorbis_analysis_wrote(&mVorbisDsp, framesCopied); VORBISLOG("vorbis_analysis_wrote framesCopied %d\n", framesCopied); GetEncodedFrames(aData); return NS_OK; }
nsresult OpusTrackEncoder::GetEncodedTrack(EncodedFrameContainer& aData) { PROFILER_LABEL("OpusTrackEncoder", "GetEncodedTrack", js::ProfileEntry::Category::OTHER); { ReentrantMonitorAutoEnter mon(mReentrantMonitor); // Wait until initialized or cancelled. while (!mCanceled && !mInitialized) { mReentrantMonitor.Wait(); } if (mCanceled || mEncodingComplete) { return NS_ERROR_FAILURE; } } // calculation below depends on the truth that mInitialized is true. MOZ_ASSERT(mInitialized); // re-sampled frames left last time which didn't fit into an Opus packet duration. const int framesLeft = mResampledLeftover.Length() / mChannels; // When framesLeft is 0, (GetPacketDuration() - framesLeft) is a multiple // of kOpusSamplingRate. There is not precision loss in the integer division // in computing framesToFetch. If frameLeft > 0, we need to add 1 to // framesToFetch to ensure there will be at least n frames after re-sampling. const int frameRoundUp = framesLeft ? 1 : 0; MOZ_ASSERT(GetPacketDuration() >= framesLeft); // Try to fetch m frames such that there will be n frames // where (n + frameLeft) >= GetPacketDuration() after re-sampling. const int framesToFetch = !mResampler ? GetPacketDuration() : (GetPacketDuration() - framesLeft) * mSamplingRate / kOpusSamplingRate + frameRoundUp; { // Move all the samples from mRawSegment to mSourceSegment. We only hold // the monitor in this block. ReentrantMonitorAutoEnter mon(mReentrantMonitor); // Wait until enough raw data, end of stream or cancelled. while (!mCanceled && mRawSegment.GetDuration() + mSourceSegment.GetDuration() < framesToFetch && !mEndOfStream) { mReentrantMonitor.Wait(); } if (mCanceled || mEncodingComplete) { return NS_ERROR_FAILURE; } mSourceSegment.AppendFrom(&mRawSegment); // Pad |mLookahead| samples to the end of source stream to prevent lost of // original data, the pcm duration will be calculated at rate 48K later. if (mEndOfStream && !mEosSetInEncoder) { mEosSetInEncoder = true; mSourceSegment.AppendNullData(mLookahead); } } // Start encoding data. nsAutoTArray<AudioDataValue, 9600> pcm; pcm.SetLength(GetPacketDuration() * mChannels); AudioSegment::ChunkIterator iter(mSourceSegment); int frameCopied = 0; while (!iter.IsEnded() && frameCopied < framesToFetch) { AudioChunk chunk = *iter; // Chunk to the required frame size. int frameToCopy = chunk.GetDuration(); if (frameCopied + frameToCopy > framesToFetch) { frameToCopy = framesToFetch - frameCopied; } if (!chunk.IsNull()) { // Append the interleaved data to the end of pcm buffer. AudioTrackEncoder::InterleaveTrackData(chunk, frameToCopy, mChannels, pcm.Elements() + frameCopied * mChannels); } else { memset(pcm.Elements() + frameCopied * mChannels, 0, frameToCopy * mChannels * sizeof(AudioDataValue)); } frameCopied += frameToCopy; iter.Next(); } RefPtr<EncodedFrame> audiodata = new EncodedFrame(); audiodata->SetFrameType(EncodedFrame::OPUS_AUDIO_FRAME); int framesInPCM = frameCopied; if (mResampler) { nsAutoTArray<AudioDataValue, 9600> resamplingDest; // We want to consume all the input data, so we slightly oversize the // resampled data buffer so we can fit the output data in. We cannot really // predict the output frame count at each call. uint32_t outframes = frameCopied * kOpusSamplingRate / mSamplingRate + 1; uint32_t inframes = frameCopied; resamplingDest.SetLength(outframes * mChannels); #if MOZ_SAMPLE_TYPE_S16 short* in = reinterpret_cast<short*>(pcm.Elements()); short* out = reinterpret_cast<short*>(resamplingDest.Elements()); speex_resampler_process_interleaved_int(mResampler, in, &inframes, out, &outframes); #else float* in = reinterpret_cast<float*>(pcm.Elements()); float* out = reinterpret_cast<float*>(resamplingDest.Elements()); speex_resampler_process_interleaved_float(mResampler, in, &inframes, out, &outframes); #endif MOZ_ASSERT(pcm.Length() >= mResampledLeftover.Length()); PodCopy(pcm.Elements(), mResampledLeftover.Elements(), mResampledLeftover.Length()); uint32_t outframesToCopy = std::min(outframes, static_cast<uint32_t>(GetPacketDuration() - framesLeft)); MOZ_ASSERT(pcm.Length() - mResampledLeftover.Length() >= outframesToCopy * mChannels); PodCopy(pcm.Elements() + mResampledLeftover.Length(), resamplingDest.Elements(), outframesToCopy * mChannels); int frameLeftover = outframes - outframesToCopy; mResampledLeftover.SetLength(frameLeftover * mChannels); PodCopy(mResampledLeftover.Elements(), resamplingDest.Elements() + outframesToCopy * mChannels, mResampledLeftover.Length()); // This is always at 48000Hz. framesInPCM = framesLeft + outframesToCopy; audiodata->SetDuration(framesInPCM); } else { // The ogg time stamping and pre-skip is always timed at 48000. audiodata->SetDuration(frameCopied * (kOpusSamplingRate / mSamplingRate)); } // Remove the raw data which has been pulled to pcm buffer. // The value of frameCopied should equal to (or smaller than, if eos) // GetPacketDuration(). mSourceSegment.RemoveLeading(frameCopied); // Has reached the end of input stream and all queued data has pulled for // encoding. if (mSourceSegment.GetDuration() == 0 && mEndOfStream) { mEncodingComplete = true; LOG("[Opus] Done encoding."); } MOZ_ASSERT(mEndOfStream || framesInPCM == GetPacketDuration()); // Append null data to pcm buffer if the leftover data is not enough for // opus encoder. if (framesInPCM < GetPacketDuration() && mEndOfStream) { PodZero(pcm.Elements() + framesInPCM * mChannels, (GetPacketDuration() - framesInPCM) * mChannels); } nsTArray<uint8_t> frameData; // Encode the data with Opus Encoder. frameData.SetLength(MAX_DATA_BYTES); // result is returned as opus error code if it is negative. int result = 0; #ifdef MOZ_SAMPLE_TYPE_S16 const opus_int16* pcmBuf = static_cast<opus_int16*>(pcm.Elements()); result = opus_encode(mEncoder, pcmBuf, GetPacketDuration(), frameData.Elements(), MAX_DATA_BYTES); #else const float* pcmBuf = static_cast<float*>(pcm.Elements()); result = opus_encode_float(mEncoder, pcmBuf, GetPacketDuration(), frameData.Elements(), MAX_DATA_BYTES); #endif frameData.SetLength(result >= 0 ? result : 0); if (result < 0) { LOG("[Opus] Fail to encode data! Result: %s.", opus_strerror(result)); } if (mEncodingComplete) { if (mResampler) { speex_resampler_destroy(mResampler); mResampler = nullptr; } mResampledLeftover.SetLength(0); } audiodata->SwapInFrameData(frameData); aData.AppendEncodedFrame(audiodata); return result >= 0 ? NS_OK : NS_ERROR_FAILURE; }