void CAudioEncoder::DoStopAudio() { // flush remaining output from audio encoder // and forward it to sinks EncodeSamples(NULL, 0, m_audioSrcChannels); ForwardEncodedAudioFrames(); StopEncoder(); CHECK_AND_FREE(m_audioPreEncodingBuffer); debug_message("Audio profile %s stats", GetProfileName()); debug_message(" encoded samples: "U64, m_audioDstSampleNumber); debug_message(" encoded frames: %u", m_audioDstFrameNumber); }
void CAudioEncoder::AddSilenceFrame(void) { int bytes = DstSamplesToBytes(m_audioDstSamplesPerFrame); uint8_t *pSilenceData = (uint8_t *)Malloc(bytes); memset(pSilenceData, 0, bytes); bool rc = EncodeSamples( (int16_t*)pSilenceData, m_audioDstSamplesPerFrame, m_audioDstChannels); if (!rc) { debug_message("failed to encode audio"); return; } ForwardEncodedAudioFrames(); free(pSilenceData); }
void CMediaSource::DoStopAudio() { if (m_audioEncoder) { // flush remaining output from audio encoder // and forward it to sinks m_audioEncoder->EncodeSamples(NULL, 0, m_audioSrcChannels); ForwardEncodedAudioFrames(); m_audioEncoder->Stop(); delete m_audioEncoder; m_audioEncoder = NULL; } free(m_audioPreEncodingBuffer); m_audioPreEncodingBuffer = NULL; m_sourceAudio = false; }
void CAudioEncoder::ProcessAudioFrame(CMediaFrame *pFrame) { const u_int8_t* frameData = (const uint8_t *)pFrame->GetData(); u_int32_t frameDataLength = pFrame->GetDataLength(); Timestamp srcFrameTimestamp = pFrame->GetTimestamp();; bool pcmMalloced = false; bool pcmBuffered; const u_int8_t* pcmData = frameData; u_int32_t pcmDataLength = frameDataLength; uint32_t audioSrcSamplesPerFrame = SrcBytesToSamples(frameDataLength); Duration subtractDuration = 0; /************************************************************************* * First convert input samples to format we need them to be in *************************************************************************/ if (m_audioSrcChannels != m_audioDstChannels) { // Convert the channels if they don't match // we either double the channel info, or combine // the left and right uint32_t samples = SrcBytesToSamples(frameDataLength); uint32_t dstLength = DstSamplesToBytes(samples); pcmData = (u_int8_t *)Malloc(dstLength); pcmDataLength = dstLength; pcmMalloced = true; int16_t *src = (int16_t *)frameData; int16_t *dst = (int16_t *)pcmData; if (m_audioSrcChannels == 1) { // 1 channel to 2 for (uint32_t ix = 0; ix < samples; ix++) { *dst++ = *src; *dst++ = *src++; } } else { // 2 channels to 1 for (uint32_t ix = 0; ix < samples; ix++) { int32_t sum = *src++; sum += *src++; sum /= 2; if (sum < -32768) sum = -32768; else if (sum > 32767) sum = 32767; *dst++ = sum & 0xffff; } } } // resample audio, if necessary if (m_audioSrcSampleRate != m_audioDstSampleRate) { subtractDuration = DstSamplesToTicks(DstBytesToSamples(m_audioPreEncodingBufferLength)); ResampleAudio(pcmData, pcmDataLength); // resampled data is now available in m_audioPreEncodingBuffer pcmBuffered = true; } else if (audioSrcSamplesPerFrame != m_audioDstSamplesPerFrame) { // reframe audio, if necessary // e.g. MP3 is 1152 samples/frame, AAC is 1024 samples/frame // add samples to end of m_audioPreEncodingBuffer // InitAudio() ensures that buffer is large enough if (m_audioPreEncodingBuffer == NULL) { m_audioPreEncodingBuffer = (u_int8_t*)realloc(m_audioPreEncodingBuffer, m_audioPreEncodingBufferMaxLength); } subtractDuration = DstSamplesToTicks(DstBytesToSamples(m_audioPreEncodingBufferLength)); memcpy( &m_audioPreEncodingBuffer[m_audioPreEncodingBufferLength], pcmData, pcmDataLength); m_audioPreEncodingBufferLength += pcmDataLength; pcmBuffered = true; } else { // default case - just use what we're passed pcmBuffered = false; } srcFrameTimestamp -= subtractDuration; /************************************************************************ * Loop while we have enough samples ************************************************************************/ Duration frametime = DstSamplesToTicks(m_audioDstSamplesPerFrame); if (m_audioDstFrameNumber == 0) debug_message("%s:frametime "U64, Profile()->GetName(), frametime); while (1) { /* * Record starting timestamps */ if (m_audioSrcFrameNumber == 0) { /* * we use m_audioStartTimestamp to determine audio output start time */ m_audioStartTimestamp = srcFrameTimestamp; #ifdef DEBUG_AUDIO_SYNC if (Profile()->GetBoolValue(CFG_AUDIO_DEBUG)) debug_message("%s:m_audioStartTimestamp = "U64, Profile()->GetName(), m_audioStartTimestamp); #endif } if (m_audioDstFrameNumber == 0) { // we wait until we see the first encoded frame. // this is because encoders usually buffer the first few // raw audio frames fed to them, and this number varies // from one encoder to another // We use this value to determine if we need to drop due to // a bad input frequency m_audioEncodingStartTimestamp = srcFrameTimestamp; } // we calculate audioSrcElapsedDuration by taking the current frame's // timestamp and subtracting the audioEncodingStartTimestamp (and NOT // the audioStartTimestamp). // this way, we just need to compare audioSrcElapsedDuration with // audioDstElapsedDuration (which should match in the ideal case), // and we don't have to compensate for the lag introduced by the initial // buffering of source frames in the encoder, which may vary from // one encoder to another m_audioSrcElapsedDuration = srcFrameTimestamp - m_audioEncodingStartTimestamp; m_audioSrcFrameNumber++; if (pcmBuffered) { u_int32_t samplesAvailable = DstBytesToSamples(m_audioPreEncodingBufferLength); if (pcmMalloced) { free((void *)pcmData); pcmMalloced = false; } #ifdef DEBUG_AUDIO_SYNC if (Profile()->GetBoolValue(CFG_AUDIO_DEBUG)) debug_message("%s: samples %u need %u", Profile()->GetName(), samplesAvailable, m_audioDstSamplesPerFrame); #endif // not enough samples collected yet to call encode or forward // we moved the data above. if (samplesAvailable < m_audioDstSamplesPerFrame) { return; } // setup for encode/forward pcmData = &m_audioPreEncodingBuffer[0]; pcmDataLength = DstSamplesToBytes(m_audioDstSamplesPerFrame); } #ifdef DEBUG_AUDIO_SYNC if (Profile()->GetBoolValue(CFG_AUDIO_DEBUG)) debug_message("%s:srcDuration="U64" dstDuration "U64" "D64, Profile()->GetName(), m_audioSrcElapsedDuration, m_audioDstElapsedDuration, m_audioDstElapsedDuration - m_audioSrcElapsedDuration); #endif /* * Check if we can encode, or if we have to add/drop frames * First check is to see if the source frequency is greater than the * theory frequency. */ if (m_audioSrcElapsedDuration + frametime >= m_audioDstElapsedDuration) { // source gets ahead of destination // We tolerate a difference of 3 frames since A/V sync is usually // noticeable after that. This way we give the encoder a chance to pick // up if (m_audioSrcElapsedDuration > (3 * frametime) + m_audioDstElapsedDuration) { int j = (int) (DstTicksToSamples(m_audioSrcElapsedDuration + (2 * frametime) - m_audioDstElapsedDuration) / m_audioDstSamplesPerFrame); debug_message("%s: Adding %d silence frames", Profile()->GetName(), j); for (int k=0; k<j; k++) AddSilenceFrame(); } #ifdef DEBUG_SYNC debug_message("%s:encoding", Profile()->GetName()); #endif /* * Actually encode and forward the frames */ bool rc = EncodeSamples( (int16_t*)pcmData, m_audioDstSamplesPerFrame, m_audioDstChannels); if (!rc) { debug_message("failed to encode audio"); } ForwardEncodedAudioFrames(); } else { // destination gets ahead of source // This has been observed as a result of clock frequency drift between // the sound card oscillator and the system mainbord oscillator // Example: If the sound card oscillator has a 'real' frequency that // is slightly larger than the 'rated' frequency, and we are sampling // at 32kHz, then the 32000 samples acquired from the sound card // 'actually' occupy a duration of slightly less than a second. // // The clock drift is usually fraction of a Hz and takes a long // time (~ 20-30 minutes) before we are off by one frame duration debug_message("%s:audio: dropping frame, SrcElapsedDuration="U64" DstElapsedDuration="U64" "U64, Profile()->GetName(), m_audioSrcElapsedDuration, m_audioDstElapsedDuration, frametime); // don't return - drop through to remove frame } if (pcmMalloced) { free((void *)pcmData); } if (pcmBuffered) { /* * This means we're storing data, either from resampling, or if the * sample numbers do not match. We will remove the encoded samples, * and increment the srcFrameTimestamp */ m_audioPreEncodingBufferLength -= pcmDataLength; memmove( &m_audioPreEncodingBuffer[0], &m_audioPreEncodingBuffer[pcmDataLength], m_audioPreEncodingBufferLength); subtractDuration = 0; srcFrameTimestamp += frametime; } else { // no data in buffer (default case). return; } } }
void CMediaSource::ProcessAudioFrame( u_int8_t* frameData, u_int32_t frameDataLength, Timestamp srcFrameTimestamp) { if (m_audioSrcFrameNumber == 0) { if (!m_sourceVideo || m_videoSrcFrameNumber == 0) { m_encodingStartTimestamp = GetTimestamp(); } m_audioStartTimestamp = srcFrameTimestamp; #ifdef DEBUG_AUDIO_SYNC debug_message("m_audioStartTimestamp = "U64, m_audioStartTimestamp); #endif } if (m_audioDstFrameNumber == 0) { // we wait until we see the first encoded frame. // this is because encoders usually buffer the first few // raw audio frames fed to them, and this number varies // from one encoder to another m_audioEncodingStartTimestamp = srcFrameTimestamp; } // we calculate audioSrcElapsedDuration by taking the current frame's // timestamp and subtracting the audioEncodingStartTimestamp (and NOT // the audioStartTimestamp). // this way, we just need to compare audioSrcElapsedDuration with // audioDstElapsedDuration (which should match in the ideal case), // and we don't have to compensate for the lag introduced by the initial // buffering of source frames in the encoder, which may vary from // one encoder to another m_audioSrcElapsedDuration = srcFrameTimestamp - m_audioEncodingStartTimestamp; m_audioSrcFrameNumber++; #if 0 // not needed if (resync) { // flush preEncodingBuffer m_audioPreEncodingBufferLength = 0; // change dst sample numbers to account for gap m_audioDstSampleNumber = m_audioDstRawSampleNumber = DstTicksToSamples(m_audioSrcElapsedDuration); error_message("Received resync"); } #endif bool pcmMalloced = false; bool pcmBuffered; u_int8_t* pcmData = frameData; u_int32_t pcmDataLength = frameDataLength; if (m_audioSrcChannels != m_audioDstChannels) { // Convert the channels if they don't match // we either double the channel info, or combine // the left and right uint32_t samples = SrcBytesToSamples(frameDataLength); uint32_t dstLength = DstSamplesToBytes(samples); pcmData = (u_int8_t *)Malloc(dstLength); pcmDataLength = dstLength; pcmMalloced = true; int16_t *src = (int16_t *)frameData; int16_t *dst = (int16_t *)pcmData; if (m_audioSrcChannels == 1) { // 1 channel to 2 for (uint32_t ix = 0; ix < samples; ix++) { *dst++ = *src; *dst++ = *src++; } } else { // 2 channels to 1 for (uint32_t ix = 0; ix < samples; ix++) { int32_t sum = *src++; sum += *src++; sum /= 2; if (sum < -32768) sum = -32768; else if (sum > 32767) sum = 32767; *dst++ = sum & 0xffff; } } } // resample audio, if necessary if (m_audioSrcSampleRate != m_audioDstSampleRate) { ResampleAudio(pcmData, pcmDataLength); // resampled data is now available in m_audioPreEncodingBuffer pcmBuffered = true; } else if (m_audioSrcSamplesPerFrame != m_audioDstSamplesPerFrame) { // reframe audio, if necessary // e.g. MP3 is 1152 samples/frame, AAC is 1024 samples/frame // add samples to end of m_audioBuffer // InitAudio() ensures that buffer is large enough memcpy( &m_audioPreEncodingBuffer[m_audioPreEncodingBufferLength], pcmData, pcmDataLength); m_audioPreEncodingBufferLength += pcmDataLength; pcmBuffered = true; } else { pcmBuffered = false; } // LATER restructure so as get rid of this label, and goto below pcmBufferCheck: if (pcmBuffered) { u_int32_t samplesAvailable = DstBytesToSamples(m_audioPreEncodingBufferLength); // not enough samples collected yet to call encode or forward if (samplesAvailable < m_audioDstSamplesPerFrame) { return; } if (pcmMalloced) { free(pcmData); pcmMalloced = false; } // setup for encode/forward pcmData = &m_audioPreEncodingBuffer[0]; pcmDataLength = DstSamplesToBytes(m_audioDstSamplesPerFrame); } // encode audio frame if (m_pConfig->m_audioEncode) { Duration frametime = DstSamplesToTicks(DstBytesToSamples(frameDataLength)); #ifdef DEBUG_AUDIO_SYNC debug_message("asrc# %d srcDuration="U64" dst# %d dstDuration "U64, m_audioSrcFrameNumber, m_audioSrcElapsedDuration, m_audioDstFrameNumber, m_audioDstElapsedDuration); #endif // destination gets ahead of source // This has been observed as a result of clock frequency drift between // the sound card oscillator and the system mainbord oscillator // Example: If the sound card oscillator has a 'real' frequency that // is slightly larger than the 'rated' frequency, and we are sampling // at 32kHz, then the 32000 samples acquired from the sound card // 'actually' occupy a duration of slightly less than a second. // // The clock drift is usually fraction of a Hz and takes a long // time (~ 20-30 minutes) before we are off by one frame duration if (m_audioSrcElapsedDuration + frametime < m_audioDstElapsedDuration) { debug_message("audio: dropping frame, SrcElapsedDuration="U64" DstElapsedDuration="U64, m_audioSrcElapsedDuration, m_audioDstElapsedDuration); return; } // source gets ahead of destination // We tolerate a difference of 3 frames since A/V sync is usually // noticeable after that. This way we give the encoder a chance to pick up if (m_audioSrcElapsedDuration > (3 * frametime) + m_audioDstElapsedDuration) { int j = (int) (DstTicksToSamples(m_audioSrcElapsedDuration + (2 * frametime) - m_audioDstElapsedDuration) / m_audioDstSamplesPerFrame); debug_message("audio: Adding %d silence frames", j); for (int k=0; k<j; k++) AddSilenceFrame(); } //Timestamp encodingStartTimestamp = GetTimestamp(); bool rc = m_audioEncoder->EncodeSamples( (int16_t*)pcmData, m_audioDstSamplesPerFrame, m_audioDstChannels); if (!rc) { debug_message("failed to encode audio"); return; } // Disabled since we are not taking into account audio drift anymore /* Duration encodingTime = (GetTimestamp() - encodingStartTimestamp); if (m_sourceRealTime && m_videoSource) { Duration drift; if (frametime <= encodingTime) { drift = encodingTime - frametime; m_videoSource->AddEncodingDrift(drift); } } */ ForwardEncodedAudioFrames(); } //Forward PCM Frames to Feeder Sink if ((m_pConfig->GetBoolValue(CONFIG_FEEDER_SINK_ENABLE) && frameDataLength > 0)) { // make a copy of the pcm data if needed u_int8_t* FwdedData; FwdedData = (u_int8_t*)Malloc(frameDataLength); memcpy(FwdedData, frameData, frameDataLength); CMediaFrame* pFrame = new CMediaFrame( RAWPCMAUDIOFRAME, FwdedData, frameDataLength, srcFrameTimestamp, 0, m_audioDstSampleRate); ForwardFrame(pFrame); } // if desired, forward raw audio to sinks if (m_pConfig->SourceRawAudio() && pcmDataLength > 0) { // make a copy of the pcm data if needed u_int8_t* pcmForwardedData; if (!pcmMalloced) { pcmForwardedData = (u_int8_t*)Malloc(pcmDataLength); memcpy(pcmForwardedData, pcmData, pcmDataLength); } else { pcmForwardedData = pcmData; pcmMalloced = false; } #ifndef WORDS_BIGENDIAN // swap byte ordering so we have big endian to write into // the file. uint16_t *pdata = (uint16_t *)pcmForwardedData; for (uint32_t ix = 0; ix < pcmDataLength; ix += sizeof(uint16_t),pdata++) { uint16_t swap = *pdata; *pdata = B2N_16(swap); } #endif CMediaFrame* pFrame = new CMediaFrame( PCMAUDIOFRAME, pcmForwardedData, pcmDataLength, m_audioStartTimestamp + DstSamplesToTicks(m_audioDstRawSampleNumber), DstBytesToSamples(pcmDataLength), m_audioDstSampleRate); ForwardFrame(pFrame); m_audioDstRawSampleNumber += SrcBytesToSamples(pcmDataLength); m_audioDstRawFrameNumber++; } if (pcmMalloced) { free(pcmData); } if (pcmBuffered) { m_audioPreEncodingBufferLength -= pcmDataLength; memcpy( &m_audioPreEncodingBuffer[0], &m_audioPreEncodingBuffer[pcmDataLength], m_audioPreEncodingBufferLength); goto pcmBufferCheck; } }