void CAudioEncoder::ResampleAudio( const u_int8_t* frameData, u_int32_t frameDataLength) { uint32_t samplesIn; uint32_t samplesInConsumed; uint32_t outBufferSamplesLeft; uint32_t outBufferSamplesWritten; uint32_t chan_offset; samplesIn = DstBytesToSamples(frameDataLength); // so far, record the pre length while (samplesIn > 0) { outBufferSamplesLeft = DstBytesToSamples(m_audioPreEncodingBufferMaxLength - m_audioPreEncodingBufferLength); if (outBufferSamplesLeft * 2 <= samplesIn && samplesIn > 0) { m_audioPreEncodingBufferMaxLength *= 2; m_audioPreEncodingBuffer = (u_int8_t*)realloc(m_audioPreEncodingBuffer, m_audioPreEncodingBufferMaxLength); } for (uint8_t chan_ix = 0; chan_ix < m_audioDstChannels; chan_ix++) { samplesInConsumed = samplesIn; outBufferSamplesWritten = outBufferSamplesLeft; chan_offset = chan_ix * (DstSamplesToBytes(1)); #ifdef DEBUG_AUDIO_RESAMPLER error_message("%s:resample - chans %d %d, samples %d left %d", Profile()->GetName(), m_audioDstChannels, chan_ix, samplesIn, outBufferSamplesLeft); #endif if (st_resample_flow(m_audioResample[chan_ix], (int16_t *)(frameData + chan_offset), (int16_t *)(&m_audioPreEncodingBuffer[m_audioPreEncodingBufferLength + chan_offset]), &samplesInConsumed, &outBufferSamplesWritten, m_audioDstChannels) < 0) { error_message("%s:resample failed", Profile()->GetName()); } #ifdef DEBUG_AUDIO_RESAMPLER debug_message("%s:Chan %d consumed %d wrote %d", Profile()->GetName(), chan_ix, samplesInConsumed, outBufferSamplesWritten); #endif } if (outBufferSamplesLeft < outBufferSamplesWritten) { error_message("%s:Written past end of buffer", Profile()->GetName()); } samplesIn -= samplesInConsumed; outBufferSamplesLeft -= outBufferSamplesWritten; m_audioPreEncodingBufferLength += DstSamplesToBytes(outBufferSamplesWritten); // If we have no room for new output data, and more to process, // give us a bunch more room... } // end while we still have input samples }
void CAudioEncoder::AddSilenceFrame(void) { int bytes = DstSamplesToBytes(m_audioDstSamplesPerFrame); uint8_t *pSilenceData = (uint8_t *)Malloc(bytes); memset(pSilenceData, 0, bytes); bool rc = EncodeSamples( (int16_t*)pSilenceData, m_audioDstSamplesPerFrame, m_audioDstChannels); if (!rc) { debug_message("failed to encode audio"); return; } ForwardEncodedAudioFrames(); free(pSilenceData); }
void CAudioEncoder::Initialize (void) { // called from derived classes init function from the start function // in the media flow m_audioSrcFrameNumber = 0; m_audioDstFrameNumber = 0; m_audioDstSampleNumber = 0; m_audioSrcElapsedDuration = 0; m_audioDstElapsedDuration = 0; // destination parameters are from the audio profile m_audioDstType = GetFrameType(); m_audioDstSampleRate = m_pConfig->GetIntegerValue(CFG_AUDIO_SAMPLE_RATE); m_audioDstChannels = m_pConfig->GetIntegerValue(CFG_AUDIO_CHANNELS); m_audioDstSamplesPerFrame = GetSamplesPerFrame(); // if we need to resample if (m_audioDstSampleRate != m_audioSrcSampleRate) { // create a resampler for each audio destination channel - // we will combine the channels before resampling m_audioResample = (resample_t *)malloc(sizeof(resample_t) * m_audioDstChannels); for (int ix = 0; ix < m_audioDstChannels; ix++) { m_audioResample[ix] = st_resample_start(m_audioSrcSampleRate, m_audioDstSampleRate); } } // this calculation doesn't take into consideration the resampling // size of the src. 4 times might not be enough - we need most likely // 2 times the max of the src samples and the dest samples m_audioPreEncodingBufferLength = 0; m_audioPreEncodingBufferMaxLength = 4 * DstSamplesToBytes(m_audioDstSamplesPerFrame); m_audioPreEncodingBuffer = (u_int8_t*)realloc( m_audioPreEncodingBuffer, m_audioPreEncodingBufferMaxLength); }
void CAudioEncoder::ProcessAudioFrame(CMediaFrame *pFrame) { const u_int8_t* frameData = (const uint8_t *)pFrame->GetData(); u_int32_t frameDataLength = pFrame->GetDataLength(); Timestamp srcFrameTimestamp = pFrame->GetTimestamp();; bool pcmMalloced = false; bool pcmBuffered; const u_int8_t* pcmData = frameData; u_int32_t pcmDataLength = frameDataLength; uint32_t audioSrcSamplesPerFrame = SrcBytesToSamples(frameDataLength); Duration subtractDuration = 0; /************************************************************************* * First convert input samples to format we need them to be in *************************************************************************/ if (m_audioSrcChannels != m_audioDstChannels) { // Convert the channels if they don't match // we either double the channel info, or combine // the left and right uint32_t samples = SrcBytesToSamples(frameDataLength); uint32_t dstLength = DstSamplesToBytes(samples); pcmData = (u_int8_t *)Malloc(dstLength); pcmDataLength = dstLength; pcmMalloced = true; int16_t *src = (int16_t *)frameData; int16_t *dst = (int16_t *)pcmData; if (m_audioSrcChannels == 1) { // 1 channel to 2 for (uint32_t ix = 0; ix < samples; ix++) { *dst++ = *src; *dst++ = *src++; } } else { // 2 channels to 1 for (uint32_t ix = 0; ix < samples; ix++) { int32_t sum = *src++; sum += *src++; sum /= 2; if (sum < -32768) sum = -32768; else if (sum > 32767) sum = 32767; *dst++ = sum & 0xffff; } } } // resample audio, if necessary if (m_audioSrcSampleRate != m_audioDstSampleRate) { subtractDuration = DstSamplesToTicks(DstBytesToSamples(m_audioPreEncodingBufferLength)); ResampleAudio(pcmData, pcmDataLength); // resampled data is now available in m_audioPreEncodingBuffer pcmBuffered = true; } else if (audioSrcSamplesPerFrame != m_audioDstSamplesPerFrame) { // reframe audio, if necessary // e.g. MP3 is 1152 samples/frame, AAC is 1024 samples/frame // add samples to end of m_audioPreEncodingBuffer // InitAudio() ensures that buffer is large enough if (m_audioPreEncodingBuffer == NULL) { m_audioPreEncodingBuffer = (u_int8_t*)realloc(m_audioPreEncodingBuffer, m_audioPreEncodingBufferMaxLength); } subtractDuration = DstSamplesToTicks(DstBytesToSamples(m_audioPreEncodingBufferLength)); memcpy( &m_audioPreEncodingBuffer[m_audioPreEncodingBufferLength], pcmData, pcmDataLength); m_audioPreEncodingBufferLength += pcmDataLength; pcmBuffered = true; } else { // default case - just use what we're passed pcmBuffered = false; } srcFrameTimestamp -= subtractDuration; /************************************************************************ * Loop while we have enough samples ************************************************************************/ Duration frametime = DstSamplesToTicks(m_audioDstSamplesPerFrame); if (m_audioDstFrameNumber == 0) debug_message("%s:frametime "U64, Profile()->GetName(), frametime); while (1) { /* * Record starting timestamps */ if (m_audioSrcFrameNumber == 0) { /* * we use m_audioStartTimestamp to determine audio output start time */ m_audioStartTimestamp = srcFrameTimestamp; #ifdef DEBUG_AUDIO_SYNC if (Profile()->GetBoolValue(CFG_AUDIO_DEBUG)) debug_message("%s:m_audioStartTimestamp = "U64, Profile()->GetName(), m_audioStartTimestamp); #endif } if (m_audioDstFrameNumber == 0) { // we wait until we see the first encoded frame. // this is because encoders usually buffer the first few // raw audio frames fed to them, and this number varies // from one encoder to another // We use this value to determine if we need to drop due to // a bad input frequency m_audioEncodingStartTimestamp = srcFrameTimestamp; } // we calculate audioSrcElapsedDuration by taking the current frame's // timestamp and subtracting the audioEncodingStartTimestamp (and NOT // the audioStartTimestamp). // this way, we just need to compare audioSrcElapsedDuration with // audioDstElapsedDuration (which should match in the ideal case), // and we don't have to compensate for the lag introduced by the initial // buffering of source frames in the encoder, which may vary from // one encoder to another m_audioSrcElapsedDuration = srcFrameTimestamp - m_audioEncodingStartTimestamp; m_audioSrcFrameNumber++; if (pcmBuffered) { u_int32_t samplesAvailable = DstBytesToSamples(m_audioPreEncodingBufferLength); if (pcmMalloced) { free((void *)pcmData); pcmMalloced = false; } #ifdef DEBUG_AUDIO_SYNC if (Profile()->GetBoolValue(CFG_AUDIO_DEBUG)) debug_message("%s: samples %u need %u", Profile()->GetName(), samplesAvailable, m_audioDstSamplesPerFrame); #endif // not enough samples collected yet to call encode or forward // we moved the data above. if (samplesAvailable < m_audioDstSamplesPerFrame) { return; } // setup for encode/forward pcmData = &m_audioPreEncodingBuffer[0]; pcmDataLength = DstSamplesToBytes(m_audioDstSamplesPerFrame); } #ifdef DEBUG_AUDIO_SYNC if (Profile()->GetBoolValue(CFG_AUDIO_DEBUG)) debug_message("%s:srcDuration="U64" dstDuration "U64" "D64, Profile()->GetName(), m_audioSrcElapsedDuration, m_audioDstElapsedDuration, m_audioDstElapsedDuration - m_audioSrcElapsedDuration); #endif /* * Check if we can encode, or if we have to add/drop frames * First check is to see if the source frequency is greater than the * theory frequency. */ if (m_audioSrcElapsedDuration + frametime >= m_audioDstElapsedDuration) { // source gets ahead of destination // We tolerate a difference of 3 frames since A/V sync is usually // noticeable after that. This way we give the encoder a chance to pick // up if (m_audioSrcElapsedDuration > (3 * frametime) + m_audioDstElapsedDuration) { int j = (int) (DstTicksToSamples(m_audioSrcElapsedDuration + (2 * frametime) - m_audioDstElapsedDuration) / m_audioDstSamplesPerFrame); debug_message("%s: Adding %d silence frames", Profile()->GetName(), j); for (int k=0; k<j; k++) AddSilenceFrame(); } #ifdef DEBUG_SYNC debug_message("%s:encoding", Profile()->GetName()); #endif /* * Actually encode and forward the frames */ bool rc = EncodeSamples( (int16_t*)pcmData, m_audioDstSamplesPerFrame, m_audioDstChannels); if (!rc) { debug_message("failed to encode audio"); } ForwardEncodedAudioFrames(); } else { // destination gets ahead of source // This has been observed as a result of clock frequency drift between // the sound card oscillator and the system mainbord oscillator // Example: If the sound card oscillator has a 'real' frequency that // is slightly larger than the 'rated' frequency, and we are sampling // at 32kHz, then the 32000 samples acquired from the sound card // 'actually' occupy a duration of slightly less than a second. // // The clock drift is usually fraction of a Hz and takes a long // time (~ 20-30 minutes) before we are off by one frame duration debug_message("%s:audio: dropping frame, SrcElapsedDuration="U64" DstElapsedDuration="U64" "U64, Profile()->GetName(), m_audioSrcElapsedDuration, m_audioDstElapsedDuration, frametime); // don't return - drop through to remove frame } if (pcmMalloced) { free((void *)pcmData); } if (pcmBuffered) { /* * This means we're storing data, either from resampling, or if the * sample numbers do not match. We will remove the encoded samples, * and increment the srcFrameTimestamp */ m_audioPreEncodingBufferLength -= pcmDataLength; memmove( &m_audioPreEncodingBuffer[0], &m_audioPreEncodingBuffer[pcmDataLength], m_audioPreEncodingBufferLength); subtractDuration = 0; srcFrameTimestamp += frametime; } else { // no data in buffer (default case). return; } } }
void CMediaSource::ProcessAudioFrame( u_int8_t* frameData, u_int32_t frameDataLength, Timestamp srcFrameTimestamp) { if (m_audioSrcFrameNumber == 0) { if (!m_sourceVideo || m_videoSrcFrameNumber == 0) { m_encodingStartTimestamp = GetTimestamp(); } m_audioStartTimestamp = srcFrameTimestamp; #ifdef DEBUG_AUDIO_SYNC debug_message("m_audioStartTimestamp = "U64, m_audioStartTimestamp); #endif } if (m_audioDstFrameNumber == 0) { // we wait until we see the first encoded frame. // this is because encoders usually buffer the first few // raw audio frames fed to them, and this number varies // from one encoder to another m_audioEncodingStartTimestamp = srcFrameTimestamp; } // we calculate audioSrcElapsedDuration by taking the current frame's // timestamp and subtracting the audioEncodingStartTimestamp (and NOT // the audioStartTimestamp). // this way, we just need to compare audioSrcElapsedDuration with // audioDstElapsedDuration (which should match in the ideal case), // and we don't have to compensate for the lag introduced by the initial // buffering of source frames in the encoder, which may vary from // one encoder to another m_audioSrcElapsedDuration = srcFrameTimestamp - m_audioEncodingStartTimestamp; m_audioSrcFrameNumber++; #if 0 // not needed if (resync) { // flush preEncodingBuffer m_audioPreEncodingBufferLength = 0; // change dst sample numbers to account for gap m_audioDstSampleNumber = m_audioDstRawSampleNumber = DstTicksToSamples(m_audioSrcElapsedDuration); error_message("Received resync"); } #endif bool pcmMalloced = false; bool pcmBuffered; u_int8_t* pcmData = frameData; u_int32_t pcmDataLength = frameDataLength; if (m_audioSrcChannels != m_audioDstChannels) { // Convert the channels if they don't match // we either double the channel info, or combine // the left and right uint32_t samples = SrcBytesToSamples(frameDataLength); uint32_t dstLength = DstSamplesToBytes(samples); pcmData = (u_int8_t *)Malloc(dstLength); pcmDataLength = dstLength; pcmMalloced = true; int16_t *src = (int16_t *)frameData; int16_t *dst = (int16_t *)pcmData; if (m_audioSrcChannels == 1) { // 1 channel to 2 for (uint32_t ix = 0; ix < samples; ix++) { *dst++ = *src; *dst++ = *src++; } } else { // 2 channels to 1 for (uint32_t ix = 0; ix < samples; ix++) { int32_t sum = *src++; sum += *src++; sum /= 2; if (sum < -32768) sum = -32768; else if (sum > 32767) sum = 32767; *dst++ = sum & 0xffff; } } } // resample audio, if necessary if (m_audioSrcSampleRate != m_audioDstSampleRate) { ResampleAudio(pcmData, pcmDataLength); // resampled data is now available in m_audioPreEncodingBuffer pcmBuffered = true; } else if (m_audioSrcSamplesPerFrame != m_audioDstSamplesPerFrame) { // reframe audio, if necessary // e.g. MP3 is 1152 samples/frame, AAC is 1024 samples/frame // add samples to end of m_audioBuffer // InitAudio() ensures that buffer is large enough memcpy( &m_audioPreEncodingBuffer[m_audioPreEncodingBufferLength], pcmData, pcmDataLength); m_audioPreEncodingBufferLength += pcmDataLength; pcmBuffered = true; } else { pcmBuffered = false; } // LATER restructure so as get rid of this label, and goto below pcmBufferCheck: if (pcmBuffered) { u_int32_t samplesAvailable = DstBytesToSamples(m_audioPreEncodingBufferLength); // not enough samples collected yet to call encode or forward if (samplesAvailable < m_audioDstSamplesPerFrame) { return; } if (pcmMalloced) { free(pcmData); pcmMalloced = false; } // setup for encode/forward pcmData = &m_audioPreEncodingBuffer[0]; pcmDataLength = DstSamplesToBytes(m_audioDstSamplesPerFrame); } // encode audio frame if (m_pConfig->m_audioEncode) { Duration frametime = DstSamplesToTicks(DstBytesToSamples(frameDataLength)); #ifdef DEBUG_AUDIO_SYNC debug_message("asrc# %d srcDuration="U64" dst# %d dstDuration "U64, m_audioSrcFrameNumber, m_audioSrcElapsedDuration, m_audioDstFrameNumber, m_audioDstElapsedDuration); #endif // destination gets ahead of source // This has been observed as a result of clock frequency drift between // the sound card oscillator and the system mainbord oscillator // Example: If the sound card oscillator has a 'real' frequency that // is slightly larger than the 'rated' frequency, and we are sampling // at 32kHz, then the 32000 samples acquired from the sound card // 'actually' occupy a duration of slightly less than a second. // // The clock drift is usually fraction of a Hz and takes a long // time (~ 20-30 minutes) before we are off by one frame duration if (m_audioSrcElapsedDuration + frametime < m_audioDstElapsedDuration) { debug_message("audio: dropping frame, SrcElapsedDuration="U64" DstElapsedDuration="U64, m_audioSrcElapsedDuration, m_audioDstElapsedDuration); return; } // source gets ahead of destination // We tolerate a difference of 3 frames since A/V sync is usually // noticeable after that. This way we give the encoder a chance to pick up if (m_audioSrcElapsedDuration > (3 * frametime) + m_audioDstElapsedDuration) { int j = (int) (DstTicksToSamples(m_audioSrcElapsedDuration + (2 * frametime) - m_audioDstElapsedDuration) / m_audioDstSamplesPerFrame); debug_message("audio: Adding %d silence frames", j); for (int k=0; k<j; k++) AddSilenceFrame(); } //Timestamp encodingStartTimestamp = GetTimestamp(); bool rc = m_audioEncoder->EncodeSamples( (int16_t*)pcmData, m_audioDstSamplesPerFrame, m_audioDstChannels); if (!rc) { debug_message("failed to encode audio"); return; } // Disabled since we are not taking into account audio drift anymore /* Duration encodingTime = (GetTimestamp() - encodingStartTimestamp); if (m_sourceRealTime && m_videoSource) { Duration drift; if (frametime <= encodingTime) { drift = encodingTime - frametime; m_videoSource->AddEncodingDrift(drift); } } */ ForwardEncodedAudioFrames(); } //Forward PCM Frames to Feeder Sink if ((m_pConfig->GetBoolValue(CONFIG_FEEDER_SINK_ENABLE) && frameDataLength > 0)) { // make a copy of the pcm data if needed u_int8_t* FwdedData; FwdedData = (u_int8_t*)Malloc(frameDataLength); memcpy(FwdedData, frameData, frameDataLength); CMediaFrame* pFrame = new CMediaFrame( RAWPCMAUDIOFRAME, FwdedData, frameDataLength, srcFrameTimestamp, 0, m_audioDstSampleRate); ForwardFrame(pFrame); } // if desired, forward raw audio to sinks if (m_pConfig->SourceRawAudio() && pcmDataLength > 0) { // make a copy of the pcm data if needed u_int8_t* pcmForwardedData; if (!pcmMalloced) { pcmForwardedData = (u_int8_t*)Malloc(pcmDataLength); memcpy(pcmForwardedData, pcmData, pcmDataLength); } else { pcmForwardedData = pcmData; pcmMalloced = false; } #ifndef WORDS_BIGENDIAN // swap byte ordering so we have big endian to write into // the file. uint16_t *pdata = (uint16_t *)pcmForwardedData; for (uint32_t ix = 0; ix < pcmDataLength; ix += sizeof(uint16_t),pdata++) { uint16_t swap = *pdata; *pdata = B2N_16(swap); } #endif CMediaFrame* pFrame = new CMediaFrame( PCMAUDIOFRAME, pcmForwardedData, pcmDataLength, m_audioStartTimestamp + DstSamplesToTicks(m_audioDstRawSampleNumber), DstBytesToSamples(pcmDataLength), m_audioDstSampleRate); ForwardFrame(pFrame); m_audioDstRawSampleNumber += SrcBytesToSamples(pcmDataLength); m_audioDstRawFrameNumber++; } if (pcmMalloced) { free(pcmData); } if (pcmBuffered) { m_audioPreEncodingBufferLength -= pcmDataLength; memcpy( &m_audioPreEncodingBuffer[0], &m_audioPreEncodingBuffer[pcmDataLength], m_audioPreEncodingBufferLength); goto pcmBufferCheck; } }
bool CMediaSource::SetAudioSrc( MediaType srcType, u_int8_t srcChannels, u_int32_t srcSampleRate) { // audio source info m_audioSrcType = srcType; m_audioSrcChannels = srcChannels; m_audioSrcSampleRate = srcSampleRate; m_audioSrcSamplesPerFrame = 0; // unknown, presumed variable // init audio encoder delete m_audioEncoder; m_audioEncoder = AudioEncoderCreate( m_pConfig->GetStringValue(CONFIG_AUDIO_ENCODER)); if (m_audioEncoder == NULL) { return false; } if (!m_audioEncoder->Init(m_pConfig, m_sourceRealTime)) { delete m_audioEncoder; m_audioEncoder = NULL; return false; } m_audioDstType = m_audioEncoder->GetFrameType(); m_audioDstSamplesPerFrame = m_audioEncoder->GetSamplesPerFrame(); // if we need to resample if (m_audioDstSampleRate != m_audioSrcSampleRate) { // create a resampler for each audio destination channel - // we will combine the channels before resampling m_audioResample = (resample_t *)malloc(sizeof(resample_t) * m_audioDstChannels); for (int ix = 0; ix <= m_audioDstChannels; ix++) { m_audioResample[ix] = st_resample_start(m_audioSrcSampleRate, m_audioDstSampleRate); } } // this calculation doesn't take into consideration the resampling // size of the src. 4 times might not be enough - we need most likely // 2 times the max of the src samples and the dest samples m_audioPreEncodingBufferLength = 0; m_audioPreEncodingBufferMaxLength = 4 * DstSamplesToBytes(m_audioDstSamplesPerFrame); m_audioPreEncodingBuffer = (u_int8_t*)realloc( m_audioPreEncodingBuffer, m_audioPreEncodingBufferMaxLength); if (m_audioPreEncodingBuffer == NULL) { delete m_audioEncoder; m_audioEncoder = NULL; return false; } return true; }