void COSSAudioSource::ProcessAudio(void) { #ifdef SNDCTL_DSP_GETERROR audio_errinfo errinfo; if (m_audioSrcFrameNumber == 0) { ioctl(m_audioDevice, SNDCTL_DSP_GETERROR, &errinfo); } else { ioctl(m_audioDevice, SNDCTL_DSP_GETERROR, &errinfo); if (errinfo.rec_overruns > 0) { debug_message("overrun error found in audio - adding "U64" samples", SrcBytesToSamples(errinfo.rec_ptradjust)); close(m_audioDevice); InitDevice(); m_audioSrcSampleNumber = 0; } } #endif if (m_audioSrcFrameNumber == 0) { // Pull the trigger and start the audio input int enablebits; ioctl(m_audioDevice, SNDCTL_DSP_GETTRIGGER, &enablebits); enablebits |= PCM_ENABLE_INPUT; ioctl(m_audioDevice, SNDCTL_DSP_SETTRIGGER, &enablebits); } // for efficiency, process 1 second before returning to check for commands for (int pass = 0; pass < m_maxPasses; pass++) { audio_buf_info info; int rc = ioctl(m_audioDevice, SNDCTL_DSP_GETISPACE, &info); Timestamp currentTime = GetTimestamp(); if (rc<0) { error_message("Failed to query OSS GETISPACE"); info.bytes = 0; } uint32_t bytesRead = read(m_audioDevice, m_pcmFrameBuffer, m_pcmFrameSize); if (bytesRead < m_pcmFrameSize) { debug_message("bad audio read"); continue; } Timestamp timestamp; if (info.bytes == m_audioOssMaxBufferSize) { // means the audio buffer is full, and not capturing // we want to make the timestamp based on the previous one // When we hit this case, we start using the m_timestampOverflowArray // This will give us a timestamp for when the array is full. // // In other words, if we have a full audio buffer (ie: it's not loading // any more), we start storing the current timestamp into the array. // This will let us "catch up", and have a somewhat accurate timestamp // when we loop around // // wmay - I'm not convinced that this actually works - if the buffer // cleans up, we'll ignore m_timestampOverflowArray if (m_timestampOverflowArray != NULL && m_timestampOverflowArray[m_timestampOverflowArrayIndex] != 0) { timestamp = m_timestampOverflowArray[m_timestampOverflowArrayIndex]; } else { timestamp = m_prevTimestamp + SrcSamplesToTicks(m_audioSrcSamplesPerFrame); } if (m_timestampOverflowArray != NULL) m_timestampOverflowArray[m_timestampOverflowArrayIndex] = currentTime; debug_message("audio buffer full !"); } else { // buffer is not full - so, we make the timestamp based on the number // of bytes in the buffer that we read. timestamp = currentTime - SrcSamplesToTicks(SrcBytesToSamples(info.bytes)); if (m_timestampOverflowArray != NULL) m_timestampOverflowArray[m_timestampOverflowArrayIndex] = 0; } #ifdef DEBUG_TIMESTAMPS debug_message("info.bytes=%d t="U64" timestamp="U64" delta="U64, info.bytes, currentTime, timestamp, timestamp - m_prevTimestamp); #endif m_prevTimestamp = timestamp; if (m_timestampOverflowArray != NULL) { m_timestampOverflowArrayIndex = (m_timestampOverflowArrayIndex + 1) % m_audioOssMaxBufferFrames; } ProcessAudioFrame(m_pcmFrameBuffer, m_pcmFrameSize, timestamp); } }
void CALSAAudioSource::ProcessAudio(void) { int err; if (m_audioSrcFrameNumber == 0) { // Start the device if ((err = snd_pcm_start(m_pcmHandle)) < 0) { error_message("Couldn't start the PCM device: %s", snd_strerror(err)); } } snd_pcm_status_t *status; snd_pcm_status_alloca(&status); // for efficiency, process 1 second before returning to check for commands for (int pass = 0; pass < m_maxPasses && m_stop_thread == false; pass++) { u_int8_t* pcmFrameBuffer; pcmFrameBuffer = (u_int8_t*)malloc(m_pcmFrameSize); // The alsa frames is not the same as the pcm frames used to feed the encoder // Calculate how many alsa frames is neccesary to read to fill one pcm frame snd_pcm_uframes_t num_frames = m_pcmFrameSize / (m_audioSrcChannels * sizeof(u_int16_t)); // Check how many bytes there is to read in the buffer, it will be used to calculate timestamp snd_pcm_status(m_pcmHandle, status); unsigned long avail_bytes = snd_pcm_status_get_avail(status) * (m_audioSrcChannels * sizeof(u_int16_t)); Timestamp currentTime = GetTimestamp(); Timestamp timestamp; // Read num_frames frames from the PCM device // pointed to by pcm_handle to buffer capdata. // Returns the number of frames actually read. // TODO On certain alsa configurations, e.g. when using dsnoop with low sample rate, the period gets too small. What to do about that? snd_pcm_sframes_t framesRead; if((framesRead = snd_pcm_readi(m_pcmHandle, pcmFrameBuffer, num_frames)) == -EPIPE) { snd_pcm_prepare(m_pcmHandle); // Buffer Overrun. This means the audio buffer is full, and not capturing // we want to make the timestamp based on the previous one // When we hit this case, we start using the m_timestampOverflowArray // This will give us a timestamp for when the array is full. // // In other words, if we have a full audio buffer (ie: it's not loading // any more), we start storing the current timestamp into the array. // This will let us "catch up", and have a somewhat accurate timestamp // when we loop around // // wmay - I'm not convinced that this actually works - if the buffer // cleans up, we'll ignore m_timestampOverflowArray if (m_timestampOverflowArray != NULL && m_timestampOverflowArray[m_timestampOverflowArrayIndex] != 0) { timestamp = m_timestampOverflowArray[m_timestampOverflowArrayIndex]; } else { timestamp = m_prevTimestamp + SrcSamplesToTicks(avail_bytes); } if (m_timestampOverflowArray != NULL) m_timestampOverflowArray[m_timestampOverflowArrayIndex] = currentTime; debug_message("audio buffer full !"); } else { if (framesRead < (snd_pcm_sframes_t) num_frames) { error_message("Bad audio read. Expected %li frames, got %li", num_frames, framesRead); free(pcmFrameBuffer); continue; } // buffer is not full - so, we make the timestamp based on the number // of bytes in the buffer that we read. timestamp = currentTime - SrcSamplesToTicks(SrcBytesToSamples(avail_bytes)); if (m_timestampOverflowArray != NULL) m_timestampOverflowArray[m_timestampOverflowArrayIndex] = 0; } //debug_message("alsa read"); #ifdef DEBUG_TIMESTAMPS debug_message("avail_bytes=%lu t="U64" timestamp="U64" delta="U64, avail_bytes, currentTime, timestamp, timestamp - m_prevTimestamp); #endif m_prevTimestamp = timestamp; if (m_timestampOverflowArray != NULL) { m_timestampOverflowArrayIndex = (m_timestampOverflowArrayIndex + 1) % m_audioMaxBufferFrames; } #ifdef DEBUG_TIMESTAMPS debug_message("pcm forward "U64" %u", timestamp, m_pcmFrameSize); #endif if (m_audioSrcFrameNumber == 0 && m_videoSource != NULL) { m_videoSource->RequestKeyFrame(timestamp); } m_audioSrcFrameNumber++; CMediaFrame *frame = new CMediaFrame(PCMAUDIOFRAME, pcmFrameBuffer, m_pcmFrameSize, timestamp); ForwardFrame(frame); } }
void CAudioEncoder::ProcessAudioFrame(CMediaFrame *pFrame) { const u_int8_t* frameData = (const uint8_t *)pFrame->GetData(); u_int32_t frameDataLength = pFrame->GetDataLength(); Timestamp srcFrameTimestamp = pFrame->GetTimestamp();; bool pcmMalloced = false; bool pcmBuffered; const u_int8_t* pcmData = frameData; u_int32_t pcmDataLength = frameDataLength; uint32_t audioSrcSamplesPerFrame = SrcBytesToSamples(frameDataLength); Duration subtractDuration = 0; /************************************************************************* * First convert input samples to format we need them to be in *************************************************************************/ if (m_audioSrcChannels != m_audioDstChannels) { // Convert the channels if they don't match // we either double the channel info, or combine // the left and right uint32_t samples = SrcBytesToSamples(frameDataLength); uint32_t dstLength = DstSamplesToBytes(samples); pcmData = (u_int8_t *)Malloc(dstLength); pcmDataLength = dstLength; pcmMalloced = true; int16_t *src = (int16_t *)frameData; int16_t *dst = (int16_t *)pcmData; if (m_audioSrcChannels == 1) { // 1 channel to 2 for (uint32_t ix = 0; ix < samples; ix++) { *dst++ = *src; *dst++ = *src++; } } else { // 2 channels to 1 for (uint32_t ix = 0; ix < samples; ix++) { int32_t sum = *src++; sum += *src++; sum /= 2; if (sum < -32768) sum = -32768; else if (sum > 32767) sum = 32767; *dst++ = sum & 0xffff; } } } // resample audio, if necessary if (m_audioSrcSampleRate != m_audioDstSampleRate) { subtractDuration = DstSamplesToTicks(DstBytesToSamples(m_audioPreEncodingBufferLength)); ResampleAudio(pcmData, pcmDataLength); // resampled data is now available in m_audioPreEncodingBuffer pcmBuffered = true; } else if (audioSrcSamplesPerFrame != m_audioDstSamplesPerFrame) { // reframe audio, if necessary // e.g. MP3 is 1152 samples/frame, AAC is 1024 samples/frame // add samples to end of m_audioPreEncodingBuffer // InitAudio() ensures that buffer is large enough if (m_audioPreEncodingBuffer == NULL) { m_audioPreEncodingBuffer = (u_int8_t*)realloc(m_audioPreEncodingBuffer, m_audioPreEncodingBufferMaxLength); } subtractDuration = DstSamplesToTicks(DstBytesToSamples(m_audioPreEncodingBufferLength)); memcpy( &m_audioPreEncodingBuffer[m_audioPreEncodingBufferLength], pcmData, pcmDataLength); m_audioPreEncodingBufferLength += pcmDataLength; pcmBuffered = true; } else { // default case - just use what we're passed pcmBuffered = false; } srcFrameTimestamp -= subtractDuration; /************************************************************************ * Loop while we have enough samples ************************************************************************/ Duration frametime = DstSamplesToTicks(m_audioDstSamplesPerFrame); if (m_audioDstFrameNumber == 0) debug_message("%s:frametime "U64, Profile()->GetName(), frametime); while (1) { /* * Record starting timestamps */ if (m_audioSrcFrameNumber == 0) { /* * we use m_audioStartTimestamp to determine audio output start time */ m_audioStartTimestamp = srcFrameTimestamp; #ifdef DEBUG_AUDIO_SYNC if (Profile()->GetBoolValue(CFG_AUDIO_DEBUG)) debug_message("%s:m_audioStartTimestamp = "U64, Profile()->GetName(), m_audioStartTimestamp); #endif } if (m_audioDstFrameNumber == 0) { // we wait until we see the first encoded frame. // this is because encoders usually buffer the first few // raw audio frames fed to them, and this number varies // from one encoder to another // We use this value to determine if we need to drop due to // a bad input frequency m_audioEncodingStartTimestamp = srcFrameTimestamp; } // we calculate audioSrcElapsedDuration by taking the current frame's // timestamp and subtracting the audioEncodingStartTimestamp (and NOT // the audioStartTimestamp). // this way, we just need to compare audioSrcElapsedDuration with // audioDstElapsedDuration (which should match in the ideal case), // and we don't have to compensate for the lag introduced by the initial // buffering of source frames in the encoder, which may vary from // one encoder to another m_audioSrcElapsedDuration = srcFrameTimestamp - m_audioEncodingStartTimestamp; m_audioSrcFrameNumber++; if (pcmBuffered) { u_int32_t samplesAvailable = DstBytesToSamples(m_audioPreEncodingBufferLength); if (pcmMalloced) { free((void *)pcmData); pcmMalloced = false; } #ifdef DEBUG_AUDIO_SYNC if (Profile()->GetBoolValue(CFG_AUDIO_DEBUG)) debug_message("%s: samples %u need %u", Profile()->GetName(), samplesAvailable, m_audioDstSamplesPerFrame); #endif // not enough samples collected yet to call encode or forward // we moved the data above. if (samplesAvailable < m_audioDstSamplesPerFrame) { return; } // setup for encode/forward pcmData = &m_audioPreEncodingBuffer[0]; pcmDataLength = DstSamplesToBytes(m_audioDstSamplesPerFrame); } #ifdef DEBUG_AUDIO_SYNC if (Profile()->GetBoolValue(CFG_AUDIO_DEBUG)) debug_message("%s:srcDuration="U64" dstDuration "U64" "D64, Profile()->GetName(), m_audioSrcElapsedDuration, m_audioDstElapsedDuration, m_audioDstElapsedDuration - m_audioSrcElapsedDuration); #endif /* * Check if we can encode, or if we have to add/drop frames * First check is to see if the source frequency is greater than the * theory frequency. */ if (m_audioSrcElapsedDuration + frametime >= m_audioDstElapsedDuration) { // source gets ahead of destination // We tolerate a difference of 3 frames since A/V sync is usually // noticeable after that. This way we give the encoder a chance to pick // up if (m_audioSrcElapsedDuration > (3 * frametime) + m_audioDstElapsedDuration) { int j = (int) (DstTicksToSamples(m_audioSrcElapsedDuration + (2 * frametime) - m_audioDstElapsedDuration) / m_audioDstSamplesPerFrame); debug_message("%s: Adding %d silence frames", Profile()->GetName(), j); for (int k=0; k<j; k++) AddSilenceFrame(); } #ifdef DEBUG_SYNC debug_message("%s:encoding", Profile()->GetName()); #endif /* * Actually encode and forward the frames */ bool rc = EncodeSamples( (int16_t*)pcmData, m_audioDstSamplesPerFrame, m_audioDstChannels); if (!rc) { debug_message("failed to encode audio"); } ForwardEncodedAudioFrames(); } else { // destination gets ahead of source // This has been observed as a result of clock frequency drift between // the sound card oscillator and the system mainbord oscillator // Example: If the sound card oscillator has a 'real' frequency that // is slightly larger than the 'rated' frequency, and we are sampling // at 32kHz, then the 32000 samples acquired from the sound card // 'actually' occupy a duration of slightly less than a second. // // The clock drift is usually fraction of a Hz and takes a long // time (~ 20-30 minutes) before we are off by one frame duration debug_message("%s:audio: dropping frame, SrcElapsedDuration="U64" DstElapsedDuration="U64" "U64, Profile()->GetName(), m_audioSrcElapsedDuration, m_audioDstElapsedDuration, frametime); // don't return - drop through to remove frame } if (pcmMalloced) { free((void *)pcmData); } if (pcmBuffered) { /* * This means we're storing data, either from resampling, or if the * sample numbers do not match. We will remove the encoded samples, * and increment the srcFrameTimestamp */ m_audioPreEncodingBufferLength -= pcmDataLength; memmove( &m_audioPreEncodingBuffer[0], &m_audioPreEncodingBuffer[pcmDataLength], m_audioPreEncodingBufferLength); subtractDuration = 0; srcFrameTimestamp += frametime; } else { // no data in buffer (default case). return; } } }
void CMediaSource::ProcessAudioFrame( u_int8_t* frameData, u_int32_t frameDataLength, Timestamp srcFrameTimestamp) { if (m_audioSrcFrameNumber == 0) { if (!m_sourceVideo || m_videoSrcFrameNumber == 0) { m_encodingStartTimestamp = GetTimestamp(); } m_audioStartTimestamp = srcFrameTimestamp; #ifdef DEBUG_AUDIO_SYNC debug_message("m_audioStartTimestamp = "U64, m_audioStartTimestamp); #endif } if (m_audioDstFrameNumber == 0) { // we wait until we see the first encoded frame. // this is because encoders usually buffer the first few // raw audio frames fed to them, and this number varies // from one encoder to another m_audioEncodingStartTimestamp = srcFrameTimestamp; } // we calculate audioSrcElapsedDuration by taking the current frame's // timestamp and subtracting the audioEncodingStartTimestamp (and NOT // the audioStartTimestamp). // this way, we just need to compare audioSrcElapsedDuration with // audioDstElapsedDuration (which should match in the ideal case), // and we don't have to compensate for the lag introduced by the initial // buffering of source frames in the encoder, which may vary from // one encoder to another m_audioSrcElapsedDuration = srcFrameTimestamp - m_audioEncodingStartTimestamp; m_audioSrcFrameNumber++; #if 0 // not needed if (resync) { // flush preEncodingBuffer m_audioPreEncodingBufferLength = 0; // change dst sample numbers to account for gap m_audioDstSampleNumber = m_audioDstRawSampleNumber = DstTicksToSamples(m_audioSrcElapsedDuration); error_message("Received resync"); } #endif bool pcmMalloced = false; bool pcmBuffered; u_int8_t* pcmData = frameData; u_int32_t pcmDataLength = frameDataLength; if (m_audioSrcChannels != m_audioDstChannels) { // Convert the channels if they don't match // we either double the channel info, or combine // the left and right uint32_t samples = SrcBytesToSamples(frameDataLength); uint32_t dstLength = DstSamplesToBytes(samples); pcmData = (u_int8_t *)Malloc(dstLength); pcmDataLength = dstLength; pcmMalloced = true; int16_t *src = (int16_t *)frameData; int16_t *dst = (int16_t *)pcmData; if (m_audioSrcChannels == 1) { // 1 channel to 2 for (uint32_t ix = 0; ix < samples; ix++) { *dst++ = *src; *dst++ = *src++; } } else { // 2 channels to 1 for (uint32_t ix = 0; ix < samples; ix++) { int32_t sum = *src++; sum += *src++; sum /= 2; if (sum < -32768) sum = -32768; else if (sum > 32767) sum = 32767; *dst++ = sum & 0xffff; } } } // resample audio, if necessary if (m_audioSrcSampleRate != m_audioDstSampleRate) { ResampleAudio(pcmData, pcmDataLength); // resampled data is now available in m_audioPreEncodingBuffer pcmBuffered = true; } else if (m_audioSrcSamplesPerFrame != m_audioDstSamplesPerFrame) { // reframe audio, if necessary // e.g. MP3 is 1152 samples/frame, AAC is 1024 samples/frame // add samples to end of m_audioBuffer // InitAudio() ensures that buffer is large enough memcpy( &m_audioPreEncodingBuffer[m_audioPreEncodingBufferLength], pcmData, pcmDataLength); m_audioPreEncodingBufferLength += pcmDataLength; pcmBuffered = true; } else { pcmBuffered = false; } // LATER restructure so as get rid of this label, and goto below pcmBufferCheck: if (pcmBuffered) { u_int32_t samplesAvailable = DstBytesToSamples(m_audioPreEncodingBufferLength); // not enough samples collected yet to call encode or forward if (samplesAvailable < m_audioDstSamplesPerFrame) { return; } if (pcmMalloced) { free(pcmData); pcmMalloced = false; } // setup for encode/forward pcmData = &m_audioPreEncodingBuffer[0]; pcmDataLength = DstSamplesToBytes(m_audioDstSamplesPerFrame); } // encode audio frame if (m_pConfig->m_audioEncode) { Duration frametime = DstSamplesToTicks(DstBytesToSamples(frameDataLength)); #ifdef DEBUG_AUDIO_SYNC debug_message("asrc# %d srcDuration="U64" dst# %d dstDuration "U64, m_audioSrcFrameNumber, m_audioSrcElapsedDuration, m_audioDstFrameNumber, m_audioDstElapsedDuration); #endif // destination gets ahead of source // This has been observed as a result of clock frequency drift between // the sound card oscillator and the system mainbord oscillator // Example: If the sound card oscillator has a 'real' frequency that // is slightly larger than the 'rated' frequency, and we are sampling // at 32kHz, then the 32000 samples acquired from the sound card // 'actually' occupy a duration of slightly less than a second. // // The clock drift is usually fraction of a Hz and takes a long // time (~ 20-30 minutes) before we are off by one frame duration if (m_audioSrcElapsedDuration + frametime < m_audioDstElapsedDuration) { debug_message("audio: dropping frame, SrcElapsedDuration="U64" DstElapsedDuration="U64, m_audioSrcElapsedDuration, m_audioDstElapsedDuration); return; } // source gets ahead of destination // We tolerate a difference of 3 frames since A/V sync is usually // noticeable after that. This way we give the encoder a chance to pick up if (m_audioSrcElapsedDuration > (3 * frametime) + m_audioDstElapsedDuration) { int j = (int) (DstTicksToSamples(m_audioSrcElapsedDuration + (2 * frametime) - m_audioDstElapsedDuration) / m_audioDstSamplesPerFrame); debug_message("audio: Adding %d silence frames", j); for (int k=0; k<j; k++) AddSilenceFrame(); } //Timestamp encodingStartTimestamp = GetTimestamp(); bool rc = m_audioEncoder->EncodeSamples( (int16_t*)pcmData, m_audioDstSamplesPerFrame, m_audioDstChannels); if (!rc) { debug_message("failed to encode audio"); return; } // Disabled since we are not taking into account audio drift anymore /* Duration encodingTime = (GetTimestamp() - encodingStartTimestamp); if (m_sourceRealTime && m_videoSource) { Duration drift; if (frametime <= encodingTime) { drift = encodingTime - frametime; m_videoSource->AddEncodingDrift(drift); } } */ ForwardEncodedAudioFrames(); } //Forward PCM Frames to Feeder Sink if ((m_pConfig->GetBoolValue(CONFIG_FEEDER_SINK_ENABLE) && frameDataLength > 0)) { // make a copy of the pcm data if needed u_int8_t* FwdedData; FwdedData = (u_int8_t*)Malloc(frameDataLength); memcpy(FwdedData, frameData, frameDataLength); CMediaFrame* pFrame = new CMediaFrame( RAWPCMAUDIOFRAME, FwdedData, frameDataLength, srcFrameTimestamp, 0, m_audioDstSampleRate); ForwardFrame(pFrame); } // if desired, forward raw audio to sinks if (m_pConfig->SourceRawAudio() && pcmDataLength > 0) { // make a copy of the pcm data if needed u_int8_t* pcmForwardedData; if (!pcmMalloced) { pcmForwardedData = (u_int8_t*)Malloc(pcmDataLength); memcpy(pcmForwardedData, pcmData, pcmDataLength); } else { pcmForwardedData = pcmData; pcmMalloced = false; } #ifndef WORDS_BIGENDIAN // swap byte ordering so we have big endian to write into // the file. uint16_t *pdata = (uint16_t *)pcmForwardedData; for (uint32_t ix = 0; ix < pcmDataLength; ix += sizeof(uint16_t),pdata++) { uint16_t swap = *pdata; *pdata = B2N_16(swap); } #endif CMediaFrame* pFrame = new CMediaFrame( PCMAUDIOFRAME, pcmForwardedData, pcmDataLength, m_audioStartTimestamp + DstSamplesToTicks(m_audioDstRawSampleNumber), DstBytesToSamples(pcmDataLength), m_audioDstSampleRate); ForwardFrame(pFrame); m_audioDstRawSampleNumber += SrcBytesToSamples(pcmDataLength); m_audioDstRawFrameNumber++; } if (pcmMalloced) { free(pcmData); } if (pcmBuffered) { m_audioPreEncodingBufferLength -= pcmDataLength; memcpy( &m_audioPreEncodingBuffer[0], &m_audioPreEncodingBuffer[pcmDataLength], m_audioPreEncodingBufferLength); goto pcmBufferCheck; } }