void CAudioEncoder::DoStopAudio()
{
  // flush remaining output from audio encoder
  // and forward it to sinks
  
  EncodeSamples(NULL, 0, m_audioSrcChannels);

  ForwardEncodedAudioFrames();

  StopEncoder();

  CHECK_AND_FREE(m_audioPreEncodingBuffer);
  debug_message("Audio profile %s stats", GetProfileName());
  debug_message(" encoded samples: "U64, m_audioDstSampleNumber);
  debug_message(" encoded frames: %u", m_audioDstFrameNumber);
}
void CAudioEncoder::AddSilenceFrame(void)
{
  int bytes = DstSamplesToBytes(m_audioDstSamplesPerFrame);
  uint8_t *pSilenceData = (uint8_t *)Malloc(bytes);
  memset(pSilenceData, 0, bytes);

  bool rc = EncodeSamples(
			  (int16_t*)pSilenceData,
			  m_audioDstSamplesPerFrame,
			  m_audioDstChannels);
  if (!rc) {
    debug_message("failed to encode audio");
    return;
  }

  ForwardEncodedAudioFrames();
  free(pSilenceData);
}
Example #3
0
void CMediaSource::DoStopAudio()
{
  if (m_audioEncoder) {
    // flush remaining output from audio encoder
    // and forward it to sinks

    m_audioEncoder->EncodeSamples(NULL, 0, m_audioSrcChannels);

    ForwardEncodedAudioFrames();

    m_audioEncoder->Stop();
    delete m_audioEncoder;
    m_audioEncoder = NULL;
  }


  free(m_audioPreEncodingBuffer);
  m_audioPreEncodingBuffer = NULL;

  m_sourceAudio = false;
}
void CAudioEncoder::ProcessAudioFrame(CMediaFrame *pFrame)
{
  const u_int8_t* frameData = (const uint8_t *)pFrame->GetData();
  u_int32_t frameDataLength = pFrame->GetDataLength();
  Timestamp srcFrameTimestamp = pFrame->GetTimestamp();;

  bool pcmMalloced = false;
  bool pcmBuffered;
  const u_int8_t* pcmData = frameData;
  u_int32_t pcmDataLength = frameDataLength;
  uint32_t audioSrcSamplesPerFrame = SrcBytesToSamples(frameDataLength);
  Duration subtractDuration = 0;

  /*************************************************************************
   * First convert input samples to format we need them to be in
   *************************************************************************/
  if (m_audioSrcChannels != m_audioDstChannels) {
    // Convert the channels if they don't match
    // we either double the channel info, or combine
    // the left and right
    uint32_t samples = SrcBytesToSamples(frameDataLength);
    uint32_t dstLength = DstSamplesToBytes(samples);
    pcmData = (u_int8_t *)Malloc(dstLength);
    pcmDataLength = dstLength;
    pcmMalloced = true;

    int16_t *src = (int16_t *)frameData;
    int16_t *dst = (int16_t *)pcmData;
    if (m_audioSrcChannels == 1) {
      // 1 channel to 2
      for (uint32_t ix = 0; ix < samples; ix++) {
	*dst++ = *src;
	*dst++ = *src++;
      }
    } else {
      // 2 channels to 1
      for (uint32_t ix = 0; ix < samples; ix++) {
	int32_t sum = *src++;
	sum += *src++;
	sum /= 2;
	if (sum < -32768) sum = -32768;
	else if (sum > 32767) sum = 32767;
	*dst++ = sum & 0xffff;
      }
    }
  }

  // resample audio, if necessary
  if (m_audioSrcSampleRate != m_audioDstSampleRate) {
    subtractDuration = 
      DstSamplesToTicks(DstBytesToSamples(m_audioPreEncodingBufferLength));
 
     ResampleAudio(pcmData, pcmDataLength);

    // resampled data is now available in m_audioPreEncodingBuffer
    pcmBuffered = true;

  } else if (audioSrcSamplesPerFrame != m_audioDstSamplesPerFrame) {
    // reframe audio, if necessary
    // e.g. MP3 is 1152 samples/frame, AAC is 1024 samples/frame

    // add samples to end of m_audioPreEncodingBuffer
    // InitAudio() ensures that buffer is large enough
    if (m_audioPreEncodingBuffer == NULL) {
      m_audioPreEncodingBuffer = 
	(u_int8_t*)realloc(m_audioPreEncodingBuffer,
			   m_audioPreEncodingBufferMaxLength);
    }
    subtractDuration = 
      DstSamplesToTicks(DstBytesToSamples(m_audioPreEncodingBufferLength));
    memcpy(
	   &m_audioPreEncodingBuffer[m_audioPreEncodingBufferLength],
	   pcmData,
	   pcmDataLength);

    m_audioPreEncodingBufferLength += pcmDataLength;

    pcmBuffered = true;

  } else {
    // default case - just use what we're passed
    pcmBuffered = false;
  }
 
  srcFrameTimestamp -= subtractDuration;

  /************************************************************************
   * Loop while we have enough samples
   ************************************************************************/
  Duration frametime = DstSamplesToTicks(m_audioDstSamplesPerFrame);
  if (m_audioDstFrameNumber == 0)
    debug_message("%s:frametime "U64, Profile()->GetName(), frametime);
  while (1) {

    /*
     * Record starting timestamps
     */
    if (m_audioSrcFrameNumber == 0) {
      /*
       * we use m_audioStartTimestamp to determine audio output start time
       */
      m_audioStartTimestamp = srcFrameTimestamp;
#ifdef DEBUG_AUDIO_SYNC
      if (Profile()->GetBoolValue(CFG_AUDIO_DEBUG))
	debug_message("%s:m_audioStartTimestamp = "U64, 
		      Profile()->GetName(), m_audioStartTimestamp);
#endif
    }
    
    if (m_audioDstFrameNumber == 0) {
      // we wait until we see the first encoded frame.
      // this is because encoders usually buffer the first few
      // raw audio frames fed to them, and this number varies
      // from one encoder to another
      // We use this value to determine if we need to drop due to
      // a bad input frequency
      m_audioEncodingStartTimestamp = srcFrameTimestamp;
    }
    
    // we calculate audioSrcElapsedDuration by taking the current frame's
    // timestamp and subtracting the audioEncodingStartTimestamp (and NOT
    // the audioStartTimestamp).
    // this way, we just need to compare audioSrcElapsedDuration with 
    // audioDstElapsedDuration (which should match in the ideal case),
    // and we don't have to compensate for the lag introduced by the initial
    // buffering of source frames in the encoder, which may vary from
    // one encoder to another
    m_audioSrcElapsedDuration = 
      srcFrameTimestamp - m_audioEncodingStartTimestamp;
    m_audioSrcFrameNumber++;


    if (pcmBuffered) {
      u_int32_t samplesAvailable =
	DstBytesToSamples(m_audioPreEncodingBufferLength);
      
      if (pcmMalloced) {
	free((void *)pcmData);
	pcmMalloced = false;
      }
#ifdef DEBUG_AUDIO_SYNC
      if (Profile()->GetBoolValue(CFG_AUDIO_DEBUG))
      debug_message("%s: samples %u need %u", 
		    Profile()->GetName(), 
		    samplesAvailable, m_audioDstSamplesPerFrame);
#endif
      // not enough samples collected yet to call encode or forward
      // we moved the data above.
      if (samplesAvailable < m_audioDstSamplesPerFrame) {
	return;
      }
      // setup for encode/forward
      pcmData = &m_audioPreEncodingBuffer[0];
      pcmDataLength = DstSamplesToBytes(m_audioDstSamplesPerFrame);
    }


#ifdef DEBUG_AUDIO_SYNC
      if (Profile()->GetBoolValue(CFG_AUDIO_DEBUG))
	debug_message("%s:srcDuration="U64" dstDuration "U64" "D64,
		  Profile()->GetName(),
		  m_audioSrcElapsedDuration,
		  m_audioDstElapsedDuration,
		      m_audioDstElapsedDuration - m_audioSrcElapsedDuration);
#endif

    /*
     * Check if we can encode, or if we have to add/drop frames
     * First check is to see if the source frequency is greater than the
     * theory frequency.
     */
    if (m_audioSrcElapsedDuration + frametime >= m_audioDstElapsedDuration) {
      
      // source gets ahead of destination
      // We tolerate a difference of 3 frames since A/V sync is usually
      // noticeable after that. This way we give the encoder a chance to pick 
      // up
      if (m_audioSrcElapsedDuration > 
	  (3 * frametime) + m_audioDstElapsedDuration) {
	int j = (int) (DstTicksToSamples(m_audioSrcElapsedDuration
					 + (2 * frametime)
					 - m_audioDstElapsedDuration)
		       / m_audioDstSamplesPerFrame);
	debug_message("%s: Adding %d silence frames", 
		      Profile()->GetName(), j);
	for (int k=0; k<j; k++)
	  AddSilenceFrame();
      }
      
#ifdef DEBUG_SYNC
      debug_message("%s:encoding", Profile()->GetName());
#endif
      /*
       * Actually encode and forward the frames
       */
      bool rc = EncodeSamples(
			      (int16_t*)pcmData,
			      m_audioDstSamplesPerFrame,
			      m_audioDstChannels);
      
      if (!rc) {
	debug_message("failed to encode audio");
      }
      
      ForwardEncodedAudioFrames();
    } else {
      // destination gets ahead of source
      // This has been observed as a result of clock frequency drift between
      // the sound card oscillator and the system mainbord oscillator
      // Example: If the sound card oscillator has a 'real' frequency that
      // is slightly larger than the 'rated' frequency, and we are sampling
      // at 32kHz, then the 32000 samples acquired from the sound card
      // 'actually' occupy a duration of slightly less than a second.
      // 
      // The clock drift is usually fraction of a Hz and takes a long
      // time (~ 20-30 minutes) before we are off by one frame duration
      
      debug_message("%s:audio: dropping frame, SrcElapsedDuration="U64" DstElapsedDuration="U64" "U64,
		    Profile()->GetName(), 
		    m_audioSrcElapsedDuration, m_audioDstElapsedDuration,
		    frametime);
      // don't return - drop through to remove frame
    }
    
    if (pcmMalloced) {
      free((void *)pcmData);
    }
    if (pcmBuffered) {
      /*
       * This means we're storing data, either from resampling, or if the
       * sample numbers do not match.  We will remove the encoded samples, 
       * and increment the srcFrameTimestamp
       */
      m_audioPreEncodingBufferLength -= pcmDataLength;
      memmove(
	     &m_audioPreEncodingBuffer[0],
	     &m_audioPreEncodingBuffer[pcmDataLength],
	     m_audioPreEncodingBufferLength);
      subtractDuration = 0;
      srcFrameTimestamp += frametime;
    } else {
      // no data in buffer (default case).
      return;
    }
  }
}
Example #5
0
void CMediaSource::ProcessAudioFrame(
				     u_int8_t* frameData,
				     u_int32_t frameDataLength,
				     Timestamp srcFrameTimestamp)
{
  if (m_audioSrcFrameNumber == 0) {
    if (!m_sourceVideo || m_videoSrcFrameNumber == 0) {
      m_encodingStartTimestamp = GetTimestamp();
    }
    m_audioStartTimestamp = srcFrameTimestamp;
#ifdef DEBUG_AUDIO_SYNC
    debug_message("m_audioStartTimestamp = "U64, m_audioStartTimestamp);
#endif
  }

  if (m_audioDstFrameNumber == 0) {
    // we wait until we see the first encoded frame.
    // this is because encoders usually buffer the first few
    // raw audio frames fed to them, and this number varies
    // from one encoder to another
    m_audioEncodingStartTimestamp = srcFrameTimestamp;
  }

  // we calculate audioSrcElapsedDuration by taking the current frame's
  // timestamp and subtracting the audioEncodingStartTimestamp (and NOT
  // the audioStartTimestamp).
  // this way, we just need to compare audioSrcElapsedDuration with 
  // audioDstElapsedDuration (which should match in the ideal case),
  // and we don't have to compensate for the lag introduced by the initial
  // buffering of source frames in the encoder, which may vary from
  // one encoder to another
  m_audioSrcElapsedDuration = srcFrameTimestamp - m_audioEncodingStartTimestamp;
  m_audioSrcFrameNumber++;

#if 0 
  // not needed
  if (resync) {
    // flush preEncodingBuffer
    m_audioPreEncodingBufferLength = 0;

    // change dst sample numbers to account for gap
    m_audioDstSampleNumber = m_audioDstRawSampleNumber
      = DstTicksToSamples(m_audioSrcElapsedDuration);
    error_message("Received resync");
  }
#endif

  bool pcmMalloced = false;
  bool pcmBuffered;
  u_int8_t* pcmData = frameData;
  u_int32_t pcmDataLength = frameDataLength;

  if (m_audioSrcChannels != m_audioDstChannels) {
    // Convert the channels if they don't match
    // we either double the channel info, or combine
    // the left and right
    uint32_t samples = SrcBytesToSamples(frameDataLength);
    uint32_t dstLength = DstSamplesToBytes(samples);
    pcmData = (u_int8_t *)Malloc(dstLength);
    pcmDataLength = dstLength;
    pcmMalloced = true;

    int16_t *src = (int16_t *)frameData;
    int16_t *dst = (int16_t *)pcmData;
    if (m_audioSrcChannels == 1) {
      // 1 channel to 2
      for (uint32_t ix = 0; ix < samples; ix++) {
	*dst++ = *src;
	*dst++ = *src++;
      }
    } else {
      // 2 channels to 1
      for (uint32_t ix = 0; ix < samples; ix++) {
	int32_t sum = *src++;
	sum += *src++;
	sum /= 2;
	if (sum < -32768) sum = -32768;
	else if (sum > 32767) sum = 32767;
	*dst++ = sum & 0xffff;
      }
    }
  }

  // resample audio, if necessary
  if (m_audioSrcSampleRate != m_audioDstSampleRate) {
    ResampleAudio(pcmData, pcmDataLength);

    // resampled data is now available in m_audioPreEncodingBuffer
    pcmBuffered = true;

  } else if (m_audioSrcSamplesPerFrame != m_audioDstSamplesPerFrame) {
    // reframe audio, if necessary
    // e.g. MP3 is 1152 samples/frame, AAC is 1024 samples/frame

    // add samples to end of m_audioBuffer
    // InitAudio() ensures that buffer is large enough
    memcpy(
	   &m_audioPreEncodingBuffer[m_audioPreEncodingBufferLength],
	   pcmData,
	   pcmDataLength);

    m_audioPreEncodingBufferLength += pcmDataLength;

    pcmBuffered = true;

  } else {
    pcmBuffered = false;
  }

  // LATER restructure so as get rid of this label, and goto below
 pcmBufferCheck:

  if (pcmBuffered) {
    u_int32_t samplesAvailable =
      DstBytesToSamples(m_audioPreEncodingBufferLength);

    // not enough samples collected yet to call encode or forward
    if (samplesAvailable < m_audioDstSamplesPerFrame) {
      return;
    }
    if (pcmMalloced) {
      free(pcmData);
      pcmMalloced = false;
    }

    // setup for encode/forward
    pcmData = &m_audioPreEncodingBuffer[0];
    pcmDataLength = DstSamplesToBytes(m_audioDstSamplesPerFrame);
  }


  // encode audio frame
  if (m_pConfig->m_audioEncode) {
    Duration frametime = DstSamplesToTicks(DstBytesToSamples(frameDataLength));

#ifdef DEBUG_AUDIO_SYNC
    debug_message("asrc# %d srcDuration="U64" dst# %d dstDuration "U64,
                  m_audioSrcFrameNumber, m_audioSrcElapsedDuration,
                  m_audioDstFrameNumber, m_audioDstElapsedDuration);
#endif

    // destination gets ahead of source
    // This has been observed as a result of clock frequency drift between
    // the sound card oscillator and the system mainbord oscillator
    // Example: If the sound card oscillator has a 'real' frequency that
    // is slightly larger than the 'rated' frequency, and we are sampling
    // at 32kHz, then the 32000 samples acquired from the sound card
    // 'actually' occupy a duration of slightly less than a second.
    // 
    // The clock drift is usually fraction of a Hz and takes a long
    // time (~ 20-30 minutes) before we are off by one frame duration

    if (m_audioSrcElapsedDuration + frametime < m_audioDstElapsedDuration) {
      debug_message("audio: dropping frame, SrcElapsedDuration="U64" DstElapsedDuration="U64,
                    m_audioSrcElapsedDuration, m_audioDstElapsedDuration);
      return;
    }

    // source gets ahead of destination
    // We tolerate a difference of 3 frames since A/V sync is usually
    // noticeable after that. This way we give the encoder a chance to pick up
    if (m_audioSrcElapsedDuration > (3 * frametime) + m_audioDstElapsedDuration) {
      int j = (int) (DstTicksToSamples(m_audioSrcElapsedDuration
                                       + (2 * frametime)
                                       - m_audioDstElapsedDuration)
                     / m_audioDstSamplesPerFrame);
      debug_message("audio: Adding %d silence frames", j);
      for (int k=0; k<j; k++)
        AddSilenceFrame();
    }

    //Timestamp encodingStartTimestamp = GetTimestamp();

    bool rc = m_audioEncoder->EncodeSamples(
                                            (int16_t*)pcmData,
                                            m_audioDstSamplesPerFrame,
                                            m_audioDstChannels);

    if (!rc) {
      debug_message("failed to encode audio");
      return;
    }

    // Disabled since we are not taking into account audio drift anymore
    /*
    Duration encodingTime =  (GetTimestamp() - encodingStartTimestamp);
    if (m_sourceRealTime && m_videoSource) {
      Duration drift;
      if (frametime <= encodingTime) {
        drift = encodingTime - frametime;
        m_videoSource->AddEncodingDrift(drift);
      }
    }
    */

    ForwardEncodedAudioFrames();

  }

  //Forward PCM Frames to Feeder Sink
  if ((m_pConfig->GetBoolValue(CONFIG_FEEDER_SINK_ENABLE) &&
       frameDataLength > 0)) {
    // make a copy of the pcm data if needed
    u_int8_t* FwdedData;

	FwdedData = (u_int8_t*)Malloc(frameDataLength);
	memcpy(FwdedData, frameData, frameDataLength);

    CMediaFrame* pFrame =
      new CMediaFrame(
		      RAWPCMAUDIOFRAME,
		      FwdedData,
		      frameDataLength,
		      srcFrameTimestamp,
		      0,
		      m_audioDstSampleRate);

   ForwardFrame(pFrame);
  }
  
  // if desired, forward raw audio to sinks
  if (m_pConfig->SourceRawAudio() && pcmDataLength > 0) {

    // make a copy of the pcm data if needed
    u_int8_t* pcmForwardedData;

    if (!pcmMalloced) {
      pcmForwardedData = (u_int8_t*)Malloc(pcmDataLength);
      memcpy(pcmForwardedData, pcmData, pcmDataLength);
    } else {
      pcmForwardedData = pcmData;
      pcmMalloced = false;
    }
#ifndef WORDS_BIGENDIAN
    // swap byte ordering so we have big endian to write into
    // the file.
    uint16_t *pdata = (uint16_t *)pcmForwardedData;
    for (uint32_t ix = 0; 
	 ix < pcmDataLength; 
	 ix += sizeof(uint16_t),pdata++) {
      uint16_t swap = *pdata;
      *pdata = B2N_16(swap);
    }
#endif

    CMediaFrame* pFrame =
      new CMediaFrame(
		      PCMAUDIOFRAME, 
		      pcmForwardedData, 
		      pcmDataLength,
		      m_audioStartTimestamp 
		      + DstSamplesToTicks(m_audioDstRawSampleNumber),
		      DstBytesToSamples(pcmDataLength),
		      m_audioDstSampleRate);
    ForwardFrame(pFrame);

    m_audioDstRawSampleNumber += SrcBytesToSamples(pcmDataLength);
    m_audioDstRawFrameNumber++;
  }

  if (pcmMalloced) {
    free(pcmData);
  }

  if (pcmBuffered) {
    m_audioPreEncodingBufferLength -= pcmDataLength;
    memcpy(
	   &m_audioPreEncodingBuffer[0],
	   &m_audioPreEncodingBuffer[pcmDataLength],
	   m_audioPreEncodingBufferLength);

    goto pcmBufferCheck;
  }

}