int JACKInput::ProcessCallback(jack_nframes_t nframes, void* arg) { JACKInput *input = (JACKInput*) arg; // deal with holes if(input->m_jackthread_hole) { char *message = input->m_message_queue.PrepareWriteMessage(sizeof(enum_eventtype)); if(message == NULL) return 0; *((enum_eventtype*) message) = EVENTTYPE_HOLE; input->m_message_queue.WriteMessage(); input->m_jackthread_hole = false; } // This function is called from a real-time thread, so it's not a good idea to do actual work here. // The data is moved to a queue, and a second thread will do the work as usual. //TODO// if nframes is small, then combine multiple blocks into one? char *message = input->m_message_queue.PrepareWriteMessage(sizeof(enum_eventtype) + sizeof(Event_Data) + nframes * input->m_channels * sizeof(float)); if(message == NULL) { input->m_jackthread_hole = true; return 0; } *((enum_eventtype*) message) = EVENTTYPE_DATA; message += sizeof(enum_eventtype); ((Event_Data*) message)->m_timestamp = hrt_time_micro() - (int64_t) nframes * (int64_t) 1000000 / (int64_t) input->m_jackthread_sample_rate; ((Event_Data*) message)->m_sample_rate = input->m_jackthread_sample_rate; ((Event_Data*) message)->m_sample_count = nframes; message += sizeof(Event_Data); for(unsigned int p = 0; p < input->m_channels; ++p) { SampleCopy(nframes, (float*) jack_port_get_buffer(input->m_jack_ports[p], nframes), 1, (float*) message + p, input->m_channels); } input->m_message_queue.WriteMessage(); return 0; }
void Synchronizer::FlushAudioBuffer(Synchronizer::SharedData* lock, int64_t segment_start_time, int64_t segment_stop_time) { double sample_length = (double) (segment_stop_time - lock->m_segment_audio_start_time) * 1.0e-6; int64_t samples_max = (int64_t) ceil(sample_length * (double) m_output_format->m_audio_sample_rate) - lock->m_segment_audio_samples_read; if(lock->m_audio_buffer.GetSize() > 0) { // Normally, the correct way to calculate the position of the first sample would be: // int64_t timestamp = lock->m_segment_audio_start_time + (int64_t) round((double) lock->m_segment_audio_samples_read / (double) m_audio_sample_rate * 1.0e6); // int64_t pos = (int64_t) round((double) (lock->m_time_offset + (timestamp - segment_start_time)) * 1.0e-6 * (double) m_audio_sample_rate); // Simplified: // int64_t pos = (int64_t) round((double) (lock->m_time_offset + (lock->m_segment_audio_start_time - segment_start_time)) * 1.0e-6 * (double) m_audio_sample_rate) // + lock->m_segment_audio_samples_read; // The first part of the expression is constant, so it only has to be calculated at the start of the segment. After that the increase in position is always // equal to the number of samples written. Samples are only dropped at the start of the segment, so actually // the position doesn't have to be calculated anymore after that, since it is assumed to be equal to lock->m_audio_samples. if(lock->m_segment_audio_can_drop) { // calculate the offset of the first sample int64_t pos = (int64_t) round((double) (lock->m_time_offset + (lock->m_segment_audio_start_time - segment_start_time)) * 1.0e-6 * (double) m_output_format->m_audio_sample_rate) + lock->m_segment_audio_samples_read; // drop samples that are too early if(pos < lock->m_audio_samples) { int64_t n = std::min(lock->m_audio_samples - pos, (int64_t) lock->m_audio_buffer.GetSize() / m_output_format->m_audio_channels); lock->m_audio_buffer.Pop(n * m_output_format->m_audio_channels); lock->m_segment_audio_samples_read += n; } } int64_t samples_left = std::min(samples_max, (int64_t) lock->m_audio_buffer.GetSize() / m_output_format->m_audio_channels); // add new block to sync diagram if(m_sync_diagram != NULL && samples_left > 0) { double t = (double) lock->m_audio_samples / (double) m_output_format->m_audio_sample_rate; m_sync_diagram->AddBlock(3, t, t + (double) samples_left / (double) m_output_format->m_audio_sample_rate, QColor(0, 255, 0)); } // send the samples to the encoder while(samples_left > 0) { lock->m_segment_audio_can_drop = false; // copy samples until either the partial frame is full or there are no samples left //TODO// do direct copy/conversion to new audio frame? int64_t n = std::min((int64_t) (m_output_format->m_audio_frame_size - lock->m_partial_audio_frame_samples), samples_left); lock->m_audio_buffer.Pop(lock->m_partial_audio_frame.GetData() + lock->m_partial_audio_frame_samples * m_output_format->m_audio_channels, n * m_output_format->m_audio_channels); lock->m_segment_audio_samples_read += n; lock->m_partial_audio_frame_samples += n; lock->m_audio_samples += n; samples_left -= n; // is the partial frame full? if(lock->m_partial_audio_frame_samples == m_output_format->m_audio_frame_size) { // allocate a frame #if SSR_USE_AVUTIL_PLANAR_SAMPLE_FMT unsigned int planes = (m_output_format->m_audio_sample_format == AV_SAMPLE_FMT_S16P || m_output_format->m_audio_sample_format == AV_SAMPLE_FMT_FLTP)? m_output_format->m_audio_channels : 1; #else unsigned int planes = 1; #endif std::unique_ptr<AVFrameWrapper> audio_frame = CreateAudioFrame(m_output_format->m_audio_channels, m_output_format->m_audio_sample_rate, m_output_format->m_audio_frame_size, planes, m_output_format->m_audio_sample_format); audio_frame->GetFrame()->pts = lock->m_audio_samples; // copy/convert the samples switch(m_output_format->m_audio_sample_format) { case AV_SAMPLE_FMT_S16: { float *data_in = (float*) lock->m_partial_audio_frame.GetData(); int16_t *data_out = (int16_t*) audio_frame->GetFrame()->data[0]; SampleCopy(m_output_format->m_audio_frame_size * m_output_format->m_audio_channels, data_in, 1, data_out, 1); break; } case AV_SAMPLE_FMT_FLT: { float *data_in = (float*) lock->m_partial_audio_frame.GetData(); float *data_out = (float*) audio_frame->GetFrame()->data[0]; memcpy(data_out, data_in, m_output_format->m_audio_frame_size * m_output_format->m_audio_channels * sizeof(float)); break; } #if SSR_USE_AVUTIL_PLANAR_SAMPLE_FMT case AV_SAMPLE_FMT_S16P: { for(unsigned int p = 0; p < planes; ++p) { float *data_in = (float*) lock->m_partial_audio_frame.GetData() + p; int16_t *data_out = (int16_t*) audio_frame->GetFrame()->data[p]; SampleCopy(m_output_format->m_audio_frame_size, data_in, planes, data_out, 1); } break; } case AV_SAMPLE_FMT_FLTP: { for(unsigned int p = 0; p < planes; ++p) { float *data_in = (float*) lock->m_partial_audio_frame.GetData() + p; float *data_out = (float*) audio_frame->GetFrame()->data[p]; SampleCopy(m_output_format->m_audio_frame_size, data_in, planes, data_out, 1); } break; } #endif default: { assert(false); break; } } lock->m_partial_audio_frame_samples = 0; //Logger::LogInfo("[Synchronizer::FlushAudioBuffer] Encoded audio frame [" + QString::number(lock->m_partial_audio_frame->pts) + "]."); m_output_manager->AddAudioFrame(std::move(audio_frame)); } } } }
void Resampler::Resample(unsigned int in_channels, unsigned int in_sample_rate, AVSampleFormat in_format, unsigned int in_sample_count, const uint8_t* in_data, unsigned int out_channels, unsigned int out_sample_rate, AVSampleFormat out_format, unsigned int* out_sample_count, const uint8_t** out_data) { Q_ASSERT(in_channels > 0 && out_channels > 0); Q_ASSERT(in_sample_rate > 0 && out_sample_rate > 0); Q_ASSERT(in_channels == out_channels); Q_ASSERT(out_format == AV_SAMPLE_FMT_FLT); if(m_started) { Q_ASSERT(m_out_channels == out_channels); Q_ASSERT(m_out_sample_rate == out_sample_rate); } else { m_started = true; m_in_sample_rate = 0; // trigger creation of new resampler m_out_channels = out_channels; m_out_sample_rate = out_sample_rate; } // prepare output samples unsigned int out_pos = 0; m_out_data.alloc(16 * 1024); // do we need a new resampler? if(in_sample_rate != m_in_sample_rate) { // delete old resampler if(m_soxr != NULL) { // flush resampler for( ; ; ) { size_t out_done; soxr_error_t error = soxr_process(m_soxr, NULL, 0, NULL, m_out_data.data() + out_pos * out_channels * sizeof(float), m_out_data.size() / (out_channels * sizeof(float)) - out_pos, &out_done); if(error != NULL) { Logger::LogError("[Resampler::Resample] " + QObject::tr("Error: Flushing resampler failed! Reason: %s").arg(soxr_strerror(error))); throw SoxrException(); } out_pos += out_done; if(out_pos < m_out_data.size() / (out_channels * sizeof(float))) break; m_out_data.realloc(m_out_data.size() * 2); } soxr_delete(m_soxr); m_soxr = NULL; } m_in_sample_rate = in_sample_rate; // do we really need a resampler? if(m_in_sample_rate != m_out_sample_rate) { Logger::LogInfo("[Resampler::Resampler] " + QObject::tr("Resampling from %1 to %2.").arg(m_in_sample_rate).arg(m_out_sample_rate)); soxr_error_t error; soxr_quality_spec_t quality = soxr_quality_spec(SOXR_MQ, 0); m_soxr = soxr_create((double) m_in_sample_rate, (double) m_out_sample_rate, out_channels, &error, NULL, &quality, NULL); if(m_soxr == NULL || error != NULL) { m_in_sample_rate = 0; Logger::LogError("[Resampler::Resampler] " + QObject::tr("Error: Can't create resampler! Reason: %s").arg(soxr_strerror(error))); throw SoxrException(); } } else { Logger::LogInfo("[Resampler::Resampler] " + QObject::tr("Resampling not needed.")); } } // prepare input samples uint8_t *in_data_float; unsigned int in_pos = 0; if(in_format == AV_SAMPLE_FMT_FLT) { in_data_float = (uint8_t*) in_data; } else if(in_format == AV_SAMPLE_FMT_S16) { m_in_data.alloc(in_sample_count * out_channels * sizeof(float)); SampleCopy(in_sample_count * out_channels, (const int16_t*) in_data, 1, (float*) m_in_data.data(), 1); in_data_float = (uint8_t*) m_in_data.data(); } else { Q_ASSERT(false); // unsupported input format return; } // no resampling needed? if(m_in_sample_rate == m_out_sample_rate) { if(out_pos == 0) { *out_sample_count = in_sample_count; *out_data = in_data_float; } else { m_out_data.realloc((out_pos + in_sample_count) * out_channels * sizeof(float)); memcpy(m_out_data.data() + out_pos * out_channels * sizeof(float), in_data_float, in_sample_count * out_channels * sizeof(float)); *out_sample_count = out_pos + in_sample_count; *out_data = m_out_data.data(); } return; } // resample for( ; ; ) { size_t in_done, out_done; soxr_error_t error = soxr_process(m_soxr, in_data_float + in_pos * out_channels * sizeof(float), in_sample_count - in_pos, &in_done, m_out_data.data() + out_pos * out_channels * sizeof(float), m_out_data.size() / (out_channels * sizeof(float)) - out_pos, &out_done); if(error != NULL) { Logger::LogError("[Resampler::Resample] " + QObject::tr("Error: Resampling failed!")); throw SoxrException(); } in_pos += in_done; out_pos += out_done; if(in_pos == in_sample_count) break; m_out_data.realloc(m_out_data.size() * 2); } *out_sample_count = out_pos; *out_data = m_out_data.data(); }
void Synchronizer::ReadAudioSamples(unsigned int channels, unsigned int sample_rate, AVSampleFormat format, unsigned int sample_count, const uint8_t* data, int64_t timestamp) { assert(m_audio_encoder != NULL); assert(channels == m_audio_channels); // remixing isn't supported yet // sanity check if(sample_count == 0) return; // add new block to sync diagram if(m_sync_diagram != NULL) m_sync_diagram->AddBlock(1, (double) timestamp * 1.0e-6, (double) timestamp * 1.0e-6 + (double) sample_count / (double) sample_rate, QColor(0, 255, 0)); AudioLock audiolock(&m_audio_data); // check the timestamp if(timestamp < audiolock->m_last_timestamp) { if(timestamp < audiolock->m_last_timestamp - 10000) Logger::LogWarning("[Synchronizer::ReadAudioSamples] " + Logger::tr("Warning: Received audio samples with non-monotonic timestamp.")); timestamp = audiolock->m_last_timestamp; } // update the timestamps int64_t previous_timestamp; if(audiolock->m_first_timestamp == AV_NOPTS_VALUE) { audiolock->m_first_timestamp = timestamp; previous_timestamp = timestamp; } else { previous_timestamp = audiolock->m_last_timestamp; } audiolock->m_last_timestamp = timestamp; // calculate drift double current_drift = ((double) audiolock->m_samples_written + audiolock->m_fast_resampler->GetOutputLatency()) / (double) m_audio_sample_rate - (double) (timestamp - audiolock->m_first_timestamp) * 1.0e-6; // if there are too many audio samples, drop the frame (unlikely) if(current_drift > DRIFT_ERROR_THRESHOLD) { Logger::LogWarning("[Synchronizer::ReadAudioSamples] " + Logger::tr("Warning: Too many audio samples, dropping samples to keep the audio in sync with the video.")); return; } // if there are not enough audio samples, insert zeros unsigned int sample_count_out = 0; if(current_drift < -DRIFT_ERROR_THRESHOLD || audiolock->m_insert_zeros) { if(!audiolock->m_insert_zeros) Logger::LogWarning("[Synchronizer::ReadAudioSamples] " + Logger::tr("Warning: Not enough audio samples, inserting silence to keep the audio in sync with the video.")); audiolock->m_insert_zeros = false; // insert zeros unsigned int n = std::max(0, (int) round(-current_drift * (double) sample_rate)); audiolock->m_temp_input_buffer.Alloc(n * m_audio_channels); std::fill_n(audiolock->m_temp_input_buffer.GetData(), n * m_audio_channels, 0.0f); sample_count_out = audiolock->m_fast_resampler->Resample((double) sample_rate / (double) m_audio_sample_rate, 1.0, audiolock->m_temp_input_buffer.GetData(), n, &audiolock->m_temp_output_buffer, sample_count_out); // recalculate drift current_drift = ((double) (audiolock->m_samples_written + sample_count_out) + audiolock->m_fast_resampler->GetOutputLatency()) / (double) m_audio_sample_rate - (double) (timestamp - audiolock->m_first_timestamp) * 1.0e-6; } // do drift correction // The point of drift correction is to keep video and audio in sync even when the clocks are not running at exactly the same speed. // This can happen because the sample rate of the sound card is not always 100% accurate. Even a 0.1% error will result in audio that is // seconds too early or too late at the end of a one hour video. This problem doesn't occur on all computers though (I'm not sure why). // Another cause of desynchronization is problems/glitches with PulseAudio (e.g. jumps in time when switching between sources). double dt = fmin((double) (timestamp - previous_timestamp) * 1.0e-6, DRIFT_MAX_BLOCK); audiolock->m_average_drift = clamp(audiolock->m_average_drift + DRIFT_CORRECTION_I * current_drift * dt, -0.5, 0.5); if(audiolock->m_average_drift < -0.02 && audiolock->m_warn_desync) { audiolock->m_warn_desync = false; Logger::LogWarning("[Synchronizer::ReadAudioSamples] " + Logger::tr("Warning: Audio input is more than 2% too slow!")); } if(audiolock->m_average_drift > 0.02 && audiolock->m_warn_desync) { audiolock->m_warn_desync = false; Logger::LogWarning("[Synchronizer::ReadAudioSamples] " + Logger::tr("Warning: Audio input is more than 2% too fast!")); } double length = (double) sample_count / (double) sample_rate; double drift_correction = clamp(DRIFT_CORRECTION_P * current_drift + audiolock->m_average_drift, -0.5, 0.5) * fmin(1.0, DRIFT_MAX_BLOCK / length); //qDebug() << "current_drift" << current_drift << "average_drift" << audiolock->m_average_drift << "drift_correction" << drift_correction; // convert the samples const float *data_float; if(format == AV_SAMPLE_FMT_FLT) { data_float = (const float*) data; } else if(format == AV_SAMPLE_FMT_S16) { audiolock->m_temp_input_buffer.Alloc(sample_count * m_audio_channels); data_float = audiolock->m_temp_input_buffer.GetData(); SampleCopy(sample_count * m_audio_channels, (const int16_t*) data, 1, audiolock->m_temp_input_buffer.GetData(), 1); } else { Logger::LogError("[Synchronizer::ReadAudioSamples] " + Logger::tr("Error: Audio sample format is not supported!")); throw ResamplerException(); } // resample sample_count_out = audiolock->m_fast_resampler->Resample((double) sample_rate / (double) m_audio_sample_rate, 1.0 / (1.0 - drift_correction), data_float, sample_count, &audiolock->m_temp_output_buffer, sample_count_out); audiolock->m_samples_written += sample_count_out; SharedLock lock(&m_shared_data); // avoid memory problems by limiting the audio buffer size if(lock->m_audio_buffer.GetSize() / m_audio_channels >= MAX_AUDIO_SAMPLES_BUFFERED) { if(lock->m_segment_video_started) { Logger::LogWarning("[Synchronizer::ReadAudioSamples] " + Logger::tr("Warning: Audio buffer overflow, starting new segment to keep the audio in sync with the video " "(some video and/or audio may be lost). The video input seems to be too slow.")); NewSegment(lock.get()); } else { // If the video hasn't started yet, it makes more sense to drop the oldest samples. // Shifting the start time like this isn't completely accurate, but this shouldn't happen often anyway. // The number of samples dropped is calculated so that the buffer will be 90% full after this. size_t n = lock->m_audio_buffer.GetSize() / m_audio_channels - MAX_AUDIO_SAMPLES_BUFFERED * 9 / 10; lock->m_audio_buffer.Pop(n * m_audio_channels); lock->m_segment_audio_start_time += (int64_t) round((double) n / (double) m_audio_sample_rate * 1.0e6); } } // start audio if(!lock->m_segment_audio_started) { lock->m_segment_audio_started = true; lock->m_segment_audio_start_time = timestamp; lock->m_segment_audio_stop_time = timestamp; } // store the samples lock->m_audio_buffer.Push(audiolock->m_temp_output_buffer.GetData(), sample_count_out * m_audio_channels); // increase segment stop time double new_sample_length = (double) (lock->m_segment_audio_samples_read + lock->m_audio_buffer.GetSize() / m_audio_channels) / (double) m_audio_sample_rate; lock->m_segment_audio_stop_time = lock->m_segment_audio_start_time + (int64_t) round(new_sample_length * 1.0e6); }