static int opus_decode_frame(OpusStreamContext *s, const uint8_t *data, int size) { int samples = s->packet.frame_duration; int redundancy = 0; int redundancy_size, redundancy_pos; int ret, i, consumed; int delayed_samples = s->delayed_samples; ret = opus_rc_init(&s->rc, data, size); if (ret < 0) return ret; /* decode the silk frame */ if (s->packet.mode == OPUS_MODE_SILK || s->packet.mode == OPUS_MODE_HYBRID) { if (!avresample_is_open(s->avr)) { ret = opus_init_resample(s); if (ret < 0) return ret; } samples = ff_silk_decode_superframe(s->silk, &s->rc, s->silk_output, FFMIN(s->packet.bandwidth, OPUS_BANDWIDTH_WIDEBAND), s->packet.stereo + 1, silk_frame_duration_ms[s->packet.config]); if (samples < 0) { av_log(s->avctx, AV_LOG_ERROR, "Error decoding a SILK frame.\n"); return samples; } samples = avresample_convert(s->avr, (uint8_t**)s->out, s->out_size, s->packet.frame_duration, (uint8_t**)s->silk_output, sizeof(s->silk_buf[0]), samples); if (samples < 0) { av_log(s->avctx, AV_LOG_ERROR, "Error resampling SILK data.\n"); return samples; } s->delayed_samples += s->packet.frame_duration - samples; } else ff_silk_flush(s->silk); // decode redundancy information consumed = opus_rc_tell(&s->rc); if (s->packet.mode == OPUS_MODE_HYBRID && consumed + 37 <= size * 8) redundancy = opus_rc_p2model(&s->rc, 12); else if (s->packet.mode == OPUS_MODE_SILK && consumed + 17 <= size * 8) redundancy = 1; if (redundancy) { redundancy_pos = opus_rc_p2model(&s->rc, 1); if (s->packet.mode == OPUS_MODE_HYBRID) redundancy_size = opus_rc_unimodel(&s->rc, 256) + 2; else redundancy_size = size - (consumed + 7) / 8; size -= redundancy_size; if (size < 0) { av_log(s->avctx, AV_LOG_ERROR, "Invalid redundancy frame size.\n"); return AVERROR_INVALIDDATA; } if (redundancy_pos) { ret = opus_decode_redundancy(s, data + size, redundancy_size); if (ret < 0) return ret; ff_celt_flush(s->celt); } } /* decode the CELT frame */ if (s->packet.mode == OPUS_MODE_CELT || s->packet.mode == OPUS_MODE_HYBRID) { float *out_tmp[2] = { s->out[0], s->out[1] }; float **dst = (s->packet.mode == OPUS_MODE_CELT) ? out_tmp : s->celt_output; int celt_output_samples = samples; int delay_samples = av_audio_fifo_size(s->celt_delay); if (delay_samples) { if (s->packet.mode == OPUS_MODE_HYBRID) { av_audio_fifo_read(s->celt_delay, (void**)s->celt_output, delay_samples); for (i = 0; i < s->output_channels; i++) { s->fdsp->vector_fmac_scalar(out_tmp[i], s->celt_output[i], 1.0, delay_samples); out_tmp[i] += delay_samples; } celt_output_samples -= delay_samples; } else { av_log(s->avctx, AV_LOG_WARNING, "Spurious CELT delay samples present.\n"); av_audio_fifo_drain(s->celt_delay, delay_samples); if (s->avctx->err_recognition & AV_EF_EXPLODE) return AVERROR_BUG; } } opus_raw_init(&s->rc, data + size, size); ret = ff_celt_decode_frame(s->celt, &s->rc, dst, s->packet.stereo + 1, s->packet.frame_duration, (s->packet.mode == OPUS_MODE_HYBRID) ? 17 : 0, celt_band_end[s->packet.bandwidth]); if (ret < 0) return ret; if (s->packet.mode == OPUS_MODE_HYBRID) { int celt_delay = s->packet.frame_duration - celt_output_samples; void *delaybuf[2] = { s->celt_output[0] + celt_output_samples, s->celt_output[1] + celt_output_samples }; for (i = 0; i < s->output_channels; i++) { s->fdsp->vector_fmac_scalar(out_tmp[i], s->celt_output[i], 1.0, celt_output_samples); } ret = av_audio_fifo_write(s->celt_delay, delaybuf, celt_delay); if (ret < 0) return ret; } } else ff_celt_flush(s->celt); if (s->redundancy_idx) { for (i = 0; i < s->output_channels; i++) opus_fade(s->out[i], s->out[i], s->redundancy_output[i] + 120 + s->redundancy_idx, ff_celt_window2 + s->redundancy_idx, 120 - s->redundancy_idx); s->redundancy_idx = 0; } if (redundancy) { if (!redundancy_pos) { ff_celt_flush(s->celt); ret = opus_decode_redundancy(s, data + size, redundancy_size); if (ret < 0) return ret; for (i = 0; i < s->output_channels; i++) { opus_fade(s->out[i] + samples - 120 + delayed_samples, s->out[i] + samples - 120 + delayed_samples, s->redundancy_output[i] + 120, ff_celt_window2, 120 - delayed_samples); if (delayed_samples) s->redundancy_idx = 120 - delayed_samples; } } else { for (i = 0; i < s->output_channels; i++) { memcpy(s->out[i] + delayed_samples, s->redundancy_output[i], 120 * sizeof(float)); opus_fade(s->out[i] + 120 + delayed_samples, s->redundancy_output[i] + 120, s->out[i] + 120 + delayed_samples, ff_celt_window2, 120); } } } return samples; }
int avresample_read(AVAudioResampleContext *avr, uint8_t **output, int nb_samples) { if (!output) return av_audio_fifo_drain(avr->out_fifo, nb_samples); return av_audio_fifo_read(avr->out_fifo, (void**)output, nb_samples); }
static int activate(AVFilterContext *ctx) { SidechainCompressContext *s = ctx->priv; AVFrame *out = NULL, *in[2] = { NULL }; int ret, i, nb_samples; double *dst; FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[0], ctx); if ((ret = ff_inlink_consume_frame(ctx->inputs[0], &in[0])) > 0) { av_audio_fifo_write(s->fifo[0], (void **)in[0]->extended_data, in[0]->nb_samples); av_frame_free(&in[0]); } if (ret < 0) return ret; if ((ret = ff_inlink_consume_frame(ctx->inputs[1], &in[1])) > 0) { av_audio_fifo_write(s->fifo[1], (void **)in[1]->extended_data, in[1]->nb_samples); av_frame_free(&in[1]); } if (ret < 0) return ret; nb_samples = FFMIN(av_audio_fifo_size(s->fifo[0]), av_audio_fifo_size(s->fifo[1])); if (nb_samples) { out = ff_get_audio_buffer(ctx->outputs[0], nb_samples); if (!out) return AVERROR(ENOMEM); for (i = 0; i < 2; i++) { in[i] = ff_get_audio_buffer(ctx->inputs[i], nb_samples); if (!in[i]) { av_frame_free(&in[0]); av_frame_free(&in[1]); av_frame_free(&out); return AVERROR(ENOMEM); } av_audio_fifo_read(s->fifo[i], (void **)in[i]->data, nb_samples); } dst = (double *)out->data[0]; out->pts = s->pts; s->pts += nb_samples; compressor(s, (double *)in[0]->data[0], dst, (double *)in[1]->data[0], nb_samples, s->level_in, s->level_sc, ctx->inputs[0], ctx->inputs[1]); av_frame_free(&in[0]); av_frame_free(&in[1]); ret = ff_filter_frame(ctx->outputs[0], out); if (ret < 0) return ret; } FF_FILTER_FORWARD_STATUS(ctx->inputs[0], ctx->outputs[0]); FF_FILTER_FORWARD_STATUS(ctx->inputs[1], ctx->outputs[0]); if (ff_outlink_frame_wanted(ctx->outputs[0])) { if (!av_audio_fifo_size(s->fifo[0])) ff_inlink_request_frame(ctx->inputs[0]); if (!av_audio_fifo_size(s->fifo[1])) ff_inlink_request_frame(ctx->inputs[1]); } return 0; }
static int opus_decode_packet(AVCodecContext *avctx, void *data, int *got_frame_ptr, AVPacket *avpkt) { OpusContext *c = avctx->priv_data; AVFrame *frame = data; const uint8_t *buf = avpkt->data; int buf_size = avpkt->size; int coded_samples = 0; int decoded_samples = INT_MAX; int delayed_samples = 0; int i, ret; /* calculate the number of delayed samples */ for (i = 0; i < c->nb_streams; i++) { OpusStreamContext *s = &c->streams[i]; s->out[0] = s->out[1] = NULL; delayed_samples = FFMAX(delayed_samples, s->delayed_samples + av_audio_fifo_size(c->sync_buffers[i])); } /* decode the header of the first sub-packet to find out the sample count */ if (buf) { OpusPacket *pkt = &c->streams[0].packet; ret = ff_opus_parse_packet(pkt, buf, buf_size, c->nb_streams > 1); if (ret < 0) { av_log(avctx, AV_LOG_ERROR, "Error parsing the packet header.\n"); return ret; } coded_samples += pkt->frame_count * pkt->frame_duration; c->streams[0].silk_samplerate = get_silk_samplerate(pkt->config); } frame->nb_samples = coded_samples + delayed_samples; /* no input or buffered data => nothing to do */ if (!frame->nb_samples) { *got_frame_ptr = 0; return 0; } /* setup the data buffers */ ret = ff_get_buffer(avctx, frame, 0); if (ret < 0) return ret; frame->nb_samples = 0; memset(c->out, 0, c->nb_streams * 2 * sizeof(*c->out)); for (i = 0; i < avctx->channels; i++) { ChannelMap *map = &c->channel_maps[i]; if (!map->copy) c->out[2 * map->stream_idx + map->channel_idx] = (float*)frame->extended_data[i]; } /* read the data from the sync buffers */ for (i = 0; i < c->nb_streams; i++) { float **out = c->out + 2 * i; int sync_size = av_audio_fifo_size(c->sync_buffers[i]); float sync_dummy[32]; int out_dummy = (!out[0]) | ((!out[1]) << 1); if (!out[0]) out[0] = sync_dummy; if (!out[1]) out[1] = sync_dummy; if (out_dummy && sync_size > FF_ARRAY_ELEMS(sync_dummy)) return AVERROR_BUG; ret = av_audio_fifo_read(c->sync_buffers[i], (void**)out, sync_size); if (ret < 0) return ret; if (out_dummy & 1) out[0] = NULL; else out[0] += ret; if (out_dummy & 2) out[1] = NULL; else out[1] += ret; c->out_size[i] = frame->linesize[0] - ret * sizeof(float); } /* decode each sub-packet */ for (i = 0; i < c->nb_streams; i++) { OpusStreamContext *s = &c->streams[i]; if (i && buf) { ret = ff_opus_parse_packet(&s->packet, buf, buf_size, i != c->nb_streams - 1); if (ret < 0) { av_log(avctx, AV_LOG_ERROR, "Error parsing the packet header.\n"); return ret; } if (coded_samples != s->packet.frame_count * s->packet.frame_duration) { av_log(avctx, AV_LOG_ERROR, "Mismatching coded sample count in substream %d.\n", i); return AVERROR_INVALIDDATA; } s->silk_samplerate = get_silk_samplerate(s->packet.config); } ret = opus_decode_subpacket(&c->streams[i], buf, s->packet.data_size, c->out + 2 * i, c->out_size[i], coded_samples); if (ret < 0) return ret; c->decoded_samples[i] = ret; decoded_samples = FFMIN(decoded_samples, ret); buf += s->packet.packet_size; buf_size -= s->packet.packet_size; } /* buffer the extra samples */ for (i = 0; i < c->nb_streams; i++) { int buffer_samples = c->decoded_samples[i] - decoded_samples; if (buffer_samples) { float *buf[2] = { c->out[2 * i + 0] ? c->out[2 * i + 0] : (float*)frame->extended_data[0], c->out[2 * i + 1] ? c->out[2 * i + 1] : (float*)frame->extended_data[0] }; buf[0] += decoded_samples; buf[1] += decoded_samples; ret = av_audio_fifo_write(c->sync_buffers[i], (void**)buf, buffer_samples); if (ret < 0) return ret; } } for (i = 0; i < avctx->channels; i++) { ChannelMap *map = &c->channel_maps[i]; /* handle copied channels */ if (map->copy) { memcpy(frame->extended_data[i], frame->extended_data[map->copy_idx], frame->linesize[0]); } else if (map->silence) { memset(frame->extended_data[i], 0, frame->linesize[0]); } if (c->gain_i && decoded_samples > 0) { c->fdsp->vector_fmul_scalar((float*)frame->extended_data[i], (float*)frame->extended_data[i], c->gain, FFALIGN(decoded_samples, 8)); } } frame->nb_samples = decoded_samples; *got_frame_ptr = !!decoded_samples; return avpkt->size; }
int main(int argc, char* argv[]) { AVFormatContext *ifmt_ctx = NULL; AVFormatContext *ifmt_ctx_a = NULL; AVFormatContext *ofmt_ctx; AVInputFormat* ifmt; AVStream* video_st; AVStream* audio_st; AVCodecContext* pCodecCtx; AVCodecContext* pCodecCtx_a; AVCodec* pCodec; AVCodec* pCodec_a; AVPacket *dec_pkt, enc_pkt; AVPacket *dec_pkt_a, enc_pkt_a; AVFrame *pframe, *pFrameYUV; struct SwsContext *img_convert_ctx; struct SwrContext *aud_convert_ctx; char capture_name[80] = { 0 }; char device_name[80] = { 0 }; char device_name_a[80] = { 0 }; int framecnt = 0; int nb_samples = 0; int videoindex; int audioindex; int i; int ret; HANDLE hThread; const char* out_path = "rtmp://localhost/live/livestream"; int dec_got_frame, enc_got_frame; int dec_got_frame_a, enc_got_frame_a; int aud_next_pts = 0; int vid_next_pts = 0; int encode_video = 1, encode_audio = 1; AVRational time_base_q = { 1, AV_TIME_BASE }; av_register_all(); //Register Device avdevice_register_all(); avformat_network_init(); #if USEFILTER //Register Filter avfilter_register_all(); buffersrc = avfilter_get_by_name("buffer"); buffersink = avfilter_get_by_name("buffersink"); #endif //Show Dshow Device show_dshow_device(); printf("\nChoose video capture device: "); if (gets(capture_name) == 0) { printf("Error in gets()\n"); return -1; } sprintf(device_name, "video=%s", capture_name); printf("\nChoose audio capture device: "); if (gets(capture_name) == 0) { printf("Error in gets()\n"); return -1; } sprintf(device_name_a, "audio=%s", capture_name); //wchar_t *cam = L"video=Integrated Camera"; //wchar_t *cam = L"video=YY伴侣"; //char *device_name_utf8 = dup_wchar_to_utf8(cam); //wchar_t *cam_a = L"audio=麦克风阵列 (Realtek High Definition Audio)"; //char *device_name_utf8_a = dup_wchar_to_utf8(cam_a); ifmt = av_find_input_format("dshow"); // Set device params AVDictionary *device_param = 0; //if not setting rtbufsize, error messages will be shown in cmd, but you can still watch or record the stream correctly in most time //setting rtbufsize will erase those error messages, however, larger rtbufsize will bring latency //av_dict_set(&device_param, "rtbufsize", "10M", 0); //Set own video device's name if (avformat_open_input(&ifmt_ctx, device_name, ifmt, &device_param) != 0){ printf("Couldn't open input video stream.(无法打开输入流)\n"); return -1; } //Set own audio device's name if (avformat_open_input(&ifmt_ctx_a, device_name_a, ifmt, &device_param) != 0){ printf("Couldn't open input audio stream.(无法打开输入流)\n"); return -1; } //input video initialize if (avformat_find_stream_info(ifmt_ctx, NULL) < 0) { printf("Couldn't find video stream information.(无法获取流信息)\n"); return -1; } videoindex = -1; for (i = 0; i < ifmt_ctx->nb_streams; i++) if (ifmt_ctx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO) { videoindex = i; break; } if (videoindex == -1) { printf("Couldn't find a video stream.(没有找到视频流)\n"); return -1; } if (avcodec_open2(ifmt_ctx->streams[videoindex]->codec, avcodec_find_decoder(ifmt_ctx->streams[videoindex]->codec->codec_id), NULL) < 0) { printf("Could not open video codec.(无法打开解码器)\n"); return -1; } //input audio initialize if (avformat_find_stream_info(ifmt_ctx_a, NULL) < 0) { printf("Couldn't find audio stream information.(无法获取流信息)\n"); return -1; } audioindex = -1; for (i = 0; i < ifmt_ctx_a->nb_streams; i++) if (ifmt_ctx_a->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO) { audioindex = i; break; } if (audioindex == -1) { printf("Couldn't find a audio stream.(没有找到视频流)\n"); return -1; } if (avcodec_open2(ifmt_ctx_a->streams[audioindex]->codec, avcodec_find_decoder(ifmt_ctx_a->streams[audioindex]->codec->codec_id), NULL) < 0) { printf("Could not open audio codec.(无法打开解码器)\n"); return -1; } //output initialize avformat_alloc_output_context2(&ofmt_ctx, NULL, "flv", out_path); //output video encoder initialize pCodec = avcodec_find_encoder(AV_CODEC_ID_H264); if (!pCodec){ printf("Can not find output video encoder! (没有找到合适的编码器!)\n"); return -1; } pCodecCtx = avcodec_alloc_context3(pCodec); pCodecCtx->pix_fmt = PIX_FMT_YUV420P; pCodecCtx->width = ifmt_ctx->streams[videoindex]->codec->width; pCodecCtx->height = ifmt_ctx->streams[videoindex]->codec->height; pCodecCtx->time_base.num = 1; pCodecCtx->time_base.den = 25; pCodecCtx->bit_rate = 300000; pCodecCtx->gop_size = 250; /* Some formats want stream headers to be separate. */ if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER) pCodecCtx->flags |= CODEC_FLAG_GLOBAL_HEADER; //H264 codec param //pCodecCtx->me_range = 16; //pCodecCtx->max_qdiff = 4; //pCodecCtx->qcompress = 0.6; pCodecCtx->qmin = 10; pCodecCtx->qmax = 51; //Optional Param pCodecCtx->max_b_frames = 0; // Set H264 preset and tune AVDictionary *param = 0; av_dict_set(¶m, "preset", "fast", 0); av_dict_set(¶m, "tune", "zerolatency", 0); if (avcodec_open2(pCodecCtx, pCodec, ¶m) < 0){ printf("Failed to open output video encoder! (编码器打开失败!)\n"); return -1; } //Add a new stream to output,should be called by the user before avformat_write_header() for muxing video_st = avformat_new_stream(ofmt_ctx, pCodec); if (video_st == NULL){ return -1; } video_st->time_base.num = 1; video_st->time_base.den = 25; video_st->codec = pCodecCtx; //output audio encoder initialize pCodec_a = avcodec_find_encoder(AV_CODEC_ID_AAC); if (!pCodec_a){ printf("Can not find output audio encoder! (没有找到合适的编码器!)\n"); return -1; } pCodecCtx_a = avcodec_alloc_context3(pCodec_a); pCodecCtx_a->channels = 2; pCodecCtx_a->channel_layout = av_get_default_channel_layout(2); pCodecCtx_a->sample_rate = ifmt_ctx_a->streams[audioindex]->codec->sample_rate; pCodecCtx_a->sample_fmt = pCodec_a->sample_fmts[0]; pCodecCtx_a->bit_rate = 32000; pCodecCtx_a->time_base.num = 1; pCodecCtx_a->time_base.den = pCodecCtx_a->sample_rate; /** Allow the use of the experimental AAC encoder */ pCodecCtx_a->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL; /* Some formats want stream headers to be separate. */ if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER) pCodecCtx_a->flags |= CODEC_FLAG_GLOBAL_HEADER; if (avcodec_open2(pCodecCtx_a, pCodec_a, NULL) < 0){ printf("Failed to open ouput audio encoder! (编码器打开失败!)\n"); return -1; } //Add a new stream to output,should be called by the user before avformat_write_header() for muxing audio_st = avformat_new_stream(ofmt_ctx, pCodec_a); if (audio_st == NULL){ return -1; } audio_st->time_base.num = 1; audio_st->time_base.den = pCodecCtx_a->sample_rate; audio_st->codec = pCodecCtx_a; //Open output URL,set before avformat_write_header() for muxing if (avio_open(&ofmt_ctx->pb, out_path, AVIO_FLAG_READ_WRITE) < 0){ printf("Failed to open output file! (输出文件打开失败!)\n"); return -1; } //Show some Information av_dump_format(ofmt_ctx, 0, out_path, 1); //Write File Header avformat_write_header(ofmt_ctx, NULL); //prepare before decode and encode dec_pkt = (AVPacket *)av_malloc(sizeof(AVPacket)); #if USEFILTER #else //camera data may has a pix fmt of RGB or sth else,convert it to YUV420 img_convert_ctx = sws_getContext(ifmt_ctx->streams[videoindex]->codec->width, ifmt_ctx->streams[videoindex]->codec->height, ifmt_ctx->streams[videoindex]->codec->pix_fmt, pCodecCtx->width, pCodecCtx->height, PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL); // Initialize the resampler to be able to convert audio sample formats aud_convert_ctx = swr_alloc_set_opts(NULL, av_get_default_channel_layout(pCodecCtx_a->channels), pCodecCtx_a->sample_fmt, pCodecCtx_a->sample_rate, av_get_default_channel_layout(ifmt_ctx_a->streams[audioindex]->codec->channels), ifmt_ctx_a->streams[audioindex]->codec->sample_fmt, ifmt_ctx_a->streams[audioindex]->codec->sample_rate, 0, NULL); /** * Perform a sanity check so that the number of converted samples is * not greater than the number of samples to be converted. * If the sample rates differ, this case has to be handled differently */ //av_assert0(pCodecCtx_a->sample_rate == ifmt_ctx_a->streams[audioindex]->codec->sample_rate); swr_init(aud_convert_ctx); #endif //Initialize the buffer to store YUV frames to be encoded. pFrameYUV = av_frame_alloc(); uint8_t *out_buffer = (uint8_t *)av_malloc(avpicture_get_size(PIX_FMT_YUV420P, pCodecCtx->width, pCodecCtx->height)); avpicture_fill((AVPicture *)pFrameYUV, out_buffer, PIX_FMT_YUV420P, pCodecCtx->width, pCodecCtx->height); //Initialize the FIFO buffer to store audio samples to be encoded. AVAudioFifo *fifo = NULL; fifo = av_audio_fifo_alloc(pCodecCtx_a->sample_fmt, pCodecCtx_a->channels, 1); //Initialize the buffer to store converted samples to be encoded. uint8_t **converted_input_samples = NULL; /** * Allocate as many pointers as there are audio channels. * Each pointer will later point to the audio samples of the corresponding * channels (although it may be NULL for interleaved formats). */ if (!(converted_input_samples = (uint8_t**)calloc(pCodecCtx_a->channels, sizeof(**converted_input_samples)))) { printf("Could not allocate converted input sample pointers\n"); return AVERROR(ENOMEM); } printf("\n --------call started----------\n"); #if USEFILTER printf("\n Press differnet number for different filters:"); printf("\n 1->Mirror"); printf("\n 2->Add Watermark"); printf("\n 3->Negate"); printf("\n 4->Draw Edge"); printf("\n 5->Split Into 4"); printf("\n 6->Vintage"); printf("\n Press 0 to remove filter\n"); #endif printf("\nPress enter to stop...\n"); hThread = CreateThread( NULL, // default security attributes 0, // use default stack size MyThreadFunction, // thread function name NULL, // argument to thread function 0, // use default creation flags NULL); // returns the thread identifier //start decode and encode int64_t start_time = av_gettime(); while (encode_video || encode_audio) { if (encode_video && (!encode_audio || av_compare_ts(vid_next_pts, time_base_q, aud_next_pts, time_base_q) <= 0)) { if ((ret=av_read_frame(ifmt_ctx, dec_pkt)) >= 0){ if (exit_thread) break; av_log(NULL, AV_LOG_DEBUG, "Going to reencode the frame\n"); pframe = av_frame_alloc(); if (!pframe) { ret = AVERROR(ENOMEM); return ret; } ret = avcodec_decode_video2(ifmt_ctx->streams[dec_pkt->stream_index]->codec, pframe, &dec_got_frame, dec_pkt); if (ret < 0) { av_frame_free(&pframe); av_log(NULL, AV_LOG_ERROR, "Decoding failed\n"); break; } if (dec_got_frame){ #if USEFILTER pframe->pts = av_frame_get_best_effort_timestamp(pframe); if (filter_change) apply_filters(ifmt_ctx); filter_change = 0; /* push the decoded frame into the filtergraph */ if (av_buffersrc_add_frame(buffersrc_ctx, pframe) < 0) { printf("Error while feeding the filtergraph\n"); break; } picref = av_frame_alloc(); /* pull filtered pictures from the filtergraph */ while (1) { ret = av_buffersink_get_frame_flags(buffersink_ctx, picref, 0); if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) break; if (ret < 0) return ret; if (picref) { img_convert_ctx = sws_getContext(picref->width, picref->height, (AVPixelFormat)picref->format, pCodecCtx->width, pCodecCtx->height, AV_PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL); sws_scale(img_convert_ctx, (const uint8_t* const*)picref->data, picref->linesize, 0, pCodecCtx->height, pFrameYUV->data, pFrameYUV->linesize); sws_freeContext(img_convert_ctx); pFrameYUV->width = picref->width; pFrameYUV->height = picref->height; pFrameYUV->format = PIX_FMT_YUV420P; #else sws_scale(img_convert_ctx, (const uint8_t* const*)pframe->data, pframe->linesize, 0, pCodecCtx->height, pFrameYUV->data, pFrameYUV->linesize); pFrameYUV->width = pframe->width; pFrameYUV->height = pframe->height; pFrameYUV->format = PIX_FMT_YUV420P; #endif enc_pkt.data = NULL; enc_pkt.size = 0; av_init_packet(&enc_pkt); ret = avcodec_encode_video2(pCodecCtx, &enc_pkt, pFrameYUV, &enc_got_frame); av_frame_free(&pframe); if (enc_got_frame == 1){ //printf("Succeed to encode frame: %5d\tsize:%5d\n", framecnt, enc_pkt.size); framecnt++; enc_pkt.stream_index = video_st->index; //Write PTS AVRational time_base = ofmt_ctx->streams[0]->time_base;//{ 1, 1000 }; AVRational r_framerate1 = ifmt_ctx->streams[videoindex]->r_frame_rate;//{ 50, 2 }; //Duration between 2 frames (us) int64_t calc_duration = (double)(AV_TIME_BASE)*(1 / av_q2d(r_framerate1)); //内部时间戳 //Parameters //enc_pkt.pts = (double)(framecnt*calc_duration)*(double)(av_q2d(time_base_q)) / (double)(av_q2d(time_base)); enc_pkt.pts = av_rescale_q(framecnt*calc_duration, time_base_q, time_base); enc_pkt.dts = enc_pkt.pts; enc_pkt.duration = av_rescale_q(calc_duration, time_base_q, time_base); //(double)(calc_duration)*(double)(av_q2d(time_base_q)) / (double)(av_q2d(time_base)); enc_pkt.pos = -1; //printf("video pts : %d\n", enc_pkt.pts); vid_next_pts=framecnt*calc_duration; //general timebase //Delay int64_t pts_time = av_rescale_q(enc_pkt.pts, time_base, time_base_q); int64_t now_time = av_gettime() - start_time; if ((pts_time > now_time) && ((vid_next_pts + pts_time - now_time)<aud_next_pts)) av_usleep(pts_time - now_time); ret = av_interleaved_write_frame(ofmt_ctx, &enc_pkt); av_free_packet(&enc_pkt); } #if USEFILTER av_frame_unref(picref); } } #endif } else { av_frame_free(&pframe); } av_free_packet(dec_pkt); } else if (ret == AVERROR_EOF) encode_video = 0; else { printf("Could not read video frame\n"); return ret; } } else { //audio trancoding here const int output_frame_size = pCodecCtx_a->frame_size; if (exit_thread) break; /** * Make sure that there is one frame worth of samples in the FIFO * buffer so that the encoder can do its work. * Since the decoder's and the encoder's frame size may differ, we * need to FIFO buffer to store as many frames worth of input samples * that they make up at least one frame worth of output samples. */ while (av_audio_fifo_size(fifo) < output_frame_size) { /** * Decode one frame worth of audio samples, convert it to the * output sample format and put it into the FIFO buffer. */ AVFrame *input_frame = av_frame_alloc(); if (!input_frame) { ret = AVERROR(ENOMEM); return ret; } /** Decode one frame worth of audio samples. */ /** Packet used for temporary storage. */ AVPacket input_packet; av_init_packet(&input_packet); input_packet.data = NULL; input_packet.size = 0; /** Read one audio frame from the input file into a temporary packet. */ if ((ret = av_read_frame(ifmt_ctx_a, &input_packet)) < 0) { /** If we are at the end of the file, flush the decoder below. */ if (ret == AVERROR_EOF) { encode_audio = 0; } else { printf("Could not read audio frame\n"); return ret; } } /** * Decode the audio frame stored in the temporary packet. * The input audio stream decoder is used to do this. * If we are at the end of the file, pass an empty packet to the decoder * to flush it. */ if ((ret = avcodec_decode_audio4(ifmt_ctx_a->streams[audioindex]->codec, input_frame, &dec_got_frame_a, &input_packet)) < 0) { printf("Could not decode audio frame\n"); return ret; } av_packet_unref(&input_packet); /** If there is decoded data, convert and store it */ if (dec_got_frame_a) { /** * Allocate memory for the samples of all channels in one consecutive * block for convenience. */ if ((ret = av_samples_alloc(converted_input_samples, NULL, pCodecCtx_a->channels, input_frame->nb_samples, pCodecCtx_a->sample_fmt, 0)) < 0) { printf("Could not allocate converted input samples\n"); av_freep(&(*converted_input_samples)[0]); free(*converted_input_samples); return ret; } /** * Convert the input samples to the desired output sample format. * This requires a temporary storage provided by converted_input_samples. */ /** Convert the samples using the resampler. */ if ((ret = swr_convert(aud_convert_ctx, converted_input_samples, input_frame->nb_samples, (const uint8_t**)input_frame->extended_data, input_frame->nb_samples)) < 0) { printf("Could not convert input samples\n"); return ret; } /** Add the converted input samples to the FIFO buffer for later processing. */ /** * Make the FIFO as large as it needs to be to hold both, * the old and the new samples. */ if ((ret = av_audio_fifo_realloc(fifo, av_audio_fifo_size(fifo) + input_frame->nb_samples)) < 0) { printf("Could not reallocate FIFO\n"); return ret; } /** Store the new samples in the FIFO buffer. */ if (av_audio_fifo_write(fifo, (void **)converted_input_samples, input_frame->nb_samples) < input_frame->nb_samples) { printf("Could not write data to FIFO\n"); return AVERROR_EXIT; } } } /** * If we have enough samples for the encoder, we encode them. * At the end of the file, we pass the remaining samples to * the encoder. */ if (av_audio_fifo_size(fifo) >= output_frame_size) /** * Take one frame worth of audio samples from the FIFO buffer, * encode it and write it to the output file. */ { /** Temporary storage of the output samples of the frame written to the file. */ AVFrame *output_frame=av_frame_alloc(); if (!output_frame) { ret = AVERROR(ENOMEM); return ret; } /** * Use the maximum number of possible samples per frame. * If there is less than the maximum possible frame size in the FIFO * buffer use this number. Otherwise, use the maximum possible frame size */ const int frame_size = FFMIN(av_audio_fifo_size(fifo), pCodecCtx_a->frame_size); /** Initialize temporary storage for one output frame. */ /** * Set the frame's parameters, especially its size and format. * av_frame_get_buffer needs this to allocate memory for the * audio samples of the frame. * Default channel layouts based on the number of channels * are assumed for simplicity. */ output_frame->nb_samples = frame_size; output_frame->channel_layout = pCodecCtx_a->channel_layout; output_frame->format = pCodecCtx_a->sample_fmt; output_frame->sample_rate = pCodecCtx_a->sample_rate; /** * Allocate the samples of the created frame. This call will make * sure that the audio frame can hold as many samples as specified. */ if ((ret = av_frame_get_buffer(output_frame, 0)) < 0) { printf("Could not allocate output frame samples\n"); av_frame_free(&output_frame); return ret; } /** * Read as many samples from the FIFO buffer as required to fill the frame. * The samples are stored in the frame temporarily. */ if (av_audio_fifo_read(fifo, (void **)output_frame->data, frame_size) < frame_size) { printf("Could not read data from FIFO\n"); return AVERROR_EXIT; } /** Encode one frame worth of audio samples. */ /** Packet used for temporary storage. */ AVPacket output_packet; av_init_packet(&output_packet); output_packet.data = NULL; output_packet.size = 0; /** Set a timestamp based on the sample rate for the container. */ if (output_frame) { nb_samples += output_frame->nb_samples; } /** * Encode the audio frame and store it in the temporary packet. * The output audio stream encoder is used to do this. */ if ((ret = avcodec_encode_audio2(pCodecCtx_a, &output_packet, output_frame, &enc_got_frame_a)) < 0) { printf("Could not encode frame\n"); av_packet_unref(&output_packet); return ret; } /** Write one audio frame from the temporary packet to the output file. */ if (enc_got_frame_a) { output_packet.stream_index = 1; AVRational time_base = ofmt_ctx->streams[1]->time_base; AVRational r_framerate1 = { ifmt_ctx_a->streams[audioindex]->codec->sample_rate, 1 };// { 44100, 1}; int64_t calc_duration = (double)(AV_TIME_BASE)*(1 / av_q2d(r_framerate1)); //内部时间戳 output_packet.pts = av_rescale_q(nb_samples*calc_duration, time_base_q, time_base); output_packet.dts = output_packet.pts; output_packet.duration = output_frame->nb_samples; //printf("audio pts : %d\n", output_packet.pts); aud_next_pts = nb_samples*calc_duration; int64_t pts_time = av_rescale_q(output_packet.pts, time_base, time_base_q); int64_t now_time = av_gettime() - start_time; if ((pts_time > now_time) && ((aud_next_pts + pts_time - now_time)<vid_next_pts)) av_usleep(pts_time - now_time); if ((ret = av_interleaved_write_frame(ofmt_ctx, &output_packet)) < 0) { printf("Could not write frame\n"); av_packet_unref(&output_packet); return ret; } av_packet_unref(&output_packet); } av_frame_free(&output_frame); } } } //Flush Encoder ret = flush_encoder(ifmt_ctx, ofmt_ctx, 0, framecnt); if (ret < 0) { printf("Flushing encoder failed\n"); return -1; } ret = flush_encoder_a(ifmt_ctx_a, ofmt_ctx, 1, nb_samples); if (ret < 0) { printf("Flushing encoder failed\n"); return -1; } //Write file trailer av_write_trailer(ofmt_ctx); cleanup: //Clean #if USEFILTER if (filter_graph) avfilter_graph_free(&filter_graph); #endif if (video_st) avcodec_close(video_st->codec); if (audio_st) avcodec_close(audio_st->codec); av_free(out_buffer); if (converted_input_samples) { av_freep(&converted_input_samples[0]); //free(converted_input_samples); } if (fifo) av_audio_fifo_free(fifo); avio_close(ofmt_ctx->pb); avformat_free_context(ifmt_ctx); avformat_free_context(ifmt_ctx_a); avformat_free_context(ofmt_ctx); CloseHandle(hThread); return 0; }
static int opus_decode_frame(OpusStreamContext *s, const uint8_t *data, int size) { int samples = s->packet.frame_duration; int redundancy = 0; int redundancy_size, redundancy_pos; int ret, i, consumed; int delayed_samples = s->delayed_samples; ret = ff_opus_rc_dec_init(&s->rc, data, size); if (ret < 0) return ret; /* decode the silk frame */ if (s->packet.mode == OPUS_MODE_SILK || s->packet.mode == OPUS_MODE_HYBRID) { #if CONFIG_SWRESAMPLE if (!swr_is_initialized(s->swr)) { #elif CONFIG_AVRESAMPLE if (!avresample_is_open(s->avr)) { #endif ret = opus_init_resample(s); if (ret < 0) return ret; } samples = ff_silk_decode_superframe(s->silk, &s->rc, s->silk_output, FFMIN(s->packet.bandwidth, OPUS_BANDWIDTH_WIDEBAND), s->packet.stereo + 1, silk_frame_duration_ms[s->packet.config]); if (samples < 0) { av_log(s->avctx, AV_LOG_ERROR, "Error decoding a SILK frame.\n"); return samples; } #if CONFIG_SWRESAMPLE samples = swr_convert(s->swr, (uint8_t**)s->out, s->packet.frame_duration, (const uint8_t**)s->silk_output, samples); #elif CONFIG_AVRESAMPLE samples = avresample_convert(s->avr, (uint8_t**)s->out, s->out_size, s->packet.frame_duration, (uint8_t**)s->silk_output, sizeof(s->silk_buf[0]), samples); #endif if (samples < 0) { av_log(s->avctx, AV_LOG_ERROR, "Error resampling SILK data.\n"); return samples; } av_assert2((samples & 7) == 0); s->delayed_samples += s->packet.frame_duration - samples; } else ff_silk_flush(s->silk); // decode redundancy information consumed = opus_rc_tell(&s->rc); if (s->packet.mode == OPUS_MODE_HYBRID && consumed + 37 <= size * 8) redundancy = ff_opus_rc_dec_log(&s->rc, 12); else if (s->packet.mode == OPUS_MODE_SILK && consumed + 17 <= size * 8) redundancy = 1; if (redundancy) { redundancy_pos = ff_opus_rc_dec_log(&s->rc, 1); if (s->packet.mode == OPUS_MODE_HYBRID) redundancy_size = ff_opus_rc_dec_uint(&s->rc, 256) + 2; else redundancy_size = size - (consumed + 7) / 8; size -= redundancy_size; if (size < 0) { av_log(s->avctx, AV_LOG_ERROR, "Invalid redundancy frame size.\n"); return AVERROR_INVALIDDATA; } if (redundancy_pos) { ret = opus_decode_redundancy(s, data + size, redundancy_size); if (ret < 0) return ret; ff_celt_flush(s->celt); } } /* decode the CELT frame */ if (s->packet.mode == OPUS_MODE_CELT || s->packet.mode == OPUS_MODE_HYBRID) { float *out_tmp[2] = { s->out[0], s->out[1] }; float **dst = (s->packet.mode == OPUS_MODE_CELT) ? out_tmp : s->celt_output; int celt_output_samples = samples; int delay_samples = av_audio_fifo_size(s->celt_delay); if (delay_samples) { if (s->packet.mode == OPUS_MODE_HYBRID) { av_audio_fifo_read(s->celt_delay, (void**)s->celt_output, delay_samples); for (i = 0; i < s->output_channels; i++) { s->fdsp->vector_fmac_scalar(out_tmp[i], s->celt_output[i], 1.0, delay_samples); out_tmp[i] += delay_samples; } celt_output_samples -= delay_samples; } else { av_log(s->avctx, AV_LOG_WARNING, "Spurious CELT delay samples present.\n"); av_audio_fifo_drain(s->celt_delay, delay_samples); if (s->avctx->err_recognition & AV_EF_EXPLODE) return AVERROR_BUG; } } ff_opus_rc_dec_raw_init(&s->rc, data + size, size); ret = ff_celt_decode_frame(s->celt, &s->rc, dst, s->packet.stereo + 1, s->packet.frame_duration, (s->packet.mode == OPUS_MODE_HYBRID) ? 17 : 0, ff_celt_band_end[s->packet.bandwidth]); if (ret < 0) return ret; if (s->packet.mode == OPUS_MODE_HYBRID) { int celt_delay = s->packet.frame_duration - celt_output_samples; void *delaybuf[2] = { s->celt_output[0] + celt_output_samples, s->celt_output[1] + celt_output_samples }; for (i = 0; i < s->output_channels; i++) { s->fdsp->vector_fmac_scalar(out_tmp[i], s->celt_output[i], 1.0, celt_output_samples); } ret = av_audio_fifo_write(s->celt_delay, delaybuf, celt_delay); if (ret < 0) return ret; } } else ff_celt_flush(s->celt); if (s->redundancy_idx) { for (i = 0; i < s->output_channels; i++) opus_fade(s->out[i], s->out[i], s->redundancy_output[i] + 120 + s->redundancy_idx, ff_celt_window2 + s->redundancy_idx, 120 - s->redundancy_idx); s->redundancy_idx = 0; } if (redundancy) { if (!redundancy_pos) { ff_celt_flush(s->celt); ret = opus_decode_redundancy(s, data + size, redundancy_size); if (ret < 0) return ret; for (i = 0; i < s->output_channels; i++) { opus_fade(s->out[i] + samples - 120 + delayed_samples, s->out[i] + samples - 120 + delayed_samples, s->redundancy_output[i] + 120, ff_celt_window2, 120 - delayed_samples); if (delayed_samples) s->redundancy_idx = 120 - delayed_samples; } } else { for (i = 0; i < s->output_channels; i++) { memcpy(s->out[i] + delayed_samples, s->redundancy_output[i], 120 * sizeof(float)); opus_fade(s->out[i] + 120 + delayed_samples, s->redundancy_output[i] + 120, s->out[i] + 120 + delayed_samples, ff_celt_window2, 120); } } } return samples; } static int opus_decode_subpacket(OpusStreamContext *s, const uint8_t *buf, int buf_size, float **out, int out_size, int nb_samples) { int output_samples = 0; int flush_needed = 0; int i, j, ret; s->out[0] = out[0]; s->out[1] = out[1]; s->out_size = out_size; /* check if we need to flush the resampler */ #if CONFIG_SWRESAMPLE if (swr_is_initialized(s->swr)) { if (buf) { int64_t cur_samplerate; av_opt_get_int(s->swr, "in_sample_rate", 0, &cur_samplerate); flush_needed = (s->packet.mode == OPUS_MODE_CELT) || (cur_samplerate != s->silk_samplerate); } else { flush_needed = !!s->delayed_samples; } } #elif CONFIG_AVRESAMPLE if (avresample_is_open(s->avr)) { if (buf) { int64_t cur_samplerate; av_opt_get_int(s->avr, "in_sample_rate", 0, &cur_samplerate); flush_needed = (s->packet.mode == OPUS_MODE_CELT) || (cur_samplerate != s->silk_samplerate); } else { flush_needed = !!s->delayed_samples; } } #endif if (!buf && !flush_needed) return 0; /* use dummy output buffers if the channel is not mapped to anything */ if (!s->out[0] || (s->output_channels == 2 && !s->out[1])) { av_fast_malloc(&s->out_dummy, &s->out_dummy_allocated_size, s->out_size); if (!s->out_dummy) return AVERROR(ENOMEM); if (!s->out[0]) s->out[0] = s->out_dummy; if (!s->out[1]) s->out[1] = s->out_dummy; } /* flush the resampler if necessary */ if (flush_needed) { ret = opus_flush_resample(s, s->delayed_samples); if (ret < 0) { av_log(s->avctx, AV_LOG_ERROR, "Error flushing the resampler.\n"); return ret; } #if CONFIG_SWRESAMPLE swr_close(s->swr); #elif CONFIG_AVRESAMPLE avresample_close(s->avr); #endif output_samples += s->delayed_samples; s->delayed_samples = 0; if (!buf) goto finish; } /* decode all the frames in the packet */ for (i = 0; i < s->packet.frame_count; i++) { int size = s->packet.frame_size[i]; int samples = opus_decode_frame(s, buf + s->packet.frame_offset[i], size); if (samples < 0) { av_log(s->avctx, AV_LOG_ERROR, "Error decoding an Opus frame.\n"); if (s->avctx->err_recognition & AV_EF_EXPLODE) return samples; for (j = 0; j < s->output_channels; j++) memset(s->out[j], 0, s->packet.frame_duration * sizeof(float)); samples = s->packet.frame_duration; } output_samples += samples; for (j = 0; j < s->output_channels; j++) s->out[j] += samples; s->out_size -= samples * sizeof(float); } finish: s->out[0] = s->out[1] = NULL; s->out_size = 0; return output_samples; }
/** * Read samples from the input FIFOs, mix, and write to the output link. */ static int output_frame(AVFilterLink *outlink) { AVFilterContext *ctx = outlink->src; MixContext *s = ctx->priv; AVFrame *out_buf, *in_buf; int nb_samples, ns, i; if (s->input_state[0] & INPUT_ON) { /* first input live: use the corresponding frame size */ nb_samples = frame_list_next_frame_size(s->frame_list); for (i = 1; i < s->nb_inputs; i++) { if (s->input_state[i] & INPUT_ON) { ns = av_audio_fifo_size(s->fifos[i]); if (ns < nb_samples) { if (!(s->input_state[i] & INPUT_EOF)) /* unclosed input with not enough samples */ return 0; /* closed input to drain */ nb_samples = ns; } } } } else { /* first input closed: use the available samples */ nb_samples = INT_MAX; for (i = 1; i < s->nb_inputs; i++) { if (s->input_state[i] & INPUT_ON) { ns = av_audio_fifo_size(s->fifos[i]); nb_samples = FFMIN(nb_samples, ns); } } if (nb_samples == INT_MAX) { ff_outlink_set_status(outlink, AVERROR_EOF, s->next_pts); return 0; } } s->next_pts = frame_list_next_pts(s->frame_list); frame_list_remove_samples(s->frame_list, nb_samples); calculate_scales(s, nb_samples); if (nb_samples == 0) return 0; out_buf = ff_get_audio_buffer(outlink, nb_samples); if (!out_buf) return AVERROR(ENOMEM); in_buf = ff_get_audio_buffer(outlink, nb_samples); if (!in_buf) { av_frame_free(&out_buf); return AVERROR(ENOMEM); } for (i = 0; i < s->nb_inputs; i++) { if (s->input_state[i] & INPUT_ON) { int planes, plane_size, p; av_audio_fifo_read(s->fifos[i], (void **)in_buf->extended_data, nb_samples); planes = s->planar ? s->nb_channels : 1; plane_size = nb_samples * (s->planar ? 1 : s->nb_channels); plane_size = FFALIGN(plane_size, 16); if (out_buf->format == AV_SAMPLE_FMT_FLT || out_buf->format == AV_SAMPLE_FMT_FLTP) { for (p = 0; p < planes; p++) { s->fdsp->vector_fmac_scalar((float *)out_buf->extended_data[p], (float *) in_buf->extended_data[p], s->input_scale[i], plane_size); } } else { for (p = 0; p < planes; p++) { s->fdsp->vector_dmac_scalar((double *)out_buf->extended_data[p], (double *) in_buf->extended_data[p], s->input_scale[i], plane_size); } } } } av_frame_free(&in_buf); out_buf->pts = s->next_pts; if (s->next_pts != AV_NOPTS_VALUE) s->next_pts += nb_samples; return ff_filter_frame(outlink, out_buf); }
/** * * buffer is a float array like: * * - float buffer[nsamples * nchannels]; * * */ bool AV::addAudioFrame(unsigned char* buffer, int nsamples, int nchannels) { if(!use_audio) { printf("Cannot add audio stream, we're not using audio.\n"); return false; } AVCodecContext* c = ct.as->codec; // BUFFER HANDLING int samples_stored = av_audio_fifo_write(ct.afifo, (void**)&buffer, nsamples); if(samples_stored != nsamples) { return false; } int nstored = av_audio_fifo_size(ct.afifo); if(nstored < c->frame_size) { return false; } AVPacket packet = {0}; // data and size must be '0' (allocation is done for you :> ) AVFrame* frame = avcodec_alloc_frame(); int got_packet = 0; av_init_packet(&packet); packet.data = NULL; packet.size = 0; int use_nsamples = c->frame_size; frame->nb_samples = use_nsamples; // <-- important, must be set before avcodec_fill_audio_frame // GET DATA FROM BUFFER int num_bytes = av_samples_get_buffer_size(NULL, c->channels, use_nsamples, c->sample_fmt, 0); uint8_t* my_buffer = (uint8_t*)av_malloc(num_bytes); uint8_t** my_ptr = &my_buffer; int nread = av_audio_fifo_read(ct.afifo, (void**)my_ptr, use_nsamples); if(nread != use_nsamples) { printf("We only read: %d but we wanted to read %d samples.\n", nread, use_nsamples); av_free(my_buffer); return false; } // FILL int fill_result = avcodec_fill_audio_frame( frame ,c->channels ,c->sample_fmt ,(uint8_t*)my_buffer ,num_bytes ,1 ); if(fill_result != 0) { char buf[1024]; av_strerror(fill_result, buf, 1024); printf("av error: %s\n",buf); av_free(my_buffer); return false; } // ENCODE int64_t now = av_gettime(); AVRational my_time_base = (AVRational){1,1e6}; AVRational stream_time_base = ct.as->time_base; // stream time base AVRational codec_time_base = ct.as->codec->time_base; // codec time base int64_t now_frame_pts = av_rescale_q(now, my_time_base, codec_time_base); if(frame->pts == AV_NOPTS_VALUE) { frame->pts = ct.acounter; } ct.acounter = frame->pts + use_nsamples; printf("frame->nb_samples: %d, counter: %d\n", frame->nb_samples, ct.acounter); int enc_result = avcodec_encode_audio2(c, &packet, frame, &got_packet); packet.stream_index = ct.as->index; if(!got_packet) { av_free(my_buffer); return false; } if(enc_result < 0) { char buf[1024]; av_strerror(enc_result, buf, 1024); printf("av error: %s\n",buf); } // CORRECT THE PTS, FROM VIDEO_CODEC.time_base TO STREAM.time_base packet.pts = av_rescale_q(packet.pts, codec_time_base, stream_time_base); packet.dts = av_rescale_q(packet.dts, codec_time_base, stream_time_base); //packet.duration = av_rescale_q(packet.duration, codec_time_base, stream_time_base); //packet.dts = packet.pts; // just a wild guess packet.duration = 0; /* printf("Audio: stream: %d\n", packet.stream_index); printf("Audio: ct.acounter: %d\n", ct.acounter); printf("Audio: packet.duration: %d\n", packet.duration); printf("Audio: stream.time_base, num=%d, den=%d\n", stream_time_base.num, stream_time_base.den); printf("Audio: codec.time_base, num=%d, den=%d\n", codec_time_base.num, codec_time_base.den); printf("Audio: coded_frame.pts: %lld\n", ct.as->codec->coded_frame->pts); printf("Audio: packet.pts: %lld\n" ,packet.pts); printf("-------------------\n"); */ // WRITE if(av_interleaved_write_frame(ct.c, &packet) != 0) { printf("Cannot write audio frame.\n"); av_free(my_buffer); return false; } av_free(my_buffer); return true; }