status_t AVFormatReader::Stream::Init(int32 virtualIndex) { TRACE("AVFormatReader::Stream::Init(%ld)\n", virtualIndex); status_t ret = StreamBase::Init(virtualIndex); if (ret != B_OK) return ret; // Get a pointer to the AVCodecContext for the stream at streamIndex. AVCodecContext* codecContext = fStream->codec; #if 0 // stippi: Here I was experimenting with the question if some fields of the // AVCodecContext change (or get filled out at all), if the AVCodec is opened. class CodecOpener { public: CodecOpener(AVCodecContext* context) { fCodecContext = context; AVCodec* codec = avcodec_find_decoder(context->codec_id); fCodecOpen = avcodec_open(context, codec) >= 0; if (!fCodecOpen) TRACE(" failed to open the codec!\n"); } ~CodecOpener() { if (fCodecOpen) avcodec_close(fCodecContext); } private: AVCodecContext* fCodecContext; bool fCodecOpen; } codecOpener(codecContext); #endif // initialize the media_format for this stream media_format* format = &fFormat; memset(format, 0, sizeof(media_format)); media_format_description description; // Set format family and type depending on codec_type of the stream. switch (codecContext->codec_type) { case AVMEDIA_TYPE_AUDIO: if ((codecContext->codec_id >= CODEC_ID_PCM_S16LE) && (codecContext->codec_id <= CODEC_ID_PCM_U8)) { TRACE(" raw audio\n"); format->type = B_MEDIA_RAW_AUDIO; description.family = B_ANY_FORMAT_FAMILY; // This will then apparently be handled by the (built into // BMediaTrack) RawDecoder. } else { TRACE(" encoded audio\n"); format->type = B_MEDIA_ENCODED_AUDIO; description.family = B_MISC_FORMAT_FAMILY; description.u.misc.file_format = 'ffmp'; } break; case AVMEDIA_TYPE_VIDEO: TRACE(" encoded video\n"); format->type = B_MEDIA_ENCODED_VIDEO; description.family = B_MISC_FORMAT_FAMILY; description.u.misc.file_format = 'ffmp'; break; default: TRACE(" unknown type\n"); format->type = B_MEDIA_UNKNOWN_TYPE; return B_ERROR; break; } if (format->type == B_MEDIA_RAW_AUDIO) { // We cannot describe all raw-audio formats, some are unsupported. switch (codecContext->codec_id) { case CODEC_ID_PCM_S16LE: format->u.raw_audio.format = media_raw_audio_format::B_AUDIO_SHORT; format->u.raw_audio.byte_order = B_MEDIA_LITTLE_ENDIAN; break; case CODEC_ID_PCM_S16BE: format->u.raw_audio.format = media_raw_audio_format::B_AUDIO_SHORT; format->u.raw_audio.byte_order = B_MEDIA_BIG_ENDIAN; break; case CODEC_ID_PCM_U16LE: // format->u.raw_audio.format // = media_raw_audio_format::B_AUDIO_USHORT; // format->u.raw_audio.byte_order // = B_MEDIA_LITTLE_ENDIAN; return B_NOT_SUPPORTED; break; case CODEC_ID_PCM_U16BE: // format->u.raw_audio.format // = media_raw_audio_format::B_AUDIO_USHORT; // format->u.raw_audio.byte_order // = B_MEDIA_BIG_ENDIAN; return B_NOT_SUPPORTED; break; case CODEC_ID_PCM_S8: format->u.raw_audio.format = media_raw_audio_format::B_AUDIO_CHAR; break; case CODEC_ID_PCM_U8: format->u.raw_audio.format = media_raw_audio_format::B_AUDIO_UCHAR; break; default: return B_NOT_SUPPORTED; break; } } else { if (description.family == B_MISC_FORMAT_FAMILY) description.u.misc.codec = codecContext->codec_id; BMediaFormats formats; status_t status = formats.GetFormatFor(description, format); if (status < B_OK) TRACE(" formats.GetFormatFor() error: %s\n", strerror(status)); format->user_data_type = B_CODEC_TYPE_INFO; *(uint32*)format->user_data = codecContext->codec_tag; format->user_data[4] = 0; } format->require_flags = 0; format->deny_flags = B_MEDIA_MAUI_UNDEFINED_FLAGS; switch (format->type) { case B_MEDIA_RAW_AUDIO: format->u.raw_audio.frame_rate = (float)codecContext->sample_rate; format->u.raw_audio.channel_count = codecContext->channels; format->u.raw_audio.channel_mask = codecContext->channel_layout; format->u.raw_audio.byte_order = avformat_to_beos_byte_order(codecContext->sample_fmt); format->u.raw_audio.format = avformat_to_beos_format(codecContext->sample_fmt); format->u.raw_audio.buffer_size = 0; // Read one packet and mark it for later re-use. (So our first // GetNextChunk() call does not read another packet.) if (_NextPacket(true) == B_OK) { TRACE(" successfully determined audio buffer size: %d\n", fPacket.size); format->u.raw_audio.buffer_size = fPacket.size; } break; case B_MEDIA_ENCODED_AUDIO: format->u.encoded_audio.bit_rate = codecContext->bit_rate; format->u.encoded_audio.frame_size = codecContext->frame_size; // Fill in some info about possible output format format->u.encoded_audio.output = media_multi_audio_format::wildcard; format->u.encoded_audio.output.frame_rate = (float)codecContext->sample_rate; // Channel layout bits match in Be API and FFmpeg. format->u.encoded_audio.output.channel_count = codecContext->channels; format->u.encoded_audio.multi_info.channel_mask = codecContext->channel_layout; format->u.encoded_audio.output.byte_order = avformat_to_beos_byte_order(codecContext->sample_fmt); format->u.encoded_audio.output.format = avformat_to_beos_format(codecContext->sample_fmt); if (codecContext->block_align > 0) { format->u.encoded_audio.output.buffer_size = codecContext->block_align; } else { format->u.encoded_audio.output.buffer_size = codecContext->frame_size * codecContext->channels * (format->u.encoded_audio.output.format & media_raw_audio_format::B_AUDIO_SIZE_MASK); } break; case B_MEDIA_ENCODED_VIDEO: // TODO: Specifying any of these seems to throw off the format matching // later on. // format->u.encoded_video.avg_bit_rate = codecContext->bit_rate; // format->u.encoded_video.max_bit_rate = codecContext->bit_rate // + codecContext->bit_rate_tolerance; // format->u.encoded_video.encoding // = media_encoded_video_format::B_ANY; // format->u.encoded_video.frame_size = 1; // format->u.encoded_video.forward_history = 0; // format->u.encoded_video.backward_history = 0; format->u.encoded_video.output.field_rate = FrameRate(); format->u.encoded_video.output.interlace = 1; format->u.encoded_video.output.first_active = 0; format->u.encoded_video.output.last_active = codecContext->height - 1; // TODO: Maybe libavformat actually provides that info // somewhere... format->u.encoded_video.output.orientation = B_VIDEO_TOP_LEFT_RIGHT; // Calculate the display aspect ratio AVRational displayAspectRatio; if (codecContext->sample_aspect_ratio.num != 0) { av_reduce(&displayAspectRatio.num, &displayAspectRatio.den, codecContext->width * codecContext->sample_aspect_ratio.num, codecContext->height * codecContext->sample_aspect_ratio.den, 1024 * 1024); TRACE(" pixel aspect ratio: %d/%d, " "display aspect ratio: %d/%d\n", codecContext->sample_aspect_ratio.num, codecContext->sample_aspect_ratio.den, displayAspectRatio.num, displayAspectRatio.den); } else { av_reduce(&displayAspectRatio.num, &displayAspectRatio.den, codecContext->width, codecContext->height, 1024 * 1024); TRACE(" no display aspect ratio (%d/%d)\n", displayAspectRatio.num, displayAspectRatio.den); } format->u.encoded_video.output.pixel_width_aspect = displayAspectRatio.num; format->u.encoded_video.output.pixel_height_aspect = displayAspectRatio.den; format->u.encoded_video.output.display.format = pixfmt_to_colorspace(codecContext->pix_fmt); format->u.encoded_video.output.display.line_width = codecContext->width; format->u.encoded_video.output.display.line_count = codecContext->height; TRACE(" width/height: %d/%d\n", codecContext->width, codecContext->height); format->u.encoded_video.output.display.bytes_per_row = 0; format->u.encoded_video.output.display.pixel_offset = 0; format->u.encoded_video.output.display.line_offset = 0; format->u.encoded_video.output.display.flags = 0; // TODO break; default: // This is an unknown format to us. break; } // Add the meta data, if any if (codecContext->extradata_size > 0) { format->SetMetaData(codecContext->extradata, codecContext->extradata_size); TRACE(" extradata: %p\n", format->MetaData()); } TRACE(" extradata_size: %d\n", codecContext->extradata_size); // TRACE(" intra_matrix: %p\n", codecContext->intra_matrix); // TRACE(" inter_matrix: %p\n", codecContext->inter_matrix); // TRACE(" get_buffer(): %p\n", codecContext->get_buffer); // TRACE(" release_buffer(): %p\n", codecContext->release_buffer); #ifdef TRACE_AVFORMAT_READER char formatString[512]; if (string_for_format(*format, formatString, sizeof(formatString))) TRACE(" format: %s\n", formatString); uint32 encoding = format->Encoding(); TRACE(" encoding '%.4s'\n", (char*)&encoding); #endif return B_OK; }
status_t StreamBase::Seek(uint32 flags, int64* frame, bigtime_t* time) { BAutolock _(fStreamLock); if (fContext == NULL || fStream == NULL) return B_NO_INIT; TRACE_SEEK("StreamBase::Seek(%ld,%s%s%s%s, %lld, " "%lld)\n", VirtualIndex(), (flags & B_MEDIA_SEEK_TO_FRAME) ? " B_MEDIA_SEEK_TO_FRAME" : "", (flags & B_MEDIA_SEEK_TO_TIME) ? " B_MEDIA_SEEK_TO_TIME" : "", (flags & B_MEDIA_SEEK_CLOSEST_BACKWARD) ? " B_MEDIA_SEEK_CLOSEST_BACKWARD" : "", (flags & B_MEDIA_SEEK_CLOSEST_FORWARD) ? " B_MEDIA_SEEK_CLOSEST_FORWARD" : "", *frame, *time); double frameRate = FrameRate(); if ((flags & B_MEDIA_SEEK_TO_FRAME) != 0) { // Seeking is always based on time, initialize it when client seeks // based on frame. *time = (bigtime_t)(*frame * 1000000.0 / frameRate + 0.5); } int64_t timeStamp = *time; int searchFlags = AVSEEK_FLAG_BACKWARD; if ((flags & B_MEDIA_SEEK_CLOSEST_FORWARD) != 0) searchFlags = 0; if (fSeekByBytes) { searchFlags |= AVSEEK_FLAG_BYTE; BAutolock _(fSourceLock); int64_t fileSize; if (fSource->GetSize(&fileSize) != B_OK) return B_NOT_SUPPORTED; int64_t duration = Duration(); if (duration == 0) return B_NOT_SUPPORTED; timeStamp = int64_t(fileSize * ((double)timeStamp / duration)); if ((flags & B_MEDIA_SEEK_CLOSEST_BACKWARD) != 0) { timeStamp -= 65536; if (timeStamp < 0) timeStamp = 0; } bool seekAgain = true; bool seekForward = true; bigtime_t lastFoundTime = -1; int64_t closestTimeStampBackwards = -1; while (seekAgain) { if (avformat_seek_file(fContext, -1, INT64_MIN, timeStamp, INT64_MAX, searchFlags) < 0) { TRACE(" avformat_seek_file() (by bytes) failed.\n"); return B_ERROR; } seekAgain = false; // Our last packet is toast in any case. Read the next one so we // know where we really seeked. fReusePacket = false; if (_NextPacket(true) == B_OK) { while (fPacket.pts == kNoPTSValue) { fReusePacket = false; if (_NextPacket(true) != B_OK) return B_ERROR; } if (fPacket.pos >= 0) timeStamp = fPacket.pos; bigtime_t foundTime = _ConvertFromStreamTimeBase(fPacket.pts); if (foundTime != lastFoundTime) { lastFoundTime = foundTime; if (foundTime > *time) { if (closestTimeStampBackwards >= 0) { timeStamp = closestTimeStampBackwards; seekAgain = true; seekForward = false; continue; } int64_t diff = int64_t(fileSize * ((double)(foundTime - *time) / (2 * duration))); if (diff < 8192) break; timeStamp -= diff; TRACE_SEEK(" need to seek back (%lld) (time: %.2f " "-> %.2f)\n", timeStamp, *time / 1000000.0, foundTime / 1000000.0); if (timeStamp < 0) foundTime = 0; else { seekAgain = true; continue; } } else if (seekForward && foundTime < *time - 100000) { closestTimeStampBackwards = timeStamp; int64_t diff = int64_t(fileSize * ((double)(*time - foundTime) / (2 * duration))); if (diff < 8192) break; timeStamp += diff; TRACE_SEEK(" need to seek forward (%lld) (time: " "%.2f -> %.2f)\n", timeStamp, *time / 1000000.0, foundTime / 1000000.0); if (timeStamp > duration) foundTime = duration; else { seekAgain = true; continue; } } } TRACE_SEEK(" found time: %lld -> %lld (%.2f)\n", *time, foundTime, foundTime / 1000000.0); *time = foundTime; *frame = (uint64)(*time * frameRate / 1000000LL + 0.5); TRACE_SEEK(" seeked frame: %lld\n", *frame); } else { TRACE_SEEK(" _NextPacket() failed!\n"); return B_ERROR; } } } else { // We may not get a PTS from the next packet after seeking, so // we try to get an expected time from the index. int64_t streamTimeStamp = _ConvertToStreamTimeBase(*time); int index = av_index_search_timestamp(fStream, streamTimeStamp, searchFlags); if (index < 0) { TRACE(" av_index_search_timestamp() failed\n"); } else { if (index > 0) { const AVIndexEntry& entry = fStream->index_entries[index]; streamTimeStamp = entry.timestamp; } else { // Some demuxers use the first index entry to store some // other information, like the total playing time for example. // Assume the timeStamp of the first entry is alays 0. // TODO: Handle start-time offset? streamTimeStamp = 0; } bigtime_t foundTime = _ConvertFromStreamTimeBase(streamTimeStamp); bigtime_t timeDiff = foundTime > *time ? foundTime - *time : *time - foundTime; if (timeDiff > 1000000 && (fStreamBuildsIndexWhileReading || index == fStream->nb_index_entries - 1)) { // If the stream is building the index on the fly while parsing // it, we only have entries in the index for positions already // decoded, i.e. we cannot seek into the future. In that case, // just assume that we can seek where we want and leave // time/frame unmodified. Since successfully seeking one time // will generate index entries for the seeked to position, we // need to remember this in fStreamBuildsIndexWhileReading, // since when seeking back there will be later index entries, // but we still want to ignore the found entry. fStreamBuildsIndexWhileReading = true; TRACE_SEEK(" Not trusting generic index entry. " "(Current count: %d)\n", fStream->nb_index_entries); } else { // If we found a reasonably time, write it into *time. // After seeking, we will try to read the sought time from // the next packet. If the packet has no PTS value, we may // still have a more accurate time from the index lookup. *time = foundTime; } } if (avformat_seek_file(fContext, -1, INT64_MIN, timeStamp, INT64_MAX, searchFlags) < 0) { TRACE(" avformat_seek_file() failed.\n"); // Try to fall back to av_seek_frame() timeStamp = _ConvertToStreamTimeBase(timeStamp); if (av_seek_frame(fContext, fStream->index, timeStamp, searchFlags) < 0) { TRACE(" avformat_seek_frame() failed as well.\n"); // Fall back to seeking to the beginning by bytes timeStamp = 0; if (av_seek_frame(fContext, fStream->index, timeStamp, AVSEEK_FLAG_BYTE) < 0) { TRACE(" avformat_seek_frame() by bytes failed as " "well.\n"); // Do not propagate error in any case. We fail if we can't // read another packet. } else *time = 0; } } // Our last packet is toast in any case. Read the next one so // we know where we really sought. bigtime_t foundTime = *time; fReusePacket = false; if (_NextPacket(true) == B_OK) { if (fPacket.pts != kNoPTSValue) foundTime = _ConvertFromStreamTimeBase(fPacket.pts); else TRACE_SEEK(" no PTS in packet after seeking\n"); } else TRACE_SEEK(" _NextPacket() failed!\n"); *time = foundTime; TRACE_SEEK(" sought time: %.2fs\n", *time / 1000000.0); *frame = (uint64)(*time * frameRate / 1000000.0 + 0.5); TRACE_SEEK(" sought frame: %lld\n", *frame); } return B_OK; }
status_t StreamBase::GetNextChunk(const void** chunkBuffer, size_t* chunkSize, media_header* mediaHeader) { BAutolock _(fStreamLock); TRACE_PACKET("StreamBase::GetNextChunk()\n"); // Get the last stream DTS before reading the next packet, since // then it points to that one. int64 lastStreamDTS = fStream->cur_dts; status_t ret = _NextPacket(false); if (ret != B_OK) { *chunkBuffer = NULL; *chunkSize = 0; return ret; } // NOTE: AVPacket has a field called "convergence_duration", for which // the documentation is quite interesting. It sounds like it could be // used to know the time until the next I-Frame in streams that don't // let you know the position of keyframes in another way (like through // the index). // According to libavformat documentation, fPacket is valid until the // next call to av_read_frame(). This is what we want and we can share // the memory with the least overhead. *chunkBuffer = fPacket.data; *chunkSize = fPacket.size; if (mediaHeader != NULL) { mediaHeader->type = fFormat.type; mediaHeader->buffer = 0; mediaHeader->destination = -1; mediaHeader->time_source = -1; mediaHeader->size_used = fPacket.size; if (fPacket.pts != kNoPTSValue) { //TRACE(" PTS: %lld (time_base.num: %d, .den: %d), stream DTS: %lld\n", //fPacket.pts, fStream->time_base.num, fStream->time_base.den, //fStream->cur_dts); mediaHeader->start_time = _ConvertFromStreamTimeBase(fPacket.pts); } else { //TRACE(" PTS (stream): %lld (time_base.num: %d, .den: %d), stream DTS: %lld\n", //lastStreamDTS, fStream->time_base.num, fStream->time_base.den, //fStream->cur_dts); mediaHeader->start_time = _ConvertFromStreamTimeBase(lastStreamDTS); } mediaHeader->file_pos = fPacket.pos; mediaHeader->data_offset = 0; switch (mediaHeader->type) { case B_MEDIA_RAW_AUDIO: break; case B_MEDIA_ENCODED_AUDIO: mediaHeader->u.encoded_audio.buffer_flags = (fPacket.flags & AV_PKT_FLAG_KEY) ? B_MEDIA_KEY_FRAME : 0; break; case B_MEDIA_RAW_VIDEO: mediaHeader->u.raw_video.line_count = fFormat.u.raw_video.display.line_count; break; case B_MEDIA_ENCODED_VIDEO: mediaHeader->u.encoded_video.field_flags = (fPacket.flags & AV_PKT_FLAG_KEY) ? B_MEDIA_KEY_FRAME : 0; mediaHeader->u.encoded_video.line_count = fFormat.u.encoded_video.output.display.line_count; break; default: break; } } // static bigtime_t pts[2]; // static bigtime_t lastPrintTime = system_time(); // static BLocker printLock; // if (fStream->index < 2) { // if (fPacket.pts != kNoPTSValue) // pts[fStream->index] = _ConvertFromStreamTimeBase(fPacket.pts); // printLock.Lock(); // bigtime_t now = system_time(); // if (now - lastPrintTime > 1000000) { // printf("PTS: %.4f/%.4f, diff: %.4f\r", pts[0] / 1000000.0, // pts[1] / 1000000.0, (pts[0] - pts[1]) / 1000000.0); // fflush(stdout); // lastPrintTime = now; // } // printLock.Unlock(); // } return B_OK; }
status_t StreamBase::GetNextChunk(const void** chunkBuffer, size_t* chunkSize, media_header* mediaHeader) { BAutolock _(fStreamLock); TRACE_PACKET("StreamBase::GetNextChunk()\n"); // Get the last stream DTS before reading the next packet, since // then it points to that one. int64 lastStreamDTS = fStream->cur_dts; status_t ret = _NextPacket(false); if (ret != B_OK) { *chunkBuffer = NULL; *chunkSize = 0; return ret; } // NOTE: AVPacket has a field called "convergence_duration", for which // the documentation is quite interesting. It sounds like it could be // used to know the time until the next I-Frame in streams that don't // let you know the position of keyframes in another way (like through // the index). // According to libavformat documentation, fPacket is valid until the // next call to av_read_frame(). This is what we want and we can share // the memory with the least overhead. *chunkBuffer = fPacket.data; *chunkSize = fPacket.size; if (mediaHeader != NULL) { mediaHeader->type = fFormat.type; mediaHeader->buffer = 0; mediaHeader->destination = -1; mediaHeader->time_source = -1; mediaHeader->size_used = fPacket.size; // FFmpeg recommends to use the decoding time stamps as primary source // for presentation time stamps, especially for video formats that are // using frame reordering. More over this way it is ensured that the // returned start times are ordered in a monotonically increasing time // series (even for videos that contain B-frames). // \see http://git.videolan.org/?p=ffmpeg.git;a=blob;f=libavformat/avformat.h;h=1e8a6294890d580cd9ebc684eaf4ce57c8413bd8;hb=9153b33a742c4e2a85ff6230aea0e75f5a8b26c2#l1623 bigtime_t presentationTimeStamp; if (fPacket.dts != kNoPTSValue) presentationTimeStamp = fPacket.dts; else if (fPacket.pts != kNoPTSValue) presentationTimeStamp = fPacket.pts; else presentationTimeStamp = lastStreamDTS; mediaHeader->start_time = _ConvertFromStreamTimeBase(presentationTimeStamp); mediaHeader->file_pos = fPacket.pos; mediaHeader->data_offset = 0; switch (mediaHeader->type) { case B_MEDIA_RAW_AUDIO: break; case B_MEDIA_ENCODED_AUDIO: mediaHeader->u.encoded_audio.buffer_flags = (fPacket.flags & AV_PKT_FLAG_KEY) ? B_MEDIA_KEY_FRAME : 0; break; case B_MEDIA_RAW_VIDEO: mediaHeader->u.raw_video.line_count = fFormat.u.raw_video.display.line_count; break; case B_MEDIA_ENCODED_VIDEO: mediaHeader->u.encoded_video.field_flags = (fPacket.flags & AV_PKT_FLAG_KEY) ? B_MEDIA_KEY_FRAME : 0; mediaHeader->u.encoded_video.line_count = fFormat.u.encoded_video.output.display.line_count; break; default: break; } } // static bigtime_t pts[2]; // static bigtime_t lastPrintTime = system_time(); // static BLocker printLock; // if (fStream->index < 2) { // if (fPacket.pts != kNoPTSValue) // pts[fStream->index] = _ConvertFromStreamTimeBase(fPacket.pts); // printLock.Lock(); // bigtime_t now = system_time(); // if (now - lastPrintTime > 1000000) { // printf("PTS: %.4f/%.4f, diff: %.4f\r", pts[0] / 1000000.0, // pts[1] / 1000000.0, (pts[0] - pts[1]) / 1000000.0); // fflush(stdout); // lastPrintTime = now; // } // printLock.Unlock(); // } return B_OK; }