core/vidl/vidl_ffmpeg_istream_v2.txx
Go to the documentation of this file.
00001 // This is core/vidl/vidl_ffmpeg_istream_v2.txx
00002 #ifndef vidl_ffmpeg_istream_v2_txx_
00003 #define vidl_ffmpeg_istream_v2_txx_
00004 #include "vidl_ffmpeg_istream.h"
00005 //:
00006 // \file
00007 // \author Matt Leota
00008 // \author Amitha Perera
00009 // \date   26 Dec 2007
00010 //
00011 // Update implementation based on ffmpeg svn -r11322
00012 // and libswscale svn -r25485.
00013 
00014 //-----------------------------------------------------------------------------
00015 
00016 #include "vidl_ffmpeg_init.h"
00017 #include "vidl_frame.h"
00018 #include "vidl_ffmpeg_convert.h"
00019 
00020 #include <vcl_string.h>
00021 #include <vcl_iostream.h>
00022 
00023 extern "C" {
00024 #if FFMPEG_IN_SEVERAL_DIRECTORIES
00025 #include <libavcodec/avcodec.h>
00026 #include <libavformat/avformat.h>
00027 #include <libswscale/swscale.h>
00028 #else
00029 #include <ffmpeg/avcodec.h>
00030 #include <ffmpeg/avformat.h>
00031 #include <ffmpeg/swscale.h>
00032 #endif
00033 }
00034 
00035 //--------------------------------------------------------------------------------
00036 
00037 struct vidl_ffmpeg_istream::pimpl
00038 {
00039   pimpl()
00040   : fmt_cxt_( NULL ),
00041     vid_index_( -1 ),
00042     vid_str_( NULL ),
00043     last_dts( 0 ),
00044     frame_( NULL ),
00045     num_frames_( -2 ), // sentinel value to indicate not yet computed
00046     sws_context_( NULL ),
00047     cur_frame_( NULL ),
00048     deinterlace_( false ),
00049     frame_number_offset_( 0 )
00050   {
00051   }
00052 
00053   AVFormatContext* fmt_cxt_;
00054   int vid_index_;
00055   AVStream* vid_str_;
00056 
00057   //: Decode time of last frame.
00058   int64_t last_dts;
00059 
00060   //: Start time of the stream, to offset the dts when computing the frame number.
00061   int64_t start_time;
00062 
00063   //: The last successfully read frame.
00064   //
00065   // If frame_->data[0] is not NULL, then the frame corresponds to
00066   // the codec state, so that codec.width and so on apply to the
00067   // frame data.
00068   AVFrame* frame_;
00069 
00070   //: number of counted frames
00071   int num_frames_;
00072 
00073   //: A software scaling context
00074   //
00075   // This is the context used for the software scaling and colour
00076   // conversion routines. Since the conversion is likely to be the
00077   // same for each frame, we save the context to avoid re-creating it
00078   // every time.
00079   SwsContext* sws_context_;
00080 
00081   //: A contiguous memory buffer to store the current image data
00082   vil_memory_chunk_sptr contig_memory_;
00083 
00084   //: The last successfully decoded frame.
00085   mutable vidl_frame_sptr cur_frame_;
00086 
00087   //: Apply deinterlacing on the frames?
00088   bool deinterlace_;
00089 
00090   //: Some codec/file format combinations need a frame number offset.
00091   // These codecs have a delay between reading packets and generating frames.
00092   unsigned frame_number_offset_;
00093 };
00094 
00095 
00096 //--------------------------------------------------------------------------------
00097 
00098 //: Constructor
00099 vidl_ffmpeg_istream::
00100 vidl_ffmpeg_istream()
00101   : is_( new vidl_ffmpeg_istream::pimpl )
00102 {
00103   vidl_ffmpeg_init();
00104 }
00105 
00106 
00107 //: Constructor - from a filename
00108 vidl_ffmpeg_istream::
00109 vidl_ffmpeg_istream(const vcl_string& filename)
00110   : is_( new vidl_ffmpeg_istream::pimpl )
00111 {
00112   vidl_ffmpeg_init();
00113   open(filename);
00114 }
00115 
00116 
00117 //: Destructor
00118 vidl_ffmpeg_istream::
00119 ~vidl_ffmpeg_istream()
00120 {
00121   close();
00122   delete is_;
00123 }
00124 
00125 //: Open a new stream using a filename
00126 bool
00127 vidl_ffmpeg_istream::
00128 open(const vcl_string& filename)
00129 {
00130   // Close any currently opened file
00131   close();
00132 
00133   // Open the file
00134   int err;
00135   if ( ( err = av_open_input_file( &is_->fmt_cxt_, filename.c_str(), NULL, 0, NULL ) ) != 0 ) {
00136     return false;
00137   }
00138 
00139   // Get the stream information by reading a bit of the file
00140   if ( av_find_stream_info( is_->fmt_cxt_ ) < 0 ) {
00141     return false;
00142   }
00143 
00144   // Find a video stream. Use the first one we find.
00145   is_->vid_index_ = -1;
00146   for ( unsigned i = 0; i < is_->fmt_cxt_->nb_streams; ++i ) {
00147     AVCodecContext *enc = is_->fmt_cxt_->streams[i]->codec;
00148     if ( enc->codec_type == CODEC_TYPE_VIDEO ) {
00149       is_->vid_index_ = i;
00150       break;
00151     }
00152   }
00153   if ( is_->vid_index_ == -1 ) {
00154     return false;
00155   }
00156 
00157   dump_format( is_->fmt_cxt_, 0, filename.c_str(), 0 );
00158   AVCodecContext *enc = is_->fmt_cxt_->streams[is_->vid_index_]->codec;
00159 
00160   // Open the stream
00161   AVCodec* codec = avcodec_find_decoder(enc->codec_id);
00162   if ( !codec || avcodec_open( enc, codec ) < 0 ) {
00163     return false;
00164   }
00165 
00166   is_->vid_str_ = is_->fmt_cxt_->streams[ is_->vid_index_ ];
00167   is_->frame_ = avcodec_alloc_frame();
00168 
00169   if ( is_->vid_str_->start_time == int64_t(1)<<63 ) {
00170     is_->start_time = 0;
00171   } else {
00172     is_->start_time = is_->vid_str_->start_time;
00173   }
00174 
00175 
00176   // The MPEG 2 codec has a latency of 1 frame when encoded in an AVI
00177   // stream, so the dts of the last packet (stored in last_dts) is
00178   // actually the next frame's dts.
00179   if ( is_->vid_str_->codec->codec_id == CODEC_ID_MPEG2VIDEO &&
00180        vcl_string("avi") == is_->fmt_cxt_->iformat->name ) {
00181     is_->frame_number_offset_ = 1;
00182   }
00183 
00184 
00185   return true;
00186 }
00187 
00188 
00189 //: Close the stream
00190 void
00191 vidl_ffmpeg_istream::
00192 close()
00193 {
00194   if ( is_->frame_ ) {
00195     av_freep( &is_->frame_ );
00196   }
00197 
00198   is_->num_frames_ = -2;
00199   is_->contig_memory_ = 0;
00200   is_->vid_index_ = -1;
00201   if ( is_->vid_str_ ) {
00202     avcodec_close( is_->vid_str_->codec );
00203     is_->vid_str_ = 0;
00204   }
00205   if ( is_->fmt_cxt_ ) {
00206     av_close_input_file( is_->fmt_cxt_ );
00207     is_->fmt_cxt_ = 0;
00208   }
00209 }
00210 
00211 
00212 //: Return true if the stream is open for reading
00213 bool
00214 vidl_ffmpeg_istream::
00215 is_open() const
00216 {
00217   return ! ! is_->frame_;
00218 }
00219 
00220 
00221 //: Return true if the stream is in a valid state
00222 bool
00223 vidl_ffmpeg_istream::
00224 is_valid() const
00225 {
00226   return is_open() && is_->frame_->data[0] != 0;
00227 }
00228 
00229 
00230 //: Return true if the stream support seeking
00231 bool
00232 vidl_ffmpeg_istream::
00233 is_seekable() const
00234 {
00235   return true;
00236 }
00237 
00238 
00239 //: Return the number of frames if known
00240 //  returns -1 for non-seekable streams
00241 int
00242 vidl_ffmpeg_istream::num_frames() const
00243 {
00244   // to get an accurate frame count, quickly run through the entire
00245   // video.  We'll only do this if the user hasn't read any frames,
00246   // because we have no guarantee that we can successfully seek back
00247   // to anywhere but the beginning.  There is logic in advance() to
00248   // ensure this.
00249   vidl_ffmpeg_istream* mutable_this = const_cast<vidl_ffmpeg_istream*>(this);
00250   if ( mutable_this->is_->num_frames_ == -2 ) {
00251     mutable_this->is_->num_frames_ = 0;
00252     while (mutable_this->advance()) {
00253       ++mutable_this->is_->num_frames_;
00254     }
00255     av_seek_frame( mutable_this->is_->fmt_cxt_,
00256                    mutable_this->is_->vid_index_,
00257                    0,
00258                    AVSEEK_FLAG_BACKWARD );
00259   }
00260 
00261   return is_->num_frames_;
00262 }
00263 
00264 
00265 //: Return the current frame number
00266 unsigned int
00267 vidl_ffmpeg_istream::
00268 frame_number() const
00269 {
00270   // Quick return if the stream isn't open.
00271   if ( !is_valid() ) {
00272     return static_cast<unsigned int>(-1);
00273   }
00274 
00275   return ((is_->last_dts - is_->start_time)
00276           * is_->vid_str_->r_frame_rate.num / is_->vid_str_->r_frame_rate.den
00277           * is_->vid_str_->time_base.num + is_->vid_str_->time_base.den/2)
00278          / is_->vid_str_->time_base.den
00279          - int(is_->frame_number_offset_);
00280 }
00281 
00282 
00283 //: Return the width of each frame
00284 unsigned int
00285 vidl_ffmpeg_istream
00286 ::width() const
00287 {
00288   // Quick return if the stream isn't open.
00289   if ( !is_open() ) {
00290     return 0;
00291   }
00292 
00293   return is_->fmt_cxt_->streams[is_->vid_index_]->codec->width;
00294 }
00295 
00296 
00297 //: Return the height of each frame
00298 unsigned int
00299 vidl_ffmpeg_istream
00300 ::height() const
00301 {
00302   // Quick return if the stream isn't open.
00303   if ( !is_open() ) {
00304     return 0;
00305   }
00306 
00307   return is_->fmt_cxt_->streams[is_->vid_index_]->codec->height;
00308 }
00309 
00310 
00311 //: Return the pixel format
00312 vidl_pixel_format
00313 vidl_ffmpeg_istream
00314 ::format() const
00315 {
00316   // Quick return if the stream isn't open.
00317   if ( !is_open() ) {
00318     return VIDL_PIXEL_FORMAT_UNKNOWN;
00319   }
00320 
00321   AVCodecContext* enc = is_->fmt_cxt_->streams[is_->vid_index_]->codec;
00322   vidl_pixel_format fmt = vidl_pixel_format_from_ffmpeg(enc->pix_fmt);
00323   if (fmt == VIDL_PIXEL_FORMAT_UNKNOWN)
00324     return VIDL_PIXEL_FORMAT_RGB_24;
00325   return fmt;
00326 }
00327 
00328 
00329 //: Return the frame rate (0.0 if unspecified)
00330 double
00331 vidl_ffmpeg_istream
00332 ::frame_rate() const
00333 {
00334   // Quick return if the stream isn't open.
00335   if ( !is_open() ) {
00336     return 0.0;
00337   }
00338 
00339   return static_cast<double>(is_->vid_str_->r_frame_rate.num) / is_->vid_str_->r_frame_rate.den;
00340 }
00341 
00342 
00343 //: Return the duration in seconds (0.0 if unknown)
00344 double
00345 vidl_ffmpeg_istream
00346 ::duration() const
00347 {
00348   // Quick return if the stream isn't open.
00349   if ( !is_open() ) {
00350     return 0.0;
00351   }
00352   return static_cast<double>(is_->vid_str_->time_base.num)/is_->vid_str_->time_base.den
00353          * static_cast<double>(is_->vid_str_->duration);
00354 }
00355 
00356 
00357 //: Advance to the next frame (but don't acquire an image)
00358 bool
00359 vidl_ffmpeg_istream::
00360 advance()
00361 {
00362   // Quick return if the file isn't open.
00363   if ( !is_open() ) {
00364     return false;
00365   }
00366 
00367   // See the comment in num_frames().  This is to make sure that once
00368   // we start reading frames, we'll never try to march to the end to
00369   // figure out how many frames there are.
00370   if ( is_->num_frames_ == -2 ) {
00371     is_->num_frames_ = -1;
00372   }
00373 
00374   AVCodecContext* codec = is_->fmt_cxt_->streams[is_->vid_index_]->codec;
00375 
00376   AVPacket pkt;
00377   int got_picture = 0;
00378 
00379   while ( got_picture == 0 ) {
00380     if ( av_read_frame( is_->fmt_cxt_, &pkt ) < 0 ) {
00381       break;
00382     }
00383     is_->last_dts = pkt.dts;
00384 
00385     // Make sure that the packet is from the actual video stream.
00386     if (pkt.stream_index==is_->vid_index_)
00387     {
00388       if ( avcodec_decode_video( codec,
00389                                  is_->frame_, &got_picture,
00390                                  pkt.data, pkt.size ) < 0 ) {
00391         vcl_cerr << "vidl_ffmpeg_istream: Error decoding packet!\n";
00392         return false;
00393       }
00394     }
00395     av_free_packet( &pkt );
00396   }
00397 
00398   // From ffmpeg apiexample.c: some codecs, such as MPEG, transmit the
00399   // I and P frame with a latency of one frame. You must do the
00400   // following to have a chance to get the last frame of the video.
00401   if ( !got_picture ) {
00402     if ( avcodec_decode_video( codec,
00403                                is_->frame_, &got_picture,
00404                                NULL, 0 ) >= 0 ) {
00405       is_->last_dts += int64_t(is_->vid_str_->time_base.den) * is_->vid_str_->r_frame_rate.den
00406         / is_->vid_str_->time_base.num / is_->vid_str_->r_frame_rate.num;
00407     }
00408   }
00409 
00410   // The cached frame is out of date, whether we managed to get a new
00411   // frame or not.
00412   if (is_->cur_frame_)
00413     is_->cur_frame_->invalidate();
00414   is_->cur_frame_ = 0;
00415 
00416   if ( ! got_picture ) {
00417     is_->frame_->data[0] = NULL;
00418   }
00419 
00420   return got_picture != 0;
00421 }
00422 
00423 
00424 //: Read the next frame from the stream
00425 vidl_frame_sptr
00426 vidl_ffmpeg_istream::read_frame()
00427 {
00428   if (advance())
00429     return current_frame();
00430   return NULL;
00431 }
00432 
00433 
00434 //: Return the current frame in the stream
00435 vidl_frame_sptr
00436 vidl_ffmpeg_istream::current_frame()
00437 {
00438   // Quick return if the stream isn't valid
00439   if ( !is_valid() ) {
00440     return NULL;
00441   }
00442   AVCodecContext* enc = is_->fmt_cxt_->streams[is_->vid_index_]->codec;
00443   // If we have not already converted this frame, try to convert it
00444   if ( !is_->cur_frame_ && is_->frame_->data[0] != 0 )
00445   {
00446     int width = enc->width;
00447     int height = enc->height;
00448 
00449     // Deinterlace if requested
00450     if ( is_->deinterlace_ ) {
00451       avpicture_deinterlace( (AVPicture*)is_->frame_, (AVPicture*)is_->frame_,
00452                              enc->pix_fmt, width, height );
00453     }
00454 
00455     // If the pixel format is not recognized by vidl then convert the data into RGB_24
00456     vidl_pixel_format fmt = vidl_pixel_format_from_ffmpeg(enc->pix_fmt);
00457     if (fmt == VIDL_PIXEL_FORMAT_UNKNOWN)
00458     {
00459       int size = width*height*3;
00460       if (!is_->contig_memory_)
00461         is_->contig_memory_ = new vil_memory_chunk(size, VIL_PIXEL_FORMAT_BYTE);
00462       else
00463         is_->contig_memory_->set_size(size, VIL_PIXEL_FORMAT_BYTE);
00464 
00465       // Reuse the previous context if we can.
00466       is_->sws_context_ = sws_getCachedContext(
00467         is_->sws_context_,
00468         width, height, enc->pix_fmt,
00469         width, height, PIX_FMT_RGB24,
00470         SWS_BILINEAR,
00471         NULL, NULL, NULL );
00472 
00473       if ( is_->sws_context_ == NULL ) {
00474         vcl_cerr << "vidl_ffmpeg_istream: couldn't create conversion context\n";
00475         return vidl_frame_sptr();
00476       }
00477 
00478       AVPicture rgb_frame;
00479       avpicture_fill(&rgb_frame, (uint8_t*)is_->contig_memory_->data(), PIX_FMT_RGB24, width, height);
00480 
00481       sws_scale( is_->sws_context_,
00482                  is_->frame_->data, is_->frame_->linesize,
00483                  0, height,
00484                  rgb_frame.data, rgb_frame.linesize );
00485 
00486       is_->cur_frame_ = new vidl_shared_frame(is_->contig_memory_->data(),width,height,
00487                                               VIDL_PIXEL_FORMAT_RGB_24);
00488     }
00489     else
00490     {
00491       // Test for contiguous memory.  Sometimes FFMPEG uses scanline buffers larger
00492       // than the image width.  The extra memory is used in optimized decoding routines.
00493       // This leads to a segmented image buffer, not supported by vidl.
00494       AVPicture test_frame;
00495       avpicture_fill(&test_frame, is_->frame_->data[0], enc->pix_fmt, width, height);
00496       if (test_frame.data[1] == is_->frame_->data[1] &&
00497           test_frame.data[2] == is_->frame_->data[2] &&
00498           test_frame.linesize[0] == is_->frame_->linesize[0] &&
00499           test_frame.linesize[1] == is_->frame_->linesize[1] &&
00500           test_frame.linesize[2] == is_->frame_->linesize[2] )
00501       {
00502         is_->cur_frame_ = new vidl_shared_frame(is_->frame_->data[0], width, height, fmt);
00503       }
00504       // Copy the image into contiguous memory.
00505       else
00506       {
00507         if (!is_->contig_memory_) {
00508           int size = avpicture_get_size( enc->pix_fmt, width, height );
00509           is_->contig_memory_ = new vil_memory_chunk(size, VIL_PIXEL_FORMAT_BYTE);
00510         }
00511         avpicture_fill(&test_frame, (uint8_t*)is_->contig_memory_->data(), enc->pix_fmt, width, height);
00512         av_picture_copy(&test_frame, (AVPicture*)is_->frame_, enc->pix_fmt, width, height);
00513         // use a shared frame because the vil_memory_chunk is reused for each frame
00514         is_->cur_frame_ = new vidl_shared_frame(is_->contig_memory_->data(),width,height,fmt);
00515       }
00516     }
00517   }
00518 
00519   return is_->cur_frame_;
00520 }
00521 
00522 
00523 //: Seek to the given frame number
00524 // \returns true if successful
00525 bool
00526 vidl_ffmpeg_istream::
00527 seek_frame(unsigned int frame)
00528 {
00529   // Quick return if the stream isn't open.
00530   if ( !is_open() ) {
00531     return false;
00532   }
00533 
00534   // We rely on the initial cast to make sure all the operations happen in int64.
00535   int64_t req_timestamp =
00536     int64_t(frame + is_->frame_number_offset_)
00537     * is_->vid_str_->time_base.den
00538     * is_->vid_str_->r_frame_rate.den
00539     / is_->vid_str_->time_base.num
00540     / is_->vid_str_->r_frame_rate.num
00541     + is_->start_time;
00542 
00543   // Seek to a keyframe before the timestamp that we want.
00544   int seek = av_seek_frame( is_->fmt_cxt_, is_->vid_index_, req_timestamp, AVSEEK_FLAG_BACKWARD );
00545 
00546   if ( seek < 0 )
00547     return false;
00548 
00549   avcodec_flush_buffers( is_->vid_str_->codec );
00550 
00551   // We got to a key frame. Forward until we get to the frame we want.
00552   while ( true )
00553   {
00554     if ( ! advance() ) {
00555       return false;
00556     }
00557     if ( is_->last_dts >= req_timestamp ) {
00558       if ( is_->last_dts > req_timestamp ) {
00559         vcl_cerr << "Warning: seek went into the future!\n";
00560         return false;
00561       }
00562       return true;
00563     }
00564   }
00565 }
00566 
00567 #endif // vidl_ffmpeg_istream_v2_txx_