FFmpeg音视频解码
FFmpeg是一个用于音视频解码的库,FFmpeg的解码流程可以分为以下步骤:
av_register_all(): 注册所有组件
avformat_open_input():打开输入的视频文件
av_format_find_stream_info():获取视频文件信息
avcodec_find_decoder():查找对应的解码器
avcodec_open2():打开解码器
avcodec_decode_video2():解压一帧的数据
avcodec_close():关闭解码器
avformat_close_input():关闭输入视频
FFmpeg的数据结构:
由于FFmepg是C语言完成,所以主要需要了解其中的结构体类型。
FFmpeg解码的数据额结构如下:
在AVFormatContext这个结构体中主要保存了视频的信息。最主要的是AVStream*, AVInputFormat.在AVstream中保存是视频流和音频流信息。即AVCodecContext。每个对应的视频音频流信息都有对应的解码器AVCodec。AVFrame中保存了一帧解码后的数据
下面选取一些源码进行解读:
AVFormatContex
定义如下,其中仅仅截取一部分,详细可以自己查看源码
typedef struct AVFormatContext {
/**
* A class for logging and @ref avoptions. Set by avformat_alloc_context().
* Exports (de)muxer private options if they exist.
*/
const AVClass *av_class;
/**
* The input container format.
*
* Demuxing only, set by avformat_open_input().
*/
struct AVInputFormat *iformat;
/**
* The output container format.
*
* Muxing only, must be set by the caller before avformat_write_header().
*/
struct AVOutputFormat *oformat;
/**
* Format private data. This is an AVOptions-enabled struct
* if and only if iformat/oformat.priv_class is not NULL.
*
* - muxing: set by avformat_write_header()
* - demuxing: set by avformat_open_input()
*/
void *priv_data;
/**
* I/O context.
*
* - demuxing: either set by the user before avformat_open_input() (then
* the user must close it manually) or set by avformat_open_input().
* - muxing: set by the user before avformat_write_header(). The caller must
* take care of closing / freeing the IO context.
*
* Do NOT set this field if AVFMT_NOFILE flag is set in
* iformat/oformat.flags. In such a case, the (de)muxer will handle
* I/O in some other way and this field will be NULL.
*/
AVIOContext *pb;
/* stream info */
int ctx_flags; /**< Format-specific flags, see AVFMTCTX_xx */
/**
* Number of elements in AVFormatContext.streams.
*
* Set by avformat_new_stream(), must not be modified by any other code.
*/
//视频音频流的总个数
unsigned int nb_streams;
/**
* A list of all streams in the file. New streams are created with
* avformat_new_stream().
*
* - demuxing: streams are created by libavformat in avformat_open_input().
* If AVFMTCTX_NOHEADER is set in ctx_flags, then new streams may also
* appear in av_read_frame().
* - muxing: streams are created by the user before avformat_write_header().
*
* Freed by libavformat in avformat_free_context().
*/
//输入视频,音频流,通过一个数组保存,数组中保存了AVStream *
AVStream **streams;
/**
* input or output filename
*
* - demuxing: set by avformat_open_input()
* - muxing: may be set by the caller before avformat_write_header()
*/
char filename[1024];
/**
* Position of the first frame of the component, in
* AV_TIME_BASE fractional seconds. NEVER set this value directly:
* It is deduced from the AVStream values.
*
* Demuxing only, set by libavformat.
*/
//视频开始时间
int64_t start_time;
/**
* Duration of the stream, in AV_TIME_BASE fractional
* seconds. Only set this value if you know none of the individual stream
* durations and also do not set any of them. This is deduced from the
* AVStream values if not set.
*
* Demuxing only, set by libavformat.
*/
//视频的时长
int64_t duration;
/**
* Total stream bitrate in bit/s, 0 if not
* available. Never set it directly if the file_size and the
* duration are known as FFmpeg can compute it automatically.
*/
//视频的码率
int bit_rate;
//音频信息
/**
* Audio preload in microseconds.
* Note, not all formats support this and unpredictable things may happen if it is used when not supported.
* - encoding: Set by user via AVOptions (NO direct access)
* - decoding: unused
*/
int audio_preload;
/**
* Max chunk time in microseconds.
* Note, not all formats support this and unpredictable things may happen if it is used when not supported.
* - encoding: Set by user via AVOptions (NO direct access)
* - decoding: unused
*/
int max_chunk_duration;
/**
* Max chunk size in bytes
* Note, not all formats support this and unpredictable things may happen if it is used when not supported.
* - encoding: Set by user via AVOptions (NO direct access)
* - decoding: unused
*/
int max_chunk_size;
/**
* forces the use of wallclock timestamps as pts/dts of packets
* This has undefined results in the presence of B frames.
* - encoding: unused
* - decoding: Set by user via AVOptions (NO direct access)
*/
int use_wallclock_as_timestamps;
/**
* Avoid negative timestamps during muxing.
* 0 -> allow negative timestamps
* 1 -> avoid negative timestamps
* -1 -> choose automatically (default)
* Note, this only works when interleave_packet_per_dts is in use.
* - encoding: Set by user via AVOptions (NO direct access)
* - decoding: unused
*/
int avoid_negative_ts;
} AVFormatContext;
AVInputFormat
typedef struct AVInputFormat {
/**
* A comma separated list of short names for the format. New names
* may be appended with a minor bump.
*/
//封装格式名称
const char *name;
/**
* Descriptive name for the format, meant to be more human-readable
* than name. You should use the NULL_IF_CONFIG_SMALL() macro
* to define it.
*/
//封装格式的长名称
const char *long_name;
/**
* Can use flags: AVFMT_NOFILE, AVFMT_NEEDNUMBER, AVFMT_SHOW_IDS,
* AVFMT_GENERIC_INDEX, AVFMT_TS_DISCONT, AVFMT_NOBINSEARCH,
* AVFMT_NOGENSEARCH, AVFMT_NO_BYTE_SEEK, AVFMT_SEEK_TO_PTS.
*/
int flags;
/**
* If extensions are defined, then no probe is done. You should
* usually not use extension format guessing because it is not
* reliable enough
*/
//封装格式的扩展名
const char *extensions;
const struct AVCodecTag * const *codec_tag;
const AVClass *priv_class; ///< AVClass for the private context
}AVInputFormat
/*****************************************************************
* No fields below this line are part of the public API. They
* may not be used outside of libavformat and can be changed and
* removed at will.
* New public fields should be added right above.
*****************************************************************
*/
AVStream
typedef struct AVStream {
int index; /**< stream index in AVFormatContext */
/**
* Format-specific stream ID.
* decoding: set by libavformat
* encoding: set by the user, replaced by libavformat if left unset
*/
int id;//序号
/**
* Codec context associated with this stream. Allocated and freed by
* libavformat.
*
* - decoding: The demuxer exports codec information stored in the headers
* here.
* - encoding: The user sets codec information, the muxer writes it to the
* output. Mandatory fields as specified in AVCodecContext
* documentation must be set even if this AVCodecContext is
* not actually used for encoding.
*/
AVCodecContext *codec;//该流对应的AVCodecContext
void *priv_data;
/**
* encoding: pts generation when outputting stream
*/
struct AVFrac pts;
/**
* This is the fundamental unit of time (in seconds) in terms
* of which frame timestamps are represented.
*
* decoding: set by libavformat
* encoding: set by libavformat in avformat_write_header. The muxer may use the
* user-provided value of @ref AVCodecContext.time_base "codec->time_base"
* as a hint.
*/
AVRational time_base;//时基
/**
* Decoding: pts of the first frame of the stream in presentation order, in stream time base.
* Only set this if you are absolutely 100% sure that the value you set
* it to really is the pts of the first frame.
* This may be undefined (AV_NOPTS_VALUE).
* @note The ASF header does NOT contain a correct start_time the ASF
* demuxer must NOT set this.
*/
int64_t start_time;//在当前时基,第一帧开始的时间
/**
* Decoding: duration of the stream, in stream time base.
* If a source file does not specify a duration, but does specify
* a bitrate, this value will be estimated from bitrate and file size.
*/
int64_t duration;//在当前时基下,时长
int64_t nb_frames; ///< number of frames in this stream if known or 0
}
FFmpeg code
基于以上信息,自己编写了一个Class,用于把视频解码成YUV数据。
FFmpeg.h
#pragma once
#include<string>
#define __STDC_CONSTANT_MACROS
#ifdef __cplusplus
extern "C"
{
#endif
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#ifdef __cplusplus
}
#endif
#include<memory>
enum class INITERRO {
INIT_ERRO_NO_ERRO,
INIT_ERRO_ALLOC_FAILED,
INIT_ERRO_OPEN_FAILED,
INIT_ERRO_FIND_FAILED,
INIT_ERRO_NO_VIDEO,
INIT_ERRO_NO_AUDIO,
INIT_ERRO_DECODE_FAILED,
INIT_ERRO_OPEN2_FAILED
};
class FFmpeg
{
public:
FFmpeg();
FFmpeg(std::string path);
INITERRO initFFmpeg(const std::string& path="");
std::shared_ptr<uint8_t> getFrameYUV(int& isVideo);
int getWidth();
int getHeight();
~FFmpeg();
private:
std::string filePath;
int videoIndex;
int audioIndex;
AVFormatContext* pFormatCtx;
AVCodecContext* pCodercCtxVideo;
AVCodecContext* pCodercCtxAudio;
AVCodec* pCodercVideo;
AVFrame* pFrame;
AVFrame* pFrameYUV;
AVPacket* pPacket;
struct SwsContext* imgConvertCtx;
};
FFmpeg.cpp
#include "FFmpeg.h"
#include<iostream>
FFmpeg::FFmpeg()
{
}
FFmpeg::FFmpeg(std::string path)
{
}
INITERRO FFmpeg::initFFmpeg(const std::string& path)
{
if (path.empty() && this->filePath.empty()) {
return INITERRO::INIT_ERRO_OPEN_FAILED;
}
//优先选用传入的参数
std::string pathTemp = path.empty() ? this->filePath : path;
//初始化
av_register_all();
avformat_network_init();
this->pFormatCtx = avformat_alloc_context();
//open_input
if (avformat_open_input(&this->pFormatCtx, pathTemp.c_str(), NULL,NULL) != 0) {
return INITERRO::INIT_ERRO_OPEN_FAILED;
}
//find_stream_infomation
if (avformat_find_stream_info(this->pFormatCtx, NULL) < 0) {
return INITERRO::INIT_ERRO_FIND_FAILED;
}
//find_decoder
videoIndex = -1;
audioIndex = -1;
for (int i = 0; i < this->pFormatCtx->nb_streams; ++i) {
if (this->pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
videoIndex = i;
}
if (this->pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
audioIndex = i;
}
}
if (videoIndex != -1) {
pCodercCtxVideo = pFormatCtx->streams[videoIndex]->codec;
pCodercVideo = avcodec_find_decoder(pCodercCtxVideo->codec_id);
if (pCodercVideo == NULL) {
printf("Codec not found.\n");
return INITERRO::INIT_ERRO_DECODE_FAILED;
}
}
if (audioIndex != -1) {
pCodercCtxAudio = pFormatCtx->streams[audioIndex]->codec;
//
}
//open2
if (avcodec_open2(pCodercCtxVideo, pCodercVideo, NULL) < 0) {
return INITERRO::INIT_ERRO_OPEN2_FAILED;
}
//init
pFrame = av_frame_alloc();
pFrameYUV = av_frame_alloc();
uint8_t* outBuffer = (uint8_t*)av_malloc(avpicture_get_size(AV_PIX_FMT_YUV420P, pCodercCtxVideo->width, pCodercCtxVideo->height));
avpicture_fill((AVPicture*)pFrameYUV, outBuffer, AV_PIX_FMT_YUV420P, pCodercCtxVideo->width, pCodercCtxVideo->height);
pPacket = (AVPacket*)av_malloc(sizeof(AVPacket));
//Output Info-----------------------------
/*printf("--------------- File Information ----------------\n");
av_dump_format(pFormatCtx, 0, pathTemp.c_str(), 0);
printf("-------------------------------------------------\n");*/
imgConvertCtx = sws_getContext(pCodercCtxVideo->width, pCodercCtxVideo->height, pCodercCtxVideo->pix_fmt,
pCodercCtxVideo->width, pCodercCtxVideo->height, AV_PIX_FMT_YUV420P, 4, NULL, NULL, NULL);
return INITERRO::INIT_ERRO_NO_ERRO;
}
std::shared_ptr<uint8_t> FFmpeg::getFrameYUV(int& isVideo)
{
int y_size = pCodercCtxVideo->width * pCodercCtxVideo->height;
std::shared_ptr<uint8_t> bufferFram(new uint8_t[y_size * 3 / 2]);
int ret = -1;
int gotPic = -1;
if (av_read_frame(pFormatCtx, pPacket) < 0) {
isVideo = -2;
return bufferFram;
}
if (pPacket->stream_index == videoIndex) {
ret = avcodec_decode_video2(pCodercCtxVideo, pFrame, &gotPic, pPacket);
if (gotPic) {
sws_scale(imgConvertCtx, (const uint8_t* const*)pFrame->data, pFrame->linesize, 0, pCodercCtxVideo->height,
pFrameYUV->data, pFrameYUV->linesize);
// U V 的数据分别是Y的1/4 宽高各减少一半
memcpy(bufferFram.get(), pFrameYUV->data[0], y_size);
memcpy(bufferFram.get() + y_size, pFrameYUV->data[1], y_size / 4);
memcpy(bufferFram.get() + y_size + y_size / 4, pFrameYUV->data[2], y_size / 4);
isVideo = 1;
}
else {
isVideo = -1;
}
}
else {
isVideo = 0;
}
av_free_packet(pPacket);
return bufferFram;
}
int FFmpeg::getWidth()
{
return this->pCodercCtxVideo->width;
}
int FFmpeg::getHeight()
{
return this->pCodercCtxVideo->height;
}
FFmpeg::~FFmpeg()
{
sws_freeContext(imgConvertCtx);
av_frame_free(&pFrameYUV);
av_frame_free(&pFrame);
avcodec_close(pCodercCtxVideo);
avcodec_close(pCodercCtxAudio);
avformat_close_input(&pFormatCtx);
}
最终函数返回一个智能指针,该指针指向一个数组,该数组中包含了一帧视频中的YUV数据。使用SDL播放器即可完成视频的播放。
SDL YUV视频播放
SDL(Simple DirectMedia Layer)库的作用说白了就是封装了复杂的视音频底层交互工作, 简化了视音频处理的难度。
SDL显示流程如下
SDL正在学习中,如果有机会再详细了解,此处仅根据流程,写出示例代码
SDLPlayer.h
#pragma once
#include<string>
#ifdef __cplusplus
extern "C"
{
#endif
#include"sdl/SDL.h"
#ifdef __cplusplus
}
#endif
//Refresh Event
#define REFRESH_EVENT (SDL_USEREVENT + 1)
//Break
#define BREAK_EVENT (SDL_USEREVENT + 2)
class SDLPlayer
{
public:
SDLPlayer();
int initPlayer(void* winID=NULL);
void setHeight(int height);
void setWidth(int width);
void setPlayerTitle(std::string title);
int playYUV(uint8_t* buffer, SDL_Rect sdlRect, int delayTime = 40);
int playYUV(uint8_t* buffer, int delayTime = 40);
void pause();
void start();
void quit();
static int refreshThread(void* opaque);
~SDLPlayer();
private:
int windowH;
int windowW;
int height;
int width;
std::string title;
//SDL WINDOW
SDL_Window* window;
SDL_Texture* sdlTexture;
SDL_Renderer* sdlRanderer;
//事件
SDL_Event event;
int threadExit;
int threadPause;
int delayTime;
};
SDLPlayer.cpp
#include "SDLPlayer.h"
SDLPlayer::SDLPlayer():
height(420),
width(640),
title("SDL player"),
window(nullptr),
sdlTexture(nullptr),
sdlRanderer(nullptr),
threadExit(0),
threadPause(0),
delayTime(40)
{
}
int SDLPlayer::initPlayer(void* winID)
{
if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER)) {
printf("Could not initialize SDL - %s\n", SDL_GetError());
return -1;
}
windowW = this->width;
windowH = this->height;
if (winID != NULL) {
window = SDL_CreateWindowFrom(winID);
}
else {
window = SDL_CreateWindow(title.c_str(), SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED,
windowW, windowH, SDL_WINDOW_OPENGL | SDL_WINDOW_RESIZABLE);
}
if (window == nullptr) {
printf("SDL: could not create window - exiting:%s\n", SDL_GetError());
return -1;
}
sdlRanderer = SDL_CreateRenderer(window, -1, 0);
SDL_ShowWindow(window);
Uint32 pixformat = 0;
pixformat = SDL_PIXELFORMAT_IYUV;
sdlTexture = SDL_CreateTexture(sdlRanderer, pixformat,
SDL_TEXTUREACCESS_STREAMING, this->width, this->height);
//创建新线程
SDL_CreateThread(SDLPlayer::refreshThread, NULL, this);
return 1;
}
void SDLPlayer::setHeight(int _height)
{
height = _height;
}
void SDLPlayer::setWidth(int _width)
{
this->width = _width;
}
void SDLPlayer::setPlayerTitle(std::string _title)
{
title = _title;
}
int SDLPlayer::playYUV(uint8_t* buffer, SDL_Rect sdlRect, int delayTime)
{
SDL_WaitEvent(&event);
if (event.type == REFRESH_EVENT) {
SDL_UpdateTexture(sdlTexture, NULL, buffer, this->width);
SDL_RenderClear(sdlRanderer);
SDL_RenderCopy(sdlRanderer, sdlTexture, NULL, &sdlRect);
SDL_RenderPresent(sdlRanderer);
//Delay 40ms
this->delayTime = delayTime;
}
else if (event.type == SDL_QUIT) {
this->threadExit = 1;
}
else if (event.type == BREAK_EVENT) {
return -1;
}
else if (event.type == SDL_WINDOWEVENT) {
//If Resize
SDL_GetWindowSize(window, &windowW, &windowH);
}
return 0;
}
int SDLPlayer::playYUV(uint8_t* buffer,int delayTime)
{
SDL_WaitEvent(&event);
if (event.type == REFRESH_EVENT) {
SDL_Rect sdlRect;
SDL_UpdateTexture(sdlTexture, NULL, buffer, this->width);
sdlRect.x = 0;
sdlRect.y = 0;
sdlRect.w = windowW;
sdlRect.h = windowH;
SDL_RenderClear(sdlRanderer);
SDL_RenderCopy(sdlRanderer, sdlTexture, NULL, &sdlRect);
SDL_RenderPresent(sdlRanderer);
//Delay 40ms
this->delayTime = delayTime;
}
else if (event.type == SDL_QUIT) {
this->threadExit = 1;
}
else if (event.type == SDL_WINDOWEVENT) {
//If Resize
SDL_GetWindowSize(window, &windowW, &windowH);
}
else if (event.type == BREAK_EVENT) {
return -1;
}
return 0;
}
void SDLPlayer::pause()
{
if (this->threadPause == 1) {
this->threadPause = 0;
}
else {
this->threadPause = 1;
}
}
void SDLPlayer::start()
{
this->threadPause = 0;
}
void SDLPlayer::quit()
{
this->threadExit = 1;
}
int SDLPlayer::refreshThread(void* opaque)
{
SDLPlayer* sdl = (SDLPlayer*)opaque;
while (!sdl->threadExit)
{
if (!sdl->threadPause) {
SDL_Event _event;
_event.type = REFRESH_EVENT;
SDL_PushEvent(&_event);
SDL_Delay(sdl->delayTime);
}
}
SDL_Event event;
event.type = BREAK_EVENT;
SDL_PushEvent(&event);
return 0;
}
SDLPlayer::~SDLPlayer()
{
SDL_DestroyWindow(window);
SDL_Quit();
}
SDL+FFmpe即可完成一个视频播放器。
QT 嵌入SDL
playThread = new PlayThread;
ui.setupUi(this);
//核心代码
playThread->setWindow((void*)ui.labelPlay->winId());
putenv(winID);
connect(ui.pushButtonPlay, &QPushButton::clicked, this, &FFmpegPlayer::play);
connect(ui.pushButtonPause, &QPushButton::clicked, this, &FFmpegPlayer::pause);
connect(ui.pushButtonClose, &QPushButton::clicked, this, &FFmpegPlayer::playerExit);
connect(ui.pushButtonOpen, &QPushButton::clicked, this, &FFmpegPlayer::openFile);
QT线程函数
PlayThread.h
#pragma once
#include <qthread.h>
#include"FFmpeg.h"
#include"SDLPlayer.h"
#include<QString>
class PlayThread :
public QThread
{
public:
PlayThread();
~PlayThread();
void setWindow(void*);
virtual void run();
void pause();
void stop();
void setFilePath(QString filePath);
int isStop = 0;
private:
QString filePath;
void* winID;
SDLPlayer sdlPlayer;
};
PlayThread.cpp
#include "PlayThread.h"
PlayThread::PlayThread():filePath("Titanic.ts"),winID(NULL)
{
}
PlayThread::~PlayThread()
{
}
void PlayThread::setWindow(void* winID)
{
this->winID = winID;
}
void PlayThread::run()
{
FFmpeg ffmpeg;
auto r = ffmpeg.initFFmpeg(this->filePath.toStdString().c_str());
if (r != INITERRO::INIT_ERRO_NO_ERRO) {
return;
}
//SDL 播放器
sdlPlayer.setWidth(ffmpeg.getWidth());
sdlPlayer.setHeight(ffmpeg.getHeight());
sdlPlayer.setPlayerTitle("myplayer");
sdlPlayer.initPlayer(this->winID);
int isVideo = -2;
auto buffer = ffmpeg.getFrameYUV(isVideo);
while (isVideo != -2)
{
buffer = ffmpeg.getFrameYUV(isVideo);
if (isVideo == 1) {
auto r = sdlPlayer.playYUV(buffer.get());
if (r == -1) {
break;
}
}
}
}
void PlayThread::pause()
{
sdlPlayer.pause();
}
void PlayThread::stop()
{
sdlPlayer.quit();
}
void PlayThread::setFilePath(QString filePath)
{
this->filePath = filePath;
}
结果
目前只能播放视频,下一步是音视频同步。希望大家多多交流。