FFMPEG学习----分离视音频里的PCM数据

/**
*  参考于：http://blog.csdn.net/leixiaohua1020/article/details/46890259
*/
#include <stdio.h>
#include <string.h>

extern "C"
{
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswresample/swresample.h"
};

#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "swresample.lib")
#pragma comment(lib, "avutil.lib")

// 1 second of 48khz 32bit(4Byte) audio
#define MAX_AUDIO_FRAME_SIZE 192000

int main(int argc, char* argv[])
{
	AVFormatContext		*pFormatCtx = NULL;
	AVCodecContext		*pCodecCtx = NULL;
	AVCodec				*pCodec = NULL;
	AVPacket			packet;
	AVFrame				*pAudioFrame = NULL;
	uint8_t				*buffer = NULL;
	struct SwrContext	*audio_convert_ctx = NULL;
	int					got_picture;
	int					audioIndex;


	char filepath[1024] = "";
	printf("Usage: program.exe *.mp3
");
	if (argc == 2)
	{
		strcpy(filepath, argv[1]);
	}
	else
	{
		printf("Could not find a audio file
");
		return -1;
	}

	FILE *fp_pcm = fopen("output.pcm", "wb+");
	if (fp_pcm == NULL)
	{
		printf("FILE open error");
		return -1;
	}


	av_register_all();

	if (avformat_open_input(&pFormatCtx, filepath, NULL, NULL) != 0)
	{
		printf("Couldn't open an input stream.
");
		return -1;
	}
	if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
	{
		printf("Couldn't find stream information.
");
		return -1;
	}
	audioIndex = -1;
	for (int i = 0; i < pFormatCtx->nb_streams; i++)
	{
		if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO)
		{
			audioIndex = i;
			break;
		}
	}

	if (audioIndex == -1)
	{
		printf("Couldn't find a audio stream.
");
		return -1;
	}

	pCodecCtx = pFormatCtx->streams[audioIndex]->codec;
	pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
	if (pCodec == NULL)
	{
		printf("Codec not found.
");
		return -1;
	}
	if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0)
	{
		printf("Could not open codec.
");
		return -1;
	}

	pAudioFrame = av_frame_alloc();
	if (pAudioFrame == NULL)
	{
		printf("Could not alloc AVFrame
");
		return -1;
	}
	
	//音频输出参数
	uint64_t out_channel_layout = AV_CH_LAYOUT_STEREO;//声道格式
	AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_S16;//采样格式
	int out_nb_samples = pCodecCtx->frame_size;//nb_samples: AAC-1024 MP3-1152 
	int out_sample_rate = 44100;//采样率
	int out_nb_channels = av_get_channel_layout_nb_channels(out_channel_layout);//根据声道格式返回声道个数
	int out_buffer_size = av_samples_get_buffer_size(NULL, out_nb_channels, out_nb_samples, out_sample_fmt, 1);


	buffer = (uint8_t *)av_malloc(MAX_AUDIO_FRAME_SIZE);
	
	/**
	* 函数声明：struct SwrContext *swr_alloc(void);
	* Allocate SwrContext.
	*
	* If you use this function you will need to set the parameters (manually or
	* with swr_alloc_set_opts()) before calling swr_init().
	*
	* @see swr_alloc_set_opts(), swr_init(), swr_free()
	* @return NULL on error, allocated context otherwise
	*/
	
	audio_convert_ctx = swr_alloc();
	if (audio_convert_ctx == NULL)
	{
		printf("Could not allocate SwrContext
");
		return -1;
	}

	/**
	* 函数声明：struct SwrContext *swr_alloc_set_opts(
	* struct SwrContext *s,int64_t out_ch_layout, enum AVSampleFormat out_sample_fmt, int out_sample_rate,
	* int64_t  in_ch_layout, enum AVSampleFormat  in_sample_fmt, int  in_sample_rate,
	* int log_offset, void *log_ctx);
	*
	* Allocate SwrContext if needed and set/reset common parameters.
	*
	* This function does not require s to be allocated with swr_alloc(). On the
	* other hand, swr_alloc() can use swr_alloc_set_opts() to set the parameters
	* on the allocated context.
	*
	* @param s               existing Swr context if available, or NULL if not
	* @param out_ch_layout   output channel layout (AV_CH_LAYOUT_*)
	* @param out_sample_fmt  output sample format (AV_SAMPLE_FMT_*).
	* @param out_sample_rate output sample rate (frequency in Hz)
	* @param in_ch_layout    input channel layout (AV_CH_LAYOUT_*)
	* @param in_sample_fmt   input sample format (AV_SAMPLE_FMT_*).
	* @param in_sample_rate  input sample rate (frequency in Hz)
	* @param log_offset      logging level offset
	* @param log_ctx         parent logging context, can be NULL
	*
	* @see swr_init(), swr_free()
	* @return NULL on error, allocated context otherwise
	*/

	/*
	int64_t in_channel_layout = av_get_default_channel_layout(pCodecCtx->channels);//根据声道数返回默认输入声道格式
	swr_alloc_set_opts(audio_convert_ctx, out_channel_layout, out_sample_fmt, out_sample_rate,
		in_channel_layout, pCodecCtx->sample_fmt, pCodecCtx->sample_rate, 0, NULL);
	*/

	swr_alloc_set_opts(audio_convert_ctx, out_channel_layout, out_sample_fmt,out_sample_rate, 
		pCodecCtx->channel_layout, pCodecCtx->sample_fmt, pCodecCtx->sample_rate, 0, NULL);
		

	/**
	* 函数声明：int swr_init(struct SwrContext *s);
	* Initialize context after user parameters have been set.
	* @note The context must be configured using the AVOption API.
	*
	* @see av_opt_set_int()
	* @see av_opt_set_dict()
	*
	* @param[in,out]   s Swr context to initialize
	* @return AVERROR error code in case of failure.
	*/
	swr_init(audio_convert_ctx);
	
	int	index = 0;//计数器
	while (av_read_frame(pFormatCtx, &packet) >= 0)
	{
		if (packet.stream_index == audioIndex)
		{
			if (avcodec_decode_audio4(pCodecCtx, pAudioFrame, &got_picture, &packet) < 0)
			{
				printf("Error in decoding audio frame.
");
				return -1;
			}
			if (got_picture)
			{
				/** Convert audio.
				* 函数声明：int swr_convert(struct SwrContext *s, uint8_t **out, int out_count,
				*                           const uint8_t **in, int in_count);
				* in and in_count can be set to 0 to flush the last few samples out at the
				* end.
				*
				* If more input is provided than output space, then the input will be buffered.
				* You can avoid this buffering by using swr_get_out_samples() to retrieve an
				* upper bound on the required number of output samples for the given number of
				* input samples. Conversion will run directly without copying whenever possible.
				*
				* @param s         allocated Swr context, with parameters set
				* @param out       output buffers, only the first one need be set in case of packed audio
				* @param out_count amount of space available for output in samples per channel
				* @param in        input buffers, only the first one need to be set in case of packed audio
				* @param in_count  number of input samples available in one channel
				*
				* @return number of samples output per channel, negative value on error
				*/
				
				swr_convert(audio_convert_ctx, &buffer, MAX_AUDIO_FRAME_SIZE, (const uint8_t **)pAudioFrame->data, pAudioFrame->nb_samples);
				printf("index:%5d	 pts:%lld	 packet size:%d
", index, packet.pts, packet.size);
				//Write PCM  
				fwrite(buffer, 1, out_buffer_size, fp_pcm);
				index++;
			}
		}
		av_free_packet(&packet);
	}

	fclose(fp_pcm);
	swr_free(&audio_convert_ctx);
	av_free(buffer);
	av_frame_free(&pAudioFrame);
	avcodec_close(pCodecCtx);
	avformat_close_input(&pFormatCtx);

	return 0;
}

与解析视频里的YUV/RGB（http://blog.csdn.net/x_iya/article/details/52248929）相同的是，解析出音频的AVFrame同样需要转换。

由于ffmpeg最新版本（从2.1开始貌似）使用avcodec_decode_audio4函数来解码音频，但解码得到的数据类型为float 4bit，而播放器播放的格式一般为S16（signed 16bit）,就需要对解码得到的数据进行转换，然而，ffmpeg已经帮我们做好了，只需调用API就可以了，这个函数就是：swr_convert

输出：

使用Audacity打开（注意参数）

问题：

1.有些格式的视频不符合标准，获得的pCodecCtx->frame_size为0

雷老师，我使用wmv格式的视频进行测试，结果不能进行得到正确的pcm文件，文件大小始终为0，发现是out_nb_samples = pCodecCtx->frame_size发生错误，其中pCodecCtx->frame_size为0，导致av_samples_get_buffer_size算出的大小是负数。问过有经验组长，他告诉我有些格式的视频不符合标准，不能从文件头中获取到信息，要在读入一帧后获取，也就是ret = avcodec_decode_audio4( pCodecCtx, pFrame,&got_picture, packet);之后，从pFrame中获取信息。
发现错误，重新修改程序，就能得到数据了。

2.对于采样率为48000Hz的视频，解析为采样率为44100Hz的pcm时出现杂音，将out_sample_rate设置为

int out_sample_rate = 48000;则没有问题。

Keep it simple!

作者：N3verL4nd

出处：http://www.cnblogs.com/lgh1992314/

知识共享，欢迎转载。