一个基于JRTPLIB的轻量级RTSP客户端(myRTSPClient)——解码篇：（二）用ffmpeg解码音频

其实这篇的内容和（一）用ffmpeg解码视频基本是一样的，重点还是给ffmpeg指定callback函数，而这个函数是从RTSP服务端那里获取音频数据的。
这里，解码音频的示例代码量之所以比解码视频的略微复杂，主要是因为ffmpeg解码音频时要比解码视频要复杂一点，具体可以参见ffmpeg解码音频示例以及官网示例代码。
具体内容将不再赘述，源码如下：
  1 extern "C"
  2 {
  3 #include <libavcodec/avcodec.h>
  4 #include <libavformat/avformat.h>
  5 #include <libavformat/avio.h>
  6 #include <libswscale/swscale.h>
  7 #include <libswresample/swresample.h>
  8 }
  9 
 10 #include <SDL.h>
 11 #include <SDL_thread.h>
 12 
 13 #ifdef __MINGW32__
 14 #undef main /* Prevents SDL from overriding main() */
 15 #endif
 16 
 17 #include <stdio.h>
 18 #include <assert.h>
 19 #include <sys/types.h>
 20 #include <sys/stat.h>
 21 #include <fcntl.h>
 22 
 23 
 24 // compatibility with newer API
 25 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(55,28,1)
 26 #define av_frame_alloc avcodec_alloc_frame
 27 #define av_frame_free avcodec_free_frame
 28 #endif
 29 
 30 #define SDL_AUDIO_BUFFER_SIZE 1024
 31 #define MAX_AUDIO_FRAME_SIZE 192000
 32 
 33 #include <signal.h>
 34 #include "rtspClient.h"
 35 #include <iostream>
 36 #include <string>
 37 
 38 using std::cout;
 39 using std::endl;
 40 
 41 int rtspClientRequest(RtspClient * Client, string url);
 42 int fill_iobuffer(void * opaque, uint8_t * buf, int bufsize);
 43 
 44 typedef struct AudioParams {
 45     int freq;
 46     int channels;
 47     int64_t channel_layout;
 48     enum AVSampleFormat fmt;
 49     int frame_size;
 50     int bytes_per_sec;
 51 } AudioParams;
 52 int sample_rate, nb_channels;
 53 int64_t channel_layout;
 54 AudioParams audio_hw_params_tgt;
 55 AudioParams audio_hw_params_src;
 56 
 57 int resample(AVFrame * af, uint8_t * audio_buf, int * audio_buf_size);
 58 
 59 struct SwrContext * swr_ctx = NULL;
 60 
 61 int resample(AVFrame * af, uint8_t * audio_buf, int * audio_buf_size)
 62 {
 63     int data_size = 0;
 64     int resampled_data_size = 0;
 65     int64_t dec_channel_layout;
 66     data_size = av_samples_get_buffer_size(NULL, 
 67             av_frame_get_channels(af),
 68             af->nb_samples,
 69             (AVSampleFormat)af->format,
 70             1);
 71 
 72     dec_channel_layout =
 73         (af->channel_layout && av_frame_get_channels(af) == av_get_channel_layout_nb_channels(af->channel_layout)) ?
 74         af->channel_layout : av_get_default_channel_layout(av_frame_get_channels(af));
 75     if(     af->format              != audio_hw_params_src.fmt                 ||
 76             af->sample_rate     != audio_hw_params_src.freq              ||
 77             dec_channel_layout     != audio_hw_params_src.channel_layout     ||
 78             !swr_ctx) {
 79         swr_free(&swr_ctx);
 80         swr_ctx = swr_alloc_set_opts(NULL, 
 81                                         audio_hw_params_tgt.channel_layout, (AVSampleFormat)audio_hw_params_tgt.fmt, audio_hw_params_tgt.freq, 
 82                                         dec_channel_layout, (AVSampleFormat)af->format, af->sample_rate, 
 83                                         0, NULL);
 84         if (!swr_ctx || swr_init(swr_ctx) < 0) {
 85             av_log(NULL, AV_LOG_ERROR,
 86                    "Cannot create sample rate converter for conversion of %d Hz %s %d channels to %d Hz %s %d channels!
",
 87                     af->sample_rate, av_get_sample_fmt_name((AVSampleFormat)af->format), av_frame_get_channels(af),
 88                     audio_hw_params_tgt.freq, av_get_sample_fmt_name(audio_hw_params_tgt.fmt), audio_hw_params_tgt.channels);
 89             swr_free(&swr_ctx);
 90             return -1;
 91         }
 92         printf("swr_init
");
 93         audio_hw_params_src.channels = av_frame_get_channels(af);
 94         audio_hw_params_src.fmt = (AVSampleFormat)af->format;
 95         audio_hw_params_src.freq = af->sample_rate;
 96     }
 97 
 98     if (swr_ctx) {
 99         const uint8_t **in = (const uint8_t **)af->extended_data;
100         uint8_t **out = &audio_buf;
101         int out_count = (int64_t)af->nb_samples * audio_hw_params_tgt.freq / af->sample_rate + 256;
102         int out_size  = av_samples_get_buffer_size(NULL, audio_hw_params_tgt.channels, out_count, audio_hw_params_tgt.fmt, 0);
103         int len2;
104         if (out_size < 0) {
105             av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size() failed
");
106             return -1;
107         }
108         av_fast_malloc(&audio_buf, (unsigned int*)audio_buf_size, out_size);
109         if (!audio_buf)
110             return AVERROR(ENOMEM);
111         len2 = swr_convert(swr_ctx, out, out_count, in, af->nb_samples);
112         if (len2 < 0) {
113             av_log(NULL, AV_LOG_ERROR, "swr_convert() failed
");
114             return -1;
115         }
116         if (len2 == out_count) {
117             av_log(NULL, AV_LOG_WARNING, "audio buffer is probably too small
");
118             if (swr_init(swr_ctx) < 0)
119                 swr_free(&swr_ctx);
120         }
121         resampled_data_size = len2 * audio_hw_params_tgt.channels * av_get_bytes_per_sample(audio_hw_params_tgt.fmt);
122     } else {
123         audio_buf = af->data[0];
124         resampled_data_size = data_size;
125     }
126 
127     return resampled_data_size;
128 }
129 
130 static void sigterm_handler(int sig)
131 {
132     exit(123);
133 }
134 
135 typedef struct PacketQueue {
136   AVPacketList *first_pkt, *last_pkt;
137   int nb_packets;
138   int size;
139   SDL_mutex *mutex;
140   SDL_cond *cond;
141 } PacketQueue;
142 
143 PacketQueue audioq;
144 
145 int quit = 0;
146 
147 void packet_queue_init(PacketQueue *q) {
148   memset(q, 0, sizeof(PacketQueue));
149   q->mutex = SDL_CreateMutex();
150   q->cond = SDL_CreateCond();
151 }
152 
153 int packet_queue_put(PacketQueue *q, AVPacket *pkt) {
154 
155   AVPacketList *pkt1;
156   if(av_dup_packet(pkt) < 0) {
157     return -1;
158   }
159   pkt1 = (AVPacketList *)av_malloc(sizeof(AVPacketList));
160   if (!pkt1)
161     return -1;
162   pkt1->pkt = *pkt;
163   pkt1->next = NULL;
164   
165   
166   SDL_LockMutex(q->mutex);
167   
168   if (!q->last_pkt)
169     q->first_pkt = pkt1;
170   else
171     q->last_pkt->next = pkt1;
172   q->last_pkt = pkt1;
173   q->nb_packets++;
174   q->size += pkt1->pkt.size;
175   SDL_CondSignal(q->cond);
176   
177   SDL_UnlockMutex(q->mutex);
178   return 0;
179 }
180 
181 int packet_queue_put_nullpacket(PacketQueue *q, int stream_index)
182 {
183     AVPacket pkt1, *pkt = &pkt1;
184     av_init_packet(pkt);
185     pkt->data = NULL;
186     pkt->size = 0; 
187     pkt->stream_index = stream_index;
188     return packet_queue_put(q, pkt);
189 }
190 
191 static int packet_queue_get(PacketQueue *q, AVPacket *pkt, int block)
192 {
193   AVPacketList *pkt1;
194   int ret;
195   
196   SDL_LockMutex(q->mutex);
197   
198   for(;;) {
199     
200     if(quit) {
201       ret = -1;
202       break;
203     }
204 
205     pkt1 = q->first_pkt;
206     if (pkt1) {
207       q->first_pkt = pkt1->next;
208       if (!q->first_pkt)
209     q->last_pkt = NULL;
210       q->nb_packets--;
211       q->size -= pkt1->pkt.size;
212       *pkt = pkt1->pkt;
213       av_free(pkt1);
214       ret = 1;
215       break;
216     } else if (!block) {
217       ret = 0;
218       break;
219     } else {
220       SDL_CondWait(q->cond, q->mutex);
221     }
222   }
223   SDL_UnlockMutex(q->mutex);
224   return ret;
225 }
226 
227 AVFrame frame;
228 int audio_decode_frame(AVCodecContext *aCodecCtx, uint8_t *audio_buf, int buf_size) {
229 
230     static AVPacket pkt;
231     static uint8_t *audio_pkt_data = NULL;
232     static int audio_pkt_size = 0;
233 
234     int len1, data_size = 0;
235 
236     for(;;) {
237         while(audio_pkt_size > 0) {
238             int got_frame = 0;
239             len1 = avcodec_decode_audio4(aCodecCtx, &frame, &got_frame, &pkt);
240             if(len1 < 0) {
241                 /* if error, skip frame */
242                 audio_pkt_size = 0;
243                 break;
244             }
245             audio_pkt_data += len1;
246             audio_pkt_size -= len1;
247             data_size = 0;
248             if(got_frame) {
249                 data_size = resample(&frame, audio_buf, &buf_size);
250                 // data_size = av_samples_get_buffer_size(NULL, 
251                 //         aCodecCtx->channels,
252                 //         frame.nb_samples,
253                 //         aCodecCtx->sample_fmt,
254                 //         1);
255                 assert(data_size <= buf_size);
256                 // memcpy(audio_buf, frame.data[0], data_size);
257             }
258             if(data_size <= 0) {
259                 /* No data yet, get more frames */
260                 continue;
261             }
262             // memcpy(audio_buf, frame.data[0], data_size);
263 
264             /* We have data, return it and come back for more later */
265             return data_size;
266         }
267         if(pkt.data)
268             av_free_packet(&pkt);
269 
270         if(quit) {
271             return -1;
272         }
273 
274         if(packet_queue_get(&audioq, &pkt, 1) < 0) {
275             return -1;
276         }
277         audio_pkt_data = pkt.data;
278         audio_pkt_size = pkt.size;
279     }
280 }
281 
282 void audio_callback(void *userdata, Uint8 *stream, int len) {
283 
284   AVCodecContext *aCodecCtx = (AVCodecContext *)userdata;
285   int len1, audio_size;
286 
287   static uint8_t audio_buf[(MAX_AUDIO_FRAME_SIZE * 3) / 2];
288   static unsigned int audio_buf_size = 0;
289   static unsigned int audio_buf_index = 0;
290 
291   while(len > 0) {
292       if(audio_buf_index >= audio_buf_size) {
293           /* We have already sent all our data; get more */
294           audio_size = audio_decode_frame(aCodecCtx, audio_buf, sizeof(audio_buf));
295           if(audio_size < 0) {
296               /* If error, output silence */
297               audio_buf_size = 1024; // arbitrary?
298               memset(audio_buf, 0, audio_buf_size);
299           } else {
300               audio_buf_size = audio_size;
301           }
302           audio_buf_index = 0;
303       }
304       len1 = audio_buf_size - audio_buf_index;
305       if(len1 > len)
306           len1 = len;
307       memcpy(stream, (uint8_t *)audio_buf + audio_buf_index, len1);
308       len -= len1;
309       stream += len1;
310       audio_buf_index += len1;
311   }
312 }
313 
314 int main(int argc, char *argv[]) {
315 
316   AVFormatContext *pFormatCtx = NULL;
317   int             i, audioStream;
318   AVPacket        packet;
319   
320   AVCodecContext  *aCodecCtxOrig = NULL;
321   AVCodecContext  *aCodecCtx = NULL;
322   AVCodec         *aCodec = NULL;
323 
324   SDL_Event       event;
325   SDL_AudioSpec   wanted_spec, spec;
326 
327   AVInputFormat *piFmt = NULL;
328   RtspClient Client;
329 
330   signal(SIGINT , sigterm_handler); /* Interrupt (ANSI).    */
331   signal(SIGTERM, sigterm_handler); /* Termination (ANSI).  */
332 
333   if(argc != 2) {
334       cout << "Usage: " << argv[0] << " <URL>" << endl;
335       cout << "For example: " << endl;
336       cout << argv[0] << " rtsp://127.0.0.1/ansersion" << endl;
337       return 1;
338   }
339   rtspClientRequest(&Client, argv[1]);
340   // Register all formats and codecs
341   av_register_all();
342   
343   if(SDL_Init(SDL_INIT_AUDIO)) {
344     fprintf(stderr, "Could not initialize SDL - %s
", SDL_GetError());
345     exit(1);
346   }
347 
348   // // Open video file
349   // if(avformat_open_input(&pFormatCtx, argv[1], NULL, NULL)!=0)
350   //   return -1; // Couldn't open file
351   
352   pFormatCtx = NULL;
353   pFormatCtx = avformat_alloc_context();
354   unsigned char * iobuffer = (unsigned char *)av_malloc(32768);
355   AVIOContext * avio = avio_alloc_context(iobuffer, 32768, 0, &Client, fill_iobuffer, NULL, NULL);
356   pFormatCtx->pb = avio;
357 
358   if(!avio) {
359       printf("avio_alloc_context error!!!
");
360       return -1;
361   }
362 
363   if(av_probe_input_buffer(avio, &piFmt, "", NULL, 0, 0) < 0) {
364       printf("av_probe_input_buffer error!
");
365       return -1;
366   } else {
367       printf("probe success
");
368       printf("format: %s[%s]
", piFmt->name, piFmt->long_name);
369   }
370 
371   int err = avformat_open_input(&pFormatCtx, "nothing", NULL, NULL);
372   if(err) {
373       printf("avformat_open_input error: %d
", err);
374       return -1;
375   }
376   // Retrieve stream information
377   if(avformat_find_stream_info(pFormatCtx, NULL)<0)
378     return -1; // Couldn't find stream information
379   
380   // Dump information about file onto standard error
381   // av_dump_format(pFormatCtx, 0, argv[1], 0);
382   av_dump_format(pFormatCtx, 0, "", 0);
383     
384   // Find the first video stream
385   audioStream=-1;
386   for(i=0; i<pFormatCtx->nb_streams; i++) {
387     if(pFormatCtx->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO &&
388        audioStream < 0) {
389       audioStream=i;
390     }
391   }
392   // if(videoStream==-1)
393   //   return -1; // Didn't find a video stream
394   if(audioStream==-1)
395     return -1;
396    
397   aCodecCtxOrig=pFormatCtx->streams[audioStream]->codec;
398   aCodec = avcodec_find_decoder(aCodecCtxOrig->codec_id);
399   if(!aCodec) {
400     fprintf(stderr, "Unsupported codec!
");
401     return -1;
402   }
403 
404   // Copy context
405   aCodecCtx = avcodec_alloc_context3(aCodec);
406   if(avcodec_copy_context(aCodecCtx, aCodecCtxOrig) != 0) {
407     fprintf(stderr, "Couldn't copy codec context");
408     return -1; // Error copying codec context
409   }
410 
411   avcodec_open2(aCodecCtx, aCodec, NULL);
412 
413   sample_rate = aCodecCtx->sample_rate;
414   nb_channels = aCodecCtx->channels;
415   channel_layout = aCodecCtx->channel_layout;
416 
417   // printf("channel_layout=%" PRId64 "
", channel_layout);
418   printf("channel_layout=%lld
", channel_layout);
419   printf("nb_channels=%d
", nb_channels);
420   printf("freq=%d
", sample_rate);
421 
422   if (!channel_layout || nb_channels != av_get_channel_layout_nb_channels(channel_layout)) {
423       channel_layout = av_get_default_channel_layout(nb_channels);
424       channel_layout &= ~AV_CH_LAYOUT_STEREO_DOWNMIX;
425       printf("correction
");
426   }
427 
428   // Set audio settings from codec info
429   wanted_spec.freq = sample_rate;
430   wanted_spec.format = AUDIO_S16SYS;
431   wanted_spec.channels = nb_channels;
432   wanted_spec.silence = 0;
433   wanted_spec.samples = SDL_AUDIO_BUFFER_SIZE;
434   wanted_spec.callback = audio_callback;
435   wanted_spec.userdata = aCodecCtx;
436   
437   if(SDL_OpenAudio(&wanted_spec, &spec) < 0) {
438     fprintf(stderr, "SDL_OpenAudio: %s
", SDL_GetError());
439     return -1;
440   }
441   printf("freq: %d	channels: %d
", spec.freq, spec.channels);
442 
443   audio_hw_params_tgt.fmt = AV_SAMPLE_FMT_S16;
444   audio_hw_params_tgt.freq = spec.freq;
445   audio_hw_params_tgt.channel_layout = channel_layout;
446   audio_hw_params_tgt.channels =  spec.channels;
447   audio_hw_params_tgt.frame_size = av_samples_get_buffer_size(NULL, audio_hw_params_tgt.channels, 1, audio_hw_params_tgt.fmt, 1);
448   audio_hw_params_tgt.bytes_per_sec = av_samples_get_buffer_size(NULL, audio_hw_params_tgt.channels, audio_hw_params_tgt.freq, audio_hw_params_tgt.fmt, 1);
449   if (audio_hw_params_tgt.bytes_per_sec <= 0 || audio_hw_params_tgt.frame_size <= 0) {
450       printf("size error
");
451       return -1;
452   }
453   audio_hw_params_src = audio_hw_params_tgt;
454 
455   // audio_st = pFormatCtx->streams[index]
456   packet_queue_init(&audioq);
457   SDL_PauseAudio(0);
458 
459   // Read frames and save first five frames to disk
460   i=0;
461   int ret = 1;
462   // while(av_read_frame(pFormatCtx, &packet)>=0) {
463   while(ret >= 0) {
464       ret = av_read_frame(pFormatCtx, &packet);
465 
466       if(ret < 0) {
467           /* av_read_frame may get error when RTP data are blocked due to the network busy */
468           if(ret == AVERROR_EOF || avio_feof(pFormatCtx->pb)) {
469               packet_queue_put_nullpacket(&audioq, audioStream);
470               printf("continue ret=%d
", ret);
471               ret = 0;
472               continue;
473           }
474           printf("ret=%d
", ret);
475           break;
476       }
477       printf("av_read_frame
");
478       if(packet.stream_index==audioStream) {
479           packet_queue_put(&audioq, &packet);
480       } else {
481           av_free_packet(&packet);
482       }
483       // Free the packet that was allocated by av_read_frame
484       SDL_PollEvent(&event);
485       switch(event.type) {
486           case SDL_QUIT:
487               printf("SDL_QUIT
");
488               quit = 1;
489               SDL_Quit();
490               exit(0);
491               break;
492           default:
493               printf("SDL_Default
");
494               break;
495       }
496 
497   }
498 
499   while(1) SDL_Delay(1000);
500   
501   // Close the codecs
502   avcodec_close(aCodecCtxOrig);
503   avcodec_close(aCodecCtx);
504   
505   // Close the video file
506   avformat_close_input(&pFormatCtx);
507   
508   return 0;
509 }
510 
511 int rtspClientRequest(RtspClient * Client, string url)
512 {
513     if(!Client) return -1;
514 
515     // cout << "Start play " << url << endl;
516     string RtspUri(url);
517     // string RtspUri("rtsp://192.168.81.145/ansersion");
518 
519     /* Set up rtsp server resource URI */
520     Client->SetURI(RtspUri);
521     
522     /* Send DESCRIBE command to server */
523     Client->DoDESCRIBE();
524 
525     /* Parse SDP message after sending DESCRIBE command */
526     Client->ParseSDP();
527 
528     /* Send SETUP command to set up all 'audio' and 'video' 
529      * sessions which SDP refers. */
530     Client->DoSETUP();
531 
532     /* Send PLAY command to play only 'video' sessions.*/
533     Client->DoPLAY("audio");
534 
535     return 0;
536 }
537 
538 int fill_iobuffer(void * opaque, uint8_t * buf, int bufsize) {
539     size_t size = 0;
540     if(!opaque) return -1;
541     RtspClient * Client = (RtspClient *)opaque;
542     if(!Client->GetMediaData("audio", buf, &size, bufsize)) size = 0;
543     printf("fill_iobuffer size: %u
", size);
544     return size;
545 }
注：
1，兼容myRtspClient-1.2.1及以上版本，且仅支持接收mp2,mp3音频；
2，音频解码原理可参见：http://www.cnblogs.com/ansersion/p/5265033.html；
3，示例源码编译需要SDL和ffmpeg，具体可参见解码视频的附录二；
4，博主编译环境为 x86_64位ubuntu 16.04，以供参考。
myRtspClient-1.2.3
ffmpeg-2.8.5
下载源码以及Makefile
编译、配置和运行同上一篇：用ffmpeg解码视频