结论

  • 采集的pcm可以播放,但是编码aac后不能播放
  • 采用的是aac编码器编码,aac编码器需要的是AV_SAMPLE_FMT_FLTP位深才能进行编码,windows下采集到的数据是AV_SAMPLE_FMT_S16,所以需要重采样
  • 编码出来的aac数据没有adts头,在编码一帧的aac数据后,自己添加了adts头(adts头里包含采样率、编码格式、声道数)
  • 因为在pcm采集声道数据的时候,每次从函数av_read_frame(fmt_ctx, pkt)读取到的pcm的大小都是88200,而编码器每次编码需要的数据大小是4096,所以在采集完数据后做了分包处理

代码

  1. #include <iostream>
  2. //#include <WinBase.h>
  3. #define __STDC_CONSTANT_MACROS
  4. extern "C"
  5. {
  6. #include "libavutil/avutil.h"
  7. #include "libavcodec/avcodec.h"
  8. #include "libavdevice/avdevice.h"
  9. #include "libswscale/swscale.h"
  10. #include "libswresample/swresample.h"
  11. #include "libavutil\samplefmt.h"
  12. #include <stdio.h>
  13. #include <strsafe.h>
  14. #include <string.h>
  15. }
  16. #ifdef _MSC_VER
  17. #include <tchar.h>
  18. #include <dshow.h>
  19. #include <atlcomcli.h>
  20. #pragma comment(lib, "Strmiids.lib")
  21. #endif
  22. using namespace std;
  23. void Convert(const char* strIn, char* strOut, int sourceCodepage, int targetCodepage)
  24. {
  25. //LPCTSTR
  26. LPCTSTR pStr = (LPCTSTR)strIn;
  27. int len = lstrlen(pStr);
  28. int unicodeLen = MultiByteToWideChar(sourceCodepage, 0, strIn, -1, NULL, 0);
  29. wchar_t* pUnicode = NULL;
  30. pUnicode = new wchar_t[unicodeLen + 1];
  31. memset(pUnicode, 0, (unicodeLen + 1) * sizeof(wchar_t));
  32. MultiByteToWideChar(sourceCodepage, 0, strIn, -1, (LPWSTR)pUnicode, unicodeLen);
  33. BYTE* pTargetData = NULL;
  34. int targetLen = WideCharToMultiByte(targetCodepage, 0, (LPWSTR)pUnicode, -1, (char*)pTargetData, 0, NULL, NULL);
  35. pTargetData = new BYTE[targetLen + 1];
  36. memset(pTargetData, 0, targetLen + 1);
  37. WideCharToMultiByte(targetCodepage, 0, (LPWSTR)pUnicode, -1, (char*)pTargetData, targetLen, NULL, NULL);
  38. lstrcpy((LPSTR)strOut, (LPCSTR)pTargetData);
  39. delete pUnicode;
  40. delete pTargetData;
  41. }
  42. void Get_Capture_Audio_Devices_Info(char* name)
  43. {
  44. #ifdef _MSC_VER
  45. CoInitialize(NULL);
  46. CComPtr<ICreateDevEnum> pCreateDevEnum;
  47. HRESULT hr = CoCreateInstance(CLSID_SystemDeviceEnum, NULL, CLSCTX_INPROC_SERVER, IID_ICreateDevEnum, (void**)&pCreateDevEnum);
  48. CComPtr<IEnumMoniker> pEm;
  49. hr = pCreateDevEnum->CreateClassEnumerator(CLSID_AudioInputDeviceCategory, &pEm, 0);
  50. if (hr != NOERROR) {
  51. return;
  52. }
  53. pEm->Reset();
  54. ULONG cFetched;
  55. IMoniker* pM = NULL;
  56. while (hr = pEm->Next(1, &pM, &cFetched), hr == S_OK)
  57. {
  58. IPropertyBag* pBag = 0;
  59. hr = pM->BindToStorage(0, 0, IID_IPropertyBag, (void**)&pBag);
  60. if (SUCCEEDED(hr))
  61. {
  62. VARIANT var;
  63. var.vt = VT_BSTR;
  64. hr = pBag->Read(L"FriendlyName", &var, NULL); //还有其他属性,像描述信息等等...
  65. if (hr == NOERROR)
  66. {
  67. //获取设备名称
  68. WideCharToMultiByte(CP_ACP, 0, var.bstrVal, -1, name, 128, "", NULL);
  69. SysFreeString(var.bstrVal);
  70. }
  71. pBag->Release();
  72. }
  73. pM->Release();
  74. }
  75. pCreateDevEnum = NULL;
  76. pEm = NULL;
  77. #else
  78. memcpy(name, "default", strlen("default") + 1);
  79. #endif
  80. }
  81. // 判断编码器是否支持某个采样格式(采样大小)
  82. static int check_sample_fmt(const AVCodec* codec, enum AVSampleFormat sample_fmt)
  83. {
  84. const enum AVSampleFormat* p = codec->sample_fmts;
  85. while (*p != AV_SAMPLE_FMT_NONE) {
  86. printf("*p :%s\n", av_get_sample_fmt_name(*p));
  87. if (*p == sample_fmt)
  88. return 1;
  89. p++;
  90. }
  91. return 0;
  92. }
  93. // 从编码器中获取采样率(从编码器所支持的采样率中获取与44100最接近的采样率)
  94. static int select_sample_rate(const AVCodec* codec)
  95. {
  96. const int* p;
  97. int best_samplerate = 0;
  98. if (!codec->supported_samplerates)
  99. return 44100;
  100. p = codec->supported_samplerates;
  101. while (*p) {
  102. if (!best_samplerate || abs(44100 - *p) < abs(44100 - best_samplerate))
  103. best_samplerate = *p;
  104. p++;
  105. }
  106. return best_samplerate;
  107. }
  108. /* select layout with the highest channel count */
  109. static int select_channel_layout(const AVCodec* codec)
  110. {
  111. const uint64_t* p;
  112. uint64_t best_ch_layout = 0;
  113. int best_nb_channels = 0;
  114. if (!codec->channel_layouts)
  115. return AV_CH_LAYOUT_STEREO;
  116. p = codec->channel_layouts;
  117. while (*p) {
  118. int nb_channels = av_get_channel_layout_nb_channels(*p);
  119. if (nb_channels > best_nb_channels) {
  120. best_ch_layout = *p;
  121. best_nb_channels = nb_channels;
  122. }
  123. p++;
  124. }
  125. return (int)best_ch_layout;
  126. }
  127. int main()
  128. {
  129. AVFormatContext* fmt_ctx = NULL;
  130. AVDictionary* options = NULL;
  131. int ret = 0;
  132. char errors[1024] = { 0 };
  133. char device_name[256] = {0};
  134. char file_name[256] = "collection.aac";
  135. char file_name_pcm[256] = "pltf_collection.pcm";
  136. char name[128] = { 0 };
  137. char name_utf8[128] = { 0 };
  138. FILE* out_file = NULL;
  139. /*音频重采样参数*/ //播放指令 ffplay -ar 44100 -f f32le -channels 2 -i collection.pcm
  140. AVPacket* pkt = NULL;
  141. int dst_rate = 44100, src_rate = 44100;
  142. uint8_t** src_data = NULL, ** dst_data = NULL;
  143. int64_t src_ch_layout = AV_CH_LAYOUT_STEREO, dst_ch_layout = AV_CH_LAYOUT_STEREO;
  144. enum AVSampleFormat src_sample_fmt = AV_SAMPLE_FMT_S16, dst_sample_fmt = AV_SAMPLE_FMT_FLTP;
  145. int src_nb_channels = 0, dst_nb_channels = 0;
  146. int src_linesize, dst_linesize;
  147. int src_nb_samples = 1024, dst_nb_samples, max_dst_nb_samples;
  148. int dst_bufsize = 0;
  149. /*音频编码参数*/
  150. int got_output;
  151. AVCodec* codec; // 编码器
  152. AVCodecContext* codec_context = NULL; // 编码上下文环境
  153. AVFrame* frame; // 原始帧
  154. AVPacket enpkt; // 编码后的包数据
  155. av_register_all();
  156. avdevice_register_all();
  157. AVInputFormat* in_format = av_find_input_format("dshow");
  158. if (in_format == NULL)
  159. {
  160. printf("av_find_input_format error\n");
  161. }
  162. //设置输入的字体,没有这部分会报错 //Immediate exit requested
  163. Get_Capture_Audio_Devices_Info(name);
  164. Convert(name, name_utf8, CP_ACP, CP_UTF8);
  165. sprintf(device_name, "audio=%s", name_utf8);
  166. printf("device_name:%s\n", device_name);
  167. if ((ret = avformat_open_input(&fmt_ctx, device_name, in_format, NULL)) != 0)
  168. {
  169. av_strerror(ret, errors, 1024);
  170. printf("Failed to open video device, [%s][%d]\n", errors, ret);
  171. return -1;
  172. }
  173. pkt = av_packet_alloc(); //pkt存放采集的pcm数据
  174. av_init_packet(pkt);
  175. out_file = fopen(file_name, "wb+");
  176. /*添加音频重采样----start----*/
  177. SwrContext* swr_ctx = NULL;
  178. //第一个参数,如果之前有设置好的重采样上下文可以传入,如果没有就传入NULL;
  179. //2.输出的channel, nunber/layout(一种布局参数,就是把扬声器放置在哪个位置)
  180. //3.输出的采样位数
  181. //4.输出的采样频率
  182. //5.输入的采样通道
  183. //6.输入的采样位数
  184. //7.输入的采样频率
  185. //最后两个是个日志相关的,设成0和NULL就可以了
  186. swr_ctx = swr_alloc_set_opts(NULL,
  187. dst_ch_layout, dst_sample_fmt, dst_rate,
  188. src_ch_layout, src_sample_fmt, src_rate,
  189. 0, NULL);
  190. if (!swr_ctx)
  191. {
  192. return -1;
  193. }
  194. /* initialize the resampling context */
  195. if ((ret = swr_init(swr_ctx)) < 0)
  196. {
  197. fprintf(stderr, "Failed to initialize the resampling context\n");
  198. return -1;
  199. }
  200. //创建输入缓冲区
  201. src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);
  202. printf("src_nb_channels:%d\n", src_nb_channels);
  203. ret = av_samples_alloc_array_and_samples(
  204. &src_data, //输入缓冲区地址
  205. &src_linesize, //缓冲区大小
  206. src_nb_channels, //通道个数
  207. src_nb_samples, //单通道采样个数 number of samples per channel
  208. src_sample_fmt, //采样格式
  209. 0);
  210. if (ret < 0) {
  211. fprintf(stderr, "Could not allocate source samples\n");
  212. return -1;
  213. }
  214. printf("src_linesize:%d\n", src_linesize);
  215. //创建输出缓冲区
  216. max_dst_nb_samples = dst_nb_samples = av_rescale_rnd(src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);
  217. dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout);
  218. printf("max_dst_nb_samples:%d dst_nb_samples:%d\n", max_dst_nb_samples, dst_nb_samples);
  219. ret = av_samples_alloc_array_and_samples(
  220. &dst_data, //输出缓冲区地址
  221. &dst_linesize, //缓冲区大小
  222. dst_nb_channels, //通道个数
  223. dst_nb_samples, //单通道采样个数 number of samples per channel
  224. dst_sample_fmt, //采样格式
  225. 0);
  226. if (ret < 0)
  227. {
  228. fprintf(stderr, "Could not allocate destination samples\n");
  229. return -1;
  230. }
  231. printf("src_linesize:%d dst_linesize:%d \n", src_linesize, dst_linesize);
  232. /*添加音频重采样----end----*/
  233. /*音频编码设置------start-----*/
  234. //1、找到aac的编码器
  235. codec = avcodec_find_encoder_by_name("aac");
  236. if (!codec)
  237. {
  238. fprintf(stderr, "Codec not found\n");
  239. return -1;
  240. }
  241. //2、创建编码器的上下文
  242. codec_context = avcodec_alloc_context3(codec);
  243. if (!codec_context)
  244. {
  245. fprintf(stderr, "Could not allocate audio codec context\n");
  246. exit(1);
  247. }
  248. //3、设置上下文格式
  249. codec_context->profile = FF_PROFILE_AAC_SSR;
  250. codec_context->bit_rate = 128000;
  251. codec_context->sample_fmt = dst_sample_fmt;
  252. if (!check_sample_fmt(codec, codec_context->sample_fmt))
  253. {
  254. fprintf(stderr, "Encoder does not support sample format %s",
  255. av_get_sample_fmt_name(codec_context->sample_fmt));
  256. return -1;
  257. }
  258. // 设置采样率,这里通过函数获取,也可以直接写具体值
  259. codec_context->sample_rate = dst_rate;//select_sample_rate(codec);
  260. // channel_layout为各个通道存储顺序,可以据此算出声道数。设置声道数也可以直接写具体值
  261. codec_context->channel_layout = select_channel_layout(codec);
  262. codec_context->channels = av_get_channel_layout_nb_channels(codec_context->channel_layout);
  263. //codec_context->frame_size = dst_nb_samples + 1024;
  264. printf("codec_context->channels:%d \n", codec_context->channels);
  265. //3、打开编码器
  266. if (avcodec_open2(codec_context, codec, NULL) < 0)
  267. {
  268. fprintf(stderr, "Could not open codec\n");
  269. exit(1);
  270. }
  271. int encode_nb_sample = codec_context->frame_size;
  272. printf(" 1111 codec_context->frame_size:%d\n", codec_context->frame_size);
  273. //4、 初始化原始帧
  274. frame = av_frame_alloc();
  275. if (!frame)
  276. {
  277. fprintf(stderr, "Could not allocate audio frame\n");
  278. exit(1);
  279. }
  280. //5、设置帧的参数
  281. printf("codec_context->frame_size:%d\n", dst_nb_samples);
  282. frame->nb_samples = encode_nb_sample;
  283. // frame的格式和声道信息
  284. printf("codec_context->sample_fmt:%s\n", av_get_sample_fmt_name(codec_context->sample_fmt));
  285. frame->format = codec_context->sample_fmt;
  286. printf("codec_context->channel_layout:%d\n", codec_context->channel_layout);
  287. frame->channel_layout = codec_context->channel_layout;
  288. frame->sample_rate = dst_rate;
  289. /* allocate the data buffers */
  290. ret = av_frame_get_buffer(frame, 0);
  291. if (ret < 0)
  292. {
  293. fprintf(stderr, "Could not allocate audio data buffers\n");
  294. exit(1);
  295. }
  296. printf("frame->linesize[0]:%d\n", frame->linesize[0]);
  297. //复制参数、写头信息
  298. //AVStream* st = avformat_new_stream(fmt_ctx, codec);
  299. //avcodec_parameters_from_context(st->codecpar, codec_context);
  300. //vformat_write_header(fmt_ctx, NULL);
  301. printf("frame->linesize[0]:%d \n", frame->linesize[0]);
  302. /*音频编码设置------end------*/
  303. av_dump_format(fmt_ctx, 0, device_name, 0);
  304. while (!av_read_frame(fmt_ctx, pkt))
  305. {
  306. printf("%d\n", pkt->size);
  307. /*音频重采样 start*/
  308. /* compute destination number of samples */
  309. dst_nb_samples = av_rescale_rnd(swr_get_delay(swr_ctx, src_rate) +
  310. src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);
  311. printf("------ src_nb_samples:%d \n", dst_nb_samples);
  312. if (dst_nb_samples > max_dst_nb_samples)
  313. {
  314. av_freep(&dst_data[0]);
  315. ret = av_samples_alloc(dst_data, &dst_linesize, dst_nb_channels,
  316. dst_nb_samples, dst_sample_fmt, 1);
  317. if (ret < 0)
  318. break;
  319. max_dst_nb_samples = dst_nb_samples;
  320. }
  321. printf("Size of collected data %d\n", pkt->size);
  322. //src_data是一个缓冲区数组,只用到第一个缓冲区,所以用0
  323. static int offset = 0;
  324. int use_size = 0;
  325. while (use_size < pkt->size)
  326. {
  327. if (offset)
  328. {
  329. }
  330. if (use_size + src_linesize + offset > pkt->size)
  331. {
  332. offset = pkt->size - use_size;
  333. memset(src_data[0], 0, src_linesize);
  334. memcpy(src_data[0], pkt->data + use_size, offset);
  335. break;
  336. }
  337. if (offset)
  338. {
  339. printf("=====================use_size:%d src_linesize-offset:%d pkt->size:%d", use_size, src_linesize - offset, pkt->size);
  340. memcpy(src_data[0]+offset, pkt->data + use_size, src_linesize-offset);
  341. use_size += (src_linesize - offset);
  342. offset = 0;
  343. }
  344. else
  345. {
  346. memset(src_data[0], 0, src_linesize);
  347. printf("********************** use_size:%d pkt->size:%d src_linesize:%d\n\n", use_size, pkt->size, src_linesize);
  348. memcpy(src_data[0], pkt->data + use_size, src_linesize);
  349. use_size += src_linesize;
  350. }
  351. printf("ffffffffffff dst_nb_samples:%d src_nb_samples:%d\n", dst_nb_samples, src_nb_samples);
  352. //ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t**)src_data, src_nb_samples);
  353. ret = swr_convert(swr_ctx, frame->data, frame->nb_samples, (const uint8_t**)src_data, src_nb_samples);
  354. if (ret < 0)
  355. {
  356. fprintf(stderr, "Error while converting\n");
  357. return -1;
  358. }
  359. // dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
  360. // ret, dst_sample_fmt, 0);
  361. // if (dst_bufsize < 0)
  362. // {
  363. // fprintf(stderr, "Could not get sample buffer size\n")
  364. // return -1;
  365. // }
  366. //fwrite(dst_data[0], 1, dst_bufsize, out_file);
  367. //fwrite(dst_data[1], 1, dst_bufsize, out_file);
  368. //fflush(out_file);
  369. printf("ssss dst_bufsize:%d frame->linesize[0]:%d\n", dst_bufsize, frame->linesize[0]);
  370. /*音频重采样 end*/
  371. av_init_packet(&enpkt);
  372. enpkt.data = NULL;
  373. enpkt.size = 0;
  374. //memcpy(frame->data[0], dst_data[0], dst_bufsize/2);
  375. //memcpy(frame->data[1], dst_data[0]+ dst_bufsize / 2, dst_bufsize / 2);
  376. ret = avcodec_send_frame(codec_context, frame);
  377. while (ret >= 0)
  378. {
  379. ret = avcodec_receive_packet(codec_context, &enpkt);
  380. if (ret == -EAGAIN)
  381. {
  382. continue;
  383. }
  384. else if (ret < 0)
  385. {
  386. printf("avcodec_receive_packet error! [%d]\n", ret);
  387. return -1;
  388. }
  389. else
  390. {
  391. printf("enpkt.size:%d\n", enpkt.size);
  392. unsigned char aac_buffer[7] = { 0 };
  393. printf("\n\n\ncodec_context->profile:%d\n", codec_context->profile);
  394. int profile = codec_context->profile; // AAC LC
  395. int freqIdx = 4; // 44100
  396. //int channels = 2;// 声道数
  397. int chanCfg = 1; // CPE
  398. // // fill in ADTS data
  399. // aac_buffer[0] = (byte)0xFF;
  400. // aac_buffer[1] = (byte)0xF0;
  401. // aac_buffer[2] = (byte)(((profile&0x3) << 6) + (freqIdx&0xf << 2) + (chanCfg >> 2));
  402. // aac_buffer[3] = (byte)(((chanCfg & 3) << 6) + (enpkt.size >> 11));
  403. // aac_buffer[4] = (byte)((enpkt.size & 0x7FF) >> 3);
  404. // aac_buffer[5] = (byte)(((enpkt.size & 7) << 5) + 0x1F);
  405. // aac_buffer[6] = (byte)0xFC;
  406. aac_buffer[0] = (byte)0xFF;
  407. aac_buffer[1] = (byte)0xF1;
  408. //aac_buffer[2] = (byte)(((profile - 1) << 6) + (freqIdx << 2) + (chanCfg >> 2));
  409. //aac_buffer[3] = (byte)(((chanCfg & 3) << 6) + (enpkt.size >> 11));
  410. aac_buffer[2] = 0x50;
  411. aac_buffer[3] = 0x80;
  412. aac_buffer[4] = (byte)((enpkt.size & 0x7FF) >> 3);
  413. aac_buffer[5] = (byte)(((enpkt.size & 7) << 5) + 0x1F);
  414. aac_buffer[6] = (byte)0xFC;
  415. fwrite(aac_buffer, 1, 7, out_file);
  416. fwrite(enpkt.data, 1, enpkt.size, out_file);
  417. fflush(out_file);
  418. }
  419. }
  420. }
  421. printf("dst_nb_samples:%d src_nb_samples:%d frame->nb_samples:%d\n", dst_nb_samples, src_nb_samples, frame->nb_samples);
  422. #if 0
  423. ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t**)src_data, src_nb_samples);
  424. if (ret < 0)
  425. {
  426. fprintf(stderr, "Error while converting\n");
  427. return -1;
  428. }
  429. dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
  430. ret, dst_sample_fmt, 1);
  431. if (dst_bufsize < 0)
  432. {
  433. fprintf(stderr, "Could not get sample buffer size\n");
  434. return -1;
  435. }
  436. /*音频重采样 end*/
  437. /*音频编码 start*/
  438. av_init_packet(&enpkt);
  439. enpkt.data = NULL;
  440. enpkt.size = 0;
  441. ret = avcodec_send_frame();
  442. while (ret >= 0)
  443. {
  444. ret = avcodec_receive_packet(codec_context, &enpkt);
  445. if (ret < 0)
  446. {
  447. printf("avcodec_receive_packet error!\n");
  448. return -1;
  449. }
  450. ret = av_write_frame(fmt_ctx, &enpkt);
  451. av_packet_unref(&enpkt);
  452. }
  453. av_write_trailer(fmt_ctx);
  454. /*音频编码 end*/
  455. printf("enpkt.size:%d\n", enpkt.size);
  456. //fwrite(dst_data[0], 1, dst_bufsize, out_file);
  457. //fflush(out_file)
  458. #endif
  459. ;
  460. av_packet_unref(pkt);
  461. }
  462. swr_free(&swr_ctx);
  463. av_packet_free(&pkt);
  464. avformat_close_input(&fmt_ctx);
  465. fclose(out_file);
  466. //avcodec_send_frame
  467. // ffplay -video_size 640*480 -pixel_format yuyv422 -framerate 30 collection.yuv
  468. return 0;
  469. }