什么是音频重采样

将音频三元组(采样频率、采样位数和通道数)的值转换成另外一组值
例如:将44100/16/2 转成 48000/16/2

为什么要重采样

  • 从设备采集的音频数据和编码器要求的不一致
  • 扬声器要求的音频数据与要播放的音频数据不一致

    1. |<---要求的不一致--->| |<---要求的不一致-->|<br />采集的数据(PCM)--------- 编码数据 ------ 解码数据(PCM)--------- 扬声器播放

如何知道是否需要进行重采样

  • 要了解音频设备的参数
    • 不同的平台都有设备管理,通过设备管理可以知道
  • 查看ffmpeg源码(最方便的),基本包含了所有的编解码器的实现

重采样的步骤

  • 创建重采样的上下文
  • 设置参数
  • 初始化重采样
  • 进行重采样

API

swr_alloc_set_opts 设置参数,返回上下文
swr_init
swr_convert
swr_free

  1. #include <iostream>
  2. #define __STDC_CONSTANT_MACROS
  3. extern "C"
  4. {
  5. #include "libavutil/avutil.h"
  6. #include "libavdevice/avdevice.h"
  7. #include "libswscale/swscale.h"
  8. #include "libswresample/swresample.h"
  9. #include "libavutil\samplefmt.h"
  10. #include <stdio.h>
  11. #include <strsafe.h>
  12. #include <string.h>
  13. }
  14. #ifdef _MSC_VER
  15. #include <tchar.h>
  16. #include <dshow.h>
  17. #include <atlcomcli.h>
  18. #pragma comment(lib, "Strmiids.lib")
  19. #endif
  20. using namespace std;
  21. void Convert(const char* strIn, char* strOut, int sourceCodepage, int targetCodepage)
  22. {
  23. //LPCTSTR
  24. LPCTSTR pStr = (LPCTSTR)strIn;
  25. int len = lstrlen(pStr);
  26. int unicodeLen = MultiByteToWideChar(sourceCodepage, 0, strIn, -1, NULL, 0);
  27. wchar_t* pUnicode = NULL;
  28. pUnicode = new wchar_t[unicodeLen + 1];
  29. memset(pUnicode, 0, (unicodeLen + 1) * sizeof(wchar_t));
  30. MultiByteToWideChar(sourceCodepage, 0, strIn, -1, (LPWSTR)pUnicode, unicodeLen);
  31. BYTE* pTargetData = NULL;
  32. int targetLen = WideCharToMultiByte(targetCodepage, 0, (LPWSTR)pUnicode, -1, (char*)pTargetData, 0, NULL, NULL);
  33. pTargetData = new BYTE[targetLen + 1];
  34. memset(pTargetData, 0, targetLen + 1);
  35. WideCharToMultiByte(targetCodepage, 0, (LPWSTR)pUnicode, -1, (char*)pTargetData, targetLen, NULL, NULL);
  36. lstrcpy((LPSTR)strOut, (LPCSTR)pTargetData);
  37. delete pUnicode;
  38. delete pTargetData;
  39. }
  40. void Get_Capture_Audio_Devices_Info(char* name)
  41. {
  42. #ifdef _MSC_VER
  43. CoInitialize(NULL);
  44. CComPtr<ICreateDevEnum> pCreateDevEnum;
  45. HRESULT hr = CoCreateInstance(CLSID_SystemDeviceEnum, NULL, CLSCTX_INPROC_SERVER, IID_ICreateDevEnum, (void**)&pCreateDevEnum);
  46. CComPtr<IEnumMoniker> pEm;
  47. hr = pCreateDevEnum->CreateClassEnumerator(CLSID_AudioInputDeviceCategory, &pEm, 0);
  48. if (hr != NOERROR) {
  49. return;
  50. }
  51. pEm->Reset();
  52. ULONG cFetched;
  53. IMoniker* pM = NULL;
  54. while (hr = pEm->Next(1, &pM, &cFetched), hr == S_OK)
  55. {
  56. IPropertyBag* pBag = 0;
  57. hr = pM->BindToStorage(0, 0, IID_IPropertyBag, (void**)&pBag);
  58. if (SUCCEEDED(hr))
  59. {
  60. VARIANT var;
  61. var.vt = VT_BSTR;
  62. hr = pBag->Read(L"FriendlyName", &var, NULL); //还有其他属性,像描述信息等等...
  63. if (hr == NOERROR)
  64. {
  65. //获取设备名称
  66. WideCharToMultiByte(CP_ACP, 0, var.bstrVal, -1, name, 128, "", NULL);
  67. SysFreeString(var.bstrVal);
  68. }
  69. pBag->Release();
  70. }
  71. pM->Release();
  72. }
  73. pCreateDevEnum = NULL;
  74. pEm = NULL;
  75. #else
  76. memcpy(name, "default", strlen("default") + 1);
  77. #endif
  78. }
  79. int main()
  80. {
  81. int ret = 0;
  82. AVFormatContext* fmt_ctx = NULL;
  83. AVDictionary* options = NULL;
  84. char errors[1024] = { 0 };
  85. char device_name[256] = {0};
  86. char file_name[256] = "collection.pcm";
  87. char name[128] = { 0 };
  88. char name_utf8[128] = { 0 };
  89. int dst_rate = 8000, src_rate = 44100;
  90. uint8_t** src_data = NULL, ** dst_data = NULL;
  91. int64_t src_ch_layout = AV_CH_LAYOUT_STEREO, dst_ch_layout = AV_CH_LAYOUT_SURROUND;
  92. enum AVSampleFormat src_sample_fmt = AV_SAMPLE_FMT_S16, dst_sample_fmt = AV_SAMPLE_FMT_S16;
  93. int src_nb_channels = 0, dst_nb_channels = 0;
  94. int src_linesize, dst_linesize;
  95. int src_nb_samples = 44100/2, dst_nb_samples, max_dst_nb_samples;
  96. int dst_bufsize;
  97. av_register_all();
  98. avdevice_register_all();
  99. AVInputFormat* in_format = av_find_input_format("dshow");
  100. if (in_format == NULL)
  101. {
  102. printf("av_find_input_format error\n");
  103. }
  104. //设置输入的字体,没有这部分会报错 //Immediate exit requested
  105. Get_Capture_Audio_Devices_Info(name);
  106. Convert(name, name_utf8, CP_ACP, CP_UTF8);
  107. sprintf(device_name, "audio=%s", name_utf8);
  108. printf("device_name:%s\n", device_name);
  109. if ((ret = avformat_open_input(&fmt_ctx, device_name, in_format, NULL)) != 0)
  110. {
  111. av_strerror(ret, errors, 1024);
  112. printf("Failed to open video device, [%s][%d]\n", errors, ret);
  113. return -1;
  114. }
  115. AVPacket* pkt = av_packet_alloc();
  116. av_init_packet(pkt);
  117. FILE* out_file = fopen(file_name, "wb+");
  118. /*添加音频重采样----start----*/
  119. SwrContext* swr_ctx = NULL;
  120. //第一个参数,如果之前有设置好的重采样上下文可以传入,如果没有就传入NULL;
  121. //2.输出的channel, nunber/layout(一种布局参数,就是把扬声器放置在哪个位置)
  122. //3.输出的采样位数
  123. //4.输出的采样频率
  124. //5.输入的采样通道
  125. //6.输入的采样位数
  126. //7.输入的采样频率
  127. //最后两个是个日志相关的,设成0和NULL就可以了
  128. swr_ctx = swr_alloc_set_opts(swr_ctx,
  129. dst_ch_layout, dst_sample_fmt, dst_rate,
  130. src_ch_layout, src_sample_fmt, src_rate,
  131. 0, NULL);
  132. if (!swr_ctx)
  133. {
  134. return -1;
  135. }
  136. /* initialize the resampling context */
  137. if ((ret = swr_init(swr_ctx)) < 0)
  138. {
  139. fprintf(stderr, "Failed to initialize the resampling context\n");
  140. return -1;
  141. }
  142. //创建输入缓冲区
  143. src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);
  144. printf("src_nb_channels:%d\n", src_nb_channels);
  145. ret = av_samples_alloc_array_and_samples(
  146. &src_data, //输入缓冲区地址
  147. &src_linesize, //缓冲区大小
  148. src_nb_channels, //通道个数
  149. src_nb_samples, //单通道采样个数 number of samples per channel
  150. src_sample_fmt, //采样格式
  151. 0);
  152. if (ret < 0) {
  153. fprintf(stderr, "Could not allocate source samples\n");
  154. return -1;
  155. }
  156. printf("src_linesize:%d\n", src_linesize);
  157. //创建输出缓冲区
  158. max_dst_nb_samples = dst_nb_samples = av_rescale_rnd(src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);
  159. dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout);
  160. av_samples_alloc_array_and_samples(
  161. &dst_data, //输出缓冲区地址
  162. &dst_linesize, //缓冲区大小
  163. dst_nb_channels, //通道个数
  164. dst_nb_samples, //单通道采样个数 number of samples per channel
  165. dst_sample_fmt, //采样格式
  166. 0);
  167. if (ret < 0) {
  168. fprintf(stderr, "Could not allocate destination samples\n");
  169. return -1;
  170. }
  171. /*添加音频重采样----end----*/
  172. av_dump_format(fmt_ctx, 0, device_name, 0);
  173. while (!av_read_frame(fmt_ctx, pkt))
  174. {
  175. /* compute destination number of samples */
  176. dst_nb_samples = av_rescale_rnd(swr_get_delay(swr_ctx, src_rate) +
  177. src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);
  178. if (dst_nb_samples > max_dst_nb_samples)
  179. {
  180. av_freep(&dst_data[0]);
  181. ret = av_samples_alloc(dst_data, &dst_linesize, dst_nb_channels,
  182. dst_nb_samples, dst_sample_fmt, 1);
  183. if (ret < 0)
  184. break;
  185. max_dst_nb_samples = dst_nb_samples;
  186. }
  187. printf("Size of collected data %d\n", pkt->size);
  188. /*音频重采样*/ //src_data是一个缓冲区数组,只用到第一个缓冲区,所以用0
  189. memset(src_data[0], 0, pkt->size);
  190. memcpy(src_data[0], pkt->data, pkt->size);
  191. printf("dst_nb_samples:%d src_nb_samples:%d\n", dst_nb_samples, src_nb_samples);
  192. ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t**)src_data, src_nb_samples);
  193. if (ret < 0)
  194. {
  195. fprintf(stderr, "Error while converting\n");
  196. return -1;
  197. }
  198. dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
  199. ret, dst_sample_fmt, 1);
  200. if (dst_bufsize < 0)
  201. {
  202. fprintf(stderr, "Could not get sample buffer size\n");
  203. return -1;
  204. }
  205. printf("dst_bufsize:%d\n", dst_bufsize);
  206. fwrite(dst_data[0], 1, dst_bufsize, out_file);
  207. fflush(out_file);
  208. av_packet_unref(pkt);
  209. }
  210. swr_free(&swr_ctx);
  211. av_packet_free(&pkt);
  212. avformat_close_input(&fmt_ctx);
  213. fclose(out_file);
  214. // ffplay -video_size 640*480 -pixel_format yuyv422 -framerate 30 collection.yuv
  215. return 0;
  216. }

问题总结

  • int av_samples_alloc_array_and_samples(uint8_t **audio_data, int linesize, int nb_channels,

    1. int nb_samples, enum AVSampleFormat sample_fmt, int align);<br />函数功能:<br />为采样前后的pcm数据申请缓冲区<br />参数理解:<br />audio_data ----- 缓冲区地址<br />linesize --------- 申请的缓冲区大小。音频的额存放格式,如果是packed,则audio_data[0]存放所有的音频数据,如果是planar,则audio_data[i]存放i通道的音频数据<br />nb_channels ---- 通道个数<br />nb_samples ----- 单个通道的采样个数,因为在采样频率为44100的情况下,双通道采样pcm的输出数据大小为44100*2,故nb_samples = 44100*2/位深(2个字节)/2(通道个个数)<br />sample_fmt ----- 采样格式<br />align ------------ 直接赋值0