什么是音频重采样

将音频三元组（采样频率、采样位数和通道数）的值转换成另外一组值
例如：将44100/16/2 转成 48000/16/2

为什么要重采样

从设备采集的音频数据和编码器要求的不一致

扬声器要求的音频数据与要播放的音频数据不一致

          |<---要求的不一致--->|                                |<---要求的不一致-->|<br />采集的数据（PCM）--------- 编码数据 ------ 解码数据（PCM）--------- 扬声器播放

如何知道是否需要进行重采样

要了解音频设备的参数
- 不同的平台都有设备管理，通过设备管理可以知道
查看ffmpeg源码（最方便的），基本包含了所有的编解码器的实现

重采样的步骤

创建重采样的上下文
设置参数
初始化重采样
进行重采样

API

swr_alloc_set_opts 设置参数，返回上下文
swr_init
swr_convert
swr_free


#include <iostream>
#define __STDC_CONSTANT_MACROS
extern "C"
{
#include "libavutil/avutil.h"
#include "libavdevice/avdevice.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
#include "libavutil\samplefmt.h"
#include <stdio.h>
#include <strsafe.h>
#include <string.h>
}
#ifdef _MSC_VER
#include <tchar.h>
#include <dshow.h>
#include <atlcomcli.h>
#pragma comment(lib, "Strmiids.lib")
#endif
using namespace std;
void Convert(const char* strIn, char* strOut, int sourceCodepage, int targetCodepage)
{
    //LPCTSTR
    LPCTSTR pStr = (LPCTSTR)strIn;
    int len = lstrlen(pStr);
    int unicodeLen = MultiByteToWideChar(sourceCodepage, 0, strIn, -1, NULL, 0);
    wchar_t* pUnicode = NULL;
    pUnicode = new wchar_t[unicodeLen + 1];
    memset(pUnicode, 0, (unicodeLen + 1) * sizeof(wchar_t));
    MultiByteToWideChar(sourceCodepage, 0, strIn, -1, (LPWSTR)pUnicode, unicodeLen);
    BYTE* pTargetData = NULL;
    int targetLen = WideCharToMultiByte(targetCodepage, 0, (LPWSTR)pUnicode, -1, (char*)pTargetData, 0, NULL, NULL);
    pTargetData = new BYTE[targetLen + 1];
    memset(pTargetData, 0, targetLen + 1);
    WideCharToMultiByte(targetCodepage, 0, (LPWSTR)pUnicode, -1, (char*)pTargetData, targetLen, NULL, NULL);
    lstrcpy((LPSTR)strOut, (LPCSTR)pTargetData);
    delete pUnicode;
    delete pTargetData;
}
void Get_Capture_Audio_Devices_Info(char* name)
{
#ifdef _MSC_VER
    CoInitialize(NULL);
    CComPtr<ICreateDevEnum> pCreateDevEnum;
    HRESULT hr = CoCreateInstance(CLSID_SystemDeviceEnum, NULL, CLSCTX_INPROC_SERVER, IID_ICreateDevEnum, (void**)&pCreateDevEnum);
    CComPtr<IEnumMoniker> pEm;
    hr = pCreateDevEnum->CreateClassEnumerator(CLSID_AudioInputDeviceCategory, &pEm, 0);
    if (hr != NOERROR) {
        return;
    }
    pEm->Reset();
    ULONG cFetched;
    IMoniker* pM = NULL;
    while (hr = pEm->Next(1, &pM, &cFetched), hr == S_OK)
    {
        IPropertyBag* pBag = 0;
        hr = pM->BindToStorage(0, 0, IID_IPropertyBag, (void**)&pBag);
        if (SUCCEEDED(hr))
        {
            VARIANT var;
            var.vt = VT_BSTR;
            hr = pBag->Read(L"FriendlyName", &var, NULL); //还有其他属性,像描述信息等等...
            if (hr == NOERROR)
            {
                //获取设备名称
                WideCharToMultiByte(CP_ACP, 0, var.bstrVal, -1, name, 128, "", NULL);
                SysFreeString(var.bstrVal);
            }
            pBag->Release();
        }
        pM->Release();
    }
    pCreateDevEnum = NULL;
    pEm = NULL;
#else
    memcpy(name, "default", strlen("default") + 1);
#endif
}
int main()
{
    int ret = 0;
    AVFormatContext* fmt_ctx = NULL;
    AVDictionary* options = NULL;
    char errors[1024] = { 0 };
    char device_name[256] = {0};
    char file_name[256] = "collection.pcm";
    char name[128] = { 0 };
    char name_utf8[128] = { 0 };
    int dst_rate = 8000, src_rate = 44100;
    uint8_t** src_data = NULL, ** dst_data = NULL;
    int64_t src_ch_layout = AV_CH_LAYOUT_STEREO, dst_ch_layout = AV_CH_LAYOUT_SURROUND;
    enum AVSampleFormat src_sample_fmt = AV_SAMPLE_FMT_S16, dst_sample_fmt = AV_SAMPLE_FMT_S16;
    int src_nb_channels = 0, dst_nb_channels = 0;
    int src_linesize, dst_linesize;
    int src_nb_samples = 44100/2, dst_nb_samples, max_dst_nb_samples;
    int dst_bufsize;
    av_register_all();
    avdevice_register_all();
    AVInputFormat* in_format = av_find_input_format("dshow");
    if (in_format == NULL)
    {
        printf("av_find_input_format error\n");
    }
    //设置输入的字体，没有这部分会报错 //Immediate exit requested
    Get_Capture_Audio_Devices_Info(name);
    Convert(name, name_utf8, CP_ACP, CP_UTF8);
    sprintf(device_name, "audio=%s", name_utf8);
    printf("device_name:%s\n", device_name);
    if ((ret = avformat_open_input(&fmt_ctx, device_name, in_format, NULL)) != 0)
    {
        av_strerror(ret, errors, 1024);
        printf("Failed to open video device, [%s][%d]\n", errors, ret);
        return -1;
    }
    AVPacket* pkt = av_packet_alloc();
    av_init_packet(pkt);
    FILE* out_file = fopen(file_name, "wb+");
    /*添加音频重采样----start----*/
    SwrContext* swr_ctx = NULL;
    //第一个参数，如果之前有设置好的重采样上下文可以传入，如果没有就传入NULL；
    //2.输出的channel, nunber/layout(一种布局参数,就是把扬声器放置在哪个位置)
    //3.输出的采样位数
    //4.输出的采样频率
    //5.输入的采样通道
    //6.输入的采样位数
    //7.输入的采样频率
    //最后两个是个日志相关的，设成0和NULL就可以了
    swr_ctx = swr_alloc_set_opts(swr_ctx,
        dst_ch_layout, dst_sample_fmt, dst_rate,
        src_ch_layout, src_sample_fmt, src_rate,
        0, NULL);
    if (!swr_ctx)
    {
        return -1;
    }
    /* initialize the resampling context */
    if ((ret = swr_init(swr_ctx)) < 0) 
    {
        fprintf(stderr, "Failed to initialize the resampling context\n");
        return -1;
    }
    //创建输入缓冲区
    src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);
    printf("src_nb_channels:%d\n", src_nb_channels);
    ret = av_samples_alloc_array_and_samples(
        &src_data, //输入缓冲区地址
        &src_linesize, //缓冲区大小
        src_nb_channels, //通道个数
        src_nb_samples, //单通道采样个数 number of samples per channel
        src_sample_fmt, //采样格式
        0);
    if (ret < 0) {
        fprintf(stderr, "Could not allocate source samples\n");
        return -1;
    }
    printf("src_linesize:%d\n", src_linesize);
    //创建输出缓冲区
    max_dst_nb_samples = dst_nb_samples = av_rescale_rnd(src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);
    dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout);
    av_samples_alloc_array_and_samples(
        &dst_data, //输出缓冲区地址
        &dst_linesize, //缓冲区大小
        dst_nb_channels, //通道个数
        dst_nb_samples, //单通道采样个数 number of samples per channel
        dst_sample_fmt, //采样格式
        0);
    if (ret < 0) {
        fprintf(stderr, "Could not allocate destination samples\n");
        return -1;
    }
    /*添加音频重采样----end----*/
    av_dump_format(fmt_ctx, 0, device_name, 0);
    while (!av_read_frame(fmt_ctx, pkt))
    {
        /* compute destination number of samples */
        dst_nb_samples = av_rescale_rnd(swr_get_delay(swr_ctx, src_rate) +
            src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);
        if (dst_nb_samples > max_dst_nb_samples) 
        {
            av_freep(&dst_data[0]);
            ret = av_samples_alloc(dst_data, &dst_linesize, dst_nb_channels,
                dst_nb_samples, dst_sample_fmt, 1);
            if (ret < 0)
                break;
            max_dst_nb_samples = dst_nb_samples;
        }
       printf("Size of collected data %d\n", pkt->size);
        /*音频重采样*/ //src_data是一个缓冲区数组，只用到第一个缓冲区，所以用0
        memset(src_data[0], 0, pkt->size);
        memcpy(src_data[0], pkt->data, pkt->size);
        printf("dst_nb_samples:%d src_nb_samples:%d\n", dst_nb_samples, src_nb_samples);
        ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t**)src_data, src_nb_samples);
        if (ret < 0) 
        {
            fprintf(stderr, "Error while converting\n");
            return -1;
        }
        dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
            ret, dst_sample_fmt, 1);
        if (dst_bufsize < 0) 
        {
            fprintf(stderr, "Could not get sample buffer size\n");
            return -1;
        }
        printf("dst_bufsize:%d\n", dst_bufsize);
        fwrite(dst_data[0], 1, dst_bufsize, out_file);
        fflush(out_file);
        av_packet_unref(pkt);
    }
    swr_free(&swr_ctx);
    av_packet_free(&pkt);
    avformat_close_input(&fmt_ctx);
    fclose(out_file);
    // ffplay -video_size 640*480 -pixel_format yuyv422 -framerate 30 collection.yuv
    return 0;
}

问题总结

int av_samples_alloc_array_and_samples(uint8_t **audio_data, int linesize, int nb_channels,

                                 int nb_samples, enum AVSampleFormat sample_fmt, int align);<br />函数功能：<br />为采样前后的pcm数据申请缓冲区<br />参数理解：<br />audio_data ----- 缓冲区地址<br />linesize --------- 申请的缓冲区大小。音频的额存放格式，如果是packed，则audio_data[0]存放所有的音频数据，如果是planar，则audio_data[i]存放i通道的音频数据<br />nb_channels ---- 通道个数<br />nb_samples ----- 单个通道的采样个数，因为在采样频率为44100的情况下，双通道采样pcm的输出数据大小为44100*2，故nb_samples  = 44100*2/位深(2个字节)/2(通道个个数)<br />sample_fmt ----- 采样格式<br />align ------------ 直接赋值0