前言

源码分析

WebRtcVoiceMediaChannel::SetSend

h:\webrtc-20210315\webrtc-20210315\webrtc\webrtc-checkout\src\media\engine\webrtc_voice_engine.cc

void WebRtcVoiceMediaChannel::SetSend(bool send) {
***
  // Apply channel specific options, and initialize the ADM for recording (this
  // may take time on some platforms, e.g. Android).
  if (send) {
    engine()->ApplyOptions(options_);
    // InitRecording() may return an error if the ADM is already recording.
    if (!engine()->adm()->RecordingIsInitialized() &&
        !engine()->adm()->Recording()) {
      if (engine()->adm()->InitRecording() != 0) {
        RTC_LOG(LS_WARNING) << "Failed to initialize recording";
      }
    }
  }
  // Change the settings on each send channel.
  for (auto& kv : send_streams_) {
    kv.second->SetSend(send);
  }
  send_ = send;
}

其中调用AudioDeviceModuleImpl::InitRecording

int32_t AudioDeviceModuleImpl::InitRecording() {
  RTC_LOG(INFO) << __FUNCTION__;
  CHECKinitialized_();
  if (RecordingIsInitialized()) {
    return 0;
  }
  int32_t result = audio_device_->InitRecording();
  RTC_LOG(INFO) << "output: " << result;
  RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.InitRecordingSuccess",
                        static_cast<int>(result == 0));
  return result;
}

AudioDeviceWindowsCore::InitRecording

int32_t AudioDeviceWindowsCore::InitRecording() {
  MutexLock lock(&mutex_);
  if (_recording) {
    return -1;
  }
  if (_recIsInitialized) {
    return 0;
  }
    // 获取时钟频率，1 000 000 0
  if (QueryPerformanceFrequency(&_perfCounterFreq) == 0) {
    return -1;
  }
    // 本机是1.00000
  _perfCounterFactor = 10000000.0 / (double)_perfCounterFreq.QuadPart;
  if (_ptrDeviceIn == NULL) {
    return -1;
  }
  // Initialize the microphone (devices might have been added or removed)
  if (InitMicrophoneLocked() == -1) {
    RTC_LOG(LS_WARNING) << "InitMicrophone() failed";
  }
  // Ensure that the updated capturing endpoint device is valid
  if (_ptrDeviceIn == NULL) {
    return -1;
  }
  if (_builtInAecEnabled) {
    // The DMO will configure the capture device.
    return InitRecordingDMO();
  }
  HRESULT hr = S_OK;
  WAVEFORMATEX* pWfxIn = NULL;
  WAVEFORMATEXTENSIBLE Wfx = WAVEFORMATEXTENSIBLE();
  WAVEFORMATEX* pWfxClosestMatch = NULL;
  // Create COM object with IAudioClient interface.
  SAFE_RELEASE(_ptrClientIn);
  hr = _ptrDeviceIn->Activate(__uuidof(IAudioClient), CLSCTX_ALL, NULL,
                              (void**)&_ptrClientIn);
  EXIT_ON_ERROR(hr);
  // Retrieve the stream format that the audio engine uses for its internal
  // processing (mixing) of shared-mode streams.
  hr = _ptrClientIn->GetMixFormat(&pWfxIn);
  if (SUCCEEDED(hr)) {
    RTC_LOG(LS_VERBOSE) << "Audio Engine's current capturing mix format:";
    // format type
    RTC_LOG(LS_VERBOSE) << "wFormatTag     : 0x"
                        << rtc::ToHex(pWfxIn->wFormatTag) << " ("
                        << pWfxIn->wFormatTag << ")";
    // number of channels (i.e. mono, stereo...)
    RTC_LOG(LS_VERBOSE) << "nChannels      : " << pWfxIn->nChannels;
    // sample rate
    RTC_LOG(LS_VERBOSE) << "nSamplesPerSec : " << pWfxIn->nSamplesPerSec;
    // for buffer estimation
    RTC_LOG(LS_VERBOSE) << "nAvgBytesPerSec: " << pWfxIn->nAvgBytesPerSec;
    // block size of data
    RTC_LOG(LS_VERBOSE) << "nBlockAlign    : " << pWfxIn->nBlockAlign;
    // number of bits per sample of mono data
    RTC_LOG(LS_VERBOSE) << "wBitsPerSample : " << pWfxIn->wBitsPerSample;
    RTC_LOG(LS_VERBOSE) << "cbSize         : " << pWfxIn->cbSize;
  }
  // Set wave format
  Wfx.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE;
  Wfx.Format.wBitsPerSample = 16;
  Wfx.Format.cbSize = 22;
  Wfx.dwChannelMask = 0;
  Wfx.Samples.wValidBitsPerSample = Wfx.Format.wBitsPerSample;
  Wfx.SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
  const int freqs[6] = {48000, 44100, 16000, 96000, 32000, 8000};
  hr = S_FALSE;
  // Iterate over frequencies and channels, in order of priority
  for (unsigned int freq = 0; freq < sizeof(freqs) / sizeof(freqs[0]); freq++) {
    for (unsigned int chan = 0;
         chan < sizeof(_recChannelsPrioList) / sizeof(_recChannelsPrioList[0]);
         chan++) {
      Wfx.Format.nChannels = _recChannelsPrioList[chan];
      Wfx.Format.nSamplesPerSec = freqs[freq];
      Wfx.Format.nBlockAlign =
          Wfx.Format.nChannels * Wfx.Format.wBitsPerSample / 8;
      Wfx.Format.nAvgBytesPerSec =
          Wfx.Format.nSamplesPerSec * Wfx.Format.nBlockAlign;
      // If the method succeeds and the audio endpoint device supports the
      // specified stream format, it returns S_OK. If the method succeeds and
      // provides a closest match to the specified format, it returns S_FALSE.
      hr = _ptrClientIn->IsFormatSupported(
          AUDCLNT_SHAREMODE_SHARED, (WAVEFORMATEX*)&Wfx, &pWfxClosestMatch);
      if (hr == S_OK) {
        break;
      } else {
        if (pWfxClosestMatch) {
          RTC_LOG(INFO) << "nChannels=" << Wfx.Format.nChannels
                        << ", nSamplesPerSec=" << Wfx.Format.nSamplesPerSec
                        << " is not supported. Closest match: "
                           "nChannels="
                        << pWfxClosestMatch->nChannels << ", nSamplesPerSec="
                        << pWfxClosestMatch->nSamplesPerSec;
          CoTaskMemFree(pWfxClosestMatch);
          pWfxClosestMatch = NULL;
        } else {
          RTC_LOG(INFO) << "nChannels=" << Wfx.Format.nChannels
                        << ", nSamplesPerSec=" << Wfx.Format.nSamplesPerSec
                        << " is not supported. No closest match.";
        }
      }
    }
    if (hr == S_OK)
      break;
  }
  if (hr == S_OK) {
    _recAudioFrameSize = Wfx.Format.nBlockAlign;
    _recSampleRate = Wfx.Format.nSamplesPerSec;
    _recBlockSize = Wfx.Format.nSamplesPerSec / 100;
    _recChannels = Wfx.Format.nChannels;
    RTC_LOG(LS_VERBOSE) << "VoE selected this capturing format:";
    RTC_LOG(LS_VERBOSE) << "wFormatTag        : 0x"
                        << rtc::ToHex(Wfx.Format.wFormatTag) << " ("
                        << Wfx.Format.wFormatTag << ")";
    RTC_LOG(LS_VERBOSE) << "nChannels         : " << Wfx.Format.nChannels;
    RTC_LOG(LS_VERBOSE) << "nSamplesPerSec    : " << Wfx.Format.nSamplesPerSec;
    RTC_LOG(LS_VERBOSE) << "nAvgBytesPerSec   : " << Wfx.Format.nAvgBytesPerSec;
    RTC_LOG(LS_VERBOSE) << "nBlockAlign       : " << Wfx.Format.nBlockAlign;
    RTC_LOG(LS_VERBOSE) << "wBitsPerSample    : " << Wfx.Format.wBitsPerSample;
    RTC_LOG(LS_VERBOSE) << "cbSize            : " << Wfx.Format.cbSize;
    RTC_LOG(LS_VERBOSE) << "Additional settings:";
    RTC_LOG(LS_VERBOSE) << "_recAudioFrameSize: " << _recAudioFrameSize;
    RTC_LOG(LS_VERBOSE) << "_recBlockSize     : " << _recBlockSize;
    RTC_LOG(LS_VERBOSE) << "_recChannels      : " << _recChannels;
  }
  // Create a capturing stream.
  hr = _ptrClientIn->Initialize(
      AUDCLNT_SHAREMODE_SHARED,  // share Audio Engine with other applications
      AUDCLNT_STREAMFLAGS_EVENTCALLBACK |  // processing of the audio buffer by
                                           // the client will be event driven
          AUDCLNT_STREAMFLAGS_NOPERSIST,   // volume and mute settings for an
                                           // audio session will not persist
                                           // across system restarts
      0,                    // required for event-driven shared mode
      0,                    // periodicity
      (WAVEFORMATEX*)&Wfx,  // selected wave format
      NULL);                // session GUID
  if (hr != S_OK) {
    RTC_LOG(LS_ERROR) << "IAudioClient::Initialize() failed:";
  }
  EXIT_ON_ERROR(hr);
  if (_ptrAudioBuffer) {
    // Update the audio buffer with the selected parameters
    _ptrAudioBuffer->SetRecordingSampleRate(_recSampleRate);
    _ptrAudioBuffer->SetRecordingChannels((uint8_t)_recChannels);
  } else {
    // We can enter this state during CoreAudioIsSupported() when no
    // AudioDeviceImplementation has been created, hence the AudioDeviceBuffer
    // does not exist. It is OK to end up here since we don't initiate any media
    // in CoreAudioIsSupported().
    RTC_LOG(LS_VERBOSE)
        << "AudioDeviceBuffer must be attached before streaming can start";
  }
  // Get the actual size of the shared (endpoint buffer).
  // Typical value is 960 audio frames <=> 20ms @ 48kHz sample rate.
  UINT bufferFrameCount(0);
  hr = _ptrClientIn->GetBufferSize(&bufferFrameCount);
  if (SUCCEEDED(hr)) {
    RTC_LOG(LS_VERBOSE) << "IAudioClient::GetBufferSize() => "
                        << bufferFrameCount << " (<=> "
                        << bufferFrameCount * _recAudioFrameSize << " bytes)";
  }
  // Set the event handle that the system signals when an audio buffer is ready
  // to be processed by the client.
  hr = _ptrClientIn->SetEventHandle(_hCaptureSamplesReadyEvent);
  EXIT_ON_ERROR(hr);
  // Get an IAudioCaptureClient interface.
  SAFE_RELEASE(_ptrCaptureClient);
  hr = _ptrClientIn->GetService(__uuidof(IAudioCaptureClient),
                                (void**)&_ptrCaptureClient);
  EXIT_ON_ERROR(hr);
  // Mark capture side as initialized
  _recIsInitialized = true;
  CoTaskMemFree(pWfxIn);
  CoTaskMemFree(pWfxClosestMatch);
  RTC_LOG(LS_VERBOSE) << "capture side is now initialized";
  return 0;
Exit:
  _TraceCOMError(hr);
  CoTaskMemFree(pWfxIn);
  CoTaskMemFree(pWfxClosestMatch);
  SAFE_RELEASE(_ptrClientIn);
  SAFE_RELEASE(_ptrCaptureClient);
  return -1;
}

InitRecordingDMO();

AudioDeviceWindowsCore::InitRecordingDMO

// Capture initialization when the built-in AEC DirectX Media Object (DMO) is
// used. Called from InitRecording(), most of which is skipped over. The DMO
// handles device initialization itself.
// Reference: http://msdn.microsoft.com/en-us/library/ff819492(v=vs.85).aspx
int32_t AudioDeviceWindowsCore::InitRecordingDMO() {
  assert(_builtInAecEnabled);
  assert(_dmo != NULL);
    // 主要是
  if (SetDMOProperties() == -1) {
    return -1;
  }
  DMO_MEDIA_TYPE mt = {};
  HRESULT hr = MoInitMediaType(&mt, sizeof(WAVEFORMATEX));
  if (FAILED(hr)) {
    MoFreeMediaType(&mt);
    _TraceCOMError(hr);
    return -1;
  }
  mt.majortype = MEDIATYPE_Audio;
  mt.subtype = MEDIASUBTYPE_PCM;
  mt.formattype = FORMAT_WaveFormatEx; // 数据格式
  // Supported formats
  // nChannels: 1 (in AEC-only mode)
  // nSamplesPerSec: 8000, 11025, 16000, 22050
  // wBitsPerSample: 16
  WAVEFORMATEX* ptrWav = reinterpret_cast<WAVEFORMATEX*>(mt.pbFormat);
  ptrWav->wFormatTag = WAVE_FORMAT_PCM;
  ptrWav->nChannels = 1; // 设置DMO的属性为AEC，只能是单通道
  // 16000 is the highest we can support with our resampler.
  ptrWav->nSamplesPerSec = 16000;
  ptrWav->nAvgBytesPerSec = 32000;
  ptrWav->nBlockAlign = 2;
  ptrWav->wBitsPerSample = 16;
  ptrWav->cbSize = 0;
  // Set the VoE format equal to the AEC output format.
  _recAudioFrameSize = ptrWav->nBlockAlign;
  _recSampleRate = ptrWav->nSamplesPerSec;
  _recBlockSize = ptrWav->nSamplesPerSec / 100;//10毫秒的采样个数
  _recChannels = ptrWav->nChannels;
  // Set the DMO output format parameters.
  hr = _dmo->SetOutputType(kAecCaptureStreamIndex, &mt, 0);
  MoFreeMediaType(&mt);
  if (FAILED(hr)) {
    _TraceCOMError(hr);
    return -1;
  }
    // 设置输出的Audio Buffer
  if (_ptrAudioBuffer) {
    _ptrAudioBuffer->SetRecordingSampleRate(_recSampleRate);
    _ptrAudioBuffer->SetRecordingChannels(_recChannels);
  } else {
    // Refer to InitRecording() for comments.
    RTC_LOG(LS_VERBOSE)
        << "AudioDeviceBuffer must be attached before streaming can start";
  }
  _mediaBuffer = new MediaBufferImpl(_recBlockSize * _recAudioFrameSize);
  // Optional, but if called, must be after media types are set.
    // 微软建议使用前调用。其实这时候没有调用的话，内部也会调用。。
  hr = _dmo->AllocateStreamingResources();
  if (FAILED(hr)) {
    _TraceCOMError(hr);
    return -1;
  }
  _recIsInitialized = true;
  RTC_LOG(LS_VERBOSE) << "Capture side is now initialized";
  return 0;
}

AudioDeviceWindowsCore::SetDMOProperties

int AudioDeviceWindowsCore::SetDMOProperties() {
  HRESULT hr = S_OK;
  assert(_dmo != NULL);
  rtc::scoped_refptr<IPropertyStore> ps;
  {
    IPropertyStore* ptrPS = NULL;
    hr = _dmo->QueryInterface(IID_IPropertyStore,
                              reinterpret_cast<void**>(&ptrPS));
    if (FAILED(hr) || ptrPS == NULL) {
      _TraceCOMError(hr);
      return -1;
    }
    ps = ptrPS;
    SAFE_RELEASE(ptrPS);
  }
  // Set the AEC system mode.
  // SINGLE_CHANNEL_AEC - AEC processing only.
  if (SetVtI4Property(ps, MFPKEY_WMAAECMA_SYSTEM_MODE, SINGLE_CHANNEL_AEC)) {
    return -1;
  }
  // Set the AEC source mode.
  // VARIANT_TRUE - Source mode (we poll the AEC for captured data).
  if (SetBoolProperty(ps, MFPKEY_WMAAECMA_DMO_SOURCE_MODE, VARIANT_TRUE) ==
      -1) {
    return -1;
  }
  // Enable the feature mode.
  // This lets us override all the default processing settings below.
  if (SetBoolProperty(ps, MFPKEY_WMAAECMA_FEATURE_MODE, VARIANT_TRUE) == -1) {
    return -1;
  }
  // Disable analog AGC (default enabled).
  if (SetBoolProperty(ps, MFPKEY_WMAAECMA_MIC_GAIN_BOUNDER, VARIANT_FALSE) ==
      -1) {
    return -1;
  }
  // Disable noise suppression (default enabled).
  // 0 - Disabled, 1 - Enabled
  if (SetVtI4Property(ps, MFPKEY_WMAAECMA_FEATR_NS, 0) == -1) {
    return -1;
  }
  // Relevant parameters to leave at default settings:
  // MFPKEY_WMAAECMA_FEATR_AGC - Digital AGC (disabled).
  // MFPKEY_WMAAECMA_FEATR_CENTER_CLIP - AEC center clipping (enabled).
  // MFPKEY_WMAAECMA_FEATR_ECHO_LENGTH - Filter length (256 ms).
  // TODO(andrew): investigate decresing the length to 128 ms.
  // MFPKEY_WMAAECMA_FEATR_FRAME_SIZE - Frame size (0).
  //   0 is automatic; defaults to 160 samples (or 10 ms frames at the
  //   selected 16 kHz) as long as mic array processing is disabled.
  // MFPKEY_WMAAECMA_FEATR_NOISE_FILL - Comfort noise (enabled).
  // MFPKEY_WMAAECMA_FEATR_VAD - VAD (disabled).
  // Set the devices selected by VoE. If using a default device, we need to
  // search for the device index.
  int inDevIndex = _inputDeviceIndex;
  int outDevIndex = _outputDeviceIndex;
  if (!_usingInputDeviceIndex) {
    ERole role = eCommunications;
    if (_inputDevice == AudioDeviceModule::kDefaultDevice) {
      role = eConsole;
    }
    if (_GetDefaultDeviceIndex(eCapture, role, &inDevIndex) == -1) {
      return -1;
    }
  }
  if (!_usingOutputDeviceIndex) {
    ERole role = eCommunications;
    if (_outputDevice == AudioDeviceModule::kDefaultDevice) {
      role = eConsole;
    }
    if (_GetDefaultDeviceIndex(eRender, role, &outDevIndex) == -1) {
      return -1;
    }
  }
  DWORD devIndex = static_cast<uint32_t>(outDevIndex << 16) +
                   static_cast<uint32_t>(0x0000ffff & inDevIndex);
  RTC_LOG(LS_VERBOSE) << "Capture device index: " << inDevIndex
                      << ", render device index: " << outDevIndex;
  if (SetVtI4Property(ps, MFPKEY_WMAAECMA_DEVICE_INDEXES, devIndex) == -1) {
    return -1;
  }
  return 0;
}

AudioDeviceWindowsCore::_GetDefaultDeviceIndex
-》
AudioDeviceWindowsCore::_GetDefaultDeviceID

李超WebRTC音视频-听课笔记

7-20 源码分析-InitRecording

前言

源码分析

WebRtcVoiceMediaChannel::SetSend

AudioDeviceWindowsCore::InitRecording

AudioDeviceWindowsCore::InitRecordingDMO

AudioDeviceWindowsCore::SetDMOProperties