前言

image.png

image.png
image.png

源码分析

WebRtcVoiceMediaChannel::SetSend

h:\webrtc-20210315\webrtc-20210315\webrtc\webrtc-checkout\src\media\engine\webrtc_voice_engine.cc

  1. void WebRtcVoiceMediaChannel::SetSend(bool send) {
  2. ***
  3. // Apply channel specific options, and initialize the ADM for recording (this
  4. // may take time on some platforms, e.g. Android).
  5. if (send) {
  6. engine()->ApplyOptions(options_);
  7. // InitRecording() may return an error if the ADM is already recording.
  8. if (!engine()->adm()->RecordingIsInitialized() &&
  9. !engine()->adm()->Recording()) {
  10. if (engine()->adm()->InitRecording() != 0) {
  11. RTC_LOG(LS_WARNING) << "Failed to initialize recording";
  12. }
  13. }
  14. }
  15. // Change the settings on each send channel.
  16. for (auto& kv : send_streams_) {
  17. kv.second->SetSend(send);
  18. }
  19. send_ = send;
  20. }

其中调用AudioDeviceModuleImpl::InitRecording

  1. int32_t AudioDeviceModuleImpl::InitRecording() {
  2. RTC_LOG(INFO) << __FUNCTION__;
  3. CHECKinitialized_();
  4. if (RecordingIsInitialized()) {
  5. return 0;
  6. }
  7. int32_t result = audio_device_->InitRecording();
  8. RTC_LOG(INFO) << "output: " << result;
  9. RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.InitRecordingSuccess",
  10. static_cast<int>(result == 0));
  11. return result;
  12. }

AudioDeviceWindowsCore::InitRecording

  1. int32_t AudioDeviceWindowsCore::InitRecording() {
  2. MutexLock lock(&mutex_);
  3. if (_recording) {
  4. return -1;
  5. }
  6. if (_recIsInitialized) {
  7. return 0;
  8. }
  9. // 获取时钟频率,1 000 000 0
  10. if (QueryPerformanceFrequency(&_perfCounterFreq) == 0) {
  11. return -1;
  12. }
  13. // 本机是1.00000
  14. _perfCounterFactor = 10000000.0 / (double)_perfCounterFreq.QuadPart;
  15. if (_ptrDeviceIn == NULL) {
  16. return -1;
  17. }
  18. // Initialize the microphone (devices might have been added or removed)
  19. if (InitMicrophoneLocked() == -1) {
  20. RTC_LOG(LS_WARNING) << "InitMicrophone() failed";
  21. }
  22. // Ensure that the updated capturing endpoint device is valid
  23. if (_ptrDeviceIn == NULL) {
  24. return -1;
  25. }
  26. if (_builtInAecEnabled) {
  27. // The DMO will configure the capture device.
  28. return InitRecordingDMO();
  29. }
  30. HRESULT hr = S_OK;
  31. WAVEFORMATEX* pWfxIn = NULL;
  32. WAVEFORMATEXTENSIBLE Wfx = WAVEFORMATEXTENSIBLE();
  33. WAVEFORMATEX* pWfxClosestMatch = NULL;
  34. // Create COM object with IAudioClient interface.
  35. SAFE_RELEASE(_ptrClientIn);
  36. hr = _ptrDeviceIn->Activate(__uuidof(IAudioClient), CLSCTX_ALL, NULL,
  37. (void**)&_ptrClientIn);
  38. EXIT_ON_ERROR(hr);
  39. // Retrieve the stream format that the audio engine uses for its internal
  40. // processing (mixing) of shared-mode streams.
  41. hr = _ptrClientIn->GetMixFormat(&pWfxIn);
  42. if (SUCCEEDED(hr)) {
  43. RTC_LOG(LS_VERBOSE) << "Audio Engine's current capturing mix format:";
  44. // format type
  45. RTC_LOG(LS_VERBOSE) << "wFormatTag : 0x"
  46. << rtc::ToHex(pWfxIn->wFormatTag) << " ("
  47. << pWfxIn->wFormatTag << ")";
  48. // number of channels (i.e. mono, stereo...)
  49. RTC_LOG(LS_VERBOSE) << "nChannels : " << pWfxIn->nChannels;
  50. // sample rate
  51. RTC_LOG(LS_VERBOSE) << "nSamplesPerSec : " << pWfxIn->nSamplesPerSec;
  52. // for buffer estimation
  53. RTC_LOG(LS_VERBOSE) << "nAvgBytesPerSec: " << pWfxIn->nAvgBytesPerSec;
  54. // block size of data
  55. RTC_LOG(LS_VERBOSE) << "nBlockAlign : " << pWfxIn->nBlockAlign;
  56. // number of bits per sample of mono data
  57. RTC_LOG(LS_VERBOSE) << "wBitsPerSample : " << pWfxIn->wBitsPerSample;
  58. RTC_LOG(LS_VERBOSE) << "cbSize : " << pWfxIn->cbSize;
  59. }
  60. // Set wave format
  61. Wfx.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE;
  62. Wfx.Format.wBitsPerSample = 16;
  63. Wfx.Format.cbSize = 22;
  64. Wfx.dwChannelMask = 0;
  65. Wfx.Samples.wValidBitsPerSample = Wfx.Format.wBitsPerSample;
  66. Wfx.SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
  67. const int freqs[6] = {48000, 44100, 16000, 96000, 32000, 8000};
  68. hr = S_FALSE;
  69. // Iterate over frequencies and channels, in order of priority
  70. for (unsigned int freq = 0; freq < sizeof(freqs) / sizeof(freqs[0]); freq++) {
  71. for (unsigned int chan = 0;
  72. chan < sizeof(_recChannelsPrioList) / sizeof(_recChannelsPrioList[0]);
  73. chan++) {
  74. Wfx.Format.nChannels = _recChannelsPrioList[chan];
  75. Wfx.Format.nSamplesPerSec = freqs[freq];
  76. Wfx.Format.nBlockAlign =
  77. Wfx.Format.nChannels * Wfx.Format.wBitsPerSample / 8;
  78. Wfx.Format.nAvgBytesPerSec =
  79. Wfx.Format.nSamplesPerSec * Wfx.Format.nBlockAlign;
  80. // If the method succeeds and the audio endpoint device supports the
  81. // specified stream format, it returns S_OK. If the method succeeds and
  82. // provides a closest match to the specified format, it returns S_FALSE.
  83. hr = _ptrClientIn->IsFormatSupported(
  84. AUDCLNT_SHAREMODE_SHARED, (WAVEFORMATEX*)&Wfx, &pWfxClosestMatch);
  85. if (hr == S_OK) {
  86. break;
  87. } else {
  88. if (pWfxClosestMatch) {
  89. RTC_LOG(INFO) << "nChannels=" << Wfx.Format.nChannels
  90. << ", nSamplesPerSec=" << Wfx.Format.nSamplesPerSec
  91. << " is not supported. Closest match: "
  92. "nChannels="
  93. << pWfxClosestMatch->nChannels << ", nSamplesPerSec="
  94. << pWfxClosestMatch->nSamplesPerSec;
  95. CoTaskMemFree(pWfxClosestMatch);
  96. pWfxClosestMatch = NULL;
  97. } else {
  98. RTC_LOG(INFO) << "nChannels=" << Wfx.Format.nChannels
  99. << ", nSamplesPerSec=" << Wfx.Format.nSamplesPerSec
  100. << " is not supported. No closest match.";
  101. }
  102. }
  103. }
  104. if (hr == S_OK)
  105. break;
  106. }
  107. if (hr == S_OK) {
  108. _recAudioFrameSize = Wfx.Format.nBlockAlign;
  109. _recSampleRate = Wfx.Format.nSamplesPerSec;
  110. _recBlockSize = Wfx.Format.nSamplesPerSec / 100;
  111. _recChannels = Wfx.Format.nChannels;
  112. RTC_LOG(LS_VERBOSE) << "VoE selected this capturing format:";
  113. RTC_LOG(LS_VERBOSE) << "wFormatTag : 0x"
  114. << rtc::ToHex(Wfx.Format.wFormatTag) << " ("
  115. << Wfx.Format.wFormatTag << ")";
  116. RTC_LOG(LS_VERBOSE) << "nChannels : " << Wfx.Format.nChannels;
  117. RTC_LOG(LS_VERBOSE) << "nSamplesPerSec : " << Wfx.Format.nSamplesPerSec;
  118. RTC_LOG(LS_VERBOSE) << "nAvgBytesPerSec : " << Wfx.Format.nAvgBytesPerSec;
  119. RTC_LOG(LS_VERBOSE) << "nBlockAlign : " << Wfx.Format.nBlockAlign;
  120. RTC_LOG(LS_VERBOSE) << "wBitsPerSample : " << Wfx.Format.wBitsPerSample;
  121. RTC_LOG(LS_VERBOSE) << "cbSize : " << Wfx.Format.cbSize;
  122. RTC_LOG(LS_VERBOSE) << "Additional settings:";
  123. RTC_LOG(LS_VERBOSE) << "_recAudioFrameSize: " << _recAudioFrameSize;
  124. RTC_LOG(LS_VERBOSE) << "_recBlockSize : " << _recBlockSize;
  125. RTC_LOG(LS_VERBOSE) << "_recChannels : " << _recChannels;
  126. }
  127. // Create a capturing stream.
  128. hr = _ptrClientIn->Initialize(
  129. AUDCLNT_SHAREMODE_SHARED, // share Audio Engine with other applications
  130. AUDCLNT_STREAMFLAGS_EVENTCALLBACK | // processing of the audio buffer by
  131. // the client will be event driven
  132. AUDCLNT_STREAMFLAGS_NOPERSIST, // volume and mute settings for an
  133. // audio session will not persist
  134. // across system restarts
  135. 0, // required for event-driven shared mode
  136. 0, // periodicity
  137. (WAVEFORMATEX*)&Wfx, // selected wave format
  138. NULL); // session GUID
  139. if (hr != S_OK) {
  140. RTC_LOG(LS_ERROR) << "IAudioClient::Initialize() failed:";
  141. }
  142. EXIT_ON_ERROR(hr);
  143. if (_ptrAudioBuffer) {
  144. // Update the audio buffer with the selected parameters
  145. _ptrAudioBuffer->SetRecordingSampleRate(_recSampleRate);
  146. _ptrAudioBuffer->SetRecordingChannels((uint8_t)_recChannels);
  147. } else {
  148. // We can enter this state during CoreAudioIsSupported() when no
  149. // AudioDeviceImplementation has been created, hence the AudioDeviceBuffer
  150. // does not exist. It is OK to end up here since we don't initiate any media
  151. // in CoreAudioIsSupported().
  152. RTC_LOG(LS_VERBOSE)
  153. << "AudioDeviceBuffer must be attached before streaming can start";
  154. }
  155. // Get the actual size of the shared (endpoint buffer).
  156. // Typical value is 960 audio frames <=> 20ms @ 48kHz sample rate.
  157. UINT bufferFrameCount(0);
  158. hr = _ptrClientIn->GetBufferSize(&bufferFrameCount);
  159. if (SUCCEEDED(hr)) {
  160. RTC_LOG(LS_VERBOSE) << "IAudioClient::GetBufferSize() => "
  161. << bufferFrameCount << " (<=> "
  162. << bufferFrameCount * _recAudioFrameSize << " bytes)";
  163. }
  164. // Set the event handle that the system signals when an audio buffer is ready
  165. // to be processed by the client.
  166. hr = _ptrClientIn->SetEventHandle(_hCaptureSamplesReadyEvent);
  167. EXIT_ON_ERROR(hr);
  168. // Get an IAudioCaptureClient interface.
  169. SAFE_RELEASE(_ptrCaptureClient);
  170. hr = _ptrClientIn->GetService(__uuidof(IAudioCaptureClient),
  171. (void**)&_ptrCaptureClient);
  172. EXIT_ON_ERROR(hr);
  173. // Mark capture side as initialized
  174. _recIsInitialized = true;
  175. CoTaskMemFree(pWfxIn);
  176. CoTaskMemFree(pWfxClosestMatch);
  177. RTC_LOG(LS_VERBOSE) << "capture side is now initialized";
  178. return 0;
  179. Exit:
  180. _TraceCOMError(hr);
  181. CoTaskMemFree(pWfxIn);
  182. CoTaskMemFree(pWfxClosestMatch);
  183. SAFE_RELEASE(_ptrClientIn);
  184. SAFE_RELEASE(_ptrCaptureClient);
  185. return -1;
  186. }

InitRecordingDMO();

AudioDeviceWindowsCore::InitRecordingDMO

  1. // Capture initialization when the built-in AEC DirectX Media Object (DMO) is
  2. // used. Called from InitRecording(), most of which is skipped over. The DMO
  3. // handles device initialization itself.
  4. // Reference: http://msdn.microsoft.com/en-us/library/ff819492(v=vs.85).aspx
  5. int32_t AudioDeviceWindowsCore::InitRecordingDMO() {
  6. assert(_builtInAecEnabled);
  7. assert(_dmo != NULL);
  8. // 主要是
  9. if (SetDMOProperties() == -1) {
  10. return -1;
  11. }
  12. DMO_MEDIA_TYPE mt = {};
  13. HRESULT hr = MoInitMediaType(&mt, sizeof(WAVEFORMATEX));
  14. if (FAILED(hr)) {
  15. MoFreeMediaType(&mt);
  16. _TraceCOMError(hr);
  17. return -1;
  18. }
  19. mt.majortype = MEDIATYPE_Audio;
  20. mt.subtype = MEDIASUBTYPE_PCM;
  21. mt.formattype = FORMAT_WaveFormatEx; // 数据格式
  22. // Supported formats
  23. // nChannels: 1 (in AEC-only mode)
  24. // nSamplesPerSec: 8000, 11025, 16000, 22050
  25. // wBitsPerSample: 16
  26. WAVEFORMATEX* ptrWav = reinterpret_cast<WAVEFORMATEX*>(mt.pbFormat);
  27. ptrWav->wFormatTag = WAVE_FORMAT_PCM;
  28. ptrWav->nChannels = 1; // 设置DMO的属性为AEC,只能是单通道
  29. // 16000 is the highest we can support with our resampler.
  30. ptrWav->nSamplesPerSec = 16000;
  31. ptrWav->nAvgBytesPerSec = 32000;
  32. ptrWav->nBlockAlign = 2;
  33. ptrWav->wBitsPerSample = 16;
  34. ptrWav->cbSize = 0;
  35. // Set the VoE format equal to the AEC output format.
  36. _recAudioFrameSize = ptrWav->nBlockAlign;
  37. _recSampleRate = ptrWav->nSamplesPerSec;
  38. _recBlockSize = ptrWav->nSamplesPerSec / 100;//10毫秒的采样个数
  39. _recChannels = ptrWav->nChannels;
  40. // Set the DMO output format parameters.
  41. hr = _dmo->SetOutputType(kAecCaptureStreamIndex, &mt, 0);
  42. MoFreeMediaType(&mt);
  43. if (FAILED(hr)) {
  44. _TraceCOMError(hr);
  45. return -1;
  46. }
  47. // 设置输出的Audio Buffer
  48. if (_ptrAudioBuffer) {
  49. _ptrAudioBuffer->SetRecordingSampleRate(_recSampleRate);
  50. _ptrAudioBuffer->SetRecordingChannels(_recChannels);
  51. } else {
  52. // Refer to InitRecording() for comments.
  53. RTC_LOG(LS_VERBOSE)
  54. << "AudioDeviceBuffer must be attached before streaming can start";
  55. }
  56. _mediaBuffer = new MediaBufferImpl(_recBlockSize * _recAudioFrameSize);
  57. // Optional, but if called, must be after media types are set.
  58. // 微软建议使用前调用。其实这时候没有调用的话,内部也会调用。。
  59. hr = _dmo->AllocateStreamingResources();
  60. if (FAILED(hr)) {
  61. _TraceCOMError(hr);
  62. return -1;
  63. }
  64. _recIsInitialized = true;
  65. RTC_LOG(LS_VERBOSE) << "Capture side is now initialized";
  66. return 0;
  67. }

AudioDeviceWindowsCore::SetDMOProperties

AudioDeviceWindowsCore::SetDMOProperties

  1. int AudioDeviceWindowsCore::SetDMOProperties() {
  2. HRESULT hr = S_OK;
  3. assert(_dmo != NULL);
  4. rtc::scoped_refptr<IPropertyStore> ps;
  5. {
  6. IPropertyStore* ptrPS = NULL;
  7. hr = _dmo->QueryInterface(IID_IPropertyStore,
  8. reinterpret_cast<void**>(&ptrPS));
  9. if (FAILED(hr) || ptrPS == NULL) {
  10. _TraceCOMError(hr);
  11. return -1;
  12. }
  13. ps = ptrPS;
  14. SAFE_RELEASE(ptrPS);
  15. }
  16. // Set the AEC system mode.
  17. // SINGLE_CHANNEL_AEC - AEC processing only.
  18. if (SetVtI4Property(ps, MFPKEY_WMAAECMA_SYSTEM_MODE, SINGLE_CHANNEL_AEC)) {
  19. return -1;
  20. }
  21. // Set the AEC source mode.
  22. // VARIANT_TRUE - Source mode (we poll the AEC for captured data).
  23. if (SetBoolProperty(ps, MFPKEY_WMAAECMA_DMO_SOURCE_MODE, VARIANT_TRUE) ==
  24. -1) {
  25. return -1;
  26. }
  27. // Enable the feature mode.
  28. // This lets us override all the default processing settings below.
  29. if (SetBoolProperty(ps, MFPKEY_WMAAECMA_FEATURE_MODE, VARIANT_TRUE) == -1) {
  30. return -1;
  31. }
  32. // Disable analog AGC (default enabled).
  33. if (SetBoolProperty(ps, MFPKEY_WMAAECMA_MIC_GAIN_BOUNDER, VARIANT_FALSE) ==
  34. -1) {
  35. return -1;
  36. }
  37. // Disable noise suppression (default enabled).
  38. // 0 - Disabled, 1 - Enabled
  39. if (SetVtI4Property(ps, MFPKEY_WMAAECMA_FEATR_NS, 0) == -1) {
  40. return -1;
  41. }
  42. // Relevant parameters to leave at default settings:
  43. // MFPKEY_WMAAECMA_FEATR_AGC - Digital AGC (disabled).
  44. // MFPKEY_WMAAECMA_FEATR_CENTER_CLIP - AEC center clipping (enabled).
  45. // MFPKEY_WMAAECMA_FEATR_ECHO_LENGTH - Filter length (256 ms).
  46. // TODO(andrew): investigate decresing the length to 128 ms.
  47. // MFPKEY_WMAAECMA_FEATR_FRAME_SIZE - Frame size (0).
  48. // 0 is automatic; defaults to 160 samples (or 10 ms frames at the
  49. // selected 16 kHz) as long as mic array processing is disabled.
  50. // MFPKEY_WMAAECMA_FEATR_NOISE_FILL - Comfort noise (enabled).
  51. // MFPKEY_WMAAECMA_FEATR_VAD - VAD (disabled).
  52. // Set the devices selected by VoE. If using a default device, we need to
  53. // search for the device index.
  54. int inDevIndex = _inputDeviceIndex;
  55. int outDevIndex = _outputDeviceIndex;
  56. if (!_usingInputDeviceIndex) {
  57. ERole role = eCommunications;
  58. if (_inputDevice == AudioDeviceModule::kDefaultDevice) {
  59. role = eConsole;
  60. }
  61. if (_GetDefaultDeviceIndex(eCapture, role, &inDevIndex) == -1) {
  62. return -1;
  63. }
  64. }
  65. if (!_usingOutputDeviceIndex) {
  66. ERole role = eCommunications;
  67. if (_outputDevice == AudioDeviceModule::kDefaultDevice) {
  68. role = eConsole;
  69. }
  70. if (_GetDefaultDeviceIndex(eRender, role, &outDevIndex) == -1) {
  71. return -1;
  72. }
  73. }
  74. DWORD devIndex = static_cast<uint32_t>(outDevIndex << 16) +
  75. static_cast<uint32_t>(0x0000ffff & inDevIndex);
  76. RTC_LOG(LS_VERBOSE) << "Capture device index: " << inDevIndex
  77. << ", render device index: " << outDevIndex;
  78. if (SetVtI4Property(ps, MFPKEY_WMAAECMA_DEVICE_INDEXES, devIndex) == -1) {
  79. return -1;
  80. }
  81. return 0;
  82. }

AudioDeviceWindowsCore::_GetDefaultDeviceIndex
-》
AudioDeviceWindowsCore::_GetDefaultDeviceID