几种缓冲区

image.png
packet_buffer 解码后保存到decoded_buffer,decoded_buffer经过DSP处理后的数据保存到algorithm_buffer,然后从algorithm_buffer中拷贝数据过来到sync_buffer 。

PacketBuffer入队

image.png

PacketBuffer出队

image.png

NetEqImpl::GetAudioInternal

image.png
h:\webrtc-20210315\webrtc-20210315\webrtc\webrtc-checkout\src\modules\audio_coding\neteq\neteq_impl.cc

  1. int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame,
  2. bool* muted,
  3. absl::optional<Operation> action_override) {
  4. PacketList packet_list;
  5. DtmfEvent dtmf_event;
  6. Operation operation;
  7. bool play_dtmf;
  8. *muted = false;
  9. last_decoded_timestamps_.clear();
  10. last_decoded_packet_infos_.clear();
  11. tick_timer_->Increment();
  12. stats_->IncreaseCounter(output_size_samples_, fs_hz_);
  13. const auto lifetime_stats = stats_->GetLifetimeStatistics();
  14. expand_uma_logger_.UpdateSampleCounter(lifetime_stats.concealed_samples,
  15. fs_hz_);
  16. speech_expand_uma_logger_.UpdateSampleCounter(
  17. lifetime_stats.concealed_samples -
  18. lifetime_stats.silent_concealed_samples,
  19. fs_hz_);
  20. // Check for muted state.
  21. if (enable_muted_state_ && expand_->Muted() && packet_buffer_->Empty()) {
  22. RTC_DCHECK_EQ(last_mode_, Mode::kExpand);
  23. audio_frame->Reset();
  24. RTC_DCHECK(audio_frame->muted()); // Reset() should mute the frame.
  25. playout_timestamp_ += static_cast<uint32_t>(output_size_samples_);
  26. audio_frame->sample_rate_hz_ = fs_hz_;
  27. audio_frame->samples_per_channel_ = output_size_samples_;
  28. audio_frame->timestamp_ =
  29. first_packet_
  30. ? 0
  31. : timestamp_scaler_->ToExternal(playout_timestamp_) -
  32. static_cast<uint32_t>(audio_frame->samples_per_channel_);
  33. audio_frame->num_channels_ = sync_buffer_->Channels();
  34. stats_->ExpandedNoiseSamples(output_size_samples_, false);
  35. controller_->NotifyMutedState();
  36. *muted = true;
  37. return 0;
  38. }
  39. int return_value = GetDecision(&operation, &packet_list, &dtmf_event,
  40. &play_dtmf, action_override);
  41. if (return_value != 0) {
  42. last_mode_ = Mode::kError;
  43. return return_value;
  44. }
  45. AudioDecoder::SpeechType speech_type;
  46. int length = 0;
  47. const size_t start_num_packets = packet_list.size();
  48. int decode_return_value =
  49. Decode(&packet_list, &operation, &length, &speech_type);
  50. assert(vad_.get());
  51. bool sid_frame_available =
  52. (operation == Operation::kRfc3389Cng && !packet_list.empty());
  53. vad_->Update(decoded_buffer_.get(), static_cast<size_t>(length), speech_type,
  54. sid_frame_available, fs_hz_);
  55. // This is the criterion that we did decode some data through the speech
  56. // decoder, and the operation resulted in comfort noise.
  57. const bool codec_internal_sid_frame =
  58. (speech_type == AudioDecoder::kComfortNoise &&
  59. start_num_packets > packet_list.size());
  60. if (sid_frame_available || codec_internal_sid_frame) {
  61. // Start a new stopwatch since we are decoding a new CNG packet.
  62. generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch();
  63. }
  64. algorithm_buffer_->Clear();
  65. switch (operation) {
  66. case Operation::kNormal: {
  67. DoNormal(decoded_buffer_.get(), length, speech_type, play_dtmf);
  68. if (length > 0) {
  69. stats_->DecodedOutputPlayed();
  70. }
  71. break;
  72. }
  73. case Operation::kMerge: {
  74. DoMerge(decoded_buffer_.get(), length, speech_type, play_dtmf);
  75. break;
  76. }
  77. case Operation::kExpand: {
  78. RTC_DCHECK_EQ(return_value, 0);
  79. if (!current_rtp_payload_type_ || !DoCodecPlc()) {
  80. return_value = DoExpand(play_dtmf);
  81. }
  82. RTC_DCHECK_GE(sync_buffer_->FutureLength() - expand_->overlap_length(),
  83. output_size_samples_);
  84. break;
  85. }
  86. case Operation::kAccelerate:
  87. case Operation::kFastAccelerate: {
  88. const bool fast_accelerate =
  89. enable_fast_accelerate_ && (operation == Operation::kFastAccelerate);
  90. return_value = DoAccelerate(decoded_buffer_.get(), length, speech_type,
  91. play_dtmf, fast_accelerate);
  92. break;
  93. }
  94. case Operation::kPreemptiveExpand: {
  95. return_value = DoPreemptiveExpand(decoded_buffer_.get(), length,
  96. speech_type, play_dtmf);
  97. break;
  98. }
  99. case Operation::kRfc3389Cng:
  100. case Operation::kRfc3389CngNoPacket: {
  101. return_value = DoRfc3389Cng(&packet_list, play_dtmf);
  102. break;
  103. }
  104. case Operation::kCodecInternalCng: {
  105. // This handles the case when there is no transmission and the decoder
  106. // should produce internal comfort noise.
  107. // TODO(hlundin): Write test for codec-internal CNG.
  108. DoCodecInternalCng(decoded_buffer_.get(), length);
  109. break;
  110. }
  111. case Operation::kDtmf: {
  112. // TODO(hlundin): Write test for this.
  113. return_value = DoDtmf(dtmf_event, &play_dtmf);
  114. break;
  115. }
  116. case Operation::kUndefined: {
  117. RTC_LOG(LS_ERROR) << "Invalid operation kUndefined.";
  118. assert(false); // This should not happen.
  119. last_mode_ = Mode::kError;
  120. return kInvalidOperation;
  121. }
  122. } // End of switch.
  123. last_operation_ = operation;
  124. if (return_value < 0) {
  125. return return_value;
  126. }
  127. if (last_mode_ != Mode::kRfc3389Cng) {
  128. comfort_noise_->Reset();
  129. }
  130. // We treat it as if all packets referenced to by |last_decoded_packet_infos_|
  131. // were mashed together when creating the samples in |algorithm_buffer_|.
  132. RtpPacketInfos packet_infos(last_decoded_packet_infos_);
  133. // Copy samples from |algorithm_buffer_| to |sync_buffer_|.
  134. //
  135. // TODO(bugs.webrtc.org/10757):
  136. // We would in the future also like to pass |packet_infos| so that we can do
  137. // sample-perfect tracking of that information across |sync_buffer_|.
  138. sync_buffer_->PushBack(*algorithm_buffer_);
  139. // Extract data from |sync_buffer_| to |output|.
  140. size_t num_output_samples_per_channel = output_size_samples_;
  141. size_t num_output_samples = output_size_samples_ * sync_buffer_->Channels();
  142. if (num_output_samples > AudioFrame::kMaxDataSizeSamples) {
  143. RTC_LOG(LS_WARNING) << "Output array is too short. "
  144. << AudioFrame::kMaxDataSizeSamples << " < "
  145. << output_size_samples_ << " * "
  146. << sync_buffer_->Channels();
  147. num_output_samples = AudioFrame::kMaxDataSizeSamples;
  148. num_output_samples_per_channel =
  149. AudioFrame::kMaxDataSizeSamples / sync_buffer_->Channels();
  150. }
  151. sync_buffer_->GetNextAudioInterleaved(num_output_samples_per_channel,
  152. audio_frame);
  153. audio_frame->sample_rate_hz_ = fs_hz_;
  154. // TODO(bugs.webrtc.org/10757):
  155. // We don't have the ability to properly track individual packets once their
  156. // audio samples have entered |sync_buffer_|. So for now, treat it as if
  157. // |packet_infos| from packets decoded by the current |GetAudioInternal()|
  158. // call were all consumed assembling the current audio frame and the current
  159. // audio frame only.
  160. audio_frame->packet_infos_ = std::move(packet_infos);
  161. if (sync_buffer_->FutureLength() < expand_->overlap_length()) {
  162. // The sync buffer should always contain |overlap_length| samples, but now
  163. // too many samples have been extracted. Reinstall the |overlap_length|
  164. // lookahead by moving the index.
  165. const size_t missing_lookahead_samples =
  166. expand_->overlap_length() - sync_buffer_->FutureLength();
  167. RTC_DCHECK_GE(sync_buffer_->next_index(), missing_lookahead_samples);
  168. sync_buffer_->set_next_index(sync_buffer_->next_index() -
  169. missing_lookahead_samples);
  170. }
  171. if (audio_frame->samples_per_channel_ != output_size_samples_) {
  172. RTC_LOG(LS_ERROR) << "audio_frame->samples_per_channel_ ("
  173. << audio_frame->samples_per_channel_
  174. << ") != output_size_samples_ (" << output_size_samples_
  175. << ")";
  176. // TODO(minyue): treatment of under-run, filling zeros
  177. audio_frame->Mute();
  178. return kSampleUnderrun;
  179. }
  180. // Should always have overlap samples left in the |sync_buffer_|.
  181. RTC_DCHECK_GE(sync_buffer_->FutureLength(), expand_->overlap_length());
  182. // TODO(yujo): For muted frames, this can be a copy rather than an addition.
  183. if (play_dtmf) {
  184. return_value = DtmfOverdub(dtmf_event, sync_buffer_->Channels(),
  185. audio_frame->mutable_data());
  186. }
  187. // Update the background noise parameters if last operation wrote data
  188. // straight from the decoder to the |sync_buffer_|. That is, none of the
  189. // operations that modify the signal can be followed by a parameter update.
  190. if ((last_mode_ == Mode::kNormal) || (last_mode_ == Mode::kAccelerateFail) ||
  191. (last_mode_ == Mode::kPreemptiveExpandFail) ||
  192. (last_mode_ == Mode::kRfc3389Cng) ||
  193. (last_mode_ == Mode::kCodecInternalCng)) {
  194. background_noise_->Update(*sync_buffer_, *vad_.get());
  195. }
  196. if (operation == Operation::kDtmf) {
  197. // DTMF data was written the end of |sync_buffer_|.
  198. // Update index to end of DTMF data in |sync_buffer_|.
  199. sync_buffer_->set_dtmf_index(sync_buffer_->Size());
  200. }
  201. if (last_mode_ != Mode::kExpand && last_mode_ != Mode::kCodecPlc) {
  202. // If last operation was not expand, calculate the |playout_timestamp_| from
  203. // the |sync_buffer_|. However, do not update the |playout_timestamp_| if it
  204. // would be moved "backwards".
  205. uint32_t temp_timestamp =
  206. sync_buffer_->end_timestamp() -
  207. static_cast<uint32_t>(sync_buffer_->FutureLength());
  208. if (static_cast<int32_t>(temp_timestamp - playout_timestamp_) > 0) {
  209. playout_timestamp_ = temp_timestamp;
  210. }
  211. } else {
  212. // Use dead reckoning to estimate the |playout_timestamp_|.
  213. playout_timestamp_ += static_cast<uint32_t>(output_size_samples_);
  214. }
  215. // Set the timestamp in the audio frame to zero before the first packet has
  216. // been inserted. Otherwise, subtract the frame size in samples to get the
  217. // timestamp of the first sample in the frame (playout_timestamp_ is the
  218. // last + 1).
  219. audio_frame->timestamp_ =
  220. first_packet_
  221. ? 0
  222. : timestamp_scaler_->ToExternal(playout_timestamp_) -
  223. static_cast<uint32_t>(audio_frame->samples_per_channel_);
  224. if (!(last_mode_ == Mode::kRfc3389Cng ||
  225. last_mode_ == Mode::kCodecInternalCng || last_mode_ == Mode::kExpand ||
  226. last_mode_ == Mode::kCodecPlc)) {
  227. generated_noise_stopwatch_.reset();
  228. }
  229. if (decode_return_value)
  230. return decode_return_value;
  231. return return_value;
  232. }