diff options
author | ouwou <26526779+ouwou@users.noreply.github.com> | 2023-08-19 21:21:39 -0400 |
---|---|---|
committer | ouwou <26526779+ouwou@users.noreply.github.com> | 2023-08-19 21:21:39 -0400 |
commit | 3dd192eed2f706df7581b6655dc4b1843d4df4d8 (patch) | |
tree | 97f7350ef07782a00f6878b9a52a1aba1b918697 /src/audio | |
parent | d208c64be1e19d85882ccafc74a8cd4a8e648e84 (diff) | |
download | abaddon-portaudio-3dd192eed2f706df7581b6655dc4b1843d4df4d8.tar.gz abaddon-portaudio-3dd192eed2f706df7581b6655dc4b1843d4df4d8.zip |
add noise suppression
Diffstat (limited to 'src/audio')
-rw-r--r-- | src/audio/manager.cpp | 60 | ||||
-rw-r--r-- | src/audio/manager.hpp | 9 |
2 files changed, 56 insertions, 13 deletions
diff --git a/src/audio/manager.cpp b/src/audio/manager.cpp index e47b220..dc3e7e9 100644 --- a/src/audio/manager.cpp +++ b/src/audio/manager.cpp @@ -423,19 +423,36 @@ void AudioManager::OnCapturedPCM(const int16_t *pcm, ma_uint32 frames) { UpdateCaptureVolume(new_pcm.data(), frames); + static std::array<float, 480> denoised_L; + static std::array<float, 480> denoised_R; + switch (m_vad_method) { case VADMethod::Gate: if (!CheckVADVoiceGate()) return; break; #ifdef WITH_RNNOISE case VADMethod::RNNoise: - if (!CheckVADRNNoise(pcm)) return; + if (!CheckVADRNNoise(new_pcm.data(), denoised_L.data(), denoised_R.data())) return; break; #endif } m_enc_mutex.lock(); - int payload_len = opus_encode(m_encoder, new_pcm.data(), 480, static_cast<unsigned char *>(m_opus_buffer), 1275); + int payload_len = -1; + + if (m_enable_noise_suppression) { + static std::array<int16_t, 960> denoised_interleaved; + for (size_t i = 0; i < 480; i++) { + denoised_interleaved[i * 2] = static_cast<int16_t>(denoised_L[i]); + } + for (size_t i = 0; i < 480; i++) { + denoised_interleaved[i * 2 + 1] = static_cast<int16_t>(denoised_R[i]); + } + payload_len = opus_encode(m_encoder, denoised_interleaved.data(), 480, static_cast<unsigned char *>(m_opus_buffer), 1275); + } else { + payload_len = opus_encode(m_encoder, new_pcm.data(), 480, static_cast<unsigned char *>(m_opus_buffer), 1275); + } + m_enc_mutex.unlock(); if (payload_len < 0) { spdlog::get("audio")->error("encoding error: {}", payload_len); @@ -480,15 +497,23 @@ bool AudioManager::CheckVADVoiceGate() { } #ifdef WITH_RNNOISE -bool AudioManager::CheckVADRNNoise(const int16_t *pcm) { - static float denoised[480]; +bool AudioManager::CheckVADRNNoise(const int16_t *pcm, float *denoised_left, float *denoised_right) { + // use left channel for vad, only denoise right if noise suppression enabled + std::unique_lock<std::mutex> _(m_rnn_mutex); + static float rnnoise_input[480]; - // take left channel for (size_t i = 0; i < 480; i++) { rnnoise_input[i] = static_cast<float>(pcm[i * 2]); } - std::unique_lock<std::mutex> _(m_rnn_mutex); - m_vad_prob = rnnoise_process_frame(m_rnnoise, denoised, rnnoise_input); + m_vad_prob = rnnoise_process_frame(m_rnnoise[0], denoised_left, rnnoise_input); + + if (m_enable_noise_suppression) { + for (size_t i = 0; i < 480; i++) { + rnnoise_input[i] = static_cast<float>(pcm[i * 2 + 1]); + } + m_vad_prob = rnnoise_process_frame(m_rnnoise[1], denoised_right, rnnoise_input); + } + return m_vad_prob > m_prob_threshold; } @@ -496,7 +521,8 @@ void AudioManager::RNNoiseInitialize() { spdlog::get("audio")->debug("Initializing RNNoise"); RNNoiseUninitialize(); std::unique_lock<std::mutex> _(m_rnn_mutex); - m_rnnoise = rnnoise_create(nullptr); + m_rnnoise[0] = rnnoise_create(nullptr); + m_rnnoise[1] = rnnoise_create(nullptr); const auto expected = rnnoise_get_frame_size(); if (expected != 480) { spdlog::get("audio")->warn("RNNoise expects a frame count other than 480"); @@ -504,11 +530,13 @@ void AudioManager::RNNoiseInitialize() { } void AudioManager::RNNoiseUninitialize() { - if (m_rnnoise != nullptr) { + if (m_rnnoise[0] != nullptr) { spdlog::get("audio")->debug("Uninitializing RNNoise"); std::unique_lock<std::mutex> _(m_rnn_mutex); - rnnoise_destroy(m_rnnoise); - m_rnnoise = nullptr; + rnnoise_destroy(m_rnnoise[0]); + rnnoise_destroy(m_rnnoise[1]); + m_rnnoise[0] = nullptr; + m_rnnoise[1] = nullptr; } } #endif @@ -571,6 +599,7 @@ AudioManager::VADMethod AudioManager::GetVADMethod() const { return m_vad_method; } +#ifdef WITH_RNNOISE float AudioManager::GetCurrentVADProbability() const { return m_vad_prob; } @@ -583,6 +612,15 @@ void AudioManager::SetRNNProbThreshold(double value) { m_prob_threshold = value; } +void AudioManager::SetSuppressNoise(bool value) { + m_enable_noise_suppression = value; +} + +bool AudioManager::GetSuppressNoise() const { + return m_enable_noise_suppression; +} +#endif + AudioManager::type_signal_opus_packet AudioManager::signal_opus_packet() { return m_signal_opus_packet; } diff --git a/src/audio/manager.hpp b/src/audio/manager.hpp index d37a281..80a2542 100644 --- a/src/audio/manager.hpp +++ b/src/audio/manager.hpp @@ -79,9 +79,13 @@ public: void SetVADMethod(VADMethod method); VADMethod GetVADMethod() const; +#ifdef WITH_RNNOISE float GetCurrentVADProbability() const; double GetRNNProbThreshold() const; void SetRNNProbThreshold(double value); + void SetSuppressNoise(bool value); + bool GetSuppressNoise() const; +#endif private: void OnCapturedPCM(const int16_t *pcm, ma_uint32 frames); @@ -95,7 +99,7 @@ private: bool CheckVADVoiceGate(); #ifdef WITH_RNNOISE - bool CheckVADRNNoise(const int16_t *pcm); + bool CheckVADRNNoise(const int16_t *pcm, float *denoised_left, float *denoised_right); void RNNoiseInitialize(); void RNNoiseUninitialize(); @@ -139,6 +143,7 @@ private: std::atomic<double> m_capture_gain = 1.0; std::atomic<double> m_prob_threshold = 0.5; std::atomic<float> m_vad_prob = 0.0; + std::atomic<bool> m_enable_noise_suppression = false; std::unordered_set<uint32_t> m_muted_ssrcs; std::unordered_map<uint32_t, double> m_volume_ssrc; @@ -150,7 +155,7 @@ private: VADMethod m_vad_method; #ifdef WITH_RNNOISE - DenoiseState *m_rnnoise; + DenoiseState *m_rnnoise[2]; #endif std::atomic<uint32_t> m_rtp_timestamp = 0; |