diff options
Diffstat (limited to 'src/audio')
-rw-r--r-- | src/audio/manager.cpp | 145 | ||||
-rw-r--r-- | src/audio/manager.hpp | 64 |
2 files changed, 196 insertions, 13 deletions
diff --git a/src/audio/manager.cpp b/src/audio/manager.cpp index a2fd4df..20089d2 100644 --- a/src/audio/manager.cpp +++ b/src/audio/manager.cpp @@ -64,6 +64,10 @@ void capture_data_callback(ma_device *pDevice, void *pOutput, const void *pInput AudioManager::AudioManager() { m_ok = true; +#ifdef WITH_RNNOISE + RNNoiseInitialize(); +#endif + int err; m_encoder = opus_encoder_create(48000, 2, OPUS_APPLICATION_VOIP, &err); if (err != OPUS_OK) { @@ -144,6 +148,10 @@ AudioManager::~AudioManager() { ma_device_uninit(&m_capture_device); ma_context_uninit(&m_context); RemoveAllSSRCs(); + +#ifdef WITH_RNNOISE + RNNoiseUninitialize(); +#endif } void AudioManager::AddSSRC(uint32_t ssrc) { @@ -420,10 +428,43 @@ void AudioManager::OnCapturedPCM(const int16_t *pcm, ma_uint32 frames) { UpdateCaptureVolume(new_pcm.data(), frames); - if (m_capture_peak_meter / 32768.0 < m_capture_gate) return; + static std::array<float, 480> denoised_L; + static std::array<float, 480> denoised_R; + + bool m_rnnoise_passed = false; +#ifdef WITH_RNNOISE + if (m_vad_method == VADMethod::RNNoise || m_enable_noise_suppression) { + m_rnnoise_passed = CheckVADRNNoise(new_pcm.data(), denoised_L.data(), denoised_R.data()); + } +#endif + + switch (m_vad_method) { + case VADMethod::Gate: + if (!CheckVADVoiceGate()) return; + break; +#ifdef WITH_RNNOISE + case VADMethod::RNNoise: + if (!m_rnnoise_passed) return; + break; +#endif + } m_enc_mutex.lock(); - int payload_len = opus_encode(m_encoder, new_pcm.data(), 480, static_cast<unsigned char *>(m_opus_buffer), 1275); + int payload_len = -1; + + if (m_enable_noise_suppression) { + static std::array<int16_t, 960> denoised_interleaved; + for (size_t i = 0; i < 480; i++) { + denoised_interleaved[i * 2] = static_cast<int16_t>(denoised_L[i]); + } + for (size_t i = 0; i < 480; i++) { + denoised_interleaved[i * 2 + 1] = static_cast<int16_t>(denoised_R[i]); + } + payload_len = opus_encode(m_encoder, denoised_interleaved.data(), 480, static_cast<unsigned char *>(m_opus_buffer), 1275); + } else { + payload_len = opus_encode(m_encoder, new_pcm.data(), 480, static_cast<unsigned char *>(m_opus_buffer), 1275); + } + m_enc_mutex.unlock(); if (payload_len < 0) { spdlog::get("audio")->error("encoding error: {}", payload_len); @@ -453,6 +494,9 @@ bool AudioManager::DecayVolumeMeters() { m_capture_peak_meter -= 600; if (m_capture_peak_meter < 0) m_capture_peak_meter = 0; + const auto x = m_vad_prob.load() - 0.05f; + m_vad_prob.store(x < 0.0f ? 0.0f : x); + std::lock_guard<std::mutex> _(m_vol_mtx); for (auto &[ssrc, meter] : m_volumes) { @@ -463,6 +507,55 @@ bool AudioManager::DecayVolumeMeters() { return true; } +bool AudioManager::CheckVADVoiceGate() { + return m_capture_peak_meter / 32768.0 > m_capture_gate; +} + +#ifdef WITH_RNNOISE +bool AudioManager::CheckVADRNNoise(const int16_t *pcm, float *denoised_left, float *denoised_right) { + // use left channel for vad, only denoise right if noise suppression enabled + std::unique_lock<std::mutex> _(m_rnn_mutex); + + static float rnnoise_input[480]; + for (size_t i = 0; i < 480; i++) { + rnnoise_input[i] = static_cast<float>(pcm[i * 2]); + } + m_vad_prob = std::max(m_vad_prob.load(), rnnoise_process_frame(m_rnnoise[0], denoised_left, rnnoise_input)); + + if (m_enable_noise_suppression) { + for (size_t i = 0; i < 480; i++) { + rnnoise_input[i] = static_cast<float>(pcm[i * 2 + 1]); + } + rnnoise_process_frame(m_rnnoise[1], denoised_right, rnnoise_input); + } + + return m_vad_prob > m_prob_threshold; +} + +void AudioManager::RNNoiseInitialize() { + spdlog::get("audio")->debug("Initializing RNNoise"); + RNNoiseUninitialize(); + std::unique_lock<std::mutex> _(m_rnn_mutex); + m_rnnoise[0] = rnnoise_create(nullptr); + m_rnnoise[1] = rnnoise_create(nullptr); + const auto expected = rnnoise_get_frame_size(); + if (expected != 480) { + spdlog::get("audio")->warn("RNNoise expects a frame count other than 480"); + } +} + +void AudioManager::RNNoiseUninitialize() { + if (m_rnnoise[0] != nullptr) { + spdlog::get("audio")->debug("Uninitializing RNNoise"); + std::unique_lock<std::mutex> _(m_rnn_mutex); + rnnoise_destroy(m_rnnoise[0]); + rnnoise_destroy(m_rnnoise[1]); + m_rnnoise[0] = nullptr; + m_rnnoise[1] = nullptr; + } +} +#endif + bool AudioManager::OK() const { return m_ok; } @@ -487,6 +580,54 @@ uint32_t AudioManager::GetRTPTimestamp() const noexcept { return m_rtp_timestamp; } +void AudioManager::SetVADMethod(const std::string &method) { + spdlog::get("audio")->debug("Setting VAD method to {}", method); + if (method == "gate") { + SetVADMethod(VADMethod::Gate); + } else if (method == "rnnoise") { +#ifdef WITH_RNNOISE + SetVADMethod(VADMethod::RNNoise); +#else + SetVADMethod(VADMethod::Gate); + spdlog::get("audio")->error("Tried to set RNNoise VAD method with support disabled"); +#endif + } else { + SetVADMethod(VADMethod::Gate); + spdlog::get("audio")->error("Tried to set unknown VAD method {}", method); + } +} + +void AudioManager::SetVADMethod(VADMethod method) { + spdlog::get("audio")->debug("Setting VAD method to enum {}", static_cast<int>(method)); + m_vad_method = method; +} + +AudioManager::VADMethod AudioManager::GetVADMethod() const { + return m_vad_method; +} + +#ifdef WITH_RNNOISE +float AudioManager::GetCurrentVADProbability() const { + return m_vad_prob; +} + +double AudioManager::GetRNNProbThreshold() const { + return m_prob_threshold; +} + +void AudioManager::SetRNNProbThreshold(double value) { + m_prob_threshold = value; +} + +void AudioManager::SetSuppressNoise(bool value) { + m_enable_noise_suppression = value; +} + +bool AudioManager::GetSuppressNoise() const { + return m_enable_noise_suppression; +} +#endif + AudioManager::type_signal_opus_packet AudioManager::signal_opus_packet() { return m_signal_opus_packet; } diff --git a/src/audio/manager.hpp b/src/audio/manager.hpp index ed40f35..80a2542 100644 --- a/src/audio/manager.hpp +++ b/src/audio/manager.hpp @@ -14,6 +14,11 @@ #include <miniaudio.h> #include <opus.h> #include <sigc++/sigc++.h> + +#ifdef WITH_RNNOISE +#include <rnnoise.h> +#endif + #include "devices.hpp" // clang-format on @@ -40,30 +45,47 @@ public: void SetCaptureGate(double gate); void SetCaptureGain(double gain); - [[nodiscard]] double GetCaptureGate() const noexcept; - [[nodiscard]] double GetCaptureGain() const noexcept; + double GetCaptureGate() const noexcept; + double GetCaptureGain() const noexcept; void SetMuteSSRC(uint32_t ssrc, bool mute); void SetVolumeSSRC(uint32_t ssrc, double volume); - [[nodiscard]] double GetVolumeSSRC(uint32_t ssrc) const; + double GetVolumeSSRC(uint32_t ssrc) const; void SetEncodingApplication(int application); - [[nodiscard]] int GetEncodingApplication(); + int GetEncodingApplication(); void SetSignalHint(int signal); - [[nodiscard]] int GetSignalHint(); + int GetSignalHint(); void SetBitrate(int bitrate); - [[nodiscard]] int GetBitrate(); + int GetBitrate(); void Enumerate(); - [[nodiscard]] bool OK() const; + bool OK() const; + + double GetCaptureVolumeLevel() const noexcept; + double GetSSRCVolumeLevel(uint32_t ssrc) const noexcept; - [[nodiscard]] double GetCaptureVolumeLevel() const noexcept; - [[nodiscard]] double GetSSRCVolumeLevel(uint32_t ssrc) const noexcept; + AudioDevices &GetDevices(); - [[nodiscard]] AudioDevices &GetDevices(); + uint32_t GetRTPTimestamp() const noexcept; - [[nodiscard]] uint32_t GetRTPTimestamp() const noexcept; + enum class VADMethod { + Gate, + RNNoise, + }; + + void SetVADMethod(const std::string &method); + void SetVADMethod(VADMethod method); + VADMethod GetVADMethod() const; + +#ifdef WITH_RNNOISE + float GetCurrentVADProbability() const; + double GetRNNProbThreshold() const; + void SetRNNProbThreshold(double value); + void SetSuppressNoise(bool value); + bool GetSuppressNoise() const; +#endif private: void OnCapturedPCM(const int16_t *pcm, ma_uint32 frames); @@ -74,6 +96,15 @@ private: bool DecayVolumeMeters(); + bool CheckVADVoiceGate(); + +#ifdef WITH_RNNOISE + bool CheckVADRNNoise(const int16_t *pcm, float *denoised_left, float *denoised_right); + + void RNNoiseInitialize(); + void RNNoiseUninitialize(); +#endif + friend void data_callback(ma_device *, void *, const void *, ma_uint32); friend void capture_data_callback(ma_device *, void *, const void *, ma_uint32); @@ -95,6 +126,10 @@ private: mutable std::mutex m_mutex; mutable std::mutex m_enc_mutex; +#ifdef WITH_RNNOISE + mutable std::mutex m_rnn_mutex; +#endif + std::unordered_map<uint32_t, std::pair<std::deque<int16_t>, OpusDecoder *>> m_sources; OpusEncoder *m_encoder; @@ -106,6 +141,9 @@ private: std::atomic<double> m_capture_gate = 0.0; std::atomic<double> m_capture_gain = 1.0; + std::atomic<double> m_prob_threshold = 0.5; + std::atomic<float> m_vad_prob = 0.0; + std::atomic<bool> m_enable_noise_suppression = false; std::unordered_set<uint32_t> m_muted_ssrcs; std::unordered_map<uint32_t, double> m_volume_ssrc; @@ -115,6 +153,10 @@ private: AudioDevices m_devices; + VADMethod m_vad_method; +#ifdef WITH_RNNOISE + DenoiseState *m_rnnoise[2]; +#endif std::atomic<uint32_t> m_rtp_timestamp = 0; public: |