summaryrefslogtreecommitdiff
path: root/src/audio
diff options
context:
space:
mode:
authorouwou <26526779+ouwou@users.noreply.github.com>2024-03-25 00:38:31 -0400
committerouwou <26526779+ouwou@users.noreply.github.com>2024-03-25 00:38:31 -0400
commit3306edc514a996a7c61986d4851c9e6cfa323fca (patch)
tree44f02f26afd3798815e2c9079217fbc5aa68eb85 /src/audio
parentaeebed7cd1476b8ea7c709e6252ff441e9dd992a (diff)
downloadabaddon-portaudio-3306edc514a996a7c61986d4851c9e6cfa323fca.tar.gz
abaddon-portaudio-3306edc514a996a7c61986d4851c9e6cfa323fca.zip
add jitter buffer for voice
Diffstat (limited to 'src/audio')
-rw-r--r--src/audio/jitterbuffer.hpp82
-rw-r--r--src/audio/manager.cpp22
-rw-r--r--src/audio/manager.hpp3
3 files changed, 100 insertions, 7 deletions
diff --git a/src/audio/jitterbuffer.hpp b/src/audio/jitterbuffer.hpp
new file mode 100644
index 0000000..3da3594
--- /dev/null
+++ b/src/audio/jitterbuffer.hpp
@@ -0,0 +1,82 @@
+#pragma once
+#include <chrono>
+#include <cstdint>
+#include <deque>
+
+// very simple non-RTP-based jitter buffer. does not handle out-of-order
+template<typename SampleFormat>
+class JitterBuffer {
+public:
+ /*
+ * desired_latency: how many milliseconds before audio can be drawn from buffer
+ * maximum_latency: how many milliseconds before old audio starts to be discarded
+ */
+ JitterBuffer(int desired_latency, int maximum_latency, int channels, int sample_rate)
+ : m_desired_latency(desired_latency)
+ , m_maximum_latency(maximum_latency)
+ , m_channels(channels)
+ , m_sample_rate(sample_rate)
+ , m_last_push(std::chrono::steady_clock::now()) {
+ }
+
+ [[nodiscard]] size_t Available() const noexcept {
+ return m_samples.size();
+ }
+
+ bool PopSamples(SampleFormat *ptr, size_t amount) {
+ CheckBuffering();
+ if (m_buffering || Available() < amount) return false;
+ std::copy(m_samples.begin(), m_samples.begin() + amount, ptr);
+ m_samples.erase(m_samples.begin(), m_samples.begin() + amount);
+ return true;
+ }
+
+ void PushSamples(SampleFormat *ptr, size_t amount) {
+ m_samples.insert(m_samples.end(), ptr, ptr + amount);
+ m_last_push = std::chrono::steady_clock::now();
+ const auto buffered = MillisBuffered();
+ if (buffered > m_maximum_latency) {
+ const auto overflow_ms = MillisBuffered() - m_maximum_latency;
+ const auto overflow_samples = overflow_ms * m_channels * m_sample_rate / 1000;
+ m_samples.erase(m_samples.begin(), m_samples.begin() + overflow_samples);
+ }
+ }
+
+private:
+ [[nodiscard]] size_t MillisBuffered() const {
+ return m_samples.size() * 1000 / m_channels / m_sample_rate;
+ }
+
+ void CheckBuffering() {
+ // if we arent buffering but the buffer is empty then we should be
+ if (m_samples.empty()) {
+ if (!m_buffering) {
+ m_buffering = true;
+ }
+ return;
+ }
+
+ if (!m_buffering) return;
+
+ // if we reached desired latency, we are sufficiently buffered
+ const auto millis_buffered = MillisBuffered();
+ if (millis_buffered >= m_desired_latency) {
+ m_buffering = false;
+ }
+ // if we havent buffered to desired latency but max latency has elapsed, exit buffering so it doesnt get stuck
+ const auto now = std::chrono::steady_clock::now();
+ const auto millis = std::chrono::duration_cast<std::chrono::milliseconds>(now - m_last_push).count();
+ if (millis >= m_maximum_latency) {
+ m_buffering = false;
+ }
+ }
+
+ int m_desired_latency;
+ int m_maximum_latency;
+ int m_channels;
+ int m_sample_rate;
+ bool m_buffering = true;
+ std::chrono::time_point<std::chrono::steady_clock> m_last_push;
+
+ std::deque<SampleFormat> m_samples;
+};
diff --git a/src/audio/manager.cpp b/src/audio/manager.cpp
index eaac3bf..bb12f23 100644
--- a/src/audio/manager.cpp
+++ b/src/audio/manager.cpp
@@ -25,6 +25,7 @@ const uint8_t *StripRTPExtensionHeader(const uint8_t *buf, int num_bytes, size_t
return buf;
}
+// frameCount is configured to be 480 samples per channel
void data_callback(ma_device *pDevice, void *pOutput, const void *pInput, ma_uint32 frameCount) {
AudioManager *mgr = reinterpret_cast<AudioManager *>(pDevice->pUserData);
if (mgr == nullptr) return;
@@ -36,12 +37,14 @@ void data_callback(ma_device *pDevice, void *pOutput, const void *pInput, ma_uin
if (const auto vol_it = mgr->m_volume_ssrc.find(ssrc); vol_it != mgr->m_volume_ssrc.end()) {
volume = vol_it->second;
}
- auto &buf = pair.first;
- const size_t n = std::min(static_cast<size_t>(buf.size()), static_cast<size_t>(frameCount * 2ULL));
- for (size_t i = 0; i < n; i++) {
+
+ static std::array<int16_t, 480 * 2> buf;
+
+ if (!pair.first.PopSamples(buf.data(), 480 * 2)) continue;
+
+ for (size_t i = 0; i < 480 * 2; i++) {
pOutputF32[i] += volume * buf[i] / 32768.F;
}
- buf.erase(buf.begin(), buf.begin() + n);
}
}
@@ -201,7 +204,14 @@ void AudioManager::AddSSRC(uint32_t ssrc) {
int error;
if (m_sources.find(ssrc) == m_sources.end()) {
auto *decoder = opus_decoder_create(48000, 2, &error);
- m_sources.insert(std::make_pair(ssrc, std::make_pair(std::deque<int16_t> {}, decoder)));
+ auto &s = Abaddon::Get().GetSettings();
+ m_sources.insert(std::make_pair(ssrc, std::make_pair(
+ JitterBuffer<int16_t>(
+ s.JitterDesiredLatency,
+ s.JitterMaximumLatency,
+ 2,
+ 48000),
+ decoder)));
}
}
@@ -241,7 +251,7 @@ void AudioManager::FeedMeOpus(uint32_t ssrc, const std::vector<uint8_t> &data) {
} else {
UpdateReceiveVolume(ssrc, pcm.data(), decoded);
auto &buf = it->second.first;
- buf.insert(buf.end(), pcm.begin(), pcm.begin() + decoded * 2);
+ buf.PushSamples(pcm.data(), decoded * 2);
}
}
}
diff --git a/src/audio/manager.hpp b/src/audio/manager.hpp
index 5716fc5..56882fd 100644
--- a/src/audio/manager.hpp
+++ b/src/audio/manager.hpp
@@ -21,6 +21,7 @@
#endif
#include "devices.hpp"
+#include "jitterbuffer.hpp"
// clang-format on
class AudioManager {
@@ -136,7 +137,7 @@ private:
mutable std::mutex m_rnn_mutex;
#endif
- std::unordered_map<uint32_t, std::pair<std::deque<int16_t>, OpusDecoder *>> m_sources;
+ std::unordered_map<uint32_t, std::pair<JitterBuffer<int16_t>, OpusDecoder *>> m_sources;
OpusEncoder *m_encoder;