19 files changed, 1304 insertions, 34 deletions
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 693b9b9..df4dab3 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -49,6 +49,8 @@ jobs:
             mingw-w64-x86_64-zlib
             mingw-w64-x86_64-gtkmm3
             mingw-w64-x86_64-libhandy
+            mingw-w64-x86_64-opus
+            mingw-w64-x86_64-libsodium
 
       - name: Setup MSYS2 (2)
         uses: msys2/setup-msys2@v2
@@ -57,10 +59,20 @@ jobs:
           update: true
           install: ${{ steps.setupmsys.outputs.value }}
 
-      - name: Build
-        run: |
-          cmake -GNinja -Bbuild -DCMAKE_BUILD_TYPE=${{ matrix.buildtype }}
-          cmake --build build
+      - name: Build (1)
+        uses: haya14busa/action-cond@v1
+        id: buildcmd
+        with:
+          cond: ${{ matrix.mindeps == true }}
+          if_true: |
+            cmake -GNinja -Bbuild -DUSE_LIBHANDY=OFF -DENABLE_VOICE=OFF -DCMAKE_BUILD_TYPE=${{ matrix.buildtype }}
+            cmake --build build
+          if_false: |
+            cmake -GNinja -Bbuild -DCMAKE_BUILD_TYPE=${{ matrix.buildtype }}
+            cmake --build build
+
+      - name: Build (2)
+        run: ${{ steps.buildcmd.outputs.value }}
 
       - name: Setup Artifact
         run: |
@@ -119,6 +131,8 @@ jobs:
           brew install gtkmm3
           brew install nlohmann-json
           brew install jpeg
+          brew install opus
+          brew install libsodium
 
       - name: Build
         uses: lukka/run-cmake@v3
@@ -168,6 +182,8 @@ jobs:
           sudo make install
           sudo apt-get install libgtkmm-3.0-dev
           sudo apt-get install libcurl4-gnutls-dev
+          sudo apt-get install libopus-dev
+          sudo apt-get install libsodium-dev
 
       - name: Build
         uses: lukka/run-cmake@v3
diff --git a/.gitmodules b/.gitmodules
index 17c4c23..01db078 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -4,3 +4,6 @@
 [submodule "subprojects/ixwebsocket"]
 	path = subprojects/ixwebsocket
 	url = https://github.com/machinezone/ixwebsocket
+[submodule "subprojects/miniaudio"]
+	path = subprojects/miniaudio
+	url = https://github.com/mackron/miniaudio
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1e28791..3ff2a2b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,6 +8,7 @@ set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/")
 
 option(USE_LIBHANDY "Enable features that require libhandy (default)" ON)
+option(ENABLE_VOICE "Enable voice suppport" ON)
 
 find_package(nlohmann_json REQUIRED)
 find_package(CURL)
@@ -106,3 +107,18 @@ if (USE_LIBHANDY)
         target_compile_definitions(abaddon PRIVATE WITH_LIBHANDY)
     endif ()
 endif ()
+
+if (ENABLE_VOICE)
+    target_compile_definitions(abaddon PRIVATE WITH_VOICE)
+
+    find_package(PkgConfig)
+
+    target_include_directories(abaddon PUBLIC subprojects/miniaudio)
+    pkg_check_modules(Opus REQUIRED IMPORTED_TARGET opus)
+    target_link_libraries(abaddon PkgConfig::Opus)
+
+    pkg_check_modules(libsodium REQUIRED IMPORTED_TARGET libsodium)
+    target_link_libraries(abaddon PkgConfig::libsodium)
+
+    target_link_libraries(abaddon ${CMAKE_DL_LIBS})
+endif ()
diff --git a/src/abaddon.cpp b/src/abaddon.cpp
index 02dcd08..bdedc7c 100644
--- a/src/abaddon.cpp
+++ b/src/abaddon.cpp
@@ -3,6 +3,7 @@
 #include <string>
 #include <algorithm>
 #include "platform.hpp"
+#include "audio/manager.hpp"
 #include "discord/discord.hpp"
 #include "dialogs/token.hpp"
 #include "dialogs/editmessage.hpp"
@@ -219,6 +220,16 @@ int Abaddon::StartGTK() {
         return 1;
     }
 
+#ifdef WITH_VOICE
+    m_audio = std::make_unique<AudioManager>();
+    if (!m_audio->OK()) {
+        Gtk::MessageDialog dlg(*m_main_window, "The audio engine could not be initialized!", false, Gtk::MESSAGE_ERROR, Gtk::BUTTONS_OK, true);
+        dlg.set_position(Gtk::WIN_POS_CENTER);
+        dlg.run();
+        return 1;
+    }
+#endif
+
     // store must be checked before this can be called
     m_main_window->UpdateComponents();
 
@@ -239,6 +250,11 @@ int Abaddon::StartGTK() {
     m_main_window->GetChannelList()->signal_action_guild_leave().connect(sigc::mem_fun(*this, &Abaddon::ActionLeaveGuild));
     m_main_window->GetChannelList()->signal_action_guild_settings().connect(sigc::mem_fun(*this, &Abaddon::ActionGuildSettings));
 
+#ifdef WITH_VOICE
+    m_main_window->GetChannelList()->signal_action_join_voice_channel().connect(sigc::mem_fun(*this, &Abaddon::ActionJoinVoiceChannel));
+    m_main_window->GetChannelList()->signal_action_disconnect_voice().connect(sigc::mem_fun(*this, &Abaddon::ActionDisconnectVoice));
+#endif
+
     m_main_window->GetChatWindow()->signal_action_message_edit().connect(sigc::mem_fun(*this, &Abaddon::ActionChatEditMessage));
     m_main_window->GetChatWindow()->signal_action_chat_submit().connect(sigc::mem_fun(*this, &Abaddon::ActionChatInputSubmit));
     m_main_window->GetChatWindow()->signal_action_chat_load_history().connect(sigc::mem_fun(*this, &Abaddon::ActionChatLoadHistory));
@@ -912,6 +928,16 @@ void Abaddon::ActionViewThreads(Snowflake channel_id) {
     window->show();
 }
 
+#ifdef WITH_VOICE
+void Abaddon::ActionJoinVoiceChannel(Snowflake channel_id) {
+    m_discord.ConnectToVoice(channel_id);
+}
+
+void Abaddon::ActionDisconnectVoice() {
+    m_discord.DisconnectFromVoice();
+}
+#endif
+
 std::optional<Glib::ustring> Abaddon::ShowTextPrompt(const Glib::ustring &prompt, const Glib::ustring &title, const Glib::ustring &placeholder, Gtk::Window *window) {
     TextInputDialog dlg(prompt, title, placeholder, window != nullptr ? *window : *m_main_window);
     const auto code = dlg.run();
@@ -951,15 +977,24 @@ EmojiResource &Abaddon::GetEmojis() {
     return m_emojis;
 }
 
+#ifdef WITH_VOICE
+AudioManager &Abaddon::GetAudio() {
+    return *m_audio;
+}
+#endif
+
 void Abaddon::on_tray_click() {
     m_main_window->set_visible(!m_main_window->is_visible());
 }
+
 void Abaddon::on_tray_menu_click() {
     m_gtk_app->quit();
 }
+
 void Abaddon::on_tray_popup_menu(int button, int activate_time) {
     m_tray->popup_menu_at_position(*m_tray_menu, button, activate_time);
 }
+
 void Abaddon::on_window_hide() {
     if (!m_settings.GetSettings().HideToTray) {
         m_gtk_app->quit();
diff --git a/src/abaddon.hpp b/src/abaddon.hpp
index b067324..ca92370 100644
--- a/src/abaddon.hpp
+++ b/src/abaddon.hpp
@@ -12,6 +12,8 @@
 
 #define APP_TITLE "Abaddon"
 
+class AudioManager;
+
 class Abaddon {
 private:
     Abaddon();
@@ -52,6 +54,11 @@ public:
     void ActionViewPins(Snowflake channel_id);
     void ActionViewThreads(Snowflake channel_id);
 
+#ifdef WITH_VOICE
+    void ActionJoinVoiceChannel(Snowflake channel_id);
+    void ActionDisconnectVoice();
+#endif
+
     std::optional<Glib::ustring> ShowTextPrompt(const Glib::ustring &prompt, const Glib::ustring &title, const Glib::ustring &placeholder = "", Gtk::Window *window = nullptr);
     bool ShowConfirm(const Glib::ustring &prompt, Gtk::Window *window = nullptr);
 
@@ -60,6 +67,10 @@ public:
     ImageManager &GetImageManager();
     EmojiResource &GetEmojis();
 
+#ifdef WITH_VOICE
+    AudioManager &GetAudio();
+#endif
+
     std::string GetDiscordToken() const;
     bool IsDiscordActive() const;
 
@@ -144,6 +155,10 @@ private:
     ImageManager m_img_mgr;
     EmojiResource m_emojis;
 
+#ifdef WITH_VOICE
+    std::unique_ptr<AudioManager> m_audio;
+#endif
+
     mutable std::mutex m_mutex;
     Glib::RefPtr<Gtk::Application> m_gtk_app;
     Glib::RefPtr<Gtk::CssProvider> m_css_provider;
diff --git a/src/audio/manager.cpp b/src/audio/manager.cpp
new file mode 100644
index 0000000..b665c81
--- /dev/null
+++ b/src/audio/manager.cpp
@@ -0,0 +1,156 @@
+#ifdef WITH_VOICE
+    // clang-format off
+#ifdef _WIN32
+    #include <winsock2.h>
+#endif
+
+#include "manager.hpp"
+#include <array>
+#define MINIAUDIO_IMPLEMENTATION
+#include <miniaudio.h>
+#include <opus.h>
+#include <cstring>
+// clang-format on
+
+const uint8_t *StripRTPExtensionHeader(const uint8_t *buf, int num_bytes, size_t &outlen) {
+    if (buf[0] == 0xbe && buf[1] == 0xde && num_bytes > 4) {
+        uint64_t offset = 4 + 4 * ((buf[2] << 8) | buf[3]);
+
+        outlen = num_bytes - offset;
+        return buf + offset;
+    }
+    outlen = num_bytes;
+    return buf;
+}
+
+void data_callback(ma_device *pDevice, void *pOutput, const void *pInput, ma_uint32 frameCount) {
+    AudioManager *mgr = reinterpret_cast<AudioManager *>(pDevice->pUserData);
+    if (mgr == nullptr) return;
+    std::lock_guard<std::mutex> _(mgr->m_mutex);
+
+    auto *pOutputF32 = static_cast<float *>(pOutput);
+    for (auto &[ssrc, pair] : mgr->m_sources) {
+        auto &buf = pair.first;
+        const size_t n = std::min(static_cast<size_t>(buf.size()), static_cast<size_t>(frameCount * 2ULL));
+        for (size_t i = 0; i < n; i++) {
+            pOutputF32[i] += buf[i] / 32768.F;
+        }
+        buf.erase(buf.begin(), buf.begin() + n);
+    }
+}
+
+void capture_data_callback(ma_device *pDevice, void *pOutput, const void *pInput, ma_uint32 frameCount) {
+    auto *mgr = reinterpret_cast<AudioManager *>(pDevice->pUserData);
+    if (mgr == nullptr) return;
+
+    mgr->OnCapturedPCM(static_cast<const int16_t *>(pInput), frameCount);
+}
+
+AudioManager::AudioManager() {
+    m_ok = true;
+
+    int err;
+    m_encoder = opus_encoder_create(48000, 2, OPUS_APPLICATION_VOIP, &err);
+    if (err != OPUS_OK) {
+        printf("failed to initialize opus encoder: %d\n", err);
+        m_ok = false;
+        return;
+    }
+    opus_encoder_ctl(m_encoder, OPUS_SET_BITRATE(64000));
+
+    m_device_config = ma_device_config_init(ma_device_type_playback);
+    m_device_config.playback.format = ma_format_f32;
+    m_device_config.playback.channels = 2;
+    m_device_config.sampleRate = 48000;
+    m_device_config.dataCallback = data_callback;
+    m_device_config.pUserData = this;
+
+    if (ma_device_init(nullptr, &m_device_config, &m_device) != MA_SUCCESS) {
+        puts("open playback fail");
+        m_ok = false;
+        return;
+    }
+
+    if (ma_device_start(&m_device) != MA_SUCCESS) {
+        puts("failed to start playback");
+        ma_device_uninit(&m_device);
+        m_ok = false;
+        return;
+    }
+
+    m_capture_config = ma_device_config_init(ma_device_type_capture);
+    m_capture_config.capture.format = ma_format_s16;
+    m_capture_config.capture.channels = 2;
+    m_capture_config.sampleRate = 48000;
+    m_capture_config.periodSizeInFrames = 480;
+    m_capture_config.dataCallback = capture_data_callback;
+    m_capture_config.pUserData = this;
+
+    if (ma_device_init(nullptr, &m_capture_config, &m_capture_device) != MA_SUCCESS) {
+        puts("open capture fail");
+        m_ok = false;
+        return;
+    }
+
+    if (ma_device_start(&m_capture_device) != MA_SUCCESS) {
+        puts("failed to start capture");
+        ma_device_uninit(&m_capture_device);
+        m_ok = false;
+        return;
+    }
+
+    char device_name[MA_MAX_DEVICE_NAME_LENGTH + 1];
+    ma_device_get_name(&m_capture_device, ma_device_type_capture, device_name, sizeof(device_name), nullptr);
+    printf("using %s for capture\n", device_name);
+}
+
+AudioManager::~AudioManager() {
+    ma_device_uninit(&m_device);
+    ma_device_uninit(&m_capture_device);
+    for (auto &[ssrc, pair] : m_sources) {
+        opus_decoder_destroy(pair.second);
+    }
+}
+
+void AudioManager::SetOpusBuffer(uint8_t *ptr) {
+    m_opus_buffer = ptr;
+}
+
+void AudioManager::FeedMeOpus(uint32_t ssrc, const std::vector<uint8_t> &data) {
+    size_t payload_size = 0;
+    const auto *opus_encoded = StripRTPExtensionHeader(data.data(), static_cast<int>(data.size()), payload_size);
+    static std::array<opus_int16, 120 * 48 * 2> pcm;
+    if (m_sources.find(ssrc) == m_sources.end()) {
+        int err;
+        auto *decoder = opus_decoder_create(48000, 2, &err);
+        m_sources.insert(std::make_pair(ssrc, std::make_pair(std::deque<int16_t> {}, decoder)));
+    }
+    int decoded = opus_decode(m_sources.at(ssrc).second, opus_encoded, static_cast<opus_int32>(payload_size), pcm.data(), 120 * 48, 0);
+    if (decoded <= 0) {
+    } else {
+        m_mutex.lock();
+        auto &buf = m_sources.at(ssrc).first;
+        buf.insert(buf.end(), pcm.begin(), pcm.begin() + decoded * 2);
+        m_mutex.unlock();
+    }
+}
+
+void AudioManager::OnCapturedPCM(const int16_t *pcm, ma_uint32 frames) {
+    if (m_opus_buffer == nullptr) return;
+
+    int payload_len = opus_encode(m_encoder, pcm, 480, static_cast<unsigned char *>(m_opus_buffer), 1275);
+    if (payload_len < 0) {
+        printf("encoding error: %d\n", payload_len);
+    } else {
+        m_signal_opus_packet.emit(payload_len);
+    }
+}
+
+bool AudioManager::OK() const {
+    return m_ok;
+}
+
+AudioManager::type_signal_opus_packet AudioManager::signal_opus_packet() {
+    return m_signal_opus_packet;
+}
+#endif
diff --git a/src/audio/manager.hpp b/src/audio/manager.hpp
new file mode 100644
index 0000000..700fcc0
--- /dev/null
+++ b/src/audio/manager.hpp
@@ -0,0 +1,57 @@
+#pragma once
+#ifdef WITH_VOICE
+// clang-format off
+#include <array>
+#include <atomic>
+#include <deque>
+#include <mutex>
+#include <thread>
+#include <unordered_map>
+#include <vector>
+#include <miniaudio.h>
+#include <opus.h>
+#include <sigc++/sigc++.h>
+// clang-format on
+
+class AudioManager {
+public:
+    AudioManager();
+    ~AudioManager();
+
+    void SetOpusBuffer(uint8_t *ptr);
+    void FeedMeOpus(uint32_t ssrc, const std::vector<uint8_t> &data);
+
+    [[nodiscard]] bool OK() const;
+
+private:
+    void OnCapturedPCM(const int16_t *pcm, ma_uint32 frames);
+
+    friend void data_callback(ma_device *, void *, const void *, ma_uint32);
+    friend void capture_data_callback(ma_device *, void *, const void *, ma_uint32);
+
+    std::thread m_thread;
+
+    bool m_ok;
+
+    // playback
+    ma_device m_device;
+    ma_device_config m_device_config;
+    // capture
+    ma_device m_capture_device;
+    ma_device_config m_capture_config;
+
+    std::mutex m_mutex;
+    std::unordered_map<uint32_t, std::pair<std::deque<int16_t>, OpusDecoder *>> m_sources;
+
+    OpusEncoder *m_encoder;
+
+    uint8_t *m_opus_buffer = nullptr;
+
+public:
+    using type_signal_opus_packet = sigc::signal<void(int payload_size)>;
+    type_signal_opus_packet signal_opus_packet();
+
+private:
+    type_signal_opus_packet m_signal_opus_packet;
+};
+#endif
diff --git a/src/components/channels.cpp b/src/components/channels.cpp
index 497c021..eb9d688 100644
--- a/src/components/channels.cpp
+++ b/src/components/channels.cpp
@@ -21,6 +21,10 @@ ChannelList::ChannelList()
     , m_menu_channel_open_tab("Open in New _Tab", true)
     , m_menu_dm_open_tab("Open in New _Tab", true)
 #endif
+#ifdef WITH_VOICE
+    , m_menu_voice_channel_join("_Join", true)
+    , m_menu_voice_channel_disconnect("_Disconnect", true)
+#endif
     , m_menu_dm_copy_id("_Copy ID", true)
     , m_menu_dm_close("") // changes depending on if group or not
     , m_menu_thread_copy_id("_Copy ID", true)
@@ -36,7 +40,11 @@ ChannelList::ChannelList()
         const auto type = row[m_columns.m_type];
         // text channels should not be allowed to be collapsed
         // maybe they should be but it seems a little difficult to handle expansion to permit this
+#ifdef WITH_VOICE
+        if (type != RenderType::TextChannel && type != RenderType::VoiceChannel) {
+#else
         if (type != RenderType::TextChannel) {
+#endif
             if (row[m_columns.m_expanded]) {
                 m_view.collapse_row(path);
                 row[m_columns.m_expanded] = false;
@@ -161,6 +169,21 @@ ChannelList::ChannelList()
     m_menu_channel.append(m_menu_channel_copy_id);
     m_menu_channel.show_all();
 
+#ifdef WITH_VOICE
+    m_menu_voice_channel_join.signal_activate().connect([this]() {
+        const auto id = static_cast<Snowflake>((*m_model->get_iter(m_path_for_menu))[m_columns.m_id]);
+        m_signal_action_join_voice_channel.emit(id);
+    });
+
+    m_menu_voice_channel_disconnect.signal_activate().connect([this]() {
+        m_signal_action_disconnect_voice.emit();
+    });
+
+    m_menu_voice_channel.append(m_menu_voice_channel_join);
+    m_menu_voice_channel.append(m_menu_voice_channel_disconnect);
+    m_menu_voice_channel.show_all();
+#endif
+
     m_menu_dm_copy_id.signal_activate().connect([this] {
         Gtk::Clipboard::get()->set_text(std::to_string((*m_model->get_iter(m_path_for_menu))[m_columns.m_id]));
     });
@@ -579,7 +602,11 @@ Gtk::TreeModel::iterator ChannelList::AddGuild(const GuildData &guild) {
     for (const auto &channel_ : *guild.Channels) {
         const auto channel = discord.GetChannel(channel_.ID);
         if (!channel.has_value()) continue;
+#ifdef WITH_VOICE
+        if (channel->Type == ChannelType::GUILD_TEXT || channel->Type == ChannelType::GUILD_NEWS || channel->Type == ChannelType::GUILD_VOICE) {
+#else
         if (channel->Type == ChannelType::GUILD_TEXT || channel->Type == ChannelType::GUILD_NEWS) {
+#endif
             if (channel->ParentID.has_value())
                 categories[*channel->ParentID].push_back(*channel);
             else
@@ -607,7 +634,12 @@ Gtk::TreeModel::iterator ChannelList::AddGuild(const GuildData &guild) {
 
     for (const auto &channel : orphan_channels) {
         auto channel_row = *m_model->append(guild_row.children());
-        channel_row[m_columns.m_type] = RenderType::TextChannel;
+        if (IsTextChannel(channel.Type))
+            channel_row[m_columns.m_type] = RenderType::TextChannel;
+#ifdef WITH_VOICE
+        else
+            channel_row[m_columns.m_type] = RenderType::VoiceChannel;
+#endif
         channel_row[m_columns.m_id] = channel.ID;
         channel_row[m_columns.m_name] = "#" + Glib::Markup::escape_text(*channel.Name);
         channel_row[m_columns.m_sort] = *channel.Position + OrphanChannelSortOffset;
@@ -630,7 +662,12 @@ Gtk::TreeModel::iterator ChannelList::AddGuild(const GuildData &guild) {
 
         for (const auto &channel : channels) {
             auto channel_row = *m_model->append(cat_row.children());
-            channel_row[m_columns.m_type] = RenderType::TextChannel;
+            if (IsTextChannel(channel.Type))
+                channel_row[m_columns.m_type] = RenderType::TextChannel;
+#ifdef WITH_VOICE
+            else
+                channel_row[m_columns.m_type] = RenderType::VoiceChannel;
+#endif
             channel_row[m_columns.m_id] = channel.ID;
             channel_row[m_columns.m_name] = "#" + Glib::Markup::escape_text(*channel.Name);
             channel_row[m_columns.m_sort] = *channel.Position;
@@ -856,6 +893,12 @@ bool ChannelList::OnButtonPressEvent(GdkEventButton *ev) {
                     OnChannelSubmenuPopup();
                     m_menu_channel.popup_at_pointer(reinterpret_cast<GdkEvent *>(ev));
                     break;
+#ifdef WITH_VOICE
+                case RenderType::VoiceChannel:
+                    OnVoiceChannelSubmenuPopup();
+                    m_menu_voice_channel.popup_at_pointer(reinterpret_cast<GdkEvent *>(ev));
+                    break;
+#endif
                 case RenderType::DM: {
                     OnDMSubmenuPopup();
                     const auto channel = Abaddon::Get().GetDiscordClient().GetChannel(static_cast<Snowflake>(row[m_columns.m_id]));
@@ -947,6 +990,22 @@ void ChannelList::OnChannelSubmenuPopup() {
         m_menu_channel_toggle_mute.set_label("Mute");
 }
 
+#ifdef WITH_VOICE
+void ChannelList::OnVoiceChannelSubmenuPopup() {
+    const auto iter = m_model->get_iter(m_path_for_menu);
+    if (!iter) return;
+    const auto id = static_cast<Snowflake>((*iter)[m_columns.m_id]);
+    auto &discord = Abaddon::Get().GetDiscordClient();
+    if (discord.IsConnectedToVoice()) {
+        m_menu_voice_channel_join.set_sensitive(false);
+        m_menu_voice_channel_disconnect.set_sensitive(discord.GetVoiceChannelID() == id);
+    } else {
+        m_menu_voice_channel_join.set_sensitive(true);
+        m_menu_voice_channel_disconnect.set_sensitive(false);
+    }
+}
+#endif
+
 void ChannelList::OnDMSubmenuPopup() {
     auto iter = m_model->get_iter(m_path_for_menu);
     if (!iter) return;
@@ -997,6 +1056,16 @@ ChannelList::type_signal_action_open_new_tab ChannelList::signal_action_open_new
 }
 #endif
 
+#ifdef WITH_VOICE
+ChannelList::type_signal_action_join_voice_channel ChannelList::signal_action_join_voice_channel() {
+    return m_signal_action_join_voice_channel;
+}
+
+ChannelList::type_signal_action_disconnect_voice ChannelList::signal_action_disconnect_voice() {
+    return m_signal_action_disconnect_voice;
+}
+#endif
+
 ChannelList::ModelColumns::ModelColumns() {
     add(m_type);
     add(m_id);
diff --git a/src/components/channels.hpp b/src/components/channels.hpp
index 53a68c9..2d2b257 100644
--- a/src/components/channels.hpp
+++ b/src/components/channels.hpp
@@ -125,6 +125,12 @@ protected:
     Gtk::MenuItem m_menu_channel_open_tab;
 #endif
 
+#ifdef WITH_VOICE
+    Gtk::Menu m_menu_voice_channel;
+    Gtk::MenuItem m_menu_voice_channel_join;
+    Gtk::MenuItem m_menu_voice_channel_disconnect;
+#endif
+
     Gtk::Menu m_menu_dm;
     Gtk::MenuItem m_menu_dm_copy_id;
     Gtk::MenuItem m_menu_dm_close;
@@ -148,6 +154,10 @@ protected:
     void OnDMSubmenuPopup();
     void OnThreadSubmenuPopup();
 
+#ifdef WITH_VOICE
+    void OnVoiceChannelSubmenuPopup();
+#endif
+
     bool m_updating_listing = false;
 
     Snowflake m_active_channel;
@@ -166,6 +176,14 @@ public:
     type_signal_action_open_new_tab signal_action_open_new_tab();
 #endif
 
+#ifdef WITH_VOICE
+    using type_signal_action_join_voice_channel = sigc::signal<void, Snowflake>;
+    using type_signal_action_disconnect_voice = sigc::signal<void>;
+
+    type_signal_action_join_voice_channel signal_action_join_voice_channel();
+    type_signal_action_disconnect_voice signal_action_disconnect_voice();
+#endif
+
     type_signal_action_channel_item_select signal_action_channel_item_select();
     type_signal_action_guild_leave signal_action_guild_leave();
     type_signal_action_guild_settings signal_action_guild_settings();
@@ -178,4 +196,9 @@ private:
 #ifdef WITH_LIBHANDY
     type_signal_action_open_new_tab m_signal_action_open_new_tab;
 #endif
+
+#ifdef WITH_VOICE
+    type_signal_action_join_voice_channel m_signal_action_join_voice_channel;
+    type_signal_action_disconnect_voice m_signal_action_disconnect_voice;
+#endif
 };
diff --git a/src/components/channelscellrenderer.cpp b/src/components/channelscellrenderer.cpp
index 9afce8a..23ee3f0 100644
--- a/src/components/channelscellrenderer.cpp
+++ b/src/components/channelscellrenderer.cpp
@@ -65,6 +65,10 @@ void CellRendererChannels::get_preferred_width_vfunc(Gtk::Widget &widget, int &m
             return get_preferred_width_vfunc_channel(widget, minimum_width, natural_width);
         case RenderType::Thread:
             return get_preferred_width_vfunc_thread(widget, minimum_width, natural_width);
+#ifdef WITH_VOICE
+        case RenderType::VoiceChannel:
+            return get_preferred_width_vfunc_voice_channel(widget, minimum_width, natural_width);
+#endif
         case RenderType::DMHeader:
             return get_preferred_width_vfunc_dmheader(widget, minimum_width, natural_width);
         case RenderType::DM:
@@ -82,6 +86,10 @@ void CellRendererChannels::get_preferred_width_for_height_vfunc(Gtk::Widget &wid
             return get_preferred_width_for_height_vfunc_channel(widget, height, minimum_width, natural_width);
         case RenderType::Thread:
             return get_preferred_width_for_height_vfunc_thread(widget, height, minimum_width, natural_width);
+#ifdef WITH_VOICE
+        case RenderType::VoiceChannel:
+            return get_preferred_width_for_height_vfunc_voice_channel(widget, height, minimum_width, natural_width);
+#endif
         case RenderType::DMHeader:
             return get_preferred_width_for_height_vfunc_dmheader(widget, height, minimum_width, natural_width);
         case RenderType::DM:
@@ -99,6 +107,10 @@ void CellRendererChannels::get_preferred_height_vfunc(Gtk::Widget &widget, int &
             return get_preferred_height_vfunc_channel(widget, minimum_height, natural_height);
         case RenderType::Thread:
             return get_preferred_height_vfunc_thread(widget, minimum_height, natural_height);
+#ifdef WITH_VOICE
+        case RenderType::VoiceChannel:
+            return get_preferred_height_vfunc_voice_channel(widget, minimum_height, natural_height);
+#endif
         case RenderType::DMHeader:
             return get_preferred_height_vfunc_dmheader(widget, minimum_height, natural_height);
         case RenderType::DM:
@@ -116,6 +128,10 @@ void CellRendererChannels::get_preferred_height_for_width_vfunc(Gtk::Widget &wid
             return get_preferred_height_for_width_vfunc_channel(widget, width, minimum_height, natural_height);
         case RenderType::Thread:
             return get_preferred_height_for_width_vfunc_thread(widget, width, minimum_height, natural_height);
+#ifdef WITH_VOICE
+        case RenderType::VoiceChannel:
+            return get_preferred_height_for_width_vfunc_voice_channel(widget, width, minimum_height, natural_height);
+#endif
         case RenderType::DMHeader:
             return get_preferred_height_for_width_vfunc_dmheader(widget, width, minimum_height, natural_height);
         case RenderType::DM:
@@ -133,6 +149,10 @@ void CellRendererChannels::render_vfunc(const Cairo::RefPtr<Cairo::Context> &cr,
             return render_vfunc_channel(cr, widget, background_area, cell_area, flags);
         case RenderType::Thread:
             return render_vfunc_thread(cr, widget, background_area, cell_area, flags);
+#ifdef WITH_VOICE
+        case RenderType::VoiceChannel:
+            return render_vfunc_voice_channel(cr, widget, background_area, cell_area, flags);
+#endif
         case RenderType::DMHeader:
             return render_vfunc_dmheader(cr, widget, background_area, cell_area, flags);
         case RenderType::DM:
@@ -499,6 +519,41 @@ void CellRendererChannels::render_vfunc_thread(const Cairo::RefPtr<Cairo::Contex
     }
 }
 
+// voice channel
+
+#ifdef WITH_VOICE
+void CellRendererChannels::get_preferred_width_vfunc_voice_channel(Gtk::Widget &widget, int &minimum_width, int &natural_width) const {
+    m_renderer_text.get_preferred_width(widget, minimum_width, natural_width);
+}
+
+void CellRendererChannels::get_preferred_width_for_height_vfunc_voice_channel(Gtk::Widget &widget, int height, int &minimum_width, int &natural_width) const {
+    m_renderer_text.get_preferred_width_for_height(widget, height, minimum_width, natural_width);
+}
+
+void CellRendererChannels::get_preferred_height_vfunc_voice_channel(Gtk::Widget &widget, int &minimum_height, int &natural_height) const {
+    m_renderer_text.get_preferred_height(widget, minimum_height, natural_height);
+}
+
+void CellRendererChannels::get_preferred_height_for_width_vfunc_voice_channel(Gtk::Widget &widget, int width, int &minimum_height, int &natural_height) const {
+    m_renderer_text.get_preferred_height_for_width(widget, width, minimum_height, natural_height);
+}
+
+void CellRendererChannels::render_vfunc_voice_channel(const Cairo::RefPtr<Cairo::Context> &cr, Gtk::Widget &widget, const Gdk::Rectangle &background_area, const Gdk::Rectangle &cell_area, Gtk::CellRendererState flags) {
+    Gtk::Requisition minimum_size, natural_size;
+    m_renderer_text.get_preferred_size(widget, minimum_size, natural_size);
+
+    const int text_x = background_area.get_x() + 21;
+    const int text_y = background_area.get_y() + background_area.get_height() / 2 - natural_size.height / 2;
+    const int text_w = natural_size.width;
+    const int text_h = natural_size.height;
+
+    Gdk::Rectangle text_cell_area(text_x, text_y, text_w, text_h);
+    m_renderer_text.property_foreground_rgba() = Gdk::RGBA("#0f0");
+    m_renderer_text.render(cr, widget, background_area, text_cell_area, flags);
+    m_renderer_text.property_foreground_set() = false;
+}
+#endif
+
 // dm header
 
 void CellRendererChannels::get_preferred_width_vfunc_dmheader(Gtk::Widget &widget, int &minimum_width, int &natural_width) const {
diff --git a/src/components/channelscellrenderer.hpp b/src/components/channelscellrenderer.hpp
index e2be9b2..8e4025a 100644
--- a/src/components/channelscellrenderer.hpp
+++ b/src/components/channelscellrenderer.hpp
@@ -11,6 +11,10 @@ enum class RenderType : uint8_t {
     TextChannel,
     Thread,
 
+#ifdef WITH_VOICE
+    VoiceChannel,
+#endif
+
     DMHeader,
     DM,
 };
@@ -83,6 +87,19 @@ protected:
                              const Gdk::Rectangle &cell_area,
                              Gtk::CellRendererState flags);
 
+#ifdef WITH_VOICE
+    // voice channel
+    void get_preferred_width_vfunc_voice_channel(Gtk::Widget &widget, int &minimum_width, int &natural_width) const;
+    void get_preferred_width_for_height_vfunc_voice_channel(Gtk::Widget &widget, int height, int &minimum_width, int &natural_width) const;
+    void get_preferred_height_vfunc_voice_channel(Gtk::Widget &widget, int &minimum_height, int &natural_height) const;
+    void get_preferred_height_for_width_vfunc_voice_channel(Gtk::Widget &widget, int width, int &minimum_height, int &natural_height) const;
+    void render_vfunc_voice_channel(const Cairo::RefPtr<Cairo::Context> &cr,
+                                    Gtk::Widget &widget,
+                                    const Gdk::Rectangle &background_area,
+                                    const Gdk::Rectangle &cell_area,
+                                    Gtk::CellRendererState flags);
+#endif
+
     // dm header
     void get_preferred_width_vfunc_dmheader(Gtk::Widget &widget, int &minimum_width, int &natural_width) const;
     void get_preferred_width_for_height_vfunc_dmheader(Gtk::Widget &widget, int height, int &minimum_width, int &natural_width) const;
diff --git a/src/discord/discord.cpp b/src/discord/discord.cpp
index e1b7a48..2fff2a1 100644
--- a/src/discord/discord.cpp
+++ b/src/discord/discord.cpp
@@ -1169,6 +1169,33 @@ void DiscordClient::AcceptVerificationGate(Snowflake guild_id, VerificationGateI
     });
 }
 
+#ifdef WITH_VOICE
+void DiscordClient::ConnectToVoice(Snowflake channel_id) {
+    auto channel = GetChannel(channel_id);
+    if (!channel.has_value() || !channel->GuildID.has_value()) return;
+    m_voice_channel_id = channel_id;
+    VoiceStateUpdateMessage m;
+    m.GuildID = *channel->GuildID;
+    m.ChannelID = channel_id;
+    m.PreferredRegion = "newark";
+    m_websocket.Send(m);
+}
+
+void DiscordClient::DisconnectFromVoice() {
+    m_voice.Stop();
+    VoiceStateUpdateMessage m;
+    m_websocket.Send(m);
+}
+
+bool DiscordClient::IsConnectedToVoice() const noexcept {
+    return m_voice.IsConnected();
+}
+
+Snowflake DiscordClient::GetVoiceChannelID() const noexcept {
+    return m_voice_channel_id;
+}
+#endif
+
 void DiscordClient::SetReferringChannel(Snowflake id) {
     if (!id.IsValid()) {
         m_http.SetPersistentHeader("Referer", "https://discord.com/channels/@me");
@@ -1488,6 +1515,14 @@ void DiscordClient::HandleGatewayMessage(std::string str) {
                     case GatewayEvent::GUILD_MEMBERS_CHUNK: {
                         HandleGatewayGuildMembersChunk(m);
                     } break;
+#ifdef WITH_VOICE
+                    case GatewayEvent::VOICE_STATE_UPDATE: {
+                        HandleGatewayVoiceStateUpdate(m);
+                    } break;
+                    case GatewayEvent::VOICE_SERVER_UPDATE: {
+                        HandleGatewayVoiceServerUpdate(m);
+                    } break;
+#endif
                 }
             } break;
             default:
@@ -2098,6 +2133,27 @@ void DiscordClient::HandleGatewayGuildMembersChunk(const GatewayMessage &msg) {
     m_store.EndTransaction();
 }
 
+#ifdef WITH_VOICE
+void DiscordClient::HandleGatewayVoiceStateUpdate(const GatewayMessage &msg) {
+    VoiceStateUpdateData data = msg.Data;
+    if (data.UserID == m_user_data.ID) {
+        printf("voice session id: %s\n", data.SessionID.c_str());
+        m_voice.SetSessionID(data.SessionID);
+    }
+}
+
+void DiscordClient::HandleGatewayVoiceServerUpdate(const GatewayMessage &msg) {
+    VoiceServerUpdateData data = msg.Data;
+    printf("endpoint: %s\n", data.Endpoint.c_str());
+    printf("token: %s\n", data.Token.c_str());
+    m_voice.SetEndpoint(data.Endpoint);
+    m_voice.SetToken(data.Token);
+    m_voice.SetServerID(data.GuildID);
+    m_voice.SetUserID(m_user_data.ID);
+    m_voice.Start();
+}
+#endif
+
 void DiscordClient::HandleGatewayReadySupplemental(const GatewayMessage &msg) {
     ReadySupplementalData data = msg.Data;
     for (const auto &p : data.MergedPresences.Friends) {
@@ -2591,6 +2647,8 @@ void DiscordClient::LoadEventMap() {
     m_event_map["MESSAGE_ACK"] = GatewayEvent::MESSAGE_ACK;
     m_event_map["USER_GUILD_SETTINGS_UPDATE"] = GatewayEvent::USER_GUILD_SETTINGS_UPDATE;
     m_event_map["GUILD_MEMBERS_CHUNK"] = GatewayEvent::GUILD_MEMBERS_CHUNK;
+    m_event_map["VOICE_STATE_UPDATE"] = GatewayEvent::VOICE_STATE_UPDATE;
+    m_event_map["VOICE_SERVER_UPDATE"] = GatewayEvent::VOICE_SERVER_UPDATE;
 }
 
 DiscordClient::type_signal_gateway_ready DiscordClient::signal_gateway_ready() {
diff --git a/src/discord/discord.hpp b/src/discord/discord.hpp
index 70c2d82..0b88519 100644
--- a/src/discord/discord.hpp
+++ b/src/discord/discord.hpp
@@ -1,9 +1,11 @@
 #pragma once
-#include "websocket.hpp"
+#include "chatsubmitparams.hpp"
+#include "waiter.hpp"
 #include "httpclient.hpp"
 #include "objects.hpp"
 #include "store.hpp"
-#include "chatsubmitparams.hpp"
+#include "voiceclient.hpp"
+#include "websocket.hpp"
 #include <sigc++/sigc++.h>
 #include <nlohmann/json.hpp>
 #include <thread>
@@ -18,31 +20,6 @@
     #undef GetMessage
 #endif
 
-class HeartbeatWaiter {
-public:
-    template<class R, class P>
-    bool wait_for(std::chrono::duration<R, P> const &time) const {
-        std::unique_lock<std::mutex> lock(m);
-        return !cv.wait_for(lock, time, [&] { return terminate; });
-    }
-
-    void kill() {
-        std::unique_lock<std::mutex> lock(m);
-        terminate = true;
-        cv.notify_all();
-    }
-
-    void revive() {
-        std::unique_lock<std::mutex> lock(m);
-        terminate = false;
-    }
-
-private:
-    mutable std::condition_variable cv;
-    mutable std::mutex m;
-    bool terminate = false;
-};
-
 class Abaddon;
 class DiscordClient {
     friend class Abaddon;
@@ -204,6 +181,13 @@ public:
     void GetVerificationGateInfo(Snowflake guild_id, const sigc::slot<void(std::optional<VerificationGateInfoObject>)> &callback);
     void AcceptVerificationGate(Snowflake guild_id, VerificationGateInfoObject info, const sigc::slot<void(DiscordError code)> &callback);
 
+#ifdef WITH_VOICE
+    void ConnectToVoice(Snowflake channel_id);
+    void DisconnectFromVoice();
+    [[nodiscard]] bool IsConnectedToVoice() const noexcept;
+    [[nodiscard]] Snowflake GetVoiceChannelID() const noexcept;
+#endif
+
     void SetReferringChannel(Snowflake id);
 
     void SetBuildNumber(uint32_t build_number);
@@ -286,6 +270,12 @@ private:
     void HandleGatewayReadySupplemental(const GatewayMessage &msg);
     void HandleGatewayReconnect(const GatewayMessage &msg);
     void HandleGatewayInvalidSession(const GatewayMessage &msg);
+
+#ifdef WITH_VOICE
+    void HandleGatewayVoiceStateUpdate(const GatewayMessage &msg);
+    void HandleGatewayVoiceServerUpdate(const GatewayMessage &msg);
+#endif
+
     void HeartbeatThread();
     void SendIdentify();
     void SendResume();
@@ -338,13 +328,19 @@ private:
     std::thread m_heartbeat_thread;
     std::atomic<int> m_last_sequence = -1;
     std::atomic<int> m_heartbeat_msec = 0;
-    HeartbeatWaiter m_heartbeat_waiter;
+    Waiter m_heartbeat_waiter;
     std::atomic<bool> m_heartbeat_acked = true;
 
     bool m_reconnecting = false; // reconnecting either to resume or reidentify
     bool m_wants_resume = false; // reconnecting specifically to resume
     std::string m_session_id;
 
+#ifdef WITH_VOICE
+    DiscordVoiceClient m_voice;
+
+    Snowflake m_voice_channel_id;
+#endif
+
     mutable std::mutex m_msg_mutex;
     Glib::Dispatcher m_msg_dispatch;
     std::queue<std::string> m_msg_queue;
diff --git a/src/discord/objects.cpp b/src/discord/objects.cpp
index e43e05a..3cdc6b5 100644
--- a/src/discord/objects.cpp
+++ b/src/discord/objects.cpp
@@ -640,3 +640,32 @@ void from_json(const nlohmann::json &j, GuildMembersChunkData &m) {
     JS_D("members", m.Members);
     JS_D("guild_id", m.GuildID);
 }
+
+#ifdef WITH_VOICE
+void to_json(nlohmann::json &j, const VoiceStateUpdateMessage &m) {
+    j["op"] = GatewayOp::VoiceStateUpdate;
+    if (m.GuildID.has_value())
+        j["d"]["guild_id"] = *m.GuildID;
+    else
+        j["d"]["guild_id"] = nullptr;
+    if (m.ChannelID.has_value())
+        j["d"]["channel_id"] = *m.ChannelID;
+    else
+        j["d"]["channel_id"] = nullptr;
+    j["d"]["self_mute"] = m.SelfMute;
+    j["d"]["self_deaf"] = m.SelfDeaf;
+    j["d"]["self_video"] = m.SelfVideo;
+    // j["d"]["preferred_region"] = m.PreferredRegion;
+}
+
+void from_json(const nlohmann::json &j, VoiceStateUpdateData &m) {
+    JS_ON("user_id", m.UserID);
+    JS_ON("session_id", m.SessionID);
+}
+
+void from_json(const nlohmann::json &j, VoiceServerUpdateData &m) {
+    JS_D("token", m.Token);
+    JS_D("guild_id", m.GuildID);
+    JS_D("endpoint", m.Endpoint);
+}
+#endif
diff --git a/src/discord/objects.hpp b/src/discord/objects.hpp
index 9db9369..0a947d4 100644
--- a/src/discord/objects.hpp
+++ b/src/discord/objects.hpp
@@ -100,6 +100,8 @@ enum class GatewayEvent : int {
     MESSAGE_ACK,
     USER_GUILD_SETTINGS_UPDATE,
     GUILD_MEMBERS_CHUNK,
+    VOICE_STATE_UPDATE,
+    VOICE_SERVER_UPDATE,
 };
 
 enum class GatewayCloseCode : uint16_t {
@@ -864,3 +866,31 @@ struct GuildMembersChunkData {
 
     friend void from_json(const nlohmann::json &j, GuildMembersChunkData &m);
 };
+
+#ifdef WITH_VOICE
+struct VoiceStateUpdateMessage {
+    std::optional<Snowflake> GuildID;
+    std::optional<Snowflake> ChannelID;
+    bool SelfMute = false;
+    bool SelfDeaf = false;
+    bool SelfVideo = false;
+    std::string PreferredRegion;
+
+    friend void to_json(nlohmann::json &j, const VoiceStateUpdateMessage &m);
+};
+
+struct VoiceStateUpdateData {
+    Snowflake UserID;
+    std::string SessionID;
+
+    friend void from_json(const nlohmann::json &j, VoiceStateUpdateData &m);
+};
+
+struct VoiceServerUpdateData {
+    std::string Token;
+    Snowflake GuildID;
+    std::string Endpoint;
+
+    friend void from_json(const nlohmann::json &j, VoiceServerUpdateData &m);
+};
+#endif
diff --git a/src/discord/voiceclient.cpp b/src/discord/voiceclient.cpp
new file mode 100644
index 0000000..6d45241
--- /dev/null
+++ b/src/discord/voiceclient.cpp
@@ -0,0 +1,434 @@
+#ifdef WITH_VOICE
+    // clang-format off
+#include "voiceclient.hpp"
+#include "json.hpp"
+#include <sodium.h>
+#include "abaddon.hpp"
+#include "audio/manager.hpp"
+
+#ifdef _WIN32
+    #define S_ADDR(var) (var).sin_addr.S_un.S_addr
+    #define socklen_t int
+#else
+    #define S_ADDR(var) (var).sin_addr.s_addr
+#endif
+// clang-format on
+
+UDPSocket::UDPSocket() {
+    m_socket = socket(AF_INET, SOCK_DGRAM, 0);
+}
+
+UDPSocket::~UDPSocket() {
+    Stop();
+}
+
+void UDPSocket::Connect(std::string_view ip, uint16_t port) {
+    std::memset(&m_server, 0, sizeof(m_server));
+    m_server.sin_family = AF_INET;
+    S_ADDR(m_server) = inet_addr(ip.data());
+    m_server.sin_port = htons(port);
+    bind(m_socket, reinterpret_cast<sockaddr *>(&m_server), sizeof(m_server));
+}
+
+void UDPSocket::Run() {
+    m_running = true;
+    m_thread = std::thread(&UDPSocket::ReadThread, this);
+}
+
+void UDPSocket::SetSecretKey(std::array<uint8_t, 32> key) {
+    m_secret_key = key;
+}
+
+void UDPSocket::SetSSRC(uint32_t ssrc) {
+    m_ssrc = ssrc;
+}
+
+void UDPSocket::SendEncrypted(const uint8_t *data, size_t len) {
+    m_sequence++;
+    m_timestamp += 480; // this is important
+
+    std::vector<uint8_t> rtp(12 + len + crypto_secretbox_MACBYTES, 0);
+    rtp[0] = 0x80; // ver 2
+    rtp[1] = 0x78; // payload type 0x78
+    rtp[2] = (m_sequence >> 8) & 0xFF;
+    rtp[3] = (m_sequence >> 0) & 0xFF;
+    rtp[4] = (m_timestamp >> 24) & 0xFF;
+    rtp[5] = (m_timestamp >> 16) & 0xFF;
+    rtp[6] = (m_timestamp >> 8) & 0xFF;
+    rtp[7] = (m_timestamp >> 0) & 0xFF;
+    rtp[8] = (m_ssrc >> 24) & 0xFF;
+    rtp[9] = (m_ssrc >> 16) & 0xFF;
+    rtp[10] = (m_ssrc >> 8) & 0xFF;
+    rtp[11] = (m_ssrc >> 0) & 0xFF;
+
+    static std::array<uint8_t, 24> nonce = {};
+    std::memcpy(nonce.data(), rtp.data(), 12);
+    crypto_secretbox_easy(rtp.data() + 12, data, len, nonce.data(), m_secret_key.data());
+
+    Send(rtp.data(), rtp.size());
+}
+
+void UDPSocket::SendEncrypted(const std::vector<uint8_t> &data) {
+    SendEncrypted(data.data(), data.size());
+}
+
+void UDPSocket::Send(const uint8_t *data, size_t len) {
+    sendto(m_socket, reinterpret_cast<const char *>(data), static_cast<int>(len), 0, reinterpret_cast<sockaddr *>(&m_server), sizeof(m_server));
+}
+
+std::vector<uint8_t> UDPSocket::Receive() {
+    while (true) {
+        sockaddr_in from;
+        socklen_t fromlen = sizeof(from);
+        static std::array<uint8_t, 4096> buf;
+        int n = recvfrom(m_socket, reinterpret_cast<char *>(buf.data()), sizeof(buf), 0, reinterpret_cast<sockaddr *>(&from), &fromlen);
+        if (n < 0) {
+            return {};
+        } else if (S_ADDR(from) == S_ADDR(m_server) && from.sin_port == m_server.sin_port) {
+            return { buf.begin(), buf.begin() + n };
+        }
+    }
+}
+
+void UDPSocket::Stop() {
+    m_running = false;
+    if (m_thread.joinable()) m_thread.join();
+}
+
+void UDPSocket::ReadThread() {
+    timeval tv;
+    while (m_running) {
+        static std::array<uint8_t, 4096> buf;
+        sockaddr_in from;
+        socklen_t addrlen = sizeof(from);
+
+        tv.tv_sec = 0;
+        tv.tv_usec = 1000000;
+
+        fd_set read_fds;
+        FD_ZERO(&read_fds);
+        FD_SET(m_socket, &read_fds);
+
+        if (select(m_socket + 1, &read_fds, nullptr, nullptr, &tv) > 0) {
+            int n = recvfrom(m_socket, reinterpret_cast<char *>(buf.data()), sizeof(buf), 0, reinterpret_cast<sockaddr *>(&from), &addrlen);
+            if (n > 0 && S_ADDR(from) == S_ADDR(m_server) && from.sin_port == m_server.sin_port) {
+                m_signal_data.emit({ buf.begin(), buf.begin() + n });
+            }
+        }
+    }
+}
+
+UDPSocket::type_signal_data UDPSocket::signal_data() {
+    return m_signal_data;
+}
+
+DiscordVoiceClient::DiscordVoiceClient() {
+    sodium_init();
+
+    m_ws.signal_open().connect([this]() {
+        puts("vws open");
+    });
+
+    m_ws.signal_close().connect([this](uint16_t code) {
+        printf("vws close %u\n", code);
+    });
+
+    m_ws.signal_message().connect([this](const std::string &str) {
+        std::lock_guard<std::mutex> _(m_dispatch_mutex);
+        m_message_queue.push(str);
+        m_dispatcher.emit();
+    });
+
+    m_udp.signal_data().connect([this](const std::vector<uint8_t> &data) {
+        std::lock_guard<std::mutex> _(m_udp_dispatch_mutex);
+        m_udp_message_queue.push(data);
+        m_udp_dispatcher.emit();
+    });
+
+    m_dispatcher.connect([this]() {
+        m_dispatch_mutex.lock();
+        if (m_message_queue.empty()) {
+            m_dispatch_mutex.unlock();
+            return;
+        }
+        auto msg = std::move(m_message_queue.front());
+        m_message_queue.pop();
+        m_dispatch_mutex.unlock();
+        OnGatewayMessage(msg);
+    });
+
+    m_udp_dispatcher.connect([this]() {
+        m_udp_dispatch_mutex.lock();
+        if (m_udp_message_queue.empty()) {
+            m_udp_dispatch_mutex.unlock();
+            return;
+        }
+        auto data = std::move(m_udp_message_queue.front());
+        m_udp_message_queue.pop();
+        m_udp_dispatch_mutex.unlock();
+        OnUDPData(data);
+    });
+
+    Glib::signal_idle().connect_once([this]() {
+        // cant put in ctor or deadlock in singleton initialization
+        auto &aud = Abaddon::Get().GetAudio();
+        aud.SetOpusBuffer(m_opus_buffer.data());
+        aud.signal_opus_packet().connect([this](int payload_size) {
+            if (m_connected)
+                m_udp.SendEncrypted(m_opus_buffer.data(), payload_size);
+        });
+    });
+}
+
+DiscordVoiceClient::~DiscordVoiceClient() {
+    Stop();
+}
+
+void DiscordVoiceClient::Start() {
+    m_ws.StartConnection("wss://" + m_endpoint + "/?v=7");
+}
+
+void DiscordVoiceClient::Stop() {
+    if (m_connected) {
+        m_ws.Stop();
+        m_udp.Stop();
+        m_heartbeat_waiter.kill();
+        if (m_heartbeat_thread.joinable()) m_heartbeat_thread.join();
+        m_connected = false;
+    }
+}
+
+void DiscordVoiceClient::SetSessionID(std::string_view session_id) {
+    m_session_id = session_id;
+}
+
+void DiscordVoiceClient::SetEndpoint(std::string_view endpoint) {
+    m_endpoint = endpoint;
+}
+
+void DiscordVoiceClient::SetToken(std::string_view token) {
+    m_token = token;
+}
+
+void DiscordVoiceClient::SetServerID(Snowflake id) {
+    m_server_id = id;
+}
+
+void DiscordVoiceClient::SetUserID(Snowflake id) {
+    m_user_id = id;
+}
+
+bool DiscordVoiceClient::IsConnected() const noexcept {
+    return m_connected;
+}
+
+void DiscordVoiceClient::OnGatewayMessage(const std::string &str) {
+    VoiceGatewayMessage msg = nlohmann::json::parse(str);
+    puts(msg.Data.dump(4).c_str());
+    switch (msg.Opcode) {
+        case VoiceGatewayOp::Hello: {
+            HandleGatewayHello(msg);
+        } break;
+        case VoiceGatewayOp::Ready: {
+            HandleGatewayReady(msg);
+        } break;
+        case VoiceGatewayOp::SessionDescription: {
+            HandleGatewaySessionDescription(msg);
+        } break;
+        default: break;
+    }
+}
+
+void DiscordVoiceClient::HandleGatewayHello(const VoiceGatewayMessage &m) {
+    VoiceHelloData d = m.Data;
+    m_heartbeat_msec = d.HeartbeatInterval;
+    m_heartbeat_thread = std::thread(&DiscordVoiceClient::HeartbeatThread, this);
+
+    Identify();
+}
+
+void DiscordVoiceClient::HandleGatewayReady(const VoiceGatewayMessage &m) {
+    VoiceReadyData d = m.Data;
+    m_ip = d.IP;
+    m_port = d.Port;
+    m_ssrc = d.SSRC;
+    if (std::find(d.Modes.begin(), d.Modes.end(), "xsalsa20_poly1305") == d.Modes.end()) {
+        puts("xsalsa20_poly1305 not in encryption modes");
+    }
+    printf("connect to %s:%u ssrc %u\n", m_ip.c_str(), m_port, m_ssrc);
+
+    m_udp.Connect(m_ip, m_port);
+
+    Discovery();
+}
+
+void DiscordVoiceClient::HandleGatewaySessionDescription(const VoiceGatewayMessage &m) {
+    VoiceSessionDescriptionData d = m.Data;
+    printf("receiving with %s secret key: ", d.Mode.c_str());
+    for (auto b : d.SecretKey) {
+        printf("%02X", b);
+    }
+    printf("\n");
+
+    VoiceSpeakingMessage msg;
+    msg.Delay = 0;
+    msg.SSRC = m_ssrc;
+    msg.Speaking = VoiceSpeakingMessage::Microphone;
+    m_ws.Send(msg);
+
+    m_secret_key = d.SecretKey;
+    m_udp.SetSSRC(m_ssrc);
+    m_udp.SetSecretKey(m_secret_key);
+    m_udp.SendEncrypted({ 0xF8, 0xFF, 0xFE });
+    m_udp.SendEncrypted({ 0xF8, 0xFF, 0xFE });
+    m_udp.SendEncrypted({ 0xF8, 0xFF, 0xFE });
+    m_udp.SendEncrypted({ 0xF8, 0xFF, 0xFE });
+    m_udp.SendEncrypted({ 0xF8, 0xFF, 0xFE });
+    m_udp.Run();
+    m_connected = true;
+}
+
+void DiscordVoiceClient::Identify() {
+    VoiceIdentifyMessage msg;
+    msg.ServerID = m_server_id;
+    msg.UserID = m_user_id;
+    msg.SessionID = m_session_id;
+    msg.Token = m_token;
+    msg.Video = true;
+    m_ws.Send(msg);
+}
+
+void DiscordVoiceClient::Discovery() {
+    std::vector<uint8_t> payload;
+    // 2 bytes = 1, request
+    payload.push_back(0x00);
+    payload.push_back(0x01);
+    // 2 bytes = 70, pl length
+    payload.push_back(0x00);
+    payload.push_back(70);
+    // 4 bytes = ssrc
+    payload.push_back((m_ssrc >> 24) & 0xFF);
+    payload.push_back((m_ssrc >> 16) & 0xFF);
+    payload.push_back((m_ssrc >> 8) & 0xFF);
+    payload.push_back((m_ssrc >> 0) & 0xFF);
+    // address and port
+    for (int i = 0; i < 66; i++)
+        payload.push_back(0);
+    m_udp.Send(payload.data(), payload.size());
+    auto response = m_udp.Receive();
+    if (response.size() >= 74 && response[0] == 0x00 && response[1] == 0x02) {
+        const char *our_ip = reinterpret_cast<const char *>(&response[8]);
+        uint16_t our_port = (response[73] << 8) | response[74];
+        printf("we are %s:%u\n", our_ip, our_port);
+        SelectProtocol(our_ip, our_port);
+    } else {
+        puts("received non-discovery packet after discovery");
+    }
+}
+
+void DiscordVoiceClient::SelectProtocol(std::string_view ip, uint16_t port) {
+    VoiceSelectProtocolMessage msg;
+    msg.Mode = "xsalsa20_poly1305";
+    msg.Address = ip;
+    msg.Port = port;
+    msg.Protocol = "udp";
+    m_ws.Send(msg);
+}
+
+void DiscordVoiceClient::OnUDPData(std::vector<uint8_t> data) {
+    uint8_t *payload = data.data() + 12;
+    uint32_t ssrc = (data[8] << 24) |
+                    (data[9] << 16) |
+                    (data[10] << 8) |
+                    (data[11] << 0);
+    static std::array<uint8_t, 24> nonce = {};
+    std::memcpy(nonce.data(), data.data(), 12);
+    if (crypto_secretbox_open_easy(payload, payload, data.size() - 12, nonce.data(), m_secret_key.data())) {
+        puts("decrypt fail");
+    } else {
+        Abaddon::Get().GetAudio().FeedMeOpus(ssrc, { payload, payload + data.size() - 12 - crypto_box_MACBYTES });
+    }
+}
+
+void DiscordVoiceClient::HeartbeatThread() {
+    while (true) {
+        if (!m_heartbeat_waiter.wait_for(std::chrono::milliseconds(m_heartbeat_msec)))
+            break;
+
+        const auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
+                            std::chrono::system_clock::now().time_since_epoch())
+                            .count();
+
+        VoiceHeartbeatMessage msg;
+        msg.Nonce = static_cast<uint64_t>(ms);
+        m_ws.Send(msg);
+    }
+}
+
+void from_json(const nlohmann::json &j, VoiceGatewayMessage &m) {
+    JS_D("op", m.Opcode);
+    m.Data = j.at("d");
+}
+
+void from_json(const nlohmann::json &j, VoiceHelloData &m) {
+    JS_D("heartbeat_interval", m.HeartbeatInterval);
+}
+
+void to_json(nlohmann::json &j, const VoiceHeartbeatMessage &m) {
+    j["op"] = VoiceGatewayOp::Heartbeat;
+    j["d"] = m.Nonce;
+}
+
+void to_json(nlohmann::json &j, const VoiceIdentifyMessage &m) {
+    j["op"] = VoiceGatewayOp::Identify;
+    j["d"]["server_id"] = m.ServerID;
+    j["d"]["user_id"] = m.UserID;
+    j["d"]["session_id"] = m.SessionID;
+    j["d"]["token"] = m.Token;
+    j["d"]["video"] = m.Video;
+    j["d"]["streams"][0]["type"] = "video";
+    j["d"]["streams"][0]["rid"] = "100";
+    j["d"]["streams"][0]["quality"] = 100;
+}
+
+void from_json(const nlohmann::json &j, VoiceReadyData::VoiceStream &m) {
+    JS_D("active", m.IsActive);
+    JS_D("quality", m.Quality);
+    JS_D("rid", m.RID);
+    JS_D("rtx_ssrc", m.RTXSSRC);
+    JS_D("ssrc", m.SSRC);
+    JS_D("type", m.Type);
+}
+
+void from_json(const nlohmann::json &j, VoiceReadyData &m) {
+    JS_ON("experiments", m.Experiments);
+    JS_D("ip", m.IP);
+    JS_D("modes", m.Modes);
+    JS_D("port", m.Port);
+    JS_D("ssrc", m.SSRC);
+    JS_ON("streams", m.Streams);
+}
+
+void to_json(nlohmann::json &j, const VoiceSelectProtocolMessage &m) {
+    j["op"] = VoiceGatewayOp::SelectProtocol;
+    j["d"]["address"] = m.Address;
+    j["d"]["port"] = m.Port;
+    j["d"]["protocol"] = m.Protocol;
+    j["d"]["mode"] = m.Mode;
+    j["d"]["data"]["address"] = m.Address;
+    j["d"]["data"]["port"] = m.Port;
+    j["d"]["data"]["mode"] = m.Mode;
+}
+
+void from_json(const nlohmann::json &j, VoiceSessionDescriptionData &m) {
+    JS_D("mode", m.Mode);
+    JS_D("secret_key", m.SecretKey);
+}
+
+void to_json(nlohmann::json &j, const VoiceSpeakingMessage &m) {
+    j["op"] = VoiceGatewayOp::Speaking;
+    j["d"]["speaking"] = m.Speaking;
+    j["d"]["delay"] = m.Delay;
+    j["d"]["ssrc"] = m.SSRC;
+}
+#endif
diff --git a/src/discord/voiceclient.hpp b/src/discord/voiceclient.hpp
new file mode 100644
index 0000000..f81763b
--- /dev/null
+++ b/src/discord/voiceclient.hpp
@@ -0,0 +1,232 @@
+#pragma once
+#ifdef WITH_VOICE
+// clang-format off
+#include "snowflake.hpp"
+#include "waiter.hpp"
+#include "websocket.hpp"
+#include <mutex>
+#include <queue>
+#include <string>
+#include <glibmm/dispatcher.h>
+// clang-format on
+
+enum class VoiceGatewayCloseCode : uint16_t {
+    UnknownOpcode = 4001,
+    InvalidPayload = 4002,
+    NotAuthenticated = 4003,
+    AuthenticationFailed = 4004,
+    AlreadyAuthenticated = 4005,
+    SessionInvalid = 4006,
+    SessionTimedOut = 4009,
+    ServerNotFound = 4011,
+    UnknownProtocol = 4012,
+    Disconnected = 4014,
+    ServerCrashed = 4015,
+    UnknownEncryption = 4016,
+};
+
+enum class VoiceGatewayOp : int {
+    Identify = 0,
+    SelectProtocol = 1,
+    Ready = 2,
+    Heartbeat = 3,
+    SessionDescription = 4,
+    Speaking = 5,
+    HeartbeatAck = 6,
+    Resume = 7,
+    Hello = 8,
+    Resumed = 9,
+    ClientDisconnect = 13,
+};
+
+struct VoiceGatewayMessage {
+    VoiceGatewayOp Opcode;
+    nlohmann::json Data;
+
+    friend void from_json(const nlohmann::json &j, VoiceGatewayMessage &m);
+};
+
+struct VoiceHelloData {
+    int HeartbeatInterval;
+
+    friend void from_json(const nlohmann::json &j, VoiceHelloData &m);
+};
+
+struct VoiceHeartbeatMessage {
+    uint64_t Nonce;
+
+    friend void to_json(nlohmann::json &j, const VoiceHeartbeatMessage &m);
+};
+
+struct VoiceIdentifyMessage {
+    Snowflake ServerID;
+    Snowflake UserID;
+    std::string SessionID;
+    std::string Token;
+    bool Video;
+    // todo streams i guess?
+
+    friend void to_json(nlohmann::json &j, const VoiceIdentifyMessage &m);
+};
+
+struct VoiceReadyData {
+    struct VoiceStream {
+        bool IsActive;
+        int Quality;
+        std::string RID;
+        int RTXSSRC;
+        int SSRC;
+        std::string Type;
+
+        friend void from_json(const nlohmann::json &j, VoiceStream &m);
+    };
+
+    std::vector<std::string> Experiments;
+    std::string IP;
+    std::vector<std::string> Modes;
+    uint16_t Port;
+    uint32_t SSRC;
+    std::vector<VoiceStream> Streams;
+
+    friend void from_json(const nlohmann::json &j, VoiceReadyData &m);
+};
+
+struct VoiceSelectProtocolMessage {
+    std::string Address;
+    uint16_t Port;
+    std::string Mode;
+    std::string Protocol;
+
+    friend void to_json(nlohmann::json &j, const VoiceSelectProtocolMessage &m);
+};
+
+struct VoiceSessionDescriptionData {
+    // std::string AudioCodec;
+    // std::string VideoCodec;
+    // std::string MediaSessionID;
+    std::string Mode;
+    std::array<uint8_t, 32> SecretKey;
+
+    friend void from_json(const nlohmann::json &j, VoiceSessionDescriptionData &m);
+};
+
+struct VoiceSpeakingMessage {
+    enum {
+        Microphone = 1 << 0,
+        Soundshare = 1 << 1,
+        Priority = 1 << 2,
+    };
+
+    int Speaking;
+    int Delay;
+    uint32_t SSRC;
+
+    friend void to_json(nlohmann::json &j, const VoiceSpeakingMessage &m);
+};
+
+class UDPSocket {
+public:
+    UDPSocket();
+    ~UDPSocket();
+
+    void Connect(std::string_view ip, uint16_t port);
+    void Run();
+    void SetSecretKey(std::array<uint8_t, 32> key);
+    void SetSSRC(uint32_t ssrc);
+    void SendEncrypted(const uint8_t *data, size_t len);
+    void SendEncrypted(const std::vector<uint8_t> &data);
+    void Send(const uint8_t *data, size_t len);
+    std::vector<uint8_t> Receive();
+    void Stop();
+
+private:
+    void ReadThread();
+
+    #ifdef _WIN32
+    SOCKET m_socket;
+    #else
+    int m_socket;
+    #endif
+    sockaddr_in m_server;
+
+    std::atomic<bool> m_running = false;
+
+    std::thread m_thread;
+
+    std::array<uint8_t, 32> m_secret_key;
+    uint32_t m_ssrc;
+
+    uint16_t m_sequence = 0;
+    uint32_t m_timestamp = 0;
+
+public:
+    using type_signal_data = sigc::signal<void, std::vector<uint8_t>>;
+    type_signal_data signal_data();
+
+private:
+    type_signal_data m_signal_data;
+};
+
+class DiscordVoiceClient {
+public:
+    DiscordVoiceClient();
+    ~DiscordVoiceClient();
+
+    void Start();
+    void Stop();
+
+    void SetSessionID(std::string_view session_id);
+    void SetEndpoint(std::string_view endpoint);
+    void SetToken(std::string_view token);
+    void SetServerID(Snowflake id);
+    void SetUserID(Snowflake id);
+
+    [[nodiscard]] bool IsConnected() const noexcept;
+
+private:
+    void OnGatewayMessage(const std::string &str);
+    void HandleGatewayHello(const VoiceGatewayMessage &m);
+    void HandleGatewayReady(const VoiceGatewayMessage &m);
+    void HandleGatewaySessionDescription(const VoiceGatewayMessage &m);
+
+    void Identify();
+    void Discovery();
+    void SelectProtocol(std::string_view ip, uint16_t port);
+
+    void OnUDPData(std::vector<uint8_t> data);
+
+    void HeartbeatThread();
+
+    std::string m_session_id;
+    std::string m_endpoint;
+    std::string m_token;
+    Snowflake m_server_id;
+    Snowflake m_channel_id;
+    Snowflake m_user_id;
+
+    std::string m_ip;
+    uint16_t m_port;
+    uint32_t m_ssrc;
+
+    std::array<uint8_t, 32> m_secret_key;
+
+    Websocket m_ws;
+    UDPSocket m_udp;
+
+    Glib::Dispatcher m_dispatcher;
+    std::queue<std::string> m_message_queue;
+    std::mutex m_dispatch_mutex;
+
+    Glib::Dispatcher m_udp_dispatcher;
+    std::queue<std::vector<uint8_t>> m_udp_message_queue;
+    std::mutex m_udp_dispatch_mutex;
+
+    int m_heartbeat_msec;
+    Waiter m_heartbeat_waiter;
+    std::thread m_heartbeat_thread;
+
+    std::array<uint8_t, 1275> m_opus_buffer;
+
+    std::atomic<bool> m_connected = false;
+};
+#endif
diff --git a/src/discord/waiter.hpp b/src/discord/waiter.hpp
new file mode 100644
index 0000000..0d5ae92
--- /dev/null
+++ b/src/discord/waiter.hpp
@@ -0,0 +1,29 @@
+#pragma once
+#include <chrono>
+#include <condition_variable>
+#include <mutex>
+
+class Waiter {
+public:
+    template<class R, class P>
+    bool wait_for(std::chrono::duration<R, P> const &time) const {
+        std::unique_lock<std::mutex> lock(m);
+        return !cv.wait_for(lock, time, [&] { return terminate; });
+    }
+
+    void kill() {
+        std::unique_lock<std::mutex> lock(m);
+        terminate = true;
+        cv.notify_all();
+    }
+
+    void revive() {
+        std::unique_lock<std::mutex> lock(m);
+        terminate = false;
+    }
+
+private:
+    mutable std::condition_variable cv;
+    mutable std::mutex m;
+    bool terminate = false;
+};
diff --git a/subprojects/miniaudio b/subprojects/miniaudio
new file mode 160000
+Subproject 4dfe7c4c31df46e78d9a1cc0d2d6f1aef5a5d58