From e93b8715f9e42d25c0930becc22561f23a25a709 Mon Sep 17 00:00:00 2001 From: ouwou <26526779+ouwou@users.noreply.github.com> Date: Mon, 5 Sep 2022 02:21:37 -0400 Subject: basic voice capture + transmission --- src/audio/manager.cpp | 63 ++++++++++++++++++++++++++++++++++++++++++++- src/audio/manager.hpp | 20 ++++++++++++++ src/discord/voiceclient.cpp | 38 ++++++++++++++++++++++----- src/discord/voiceclient.hpp | 19 ++++++++++++++ 4 files changed, 132 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/audio/manager.cpp b/src/audio/manager.cpp index 9d380f9..b665c81 100644 --- a/src/audio/manager.cpp +++ b/src/audio/manager.cpp @@ -39,9 +39,25 @@ void data_callback(ma_device *pDevice, void *pOutput, const void *pInput, ma_uin } } +void capture_data_callback(ma_device *pDevice, void *pOutput, const void *pInput, ma_uint32 frameCount) { + auto *mgr = reinterpret_cast(pDevice->pUserData); + if (mgr == nullptr) return; + + mgr->OnCapturedPCM(static_cast(pInput), frameCount); +} + AudioManager::AudioManager() { m_ok = true; + int err; + m_encoder = opus_encoder_create(48000, 2, OPUS_APPLICATION_VOIP, &err); + if (err != OPUS_OK) { + printf("failed to initialize opus encoder: %d\n", err); + m_ok = false; + return; + } + opus_encoder_ctl(m_encoder, OPUS_SET_BITRATE(64000)); + m_device_config = ma_device_config_init(ma_device_type_playback); m_device_config.playback.format = ma_format_f32; m_device_config.playback.channels = 2; @@ -50,7 +66,7 @@ AudioManager::AudioManager() { m_device_config.pUserData = this; if (ma_device_init(nullptr, &m_device_config, &m_device) != MA_SUCCESS) { - puts("open playabck fail"); + puts("open playback fail"); m_ok = false; return; } @@ -61,15 +77,45 @@ AudioManager::AudioManager() { m_ok = false; return; } + + m_capture_config = ma_device_config_init(ma_device_type_capture); + m_capture_config.capture.format = ma_format_s16; + m_capture_config.capture.channels = 2; + m_capture_config.sampleRate = 48000; + m_capture_config.periodSizeInFrames = 480; + m_capture_config.dataCallback = capture_data_callback; + m_capture_config.pUserData = this; + + if (ma_device_init(nullptr, &m_capture_config, &m_capture_device) != MA_SUCCESS) { + puts("open capture fail"); + m_ok = false; + return; + } + + if (ma_device_start(&m_capture_device) != MA_SUCCESS) { + puts("failed to start capture"); + ma_device_uninit(&m_capture_device); + m_ok = false; + return; + } + + char device_name[MA_MAX_DEVICE_NAME_LENGTH + 1]; + ma_device_get_name(&m_capture_device, ma_device_type_capture, device_name, sizeof(device_name), nullptr); + printf("using %s for capture\n", device_name); } AudioManager::~AudioManager() { ma_device_uninit(&m_device); + ma_device_uninit(&m_capture_device); for (auto &[ssrc, pair] : m_sources) { opus_decoder_destroy(pair.second); } } +void AudioManager::SetOpusBuffer(uint8_t *ptr) { + m_opus_buffer = ptr; +} + void AudioManager::FeedMeOpus(uint32_t ssrc, const std::vector &data) { size_t payload_size = 0; const auto *opus_encoded = StripRTPExtensionHeader(data.data(), static_cast(data.size()), payload_size); @@ -89,7 +135,22 @@ void AudioManager::FeedMeOpus(uint32_t ssrc, const std::vector &data) { } } +void AudioManager::OnCapturedPCM(const int16_t *pcm, ma_uint32 frames) { + if (m_opus_buffer == nullptr) return; + + int payload_len = opus_encode(m_encoder, pcm, 480, static_cast(m_opus_buffer), 1275); + if (payload_len < 0) { + printf("encoding error: %d\n", payload_len); + } else { + m_signal_opus_packet.emit(payload_len); + } +} + bool AudioManager::OK() const { return m_ok; } + +AudioManager::type_signal_opus_packet AudioManager::signal_opus_packet() { + return m_signal_opus_packet; +} #endif diff --git a/src/audio/manager.hpp b/src/audio/manager.hpp index d0f3a21..700fcc0 100644 --- a/src/audio/manager.hpp +++ b/src/audio/manager.hpp @@ -10,6 +10,7 @@ #include #include #include +#include // clang-format on class AudioManager { @@ -17,21 +18,40 @@ public: AudioManager(); ~AudioManager(); + void SetOpusBuffer(uint8_t *ptr); void FeedMeOpus(uint32_t ssrc, const std::vector &data); [[nodiscard]] bool OK() const; private: + void OnCapturedPCM(const int16_t *pcm, ma_uint32 frames); + friend void data_callback(ma_device *, void *, const void *, ma_uint32); + friend void capture_data_callback(ma_device *, void *, const void *, ma_uint32); std::thread m_thread; bool m_ok; + // playback ma_device m_device; ma_device_config m_device_config; + // capture + ma_device m_capture_device; + ma_device_config m_capture_config; std::mutex m_mutex; std::unordered_map, OpusDecoder *>> m_sources; + + OpusEncoder *m_encoder; + + uint8_t *m_opus_buffer = nullptr; + +public: + using type_signal_opus_packet = sigc::signal; + type_signal_opus_packet signal_opus_packet(); + +private: + type_signal_opus_packet m_signal_opus_packet; }; #endif diff --git a/src/discord/voiceclient.cpp b/src/discord/voiceclient.cpp index d8855fd..3f38eea 100644 --- a/src/discord/voiceclient.cpp +++ b/src/discord/voiceclient.cpp @@ -43,11 +43,11 @@ void UDPSocket::SetSSRC(uint32_t ssrc) { m_ssrc = ssrc; } -void UDPSocket::SendEncrypted(const std::vector &data) { +void UDPSocket::SendEncrypted(const uint8_t *data, size_t len) { m_sequence++; - m_timestamp += (48000 / 100) * 2; + m_timestamp += 480; // this is important - std::vector rtp(12, 0); + std::vector rtp(12 + len + crypto_secretbox_MACBYTES, 0); rtp[0] = 0x80; // ver 2 rtp[1] = 0x78; // payload type 0x78 rtp[2] = (m_sequence >> 8) & 0xFF; @@ -63,14 +63,15 @@ void UDPSocket::SendEncrypted(const std::vector &data) { static std::array nonce = {}; std::memcpy(nonce.data(), rtp.data(), 12); - - std::vector ciphertext(crypto_secretbox_MACBYTES + rtp.size(), 0); - crypto_secretbox_easy(ciphertext.data(), rtp.data(), rtp.size(), nonce.data(), m_secret_key.data()); - rtp.insert(rtp.end(), ciphertext.begin(), ciphertext.end()); + crypto_secretbox_easy(rtp.data() + 12, data, len, nonce.data(), m_secret_key.data()); Send(rtp.data(), rtp.size()); } +void UDPSocket::SendEncrypted(const std::vector &data) { + SendEncrypted(data.data(), data.size()); +} + void UDPSocket::Send(const uint8_t *data, size_t len) { sendto(m_socket, reinterpret_cast(data), static_cast(len), 0, reinterpret_cast(&m_server), sizeof(m_server)); } @@ -172,6 +173,14 @@ DiscordVoiceClient::~DiscordVoiceClient() { void DiscordVoiceClient::Start() { m_ws.StartConnection("wss://" + m_endpoint + "/?v=7"); + + // cant put in ctor or deadlock in singleton initialization + auto &aud = Abaddon::Get().GetAudio(); + aud.SetOpusBuffer(m_opus_buffer.data()); + aud.signal_opus_packet().connect([this](int payload_size) { + if (m_connected) + m_udp.SendEncrypted(m_opus_buffer.data(), payload_size); + }); } void DiscordVoiceClient::SetSessionID(std::string_view session_id) { @@ -241,6 +250,13 @@ void DiscordVoiceClient::HandleGatewaySessionDescription(const VoiceGatewayMessa printf("%02X", b); } printf("\n"); + + VoiceSpeakingMessage msg; + msg.Delay = 0; + msg.SSRC = m_ssrc; + msg.Speaking = VoiceSpeakingMessage::Microphone; + m_ws.Send(msg); + m_secret_key = d.SecretKey; m_udp.SetSSRC(m_ssrc); m_udp.SetSecretKey(m_secret_key); @@ -250,6 +266,7 @@ void DiscordVoiceClient::HandleGatewaySessionDescription(const VoiceGatewayMessa m_udp.SendEncrypted({ 0xF8, 0xFF, 0xFE }); m_udp.SendEncrypted({ 0xF8, 0xFF, 0xFE }); m_udp.Run(); + m_connected = true; } void DiscordVoiceClient::Identify() { @@ -388,4 +405,11 @@ void from_json(const nlohmann::json &j, VoiceSessionDescriptionData &m) { JS_D("mode", m.Mode); JS_D("secret_key", m.SecretKey); } + +void to_json(nlohmann::json &j, const VoiceSpeakingMessage &m) { + j["op"] = VoiceGatewayOp::Speaking; + j["d"]["speaking"] = m.Speaking; + j["d"]["delay"] = m.Delay; + j["d"]["ssrc"] = m.SSRC; +} #endif diff --git a/src/discord/voiceclient.hpp b/src/discord/voiceclient.hpp index 4b988d5..67919e5 100644 --- a/src/discord/voiceclient.hpp +++ b/src/discord/voiceclient.hpp @@ -110,6 +110,20 @@ struct VoiceSessionDescriptionData { friend void from_json(const nlohmann::json &j, VoiceSessionDescriptionData &m); }; +struct VoiceSpeakingMessage { + enum { + Microphone = 1 << 0, + Soundshare = 1 << 1, + Priority = 1 << 2, + }; + + int Speaking; + int Delay; + uint32_t SSRC; + + friend void to_json(nlohmann::json &j, const VoiceSpeakingMessage &m); +}; + class UDPSocket { public: UDPSocket(); @@ -119,6 +133,7 @@ public: void Run(); void SetSecretKey(std::array key); void SetSSRC(uint32_t ssrc); + void SendEncrypted(const uint8_t *data, size_t len); void SendEncrypted(const std::vector &data); void Send(const uint8_t *data, size_t len); std::vector Receive(); @@ -205,5 +220,9 @@ private: int m_heartbeat_msec; Waiter m_heartbeat_waiter; std::thread m_heartbeat_thread; + + std::array m_opus_buffer; + + std::atomic m_connected = false; }; #endif -- cgit v1.2.3