Receive: Config of decode sample rate/channels (#265)

* Receive: Config of decode sample rate/channels This PR allows for dynamic configuration of the output sample rate and channel count of received Opus audio. Users who rely on supported formats should no longer need to manually resample & downmix audio decoded from SSRCs in a call. Opus exposes tuples of (Mono, Stereo) x (8, 12, 16, 24, 48)kHz. Changing this at runtime (mid-call) may cause some audio glitches, as decoder state must be reconstructed from scratch for all affected SSRCs. * Fix doc typo, consistent naming with MixMode.
serenity-rs · Nov 16, 2024 · 91bf153 · 91bf153
1 parent 312799d
commit 91bf153
Show file tree

Hide file tree

Showing 6 changed files with 134 additions and 11 deletions.
diff --git a/src/config.rs b/src/config.rs
@@ -1,5 +1,5 @@
-#[cfg(feature = "receive")]
-use crate::driver::DecodeMode;
+#[cfg(all(feature = "driver", feature = "receive"))]
+use crate::driver::{Channels, DecodeMode, SampleRate};
 #[cfg(feature = "driver")]
 use crate::{
     driver::{
@@ -61,6 +61,18 @@ pub struct Config {
     /// [User speaking state]: crate::events::CoreEvent::VoiceTick
     pub decode_mode: DecodeMode,
 
+    #[cfg(all(feature = "driver", feature = "receive"))]
+    /// Configures the channel layout for output audio when using [`DecodeMode::Decode`].
+    ///
+    /// Defaults to [`Channels::Stereo`].
+    pub decode_channels: Channels,
+
+    #[cfg(all(feature = "driver", feature = "receive"))]
+    /// Configures the sample rate for output audio when using [`DecodeMode::Decode`].
+    ///
+    /// Defaults to [`SampleRate::Hz48000`].
+    pub decode_sample_rate: SampleRate,
+
     #[cfg(all(feature = "driver", feature = "receive"))]
     /// Configures the amount of time after a user/SSRC is inactive before their decoder state
     /// should be removed.
@@ -215,6 +227,10 @@ impl Default for Config {
             #[cfg(all(feature = "driver", feature = "receive"))]
             decode_mode: DecodeMode::Decrypt,
             #[cfg(all(feature = "driver", feature = "receive"))]
+            decode_channels: Channels::Stereo,
+            #[cfg(all(feature = "driver", feature = "receive"))]
+            decode_sample_rate: SampleRate::Hz48000,
+            #[cfg(all(feature = "driver", feature = "receive"))]
             decode_state_timeout: Duration::from_secs(60),
             #[cfg(all(feature = "driver", feature = "receive"))]
             playout_buffer_length: NonZeroUsize::new(5).unwrap(),
@@ -267,6 +283,22 @@ impl Config {
         self
     }
 
+    #[cfg(feature = "receive")]
+    /// Sets this `Config`'s channel layout for output audio when using [`DecodeMode::Decode`]
+    #[must_use]
+    pub fn decode_channels(mut self, decode_channels: Channels) -> Self {
+        self.decode_channels = decode_channels;
+        self
+    }
+
+    #[cfg(feature = "receive")]
+    /// Sets this `Config`'s sample rate for output audio when using [`DecodeMode::Decode`]
+    #[must_use]
+    pub fn decode_sample_rate(mut self, decode_sample_rate: SampleRate) -> Self {
+        self.decode_sample_rate = decode_sample_rate;
+        self
+    }
+
     #[cfg(feature = "receive")]
     /// Sets this `Config`'s received packet decoder cleanup timer.
     #[must_use]

diff --git a/src/driver/decode_mode.rs b/src/driver/decode_mode.rs
@@ -1,5 +1,7 @@
+use audiopus::{Channels as OpusChannels, SampleRate as OpusRate};
+
 /// Decode behaviour for received RTP packets within the driver.
-#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
 #[non_exhaustive]
 pub enum DecodeMode {
     /// Packets received from Discord are handed over to events without any
@@ -24,3 +26,68 @@ impl DecodeMode {
         self != DecodeMode::Pass
     }
 }
+
+/// The channel layout of output audio when using [`DecodeMode::Decode`].
+#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Hash)]
+#[non_exhaustive]
+pub enum Channels {
+    /// Decode received audio packets into a single channel.
+    Mono,
+    /// Decode received audio packets into two interleaved channels.
+    ///
+    /// Received mono packets' samples will automatically be duplicated across
+    /// both channels.
+    ///
+    /// The default choice.
+    #[default]
+    Stereo,
+}
+
+impl Channels {
+    pub(crate) fn channels(self) -> usize {
+        match self {
+            Channels::Mono => 1,
+            Channels::Stereo => 2,
+        }
+    }
+}
+
+impl From<Channels> for OpusChannels {
+    fn from(value: Channels) -> Self {
+        match value {
+            Channels::Mono => OpusChannels::Mono,
+            Channels::Stereo => OpusChannels::Stereo,
+        }
+    }
+}
+
+/// The sample rate of output audio when using [`DecodeMode::Decode`].
+#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Hash)]
+#[non_exhaustive]
+pub enum SampleRate {
+    /// Decode to a sample rate of 8kHz.
+    Hz8000,
+    /// Decode to a sample rate of 12kHz.
+    Hz12000,
+    /// Decode to a sample rate of 16kHz.
+    Hz16000,
+    /// Decode to a sample rate of 24kHz.
+    Hz24000,
+    /// Decode to a sample rate of 48kHz.
+    ///
+    /// The preferred option for encoding/decoding at or above CD quality.
+    #[default]
+    Hz48000,
+}
+
+impl From<SampleRate> for OpusRate {
+    fn from(value: SampleRate) -> Self {
+        match value {
+            SampleRate::Hz8000 => OpusRate::Hz8000,
+            SampleRate::Hz12000 => OpusRate::Hz12000,
+            SampleRate::Hz16000 => OpusRate::Hz16000,
+            SampleRate::Hz24000 => OpusRate::Hz24000,
+            SampleRate::Hz48000 => OpusRate::Hz48000,
+        }
+    }
+}
diff --git a/src/driver/mod.rs b/src/driver/mod.rs
@@ -28,7 +28,7 @@ use connection::error::{Error, Result};
 pub use crypto::CryptoMode;
 pub(crate) use crypto::CryptoState;
 #[cfg(feature = "receive")]
-pub use decode_mode::DecodeMode;
+pub use decode_mode::*;
 pub use mix_mode::MixMode;
 pub use scheduler::{
     Config as SchedulerConfig,

diff --git a/src/driver/tasks/udp_rx/mod.rs b/src/driver/tasks/udp_rx/mod.rs
@@ -66,7 +66,13 @@ impl UdpRx {
                             *interconnect = i;
                         },
                         Ok(UdpRxMessage::SetConfig(c)) => {
+                            let old_coder = (self.config.decode_channels, self.config.decode_sample_rate);
+                            let new_coder = (c.decode_channels, c.decode_sample_rate);
                             self.config = c;
+
+                            if old_coder != new_coder {
+                                self.decoder_map.values_mut().for_each(|v| v.reconfigure_decoder(&self.config));
+                            }
                         },
                         Err(flume::RecvError::Disconnected) => break,
                     }

diff --git a/src/driver/tasks/udp_rx/ssrc_state.rs b/src/driver/tasks/udp_rx/ssrc_state.rs
@@ -1,8 +1,8 @@
 use super::*;
 use crate::{
-    constants::*,
     driver::{
         tasks::error::{Error, Result},
+        Channels,
         DecodeMode,
     },
     events::context_data::{RtpData, VoiceData},
@@ -11,7 +11,6 @@ use audiopus::{
     coder::Decoder as OpusDecoder,
     error::{Error as OpusError, ErrorCode},
     packet::Packet as OpusPacket,
-    Channels,
 };
 use discortp::{rtp::RtpExtensionPacket, Packet, PacketSize};
 use tracing::{error, warn};
@@ -24,6 +23,7 @@ pub struct SsrcState {
     decode_size: PacketDecodeSize,
     pub(crate) prune_time: Instant,
     pub(crate) disconnected: bool,
+    channels: Channels,
 }
 
 impl SsrcState {
@@ -33,14 +33,27 @@ impl SsrcState {
         Self {
             playout_buffer: PlayoutBuffer::new(playout_capacity, pkt.get_sequence().0),
             crypto_mode,
-            decoder: OpusDecoder::new(SAMPLE_RATE, Channels::Stereo)
-                .expect("Failed to create new Opus decoder for source."),
+            decoder: OpusDecoder::new(
+                config.decode_sample_rate.into(),
+                config.decode_channels.into(),
+            )
+            .expect("Failed to create new Opus decoder for source."),
             decode_size: PacketDecodeSize::TwentyMillis,
             prune_time: Instant::now() + config.decode_state_timeout,
             disconnected: false,
+            channels: config.decode_channels,
         }
     }
 
+    pub fn reconfigure_decoder(&mut self, config: &Config) {
+        self.decoder = OpusDecoder::new(
+            config.decode_sample_rate.into(),
+            config.decode_channels.into(),
+        )
+        .expect("Failed to create new Opus decoder for source.");
+        self.channels = config.decode_channels;
+    }
+
     pub fn store_packet(&mut self, packet: StoredPacket, config: &Config) {
         self.playout_buffer.store_packet(packet, config);
     }
@@ -160,7 +173,7 @@ impl SsrcState {
                     Ok(audio_len) => {
                         // Decoding to stereo: audio_len refers to sample count irrespective of channel count.
                         // => multiply by number of channels.
-                        out.truncate(2 * audio_len);
+                        out.truncate(self.channels.channels() * audio_len);
 
                         break;
                     },

diff --git a/src/events/context/data/voice.rs b/src/events/context/data/voice.rs
@@ -30,9 +30,14 @@ pub struct VoiceData {
     pub packet: Option<RtpData>,
     /// PCM audio obtained from a user.
     ///
-    /// Valid audio data (`Some(audio)` where `audio.len >= 0`) typically contains 20ms of 16-bit stereo PCM audio
-    /// at 48kHz, using native endianness. Channels are interleaved (i.e., `L, R, L, R, ...`).
+    /// Valid audio data (`Some(audio)` where `audio.len >= 0`) typically contains 20ms of 16-bit PCM audio
+    /// using native endianness. This defaults to stereo audio at 48kHz, and can be configured via
+    /// [`Config::decode_channels`] and [`Config::decode_sample_rate`] -- channels are interleaved
+    /// (i.e., `L, R, L, R, ...`) if stereo.
     ///
     /// This value will be `None` if Songbird is not configured to decode audio.
+    ///
+    /// [`Config::decode_channels`]: crate::Config::decode_channels
+    /// [`Config::decode_sample_rate`]: crate::Config::decode_sample_rate
     pub decoded_voice: Option<Vec<i16>>,
 }