tts: add an audio output port, and feed TTS utterances into it

2026-06-08 15:59:25 +02:00
parent 34d58e5d66
commit 16c6cc7001
3 changed files with 62 additions and 10 deletions
@@ -1,4 +1,6 @@
-use std::process::Command;
+use std::process::{Command, Stdio};
+
+use crate::audio::TtsOutStream;

 #[derive(Debug)]
 pub struct TtsControl {
@@ -11,15 +13,34 @@ impl TtsControl {
    }
 }

-pub async fn start_tts() -> TtsControl {
+pub async fn start_tts(audio_sink: TtsOutStream) -> TtsControl {

    let (tts_request_sender, mut tts_request_receiver) = tokio::sync::mpsc::channel(3);

    // Set up the TTS task
    tokio::spawn(async move {
        while let Some(text) = tts_request_receiver.recv().await {
-            // TODO: We should also have espeak pipe out to stdout, then we can apply some audio effects and write to our own jack port.
-            Command::new("espeak-ng").arg("-v").arg("en-us+f3").arg(text).spawn().unwrap().wait().unwrap();
+            let tts_output = Command::new("espeak-ng").args(["-v", "en-us+f3", "--stdout"]).arg(text).stdout(Stdio::piped()).spawn().unwrap().wait_with_output().unwrap().stdout;
+            let tts_fd = std::io::Cursor::new(tts_output);
+            let mut wav_reader = hound::WavReader::new(tts_fd).unwrap();
+
+            let mut bitrate_resample = resampler::ResamplerFir::new_from_hz(1, wav_reader.spec().sample_rate, audio_sink.sample_rate, Default::default(), Default::default());
+
+            let mut audio_out_buf = vec![];
+
+            for sample in wav_reader.samples() {
+                if let Ok(raw_sample) = sample {
+                    let sample16: i16 = raw_sample;
+                    let sample32: f32 = (sample16 as f32) / (i16::MAX as f32);
+                    let mut audio_slice = [0.; 32];
+                    let (_, write_count) = bitrate_resample.resample(&[sample32], &mut audio_slice).unwrap();
+                    audio_out_buf.extend_from_slice(&audio_slice[0..write_count]);
+                } else {
+                    break;
+                }
+            }
+
+            audio_sink.sink.send(audio_out_buf).await.unwrap();
        }
    });