50 lines
1.8 KiB
Rust
50 lines
1.8 KiB
Rust
use std::process::{Command, Stdio};
|
|
|
|
use crate::audio::TtsOutStream;
|
|
|
|
#[derive(Debug)]
|
|
pub struct TtsControl {
|
|
request_sink: tokio::sync::mpsc::Sender<String>
|
|
}
|
|
|
|
impl TtsControl {
|
|
pub async fn speak(&self, text: String) {
|
|
self.request_sink.send(text).await.unwrap();
|
|
}
|
|
}
|
|
|
|
pub async fn start_tts(audio_sink: TtsOutStream) -> TtsControl {
|
|
|
|
let (tts_request_sender, mut tts_request_receiver) = tokio::sync::mpsc::channel(3);
|
|
|
|
// Set up the TTS task
|
|
tokio::spawn(async move {
|
|
while let Some(text) = tts_request_receiver.recv().await {
|
|
let tts_output = Command::new("espeak-ng").args(["-v", "en-us+f3", "--stdout"]).arg(text).stdout(Stdio::piped()).spawn().unwrap().wait_with_output().unwrap().stdout;
|
|
let tts_fd = std::io::Cursor::new(tts_output);
|
|
let mut wav_reader = hound::WavReader::new(tts_fd).unwrap();
|
|
|
|
let mut bitrate_resample = resampler::ResamplerFir::new_from_hz(1, wav_reader.spec().sample_rate, audio_sink.sample_rate, Default::default(), Default::default());
|
|
|
|
let mut audio_out_buf = vec![];
|
|
|
|
for sample in wav_reader.samples() {
|
|
if let Ok(raw_sample) = sample {
|
|
let sample16: i16 = raw_sample;
|
|
let sample32: f32 = (sample16 as f32) / (i16::MAX as f32);
|
|
let mut audio_slice = [0.; 32];
|
|
let (_, write_count) = bitrate_resample.resample(&[sample32], &mut audio_slice).unwrap();
|
|
audio_out_buf.extend_from_slice(&audio_slice[0..write_count]);
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
audio_sink.sink.send(audio_out_buf).await.unwrap();
|
|
}
|
|
});
|
|
|
|
TtsControl {
|
|
request_sink: tts_request_sender
|
|
}
|
|
} |