use async_openai::types::chat::ChatCompletionRequestMessage; use chrono::{DateTime, Duration, Utc}; use futures_timer::Delay; use serde::{Deserialize, Serialize}; use ratatui::{Frame, layout::{Constraint, Direction, Layout}, widgets::{Block, BorderType, Clear, Gauge, List, ListDirection, ListItem, ListState, Paragraph, Wrap}}; use throbber_widgets_tui::{Throbber, ThrobberState}; use crossterm::{event::{self, EventStream, KeyCode, KeyModifiers}}; use tokio::{sync::{mpsc, watch}, time::Instant}; use tui_input::{Input, backend::crossterm::EventHandler}; use futures::{StreamExt, future::FutureExt}; use ratatui::prelude::*; use tui_skeleton::{AnimationMode, SkeletonText}; use crate::{prediction::{BandcampResult, PossibleResponse}, scene::{ConversationEntry, Scene, StageActions, StageDirection}, transcription::{AudioInputControl, TranscriptionControl, start_audio_input}, tts::{TtsControl, start_tts}}; mod scene; mod events; mod transcription; mod tts; mod prediction; // TODO: We should have a separate 'state.json' file, which remembers jack connections, and the world time for the show to end. Then we only update the 'time remaining' field in the scene and only deal with relative durations inside the scene data // TODO: We should be able to delete entries from the conversation, or at least go back and edit something I said. // TODO: I want a "mark" command or keyboard shortcut, that inserts a marker into the log, so I know where to come back for the next speaking segment. // TODO: If we insert text without speaking, this should be indicated visually somehow // FIXME: The playlist ordering is always reversed when Eva talks about it? We also need some way to estimate where in the playlist we are currently, based on timekeeping and 'last played' state in mixxx // TODO: We should be able to 'close' an episode, by having openai summarize the script into some description, which can be used automatically in the next episode. // FIXME: It is unclear what would happen if we are live editing the save.json, have a typo, then reload. The file might get wiped without recovery. // TODO: Would be nice to have some SFX integrated, with bleeps and calculation nosies or something periodically /* Usage loop: - Prompt user to select one of: - Select response 1 (1) - Select response 2 (2) - Select response 3 (3) - Trigger scenario (a, b, c) - Add additional user input (t) - Regenerate responses (r) - Speak selected response - Regenerate next responses while speaking UI layout: - Top panel: Conversation history - Bottom panel: User input / response options (depending on mode) - Status bar: Shows current show time, episode, and any other relevant information. Has a throbber for network activity. - Right panel: Shortcuts for triggering scenarios, soundboard events, etc */ impl<'a> Into> for PossibleResponse { fn into(self) -> ListItem<'a> { if let Some(direction) = self.stage_direction { Line::from_iter([ //Span::from(format!("({})", direction)).style(ratatui::style::Color::Yellow), //Span::from(" "), Span::from(self.text) ]).into() } else { Line::from(self.text).into() } } } #[derive(Debug)] struct App { scene: Scene, direction: StageDirection, next_actions: Vec, end_time: DateTime, reply_state: ListState, conversation_state: ListState, user_input: Input, throbber_state: ThrobberState, is_requesting: bool, audio_level: f64, recording_audio: bool, focus_state: FocusState, transcription: TranscriptionControl, prediction_request_sink: watch::Sender, audio: AudioInputControl, tts: TtsControl, sys_message_sink: mpsc::Sender } #[derive(Debug)] enum FocusState { Conversation, UserInput } #[derive(Debug)] enum BandcampError { Json(serde_json::Error), Api(bandcamp::Error) } impl From for BandcampError { fn from(value: serde_json::Error) -> Self { BandcampError::Json(value) } } impl From for BandcampError { fn from(value: bandcamp::Error) -> Self { BandcampError::Api(value) } } impl App { fn new(prediction_request_sink: watch::Sender, audio: AudioInputControl, transcription: TranscriptionControl, tts: TtsControl, sys_message_sink: mpsc::Sender, initial_direction: StageDirection) -> Self { Self { scene: Default::default(), direction: initial_direction, next_actions: Default::default(), reply_state: Default::default(), conversation_state: Default::default(), user_input: Default::default(), end_time: Utc::now() + Duration::hours(2), throbber_state: Default::default(), prediction_request_sink, is_requesting: false, audio_level: -60., audio, recording_audio: false, transcription, focus_state: FocusState::UserInput, tts, sys_message_sink } } fn format_line<'a>(entry: &ConversationEntry, max_width: usize) -> Text<'a> { let prefix = match entry { ConversationEntry::Eva(_) => "Eva: ", ConversationEntry::User(_) => "Argee: ", ConversationEntry::ShipComputer(_) => "Ship Computer: ", _ => "", }; let style = match entry { ConversationEntry::Eva(_) => Style::new().fg(style::Color::Cyan), ConversationEntry::User(_) => Style::new().fg(style::Color::Magenta), ConversationEntry::ShipComputer(_) => Style::new().fg(style::Color::Green), ConversationEntry::StageDirection(_) => Style::new().fg(style::Color::Yellow), ConversationEntry::SystemMessage(_) => Style::new().fg(style::Color::DarkGray), }; let text = match entry { ConversationEntry::Eva(text) => text, ConversationEntry::ShipComputer(text) => text, ConversationEntry::StageDirection(text) => text, ConversationEntry::SystemMessage(text) => text, ConversationEntry::User(text) => text }; let avail_width = max_width - prefix.len(); let indent = " ".repeat(prefix.len()); let wrap_options = textwrap::Options::new(avail_width).initial_indent(prefix).subsequent_indent(&indent); let wrapped: Vec = textwrap::wrap(text, wrap_options) .iter() .enumerate() .map(|(idx, s)| { if idx == 0 { Line::from_iter([Span::from(prefix).style(style), Span::from(s[prefix.len()..].to_string())]) } else { Line::from(s.to_string()) } }).collect(); Text::from_iter(wrapped) } fn draw_conversation(&mut self, frame: &mut Frame, area: Rect) { let width = area.width.into(); let items: Vec = self.scene.conversation().iter().rev().map(|entry| { Self::format_line(entry, width) }).collect(); // TODO: Would be nice to be able to scroll a longer conversation with the scroll wheel, or with page up/down frame.render_stateful_widget( List::new(items) .block(Block::bordered().border_style(style::Color::LightYellow).title("Conversation (Press Tab to select and read Eva's lines aloud)")) .direction(ListDirection::BottomToTop) .highlight_symbol("> ") .highlight_style(style::Style::new().bold().fg(style::Color::Cyan)), area, &mut self.conversation_state ); } fn draw_options(&mut self, frame: &mut Frame, area: Rect) { let borders = Block::bordered().border_style(style::Color::LightGreen).title("Reply Options (Press 'Ctrl+R' to regenerate, Ctrl+Enter to use)"); if self.scene.reply_options().len() == 0 && self.is_requesting { let list = SkeletonText::new(std::time::SystemTime::now().duration_since(std::time::SystemTime::UNIX_EPOCH).unwrap().as_millis() as u64) .braille(true) .line_widths(&[0.25, 0.5, 0.4, 0.6]) .mode(AnimationMode::Noise) .block(borders); frame.render_widget(list, area); } else { let wrap_options = textwrap::Options::new(area.width as usize).subsequent_indent("..."); let options: Vec = self.scene.reply_options().iter().map(|option| { let mut contents: Vec = vec![]; if let Some(direction) = &option.stage_direction { let padded = format!("({})", direction); let mut wrapped_direction: Vec = textwrap::wrap(&padded, wrap_options.clone()) .iter() .map(|x| { Line::from(x.to_string()).fg(style::Color::Yellow)}).collect(); contents.append(&mut wrapped_direction); } let mut text: Vec = textwrap::wrap(&option.text, wrap_options.clone()) .iter() .map(|x| { Line::from(x.to_string())}).collect(); contents.append(&mut text); Text::from_iter(contents) }).collect(); frame.render_stateful_widget( List::new(options) .block(borders) .style(ratatui::style::Color::White) .highlight_symbol("> ".fg(Color::Cyan)) .highlight_style(style::Style::new().bold().bg(style::Color::DarkGray)) .repeat_highlight_symbol(true), area, &mut self.reply_state ); } } fn draw_user_input(&mut self, frame: &mut Frame, area: Rect) { let width = area.width.max(3) - 3; let scroll = self.user_input.visual_scroll(width as usize); let input = Paragraph::new(self.user_input.value()).block(Block::bordered().border_type(BorderType::LightDoubleDashed).title("User Input (Press 'Ctrl-X' to start/stop audio transcription)")).scroll((0, scroll as u16)); frame.render_widget(input, area); let x = self.user_input.visual_cursor().max(scroll); frame.set_cursor_position((area.x + x as u16 + 1, area.y + 1)); } fn draw_io_throbber(&mut self, frame: &mut Frame, area: Rect) { let throb_area = area.centered(Constraint::Max(1), Constraint::Max(1)); if self.is_requesting { let throbber = Throbber::default(); frame.render_stateful_widget(throbber, throb_area, &mut self.throbber_state); } else { frame.render_widget(Clear::default(), throb_area); } } fn draw_status(&self, frame: &mut Frame, area: Rect) { let minutes_remaining = self.direction.time_remaining.num_seconds() / 60; let negative = self.direction.time_remaining.abs() != self.direction.time_remaining; let time_style = if minutes_remaining <= 0 || negative { Style::new().fg(ratatui::style::Color::LightRed).bold() } else if minutes_remaining < 5 { Style::new().fg(ratatui::style::Color::LightRed).bold() } else if minutes_remaining < 10 { ratatui::style::Color::Red.into() } else if minutes_remaining < 25 { ratatui::style::Color::Yellow.into() } else if minutes_remaining < 60 { ratatui::style::Color::Green.into() } else { ratatui::style::Color::Blue.into() }; let formatted_time = if negative { format!("-{:0>2}:{:0>2}:{:0>2}", self.direction.time_remaining.num_hours().abs(), self.direction.time_remaining.num_minutes().abs()% 60, self.direction.time_remaining.num_seconds().abs() % 60) } else { format!("{:0>2}:{:0>2}:{:0>2}", self.direction.time_remaining.num_hours(), self.direction.time_remaining.num_minutes() % 60, self.direction.time_remaining.num_seconds() % 60) }; let status_line = Line::from_iter([ Span::from(format!("Episode {}", self.direction.episode_number)).style(ratatui::style::Color::LightBlue), Span::from(" | ").style(ratatui::style::Color::DarkGray), Span::from(format!("{} tracks", self.direction.current_playlist.len())).style(ratatui::style::Color::LightBlue), Span::from(" | ").style(ratatui::style::Color::DarkGray), Span::from(format!("Time Remaining: {}", formatted_time)).style(time_style), Span::from(" | ").style(ratatui::style::Color::DarkGray), ]); frame.render_widget(status_line, area); } fn draw_narration(&self, frame: &mut Frame, area: Rect) { let narrative_desc = if self.direction.narrative.is_empty() { Span::from("No narrative available.").style(ratatui::style::Color::DarkGray) } else { Span::from(self.direction.narrative.clone()) }; let setting = Paragraph::new(narrative_desc).block(Block::bordered().border_style(style::Color::LightMagenta).title("Stage Direction")).wrap(ratatui::widgets::Wrap { trim: false }); frame.render_widget(setting, area); } fn draw_event_log(&self, frame: &mut Frame, area: Rect) { let items: Vec = self.scene.conversation().iter().filter(|entry| { if let ConversationEntry::StageDirection(_) = entry { true } else { false }}).rev().map(|entry| { match entry { ConversationEntry::StageDirection(text) => Line::from_iter([text]).style(ratatui::style::Color::Yellow), _ => unreachable!() } }).collect(); frame.render_widget(Paragraph::new(items).block(Block::bordered().border_style(style::Color::Red).title("Event Log")).wrap(Wrap { trim: false }), area); } fn draw(&mut self, frame: &mut Frame) { let layout = Layout::default() .direction(Direction::Vertical) .constraints([ Constraint::Fill(3), Constraint::Min(9), Constraint::Max(3), Constraint::Max(1) ]) .split(frame.area()); let scene_layout = Layout::default() .direction(Direction::Horizontal) .constraints([Constraint::Fill(4), Constraint::Fill(1)]) .split(layout[0]); let context_layout = Layout::default() .direction(Direction::Vertical) .constraints([Constraint::Fill(1), Constraint::Fill(1)]) .split(scene_layout[1]); self.draw_conversation(frame, scene_layout[0]); self.draw_narration(frame, context_layout[0]); self.draw_event_log(frame, context_layout[1]); self.draw_options(frame, layout[1]); let status_layout = Layout::default() .direction(Direction::Horizontal) .constraints([Constraint::Max(3), Constraint::Fill(2), Constraint::Max(13), Constraint::Min(50)]) .split(layout[3]); self.draw_user_input(frame, layout[2]); self.draw_io_throbber(frame, status_layout[0]); self.draw_status(frame, status_layout[1]); self.draw_recording_status(frame, status_layout[2]); self.draw_volume(frame, status_layout[3]); } fn draw_recording_status(&self, frame: &mut Frame, area: Rect) { frame.render_widget(Line::from_iter( if self.recording_audio { [ Span::from(" "), Span::from(" Recording ").bg(style::Color::LightRed).fg(style::Color::White), Span::from(" ") ] } else { [ Span::from(" "), Span::from(" Stopped ").bg(style::Color::DarkGray).fg(style::Color::White), Span::from(" ") ] } ), area); } fn draw_volume(&self, frame: &mut Frame, area: Rect) { const NOISE_FLOOR: f64 = 50.; let vu_pct = 1.0 - (self.audio_level.abs().min(NOISE_FLOOR) / NOISE_FLOOR); let volume_color = if self.recording_audio { if vu_pct >= 0.85 { style::Color::Red } else if vu_pct >= 0.60 { style::Color::Yellow } else { style::Color::LightGreen } } else { style::Color::Gray }; let gauge = Gauge::default() .ratio(vu_pct) .use_unicode(true) .gauge_style(volume_color) .label(format!("{:.01}dB", self.audio_level)); frame.render_widget(gauge, area); } async fn insert_selected_prompt(&mut self) { let selected = self.scene.reply_options()[self.reply_state.selected().unwrap()].clone(); if let Some(direction) = &selected.stage_direction { self.next_actions.push(ConversationEntry::StageDirection(direction.clone())); } self.next_actions.push(ConversationEntry::Eva(selected.text.clone())); self.tts.speak(selected.text.clone()).await; self.regenerate_responses(); } async fn on_command(&mut self, command: &str) { let mut parts = command.splitn(2, " "); let command = parts.next().unwrap(); let arg = parts.next().unwrap_or(""); match command { "/bandcamp" => { if let Err(err) = self.add_bandcamp_artifact(arg).await { self.sys_message_sink.send(format!("Failed to fetch artifact: {:?}", err)).await.unwrap(); } else { self.sys_message_sink.send(format!("Added Bandcamp artifact from {}", arg)).await.unwrap(); self.next_actions.push(ConversationEntry::ShipComputer(format!("Incoming transmission from {}", arg))); self.regenerate_responses(); } }, "/episode" => { if let Ok(episode_number) = arg.trim().parse::() { self.direction.episode_number = episode_number; self.sys_message_sink.send(format!("Updated episode number: {}", self.direction.episode_number)).await.unwrap(); self.reload_mixxx_playlist(); } else { self.sys_message_sink.send("Invalid episode number format. Use /episode [number]".into()).await.unwrap(); return; } }, "/timer" => { if let Ok(minutes) = arg.trim().parse::() { self.end_time = Utc::now() + Duration::minutes(minutes); self.sys_message_sink.send(format!("Set timer for {} minutes.", minutes)).await.unwrap(); } else { self.sys_message_sink.send("Invalid timer format. Use /timer [minutes]".into()).await.unwrap(); } }, "/clear" => { match arg.trim() { "artifacts" => { self.direction.artifacts.clear(); self.sys_message_sink.send("Cleared artifacts.".into()).await.unwrap(); }, _ => { self.sys_message_sink.send("Unknown clear command. Use /clear [artifacts]".into()).await.unwrap(); } } return; }, "/narrative" => { self.direction.narrative = arg.to_string(); self.sys_message_sink.send(format!("Updated stage direction: {}", self.direction.narrative)).await.unwrap(); }, "/event" => { self.next_actions.push(ConversationEntry::StageDirection(arg.to_string())); self.regenerate_responses(); }, "/computer" => { self.next_actions.push(ConversationEntry::ShipComputer(arg.to_string())); self.regenerate_responses(); }, _ => { self.sys_message_sink.send("Unknown command. Available commands: /bandcamp [url], /episode [number], /narrative [text], /reset".into()).await.unwrap(); } } } async fn on_event(&mut self, evt: event::Event) { if let Some(key) = evt.as_key_press_event() { match self.focus_state { FocusState::Conversation => { match key.code { KeyCode::Tab => { self.focus_state = FocusState::UserInput; self.conversation_state.select(None); }, KeyCode::PageUp => self.conversation_state.scroll_down_by(5), KeyCode::PageDown => self.conversation_state.scroll_up_by(5), KeyCode::Home => self.conversation_state.select_last(), KeyCode::End => self.conversation_state.select_first(), KeyCode::Down => self.conversation_state.select_previous(), KeyCode::Up => self.conversation_state.select_next(), KeyCode::Enter => { let row_num = self.conversation_state.selected().unwrap(); if let ConversationEntry::Eva(text) = &self.scene.conversation()[self.scene.conversation().len() - 1 - row_num] { self.tts.speak(text.clone()).await; self.focus_state = FocusState::UserInput; self.conversation_state.select(None); } }, _ => () } }, FocusState::UserInput => { match key.code { KeyCode::Tab => { self.focus_state = FocusState::Conversation; self.conversation_state.select_first(); }, KeyCode::Char('r') if key.modifiers.contains(KeyModifiers::CONTROL) => self.regenerate_responses(), KeyCode::Char('x') if key.modifiers.contains(KeyModifiers::CONTROL) => { if self.recording_audio { self.recording_audio = false; self.transcription.stop(); self.is_requesting = true; } else { self.recording_audio = true; self.transcription.start(); } }, KeyCode::Down => self.reply_state.select_next(), KeyCode::Up => self.reply_state.select_previous(), KeyCode::Enter if key.modifiers.contains(KeyModifiers::CONTROL) => { self.insert_selected_prompt().await; }, KeyCode::Enter => { let next_msg = self.user_input.value_and_reset(); if next_msg.trim().is_empty() { self.insert_selected_prompt().await; } else { if next_msg.starts_with("/") { self.on_command(&next_msg).await; } else { self.next_actions.push(ConversationEntry::User(next_msg)); self.regenerate_responses(); } } }, _ => {self.user_input.handle_event(&evt);}, } } } } } async fn add_bandcamp_artifact(&mut self, url: &str) -> Result<(), BandcampError> { let album = bandcamp::album_from_url(url).await?; let result: BandcampResult = album.into(); let json = serde_json::to_string(&result)?; self.direction.artifacts.push(json); self.sys_message_sink.send("Added bandcamp album".into()).await.unwrap(); Ok(()) } fn regenerate_responses(&mut self) { let actions = StageActions { direction: self.direction.clone(), additions: std::mem::take(&mut self.next_actions) }; self.scene.reply_options_mut().clear(); self.scene.conversation_mut().append(&mut actions.additions.clone()); self.prediction_request_sink.send(actions).unwrap(); self.is_requesting = true; } fn reload_mixxx_playlist(&mut self) { // TODO: Should have some status message which states how many tracks are in the playlist if let Err(err) = self.direction.reload_mixxx_playlist() { self.next_actions.push(ConversationEntry::SystemMessage(format!("Error while loading mixxx playlist: {:?}", err))); } else { self.next_actions.push(ConversationEntry::SystemMessage(format!("Mixxx playlist reloaded. {} tracks found.", self.direction.current_playlist.len()).into())); } } } #[derive(Serialize, Deserialize, Debug, Default)] pub struct SaveData { pub direction: StageDirection, pub messages: Vec } impl SaveData { fn save(&self) { let save_data = serde_json::to_string_pretty(self).unwrap(); std::fs::write("save.json", save_data).unwrap(); } } #[tokio::main] async fn main() { let (panic_hook, eyre_hook) = color_eyre::config::HookBuilder::default() .display_env_section(true) .display_location_section(true) .into_hooks(); eyre_hook.install().unwrap(); std::panic::set_hook(Box::new(move |panic_info| { let msg = format!("{}", panic_hook.panic_report(panic_info)); println!("Panic: {}", msg); })); if option_env!("OPENAI_API_KEY").is_none() { eprintln!("Error: OPENAI_API_KEY environment variable not set. The application will not function without it."); return; } let mut terminal: Terminal> = ratatui::init(); let (sys_message_sink, sys_message_src) = tokio::sync::mpsc::channel(32); let saved_session = if let Ok(save_data) = std::fs::read_to_string("save.json") { if let Ok(ret) = serde_json::from_str(&save_data) { sys_message_sink.send("Loaded session from save.json".into()).await.unwrap(); ret } else { sys_message_sink.send("Could not load saved session!".into()).await.unwrap(); SaveData::default() } } else { sys_message_sink.send("Creating new session in save.json".into()).await.unwrap(); SaveData::default() }; let (audio_ctrl, mic_stream) = start_audio_input(&sys_message_sink).await; let tts_ctrl = start_tts().await; let (prediction_request_in, mut prediction_out) = prediction::start_prediction(sys_message_src, saved_session.messages).await; let transcription_ctrl = transcription::start_transcription(mic_stream).await; let mut app = App::new(prediction_request_in, audio_ctrl, transcription_ctrl, tts_ctrl, sys_message_sink, saved_session.direction); let mut events = EventStream::new(); let mut last_tick = Instant::now(); loop { if last_tick.elapsed() >= std::time::Duration::from_millis(100) { last_tick = Instant::now(); app.throbber_state.calc_next(); } app.direction.time_remaining = app.end_time.signed_duration_since(Utc::now()); terminal.draw(|frame| { app.draw(frame)}).unwrap(); let delay = Delay::new(std::time::Duration::from_millis(60)).fuse(); let event = events.next().fuse(); tokio::select! { _ = delay => (), _ = prediction_out.changed() => { app.scene = prediction_out.borrow().clone(); app.reply_state.select_first(); app.is_requesting = false; }, next_volume = app.audio.next() => { app.audio_level = next_volume }, transcription_result = app.transcription.next() => { app.next_actions.push(ConversationEntry::User(transcription_result)); app.regenerate_responses(); } maybe_event = event => { match maybe_event { Some(Ok(event)) => { if let event::Event::Key(key) = event { if key.modifiers.contains(KeyModifiers::CONTROL) && key.code == KeyCode::Char('c') { break; } } app.on_event(event).await; }, _ => () } } }; } ratatui::restore(); }