artifacts: rewrite the entire artifact querying layer to create modular 'tools' and 'datasource's

This commit is contained in:
2026-06-17 11:09:50 +02:00
parent 33e0b1768f
commit 3a8130d785
11 changed files with 672 additions and 257 deletions
+31 -108
View File
@@ -1,14 +1,13 @@
use std::{collections::HashSet, sync::Arc};
use std::{fmt::Debug, sync::Arc};
use async_openai::{Client, config::OpenAIConfig, types::chat::{ChatCompletionMessageToolCalls, ChatCompletionRequestAssistantMessageArgs, ChatCompletionRequestMessage, ChatCompletionRequestSystemMessageArgs, ChatCompletionRequestToolMessageArgs, ChatCompletionTool, ChatCompletionTools, CreateChatCompletionRequestArgs, FinishReason, FunctionObjectArgs, ResponseFormat, ResponseFormatJsonSchema}};
use bandcamp::SearchResultItem;
use async_openai::{Client, config::OpenAIConfig, types::chat::{ChatCompletionMessageToolCalls, ChatCompletionRequestAssistantMessageArgs, ChatCompletionRequestMessage, ChatCompletionRequestSystemMessageArgs, ChatCompletionRequestToolMessageArgs, CreateChatCompletionRequestArgs, FinishReason, ResponseFormat, ResponseFormatJsonSchema}};
use chrono::{DateTime, Utc};
use schemars::{JsonSchema, schema_for};
use serde::{Deserialize, Serialize};
use serde_json::{Serializer, ser::CompactFormatter};
use tokio::sync::{RwLock, mpsc, watch};
use crate::{SaveData, artifacts::{self, Album, Artifact, Artist, Merge, SourceID, Track, bandcamp::BandcampQueryArgs, beets::BeatsQueryArgs, mixxx::MixxxDB, musicbrainz::{MusicbrainzQueryArgs, search_artifacts}}, scene::{Scene, Scenery, StageDirection, conversation::ConversationEntry}};
use crate::{SaveData, artifacts::{Contents, bandcamp::BandcampSource, beets::BeetsDB, mixxx::{MixxxDB, MixxxQuery}, musicbrainz::MBQuery, tools::{DataSource, Tool}}, scene::{Scene, Scenery, StageDirection, conversation::ConversationEntry}};
const SYSTEM_PROMPT: &str = include_str!("system-prompt.txt");
@@ -99,63 +98,16 @@ impl Session {
}
}
async fn tool_bandcamp_scan(&mut self, args: BandcampQueryArgs) -> ToolResults {
async fn tool_artifact_query<Src: DataSource>(&mut self, src: &mut Src, json_args: &str) -> ToolResults where Src::Args: Debug {
let args: Src::Args = serde_json::from_str(json_args).unwrap();
let mut messages = vec![];
log::debug!("Fetching artifacts from Bandcamp with {:?}", args);
let mut json_results = vec![];
if let Ok(results) = bandcamp::search(args.query.as_str()).await {
for result in results {
log::debug!("Result: {:?}", result);
match result {
SearchResultItem::Artist(data) => {
/*let result = Artifact::Artist(Artist {
name: data.name,
location: data.location,
..Default::default()
});*/
let result = bandcamp::fetch_artist(data.artist_id).await.unwrap().into();
json_results.push(result);
},
SearchResultItem::Album(data) => {
let result = bandcamp::fetch_album(data.band_id, data.album_id).await.unwrap().into();
/*let result = Artifact::Album(Album {
title: data.name,
artist: data.band_name,
..Default::default()
});*/
json_results.push(result);
},
SearchResultItem::Track(data) => {
let result = Artifact::Track(Track {
title: data.name,
artist: Some(data.band_name),
album: data.album_name,
sources: HashSet::from([SourceID::Bandcamp(data.track_id)]),
..Default::default()
});
json_results.push(result);
}
_ => ()
}
}
}
let artifact_count = json_results.len();
messages.push(ConversationEntry::ShipComputer(format!("Bandcamp relay scan for '{}' complete. {} artifacts added to the archive.", args.query, artifact_count).into()));
self.scenery.artifacts.merge(json_results);
ToolResults {
result: Some(format!("{} artifacts were added to the archive.", artifact_count)),
messages
}
}
async fn tool_artifact_query(&mut self, args: BeatsQueryArgs) -> ToolResults {
let mut messages = vec![];
log::debug!("Executing beets query {:?}", args);
if let Ok(output) = args.clone().execute() {
log::debug!("Executing query {:?}", args);
if let Ok(output) = src.query(&args).await {
messages.push(ConversationEntry::ShipComputer(format!("Found {} artifacts with archive query {:?}", output.len(), args)));
self.scenery.artifacts.merge(output);
for result in output {
self.scenery.artifacts.insert(result);
}
self.scenery.artifacts.synchronize().await;
} else {
messages.push(ConversationEntry::ShipComputer("Unable to execute query!".into()));
};
@@ -166,20 +118,6 @@ impl Session {
}
}
async fn tool_musicbrainz_fetch_tracks(&mut self, args: MusicbrainzQueryArgs) -> ToolResults {
log::debug!("Executing musicbrainz fetch for {:?}", args);
let results = search_artifacts(args).await.unwrap();
let msg = format!("Found {} results via Musicbrainz relay search.", results.len());
self.scenery.artifacts.merge(results);
ToolResults {
result: Some(msg.clone()),
messages: vec![ConversationEntry::ShipComputer(msg)]
}
}
fn generate_conversation(&self, direction: &StageDirection) -> Vec<ChatCompletionRequestMessage> {
let mut json_buf = vec![];
let mut ser = Serializer::with_formatter(&mut json_buf, CompactFormatter);
@@ -208,38 +146,15 @@ impl Session {
let full_conversation = self.generate_conversation(&self.direction);
let tools = vec![
ChatCompletionTools::Function(ChatCompletionTool {
function: FunctionObjectArgs::default()
.name("log_stage_event")
.description("Inserts an event into the current scene script")
.parameters(schema_for!(StageEventArgs))
.build().unwrap()
}),
Tool { name: "log_stage_event".into(), description: "Inserts an event into the current scene script".into(), schema: schema_for!(StageEventArgs)}.into(),
// TODO: There should only be two queries, one against the ship's onboard archive, and another against the relay network, or whatever we call it. Both should be structured with the same arguments schema
// TODO: A relay search should try to grab first from beets, then musicbrainz, then from bandcamp.
// TODO: A query should specify what parts of metadata are sufficient for the result, so we don't always have to hit all the layers of data. beets can of course, ignore this.
// TODO: A query should be hierarchical somehow? eg, "I already know about artist X, but I want to know everything about track Y from album Z" or "I don't know anything about artist X/album Y, please give me an overview"
ChatCompletionTools::Function(ChatCompletionTool {
function: FunctionObjectArgs::default()
.name("archive_query")
.description("Queries the ship's musical artifact archives for tracks matching the given search parameters")
.parameters(schema_for!(BeatsQueryArgs))
.build().unwrap()
}),
ChatCompletionTools::Function(ChatCompletionTool {
function: FunctionObjectArgs::default()
.name("bandcamp_artifact_scan")
.description("Scans Bandcamp to find artifacts to use in the scene that match the given search parameters. To find an artist, provide only the artist name. To find an album, provide the artist and the album.")
.parameters(schema_for!(BandcampQueryArgs))
.build().unwrap()
}),
ChatCompletionTools::Function(ChatCompletionTool {
function: FunctionObjectArgs::default()
.name("musicbrainz_track_search")
.description("Fetches metadata from bandcamp for the given musicbrainz recording IDs (mbid)")
.parameters(schema_for!(MusicbrainzQueryArgs))
.build().unwrap()
})
Tool::from_datasource(&MBQuery).into(),
Tool::from_datasource(&BandcampSource).into(),
Tool::from_datasource(&BeetsDB).into(),
Tool::from_datasource(&MixxxDB).into(),
// TODO: We should be able to have eva update lore memories with a function call, and this lore is somehow fed into the show? but only the relevant bits? or maybe eva even queries it directly
// TODO: The memory should also be able to remember facts about artists, albums, tracks we've had in the past, and those could be pulled up when there are hits in the playlist.
];
@@ -295,9 +210,10 @@ impl Session {
let args = call.function.arguments.as_str();
let tool_result = match func_name {
"log_stage_event" => self.tool_stage_event(serde_json::from_str(args).unwrap()).await,
"bandcamp_artifact_scan" => self.tool_bandcamp_scan(serde_json::from_str(args).unwrap()).await,
"archive_query" => self.tool_artifact_query(serde_json::from_str(args).unwrap()).await,
"musicbrainz_track_search" => self.tool_musicbrainz_fetch_tracks(serde_json::from_str(args).unwrap()).await,
"query_bandcamp" => self.tool_artifact_query(&mut BandcampSource, args).await,
"query_beets" => self.tool_artifact_query(&mut BeetsDB, args).await,
"query_musicbrainz" => self.tool_artifact_query(&mut MBQuery, args).await,
"query_mixxx" => self.tool_artifact_query(&mut MixxxDB, args).await,
_ => unreachable!()
};
results.push((&call.id, tool_result));
@@ -429,12 +345,19 @@ pub async fn start_prediction(saved_session: SaveData, mut messages: tokio::sync
do_regen
},
PredictionAction::SetPlaylist(playlist_name) => {
match MixxxDB::load(&playlist_name) {
let args = MixxxQuery { playlist_name };
match MixxxDB.query(&args).await {
Err(err) => log::info!("Failed to load mixxx playlist: {:?}.", err),
Ok(playlist) => {
session.scenery.artifacts.merge(playlist.clone());
session.scenery.current_playlist = playlist;
session.direction.playlist = playlist_name;
session.scenery.current_playlist = vec![];
for item in playlist.clone() {
if let Contents::Track(as_track) = item.contents() {
session.scenery.current_playlist.push(as_track.clone());
}
session.scenery.artifacts.insert(item);
}
session.scenery.artifacts.synchronize().await;
session.direction.playlist = args.playlist_name;
log::info!("Mixxx playlist reloaded.");
}
}