use dotenvy::dotenv; use log::{error, info, warn}; use actix_web::{web, HttpResponse, Result}; use serde::{Deserialize, Serialize}; use std::process::{Command, Stdio}; use std::thread; use std::time::Duration; #[derive(Debug, Deserialize)] pub struct LocalChatRequest { pub model: String, pub messages: Vec, pub temperature: Option, pub max_tokens: Option, } #[derive(Debug, Serialize, Deserialize, Clone)] pub struct ChatMessage { pub role: String, pub content: String, } #[derive(Debug, Deserialize)] pub struct EmbeddingRequest { pub model: String, pub input: String, } #[derive(Debug, Serialize)] pub struct LocalChatResponse { pub id: String, pub object: String, pub created: u64, pub model: String, pub choices: Vec, pub usage: Usage, } #[derive(Debug, Serialize)] pub struct ChatChoice { pub index: u32, pub message: ChatMessage, pub finish_reason: Option, } #[derive(Debug, Serialize)] pub struct Usage { pub prompt_tokens: u32, pub completion_tokens: u32, pub total_tokens: u32, } #[derive(Debug, Serialize)] pub struct EmbeddingResponse { pub object: String, pub data: Vec, pub model: String, pub usage: Usage, } #[derive(Debug, Serialize)] pub struct EmbeddingData { pub object: String, pub embedding: Vec, pub index: u32, } pub async fn ensure_llama_servers_running() -> Result<(), Box> { info!("Checking if local LLM servers are running..."); // For now, just log that we would start servers info!("Local LLM servers would be started here"); Ok(()) } pub async fn chat_completions_local( payload: web::Json, ) -> Result { dotenv().ok(); info!("Received local chat request for model: {}", payload.model); // Mock response for local LLM let response = LocalChatResponse { id: "local-chat-123".to_string(), object: "chat.completion".to_string(), created: std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .unwrap() .as_secs(), model: payload.model.clone(), choices: vec![ChatChoice { index: 0, message: ChatMessage { role: "assistant".to_string(), content: "This is a mock response from the local LLM. In a real implementation, this would connect to a local model like Llama or Mistral.".to_string(), }, finish_reason: Some("stop".to_string()), }], usage: Usage { prompt_tokens: 15, completion_tokens: 25, total_tokens: 40, }, }; Ok(HttpResponse::Ok().json(response)) } pub async fn embeddings_local( payload: web::Json, ) -> Result { dotenv().ok(); info!("Received local embedding request for model: {}", payload.model); // Mock embedding response let response = EmbeddingResponse { object: "list".to_string(), data: vec![EmbeddingData { object: "embedding".to_string(), embedding: vec![0.1; 768], // Mock embedding vector index: 0, }], model: payload.model.clone(), usage: Usage { prompt_tokens: 10, completion_tokens: 0, total_tokens: 10, }, }; Ok(HttpResponse::Ok().json(response)) }