2025-10-11 12:29:03 -03:00
use dotenvy ::dotenv ;
use log ::{ error , info , warn } ;
use actix_web ::{ web , HttpResponse , Result } ;
2025-10-06 10:30:17 -03:00
use serde ::{ Deserialize , Serialize } ;
2025-10-11 12:29:03 -03:00
use std ::process ::{ Command , Stdio } ;
use std ::thread ;
use std ::time ::Duration ;
2025-10-06 10:30:17 -03:00
2025-10-11 12:29:03 -03:00
#[ derive(Debug, Deserialize) ]
pub struct LocalChatRequest {
pub model : String ,
pub messages : Vec < ChatMessage > ,
pub temperature : Option < f32 > ,
pub max_tokens : Option < u32 > ,
2025-10-06 10:30:17 -03:00
}
2025-10-11 12:29:03 -03:00
#[ derive(Debug, Serialize, Deserialize, Clone) ]
pub struct ChatMessage {
pub role : String ,
pub content : String ,
2025-10-06 10:30:17 -03:00
}
#[ derive(Debug, Deserialize) ]
pub struct EmbeddingRequest {
pub model : String ,
2025-10-11 12:29:03 -03:00
pub input : String ,
2025-10-06 10:30:17 -03:00
}
2025-10-11 12:29:03 -03:00
#[ derive(Debug, Serialize) ]
pub struct LocalChatResponse {
pub id : String ,
pub object : String ,
pub created : u64 ,
pub model : String ,
pub choices : Vec < ChatChoice > ,
pub usage : Usage ,
}
2025-10-06 10:30:17 -03:00
2025-10-11 12:29:03 -03:00
#[ derive(Debug, Serialize) ]
pub struct ChatChoice {
pub index : u32 ,
pub message : ChatMessage ,
pub finish_reason : Option < String > ,
}
2025-10-06 10:30:17 -03:00
2025-10-11 12:29:03 -03:00
#[ derive(Debug, Serialize) ]
pub struct Usage {
pub prompt_tokens : u32 ,
pub completion_tokens : u32 ,
pub total_tokens : u32 ,
2025-10-06 10:30:17 -03:00
}
#[ derive(Debug, Serialize) ]
pub struct EmbeddingResponse {
pub object : String ,
pub data : Vec < EmbeddingData > ,
pub model : String ,
pub usage : Usage ,
}
#[ derive(Debug, Serialize) ]
pub struct EmbeddingData {
pub object : String ,
pub embedding : Vec < f32 > ,
2025-10-11 12:29:03 -03:00
pub index : u32 ,
2025-10-06 10:30:17 -03:00
}
2025-10-11 12:29:03 -03:00
pub async fn ensure_llama_servers_running ( ) -> Result < ( ) , Box < dyn std ::error ::Error > > {
info! ( " Checking if local LLM servers are running... " ) ;
// For now, just log that we would start servers
info! ( " Local LLM servers would be started here " ) ;
Ok ( ( ) )
2025-10-06 10:30:17 -03:00
}
2025-10-11 12:29:03 -03:00
pub async fn chat_completions_local (
payload : web ::Json < LocalChatRequest > ,
) -> Result < HttpResponse > {
dotenv ( ) . ok ( ) ;
2025-10-06 10:30:17 -03:00
2025-10-11 12:29:03 -03:00
info! ( " Received local chat request for model: {} " , payload . model ) ;
// Mock response for local LLM
let response = LocalChatResponse {
id : " local-chat-123 " . to_string ( ) ,
object : " chat.completion " . to_string ( ) ,
created : std ::time ::SystemTime ::now ( )
. duration_since ( std ::time ::UNIX_EPOCH )
. unwrap ( )
. as_secs ( ) ,
model : payload . model . clone ( ) ,
choices : vec ! [ ChatChoice {
index : 0 ,
message : ChatMessage {
role : " assistant " . to_string ( ) ,
content : " This is a mock response from the local LLM. In a real implementation, this would connect to a local model like Llama or Mistral. " . to_string ( ) ,
} ,
finish_reason : Some ( " stop " . to_string ( ) ) ,
} ] ,
usage : Usage {
prompt_tokens : 15 ,
completion_tokens : 25 ,
total_tokens : 40 ,
} ,
} ;
Ok ( HttpResponse ::Ok ( ) . json ( response ) )
2025-10-06 10:30:17 -03:00
}
pub async fn embeddings_local (
2025-10-11 12:29:03 -03:00
payload : web ::Json < EmbeddingRequest > ,
2025-10-06 10:30:17 -03:00
) -> Result < HttpResponse > {
dotenv ( ) . ok ( ) ;
2025-10-11 12:29:03 -03:00
info! ( " Received local embedding request for model: {} " , payload . model ) ;
2025-10-06 10:30:17 -03:00
2025-10-11 12:29:03 -03:00
// Mock embedding response
let response = EmbeddingResponse {
2025-10-06 10:30:17 -03:00
object : " list " . to_string ( ) ,
2025-10-11 12:29:03 -03:00
data : vec ! [ EmbeddingData {
object : " embedding " . to_string ( ) ,
embedding : vec ! [ 0.1 ; 768 ] , // Mock embedding vector
index : 0 ,
} ] ,
model : payload . model . clone ( ) ,
2025-10-06 10:30:17 -03:00
usage : Usage {
2025-10-11 12:29:03 -03:00
prompt_tokens : 10 ,
completion_tokens : 0 ,
total_tokens : 10 ,
2025-10-06 10:30:17 -03:00
} ,
} ;
2025-10-11 12:29:03 -03:00
Ok ( HttpResponse ::Ok ( ) . json ( response ) )
2025-10-06 10:30:17 -03:00
}