diff --git a/src/scripts/utils/set-size-5GB.sh b/src/scripts/utils/set-size-5GB.sh index e2d5de2..0d9a5d6 100644 --- a/src/scripts/utils/set-size-5GB.sh +++ b/src/scripts/utils/set-size-5GB.sh @@ -1,10 +1,6 @@ -export container="pragmatismo-alm-ci" -lxc stop "$container" - -lxc config device override "$container" root size=15GB -lxc config device set "$container" root size=15GB -lxc start "$container" -ROOT_DEV=$(lxc exec "$container" -- df / --output=source | tail -1) - -lxc exec "$container" -- growpart "$(dirname "$ROOT_DEV")" "$(basename "$ROOT_DEV")" -lxc exec "$container" -- resize2fs "$ROOT_DEV" +lxc stop pragmatismo-alm-ci +lxc export pragmatismo-alm-ci backup.tar.gz +lxc delete pragmatismo-alm-ci +lxc import backup.tar.gz pragmatismo-alm-ci +zfs set refquota=15G default/containers/pragmatismo-alm-ci +lxc start pragmatismo-alm-ci diff --git a/src/services/llm_generic.rs b/src/services/llm_generic.rs index f20384d..b63df7c 100644 --- a/src/services/llm_generic.rs +++ b/src/services/llm_generic.rs @@ -1,4 +1,4 @@ -use log::info; +use log::{error, info}; use actix_web::{post, web, HttpRequest, HttpResponse, Result}; use dotenv::dotenv; @@ -108,9 +108,19 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re info!("Provider response status: {}", status); info!("Provider response body: {}", raw_response); - // Return the response with appropriate status code + // Convert Llama response to OpenAI format if successful if status.is_success() { - Ok(HttpResponse::Ok().body(raw_response)) + match convert_llama_to_openai_format(&raw_response) { + Ok(openai_response) => Ok(HttpResponse::Ok() + .content_type("application/json") + .body(openai_response)), + Err(e) => { + error!("Failed to convert response format: {}", e); + Err(actix_web::error::ErrorInternalServerError( + "Response format conversion failed", + )) + } + } } else { let actix_status = actix_web::http::StatusCode::from_u16(status.as_u16()) .unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR); @@ -118,3 +128,80 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re Ok(HttpResponse::build(actix_status).body(raw_response)) } } + +/// Converts Llama response format to OpenAI-compatible format +fn convert_llama_to_openai_format( + llama_response: &str, +) -> Result> { + #[derive(serde::Deserialize)] + struct LlamaResponse { + text: String, + sources: Option>, + } + + #[derive(serde::Serialize)] + struct OpenAIChoice { + index: u32, + message: OpenAIMessage, + finish_reason: String, + } + + #[derive(serde::Serialize)] + struct OpenAIMessage { + role: String, + content: String, + } + + #[derive(serde::Serialize)] + struct OpenAIUsage { + prompt_tokens: u32, + completion_tokens: u32, + total_tokens: u32, + } + + #[derive(serde::Serialize)] + struct OpenAIResponse { + id: String, + object: String, + created: u64, + model: String, + choices: Vec, + usage: OpenAIUsage, + } + + // Parse the Llama response + let llama: LlamaResponse = serde_json::from_str(llama_response)?; + + // Prepare content and token counts without moving llama.text + let content = llama.text.clone(); + let token_count = content.split_whitespace().count() as u32; + + // Create OpenAI-compatible response + let openai_response = OpenAIResponse { + id: format!( + "chatcmpl-{}", + uuid::Uuid::new_v4().to_string().replace("-", "") + ), + object: "chat.completion".to_string(), + created: std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs(), + model: "llama".to_string(), // You might want to make this configurable + choices: vec![OpenAIChoice { + index: 0, + message: OpenAIMessage { + role: "assistant".to_string(), + content, + }, + finish_reason: "stop".to_string(), + }], + usage: OpenAIUsage { + prompt_tokens: 0, // You might need to extract these from the Llama response + completion_tokens: token_count, + total_tokens: token_count, + }, + }; + + serde_json::to_string(&openai_response).map_err(|e| e.into()) +}