- Review response from LLM generic.

2025-09-24 08:27:09 -03:00 · 2025-09-24 08:27:09 -03:00 · a7bf8353f0
commit a7bf8353f0
parent 4e762a1a5d
2 changed files with 96 additions and 13 deletions
--- a/src/scripts/utils/set-size-5GB.sh
+++ b/src/scripts/utils/set-size-5GB.sh
@ -1,10 +1,6 @@
-export container="pragmatismo-alm-ci"
+lxc stop pragmatismo-alm-ci
-lxc stop "$container"
+lxc export pragmatismo-alm-ci backup.tar.gz
-
+lxc delete pragmatismo-alm-ci
-lxc config device override "$container" root size=15GB
+lxc import backup.tar.gz pragmatismo-alm-ci
-lxc config device set "$container" root size=15GB
+zfs set refquota=15G default/containers/pragmatismo-alm-ci
-lxc start "$container"
+lxc start pragmatismo-alm-ci
 ROOT_DEV=$(lxc exec "$container" -- df / --output=source | tail -1)
 lxc exec "$container" -- growpart "$(dirname "$ROOT_DEV")" "$(basename "$ROOT_DEV")"
 lxc exec "$container" -- resize2fs "$ROOT_DEV"
--- a/src/services/llm_generic.rs
+++ b/src/services/llm_generic.rs
@ -1,4 +1,4 @@
-use log::info;
+use log::{error, info};
 use actix_web::{post, web, HttpRequest, HttpResponse, Result};
 use dotenv::dotenv;
@ -108,9 +108,19 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re
    info!("Provider response status: {}", status);
    info!("Provider response body: {}", raw_response);
-    // Return the response with appropriate status code
+    // Convert Llama response to OpenAI format if successful
    if status.is_success() {
-        Ok(HttpResponse::Ok().body(raw_response))
+        match convert_llama_to_openai_format(&raw_response) {
            Ok(openai_response) => Ok(HttpResponse::Ok()
                .content_type("application/json")
                .body(openai_response)),
            Err(e) => {
                error!("Failed to convert response format: {}", e);
                Err(actix_web::error::ErrorInternalServerError(
                    "Response format conversion failed",
                ))
            }
        }
    } else {
        let actix_status = actix_web::http::StatusCode::from_u16(status.as_u16())
            .unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR);
@ -118,3 +128,80 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re
        Ok(HttpResponse::build(actix_status).body(raw_response))
    }
 }
 /// Converts Llama response format to OpenAI-compatible format
 fn convert_llama_to_openai_format(
    llama_response: &str,
 ) -> Result<String, Box<dyn std::error::Error>> {
    #[derive(serde::Deserialize)]
    struct LlamaResponse {
        text: String,
        sources: Option<Vec<serde_json::Value>>,
    }
    #[derive(serde::Serialize)]
    struct OpenAIChoice {
        index: u32,
        message: OpenAIMessage,
        finish_reason: String,
    }
    #[derive(serde::Serialize)]
    struct OpenAIMessage {
        role: String,
        content: String,
    }
    #[derive(serde::Serialize)]
    struct OpenAIUsage {
        prompt_tokens: u32,
        completion_tokens: u32,
        total_tokens: u32,
    }
    #[derive(serde::Serialize)]
    struct OpenAIResponse {
        id: String,
        object: String,
        created: u64,
        model: String,
        choices: Vec<OpenAIChoice>,
        usage: OpenAIUsage,
    }
    // Parse the Llama response
    let llama: LlamaResponse = serde_json::from_str(llama_response)?;
    // Prepare content and token counts without moving llama.text
    let content = llama.text.clone();
    let token_count = content.split_whitespace().count() as u32;
    // Create OpenAI-compatible response
    let openai_response = OpenAIResponse {
        id: format!(
            "chatcmpl-{}",
            uuid::Uuid::new_v4().to_string().replace("-", "")
        ),
        object: "chat.completion".to_string(),
        created: std::time::SystemTime::now()
            .duration_since(std::time::UNIX_EPOCH)
            .unwrap()
            .as_secs(),
        model: "llama".to_string(), // You might want to make this configurable
        choices: vec![OpenAIChoice {
            index: 0,
            message: OpenAIMessage {
                role: "assistant".to_string(),
                content,
            },
            finish_reason: "stop".to_string(),
        }],
        usage: OpenAIUsage {
            prompt_tokens: 0, // You might need to extract these from the Llama response
            completion_tokens: token_count,
            total_tokens: token_count,
        },
    };
    serde_json::to_string(&openai_response).map_err(|e| e.into())
 }