- Review OpenAI response.

2025-09-24 17:07:21 -03:00 · 2025-09-24 17:07:21 -03:00 · 51a2141a00
commit 51a2141a00
parent a7bf8353f0
3 changed files with 62 additions and 62 deletions
--- a/src/scripts/utils/set-size-5GB.sh
+++ b/src/scripts/utils/set-size-5GB.sh
@ -1,6 +1,7 @@
-lxc stop pragmatismo-alm-ci
-lxc export pragmatismo-alm-ci backup.tar.gz
-lxc delete pragmatismo-alm-ci
-lxc import backup.tar.gz pragmatismo-alm-ci
-zfs set refquota=15G default/containers/pragmatismo-alm-ci
-lxc start pragmatismo-alm-ci
+lxc config device override $CONTAINER_NAME root
+lxc config device set  $CONTAINER_NAME root size 6GB
+
+zpool set autoexpand=on default
+zpool online -e default /var/snap/lxd/common/lxd/disks/default.img
+zpool list
+zfs list
--- a/src/services/llm_generic.rs
+++ b/src/services/llm_generic.rs
@ -41,7 +41,6 @@ fn clean_request_body(body: &str) -> String {
    let re = Regex::new(r#","?\s*"(max_completion_tokens|parallel_tool_calls|top_p|frequency_penalty|presence_penalty)"\s*:\s*[^,}]*"#).unwrap();
    re.replace_all(body, "").to_string()
 }
-
 #[post("/v1/chat/completions")]
 pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Result<HttpResponse> {
    // Log raw POST data
@ -58,9 +57,19 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re
    let endpoint = env::var("AI_ENDPOINT")
        .map_err(|_| actix_web::error::ErrorInternalServerError("AI_ENDPOINT not set."))?;

-    // Clean the request body (remove unsupported parameters)
-    let cleaned_body_str = clean_request_body(body_str);
-    info!("Cleaned POST Data: {}", cleaned_body_str);
+    // Parse and modify the request body
+    let mut json_value: serde_json::Value = serde_json::from_str(body_str)
+        .map_err(|_| actix_web::error::ErrorInternalServerError("Failed to parse JSON"))?;
+
+    // Add model parameter
+    if let Some(obj) = json_value.as_object_mut() {
+        obj.insert("model".to_string(), serde_json::Value::String(model));
+    }
+
+    let modified_body_str = serde_json::to_string(&json_value)
+        .map_err(|_| actix_web::error::ErrorInternalServerError("Failed to serialize JSON"))?;
+
+    info!("Modified POST Data: {}", modified_body_str);

    // Set up headers
    let mut headers = reqwest::header::HeaderMap::new();
@ -74,21 +83,7 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re
        reqwest::header::HeaderValue::from_static("application/json"),
    );

-    // After cleaning the request body, add the unused parameter
-    let mut json_value: serde_json::Value = serde_json::from_str(&cleaned_body_str)
-        .map_err(|_| actix_web::error::ErrorInternalServerError("Failed to parse JSON"))?;
-
-    // Add the unused parameter
-    json_value
-        .as_object_mut()
-        .unwrap()
-        .insert("model".to_string(), serde_json::Value::String(model));
-
-    // Serialize the modified JSON
-    let modified_body_str = serde_json::to_string(&json_value)
-        .map_err(|_| actix_web::error::ErrorInternalServerError("Failed to serialize JSON"))?;
-
-    // Send request to the OpenAI-compatible provider
+    // Send request to the AI provider
    let client = Client::new();
    let response = client
        .post(&endpoint)
@ -108,35 +103,50 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re
    info!("Provider response status: {}", status);
    info!("Provider response body: {}", raw_response);

-    // Convert Llama response to OpenAI format if successful
+    // Convert response to OpenAI format if successful
    if status.is_success() {
-        match convert_llama_to_openai_format(&raw_response) {
+        match convert_to_openai_format(&raw_response) {
            Ok(openai_response) => Ok(HttpResponse::Ok()
                .content_type("application/json")
                .body(openai_response)),
            Err(e) => {
                error!("Failed to convert response format: {}", e);
-                Err(actix_web::error::ErrorInternalServerError(
-                    "Response format conversion failed",
-                ))
+                // Return the original response if conversion fails
+                Ok(HttpResponse::Ok()
+                    .content_type("application/json")
+                    .body(raw_response))
            }
        }
    } else {
+        // Return error as-is
        let actix_status = actix_web::http::StatusCode::from_u16(status.as_u16())
            .unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR);

-        Ok(HttpResponse::build(actix_status).body(raw_response))
+        Ok(HttpResponse::build(actix_status)
+            .content_type("application/json")
+            .body(raw_response))
    }
 }

-/// Converts Llama response format to OpenAI-compatible format
-fn convert_llama_to_openai_format(
-    llama_response: &str,
-) -> Result<String, Box<dyn std::error::Error>> {
+/// Converts provider response to OpenAI-compatible format
+fn convert_to_openai_format(provider_response: &str) -> Result<String, Box<dyn std::error::Error>> {
    #[derive(serde::Deserialize)]
-    struct LlamaResponse {
+    struct ProviderResponse {
        text: String,
-        sources: Option<Vec<serde_json::Value>>,
+        #[serde(default)]
+        generated_tokens: Option<u32>,
+        #[serde(default)]
+        input_tokens: Option<u32>,
+    }
+
+    #[derive(serde::Serialize)]
+    struct OpenAIResponse {
+        id: String,
+        object: String,
+        created: u64,
+        model: String,
+        choices: Vec<OpenAIChoice>,
+        usage: OpenAIUsage,
    }

    #[derive(serde::Serialize)]
@ -159,47 +169,36 @@ fn convert_llama_to_openai_format(
        total_tokens: u32,
    }

-    #[derive(serde::Serialize)]
-    struct OpenAIResponse {
-        id: String,
-        object: String,
-        created: u64,
-        model: String,
-        choices: Vec<OpenAIChoice>,
-        usage: OpenAIUsage,
-    }
+    // Parse the provider response
+    let provider: ProviderResponse = serde_json::from_str(provider_response)?;

-    // Parse the Llama response
-    let llama: LlamaResponse = serde_json::from_str(llama_response)?;
+    let completion_tokens = provider
+        .generated_tokens
+        .unwrap_or_else(|| provider.text.split_whitespace().count() as u32);

-    // Prepare content and token counts without moving llama.text
-    let content = llama.text.clone();
-    let token_count = content.split_whitespace().count() as u32;
+    let prompt_tokens = provider.input_tokens.unwrap_or(0);
+    let total_tokens = prompt_tokens + completion_tokens;

-    // Create OpenAI-compatible response
    let openai_response = OpenAIResponse {
-        id: format!(
-            "chatcmpl-{}",
-            uuid::Uuid::new_v4().to_string().replace("-", "")
-        ),
+        id: format!("chatcmpl-{}", uuid::Uuid::new_v4().simple()),
        object: "chat.completion".to_string(),
        created: std::time::SystemTime::now()
            .duration_since(std::time::UNIX_EPOCH)
            .unwrap()
            .as_secs(),
-        model: "llama".to_string(), // You might want to make this configurable
+        model: "llama".to_string(),
        choices: vec![OpenAIChoice {
            index: 0,
            message: OpenAIMessage {
                role: "assistant".to_string(),
-                content,
+                content: provider.text,
            },
            finish_reason: "stop".to_string(),
        }],
        usage: OpenAIUsage {
-            prompt_tokens: 0, // You might need to extract these from the Llama response
-            completion_tokens: token_count,
-            total_tokens: token_count,
+            prompt_tokens,
+            completion_tokens,
+            total_tokens,
        },
    };

--- a/src/services/llm_local.rs
+++ b/src/services/llm_local.rs
@ -432,7 +432,7 @@ struct LlamaCppEmbeddingRequest {
 #[derive(Debug, Deserialize)]
 struct LlamaCppEmbeddingResponseItem {
    pub index: usize,
-    pub embedding: Vec<Vec<f32>>, // This is the fucked up part - embedding is an array of arrays
+    pub embedding: Vec<Vec<f32>>, // This is the  up part - embedding is an array of arrays
 }

 // Proxy endpoint for embeddings