diff --git a/src/scripts/utils/set-size-5GB.sh b/src/scripts/utils/set-size-5GB.sh
index e2d5de2..0d9a5d6 100644
--- a/src/scripts/utils/set-size-5GB.sh
+++ b/src/scripts/utils/set-size-5GB.sh
@@ -1,10 +1,6 @@
-export container="pragmatismo-alm-ci"
-lxc stop "$container"
-
-lxc config device override "$container" root size=15GB
-lxc config device set "$container" root size=15GB
-lxc start "$container"
-ROOT_DEV=$(lxc exec "$container" -- df / --output=source | tail -1)
-
-lxc exec "$container" -- growpart "$(dirname "$ROOT_DEV")" "$(basename "$ROOT_DEV")"
-lxc exec "$container" -- resize2fs "$ROOT_DEV"
+lxc stop pragmatismo-alm-ci
+lxc export pragmatismo-alm-ci backup.tar.gz
+lxc delete pragmatismo-alm-ci
+lxc import backup.tar.gz pragmatismo-alm-ci
+zfs set refquota=15G default/containers/pragmatismo-alm-ci
+lxc start pragmatismo-alm-ci
diff --git a/src/services/llm_generic.rs b/src/services/llm_generic.rs
index f20384d..b63df7c 100644
--- a/src/services/llm_generic.rs
+++ b/src/services/llm_generic.rs
@@ -1,4 +1,4 @@
-use log::info;
+use log::{error, info};
 
 use actix_web::{post, web, HttpRequest, HttpResponse, Result};
 use dotenv::dotenv;
@@ -108,9 +108,19 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re
     info!("Provider response status: {}", status);
     info!("Provider response body: {}", raw_response);
 
-    // Return the response with appropriate status code
+    // Convert Llama response to OpenAI format if successful
     if status.is_success() {
-        Ok(HttpResponse::Ok().body(raw_response))
+        match convert_llama_to_openai_format(&raw_response) {
+            Ok(openai_response) => Ok(HttpResponse::Ok()
+                .content_type("application/json")
+                .body(openai_response)),
+            Err(e) => {
+                error!("Failed to convert response format: {}", e);
+                Err(actix_web::error::ErrorInternalServerError(
+                    "Response format conversion failed",
+                ))
+            }
+        }
     } else {
         let actix_status = actix_web::http::StatusCode::from_u16(status.as_u16())
             .unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR);
@@ -118,3 +128,80 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re
         Ok(HttpResponse::build(actix_status).body(raw_response))
     }
 }
+
+/// Converts Llama response format to OpenAI-compatible format
+fn convert_llama_to_openai_format(
+    llama_response: &str,
+) -> Result<String, Box<dyn std::error::Error>> {
+    #[derive(serde::Deserialize)]
+    struct LlamaResponse {
+        text: String,
+        sources: Option<Vec<serde_json::Value>>,
+    }
+
+    #[derive(serde::Serialize)]
+    struct OpenAIChoice {
+        index: u32,
+        message: OpenAIMessage,
+        finish_reason: String,
+    }
+
+    #[derive(serde::Serialize)]
+    struct OpenAIMessage {
+        role: String,
+        content: String,
+    }
+
+    #[derive(serde::Serialize)]
+    struct OpenAIUsage {
+        prompt_tokens: u32,
+        completion_tokens: u32,
+        total_tokens: u32,
+    }
+
+    #[derive(serde::Serialize)]
+    struct OpenAIResponse {
+        id: String,
+        object: String,
+        created: u64,
+        model: String,
+        choices: Vec<OpenAIChoice>,
+        usage: OpenAIUsage,
+    }
+
+    // Parse the Llama response
+    let llama: LlamaResponse = serde_json::from_str(llama_response)?;
+
+    // Prepare content and token counts without moving llama.text
+    let content = llama.text.clone();
+    let token_count = content.split_whitespace().count() as u32;
+
+    // Create OpenAI-compatible response
+    let openai_response = OpenAIResponse {
+        id: format!(
+            "chatcmpl-{}",
+            uuid::Uuid::new_v4().to_string().replace("-", "")
+        ),
+        object: "chat.completion".to_string(),
+        created: std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap()
+            .as_secs(),
+        model: "llama".to_string(), // You might want to make this configurable
+        choices: vec![OpenAIChoice {
+            index: 0,
+            message: OpenAIMessage {
+                role: "assistant".to_string(),
+                content,
+            },
+            finish_reason: "stop".to_string(),
+        }],
+        usage: OpenAIUsage {
+            prompt_tokens: 0, // You might need to extract these from the Llama response
+            completion_tokens: token_count,
+            total_tokens: token_count,
+        },
+    };
+
+    serde_json::to_string(&openai_response).map_err(|e| e.into())
+}