diff --git a/src/scripts/utils/set-size-5GB.sh b/src/scripts/utils/set-size-5GB.sh
index 0d9a5d6..6de4216 100644
--- a/src/scripts/utils/set-size-5GB.sh
+++ b/src/scripts/utils/set-size-5GB.sh
@@ -1,6 +1,7 @@
-lxc stop pragmatismo-alm-ci
-lxc export pragmatismo-alm-ci backup.tar.gz
-lxc delete pragmatismo-alm-ci
-lxc import backup.tar.gz pragmatismo-alm-ci
-zfs set refquota=15G default/containers/pragmatismo-alm-ci
-lxc start pragmatismo-alm-ci
+lxc config device override $CONTAINER_NAME root
+lxc config device set  $CONTAINER_NAME root size 6GB
+
+zpool set autoexpand=on default
+zpool online -e default /var/snap/lxd/common/lxd/disks/default.img
+zpool list
+zfs list
diff --git a/src/services/llm_generic.rs b/src/services/llm_generic.rs
index b63df7c..6f802bf 100644
--- a/src/services/llm_generic.rs
+++ b/src/services/llm_generic.rs
@@ -41,7 +41,6 @@ fn clean_request_body(body: &str) -> String {
     let re = Regex::new(r#","?\s*"(max_completion_tokens|parallel_tool_calls|top_p|frequency_penalty|presence_penalty)"\s*:\s*[^,}]*"#).unwrap();
     re.replace_all(body, "").to_string()
 }
-
 #[post("/v1/chat/completions")]
 pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Result<HttpResponse> {
     // Log raw POST data
@@ -58,9 +57,19 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re
     let endpoint = env::var("AI_ENDPOINT")
         .map_err(|_| actix_web::error::ErrorInternalServerError("AI_ENDPOINT not set."))?;
 
-    // Clean the request body (remove unsupported parameters)
-    let cleaned_body_str = clean_request_body(body_str);
-    info!("Cleaned POST Data: {}", cleaned_body_str);
+    // Parse and modify the request body
+    let mut json_value: serde_json::Value = serde_json::from_str(body_str)
+        .map_err(|_| actix_web::error::ErrorInternalServerError("Failed to parse JSON"))?;
+
+    // Add model parameter
+    if let Some(obj) = json_value.as_object_mut() {
+        obj.insert("model".to_string(), serde_json::Value::String(model));
+    }
+
+    let modified_body_str = serde_json::to_string(&json_value)
+        .map_err(|_| actix_web::error::ErrorInternalServerError("Failed to serialize JSON"))?;
+
+    info!("Modified POST Data: {}", modified_body_str);
 
     // Set up headers
     let mut headers = reqwest::header::HeaderMap::new();
@@ -74,21 +83,7 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re
         reqwest::header::HeaderValue::from_static("application/json"),
     );
 
-    // After cleaning the request body, add the unused parameter
-    let mut json_value: serde_json::Value = serde_json::from_str(&cleaned_body_str)
-        .map_err(|_| actix_web::error::ErrorInternalServerError("Failed to parse JSON"))?;
-
-    // Add the unused parameter
-    json_value
-        .as_object_mut()
-        .unwrap()
-        .insert("model".to_string(), serde_json::Value::String(model));
-
-    // Serialize the modified JSON
-    let modified_body_str = serde_json::to_string(&json_value)
-        .map_err(|_| actix_web::error::ErrorInternalServerError("Failed to serialize JSON"))?;
-
-    // Send request to the OpenAI-compatible provider
+    // Send request to the AI provider
     let client = Client::new();
     let response = client
         .post(&endpoint)
@@ -108,35 +103,50 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re
     info!("Provider response status: {}", status);
     info!("Provider response body: {}", raw_response);
 
-    // Convert Llama response to OpenAI format if successful
+    // Convert response to OpenAI format if successful
     if status.is_success() {
-        match convert_llama_to_openai_format(&raw_response) {
+        match convert_to_openai_format(&raw_response) {
             Ok(openai_response) => Ok(HttpResponse::Ok()
                 .content_type("application/json")
                 .body(openai_response)),
             Err(e) => {
                 error!("Failed to convert response format: {}", e);
-                Err(actix_web::error::ErrorInternalServerError(
-                    "Response format conversion failed",
-                ))
+                // Return the original response if conversion fails
+                Ok(HttpResponse::Ok()
+                    .content_type("application/json")
+                    .body(raw_response))
             }
         }
     } else {
+        // Return error as-is
         let actix_status = actix_web::http::StatusCode::from_u16(status.as_u16())
             .unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR);
 
-        Ok(HttpResponse::build(actix_status).body(raw_response))
+        Ok(HttpResponse::build(actix_status)
+            .content_type("application/json")
+            .body(raw_response))
     }
 }
 
-/// Converts Llama response format to OpenAI-compatible format
-fn convert_llama_to_openai_format(
-    llama_response: &str,
-) -> Result<String, Box<dyn std::error::Error>> {
+/// Converts provider response to OpenAI-compatible format
+fn convert_to_openai_format(provider_response: &str) -> Result<String, Box<dyn std::error::Error>> {
     #[derive(serde::Deserialize)]
-    struct LlamaResponse {
+    struct ProviderResponse {
         text: String,
-        sources: Option<Vec<serde_json::Value>>,
+        #[serde(default)]
+        generated_tokens: Option<u32>,
+        #[serde(default)]
+        input_tokens: Option<u32>,
+    }
+
+    #[derive(serde::Serialize)]
+    struct OpenAIResponse {
+        id: String,
+        object: String,
+        created: u64,
+        model: String,
+        choices: Vec<OpenAIChoice>,
+        usage: OpenAIUsage,
     }
 
     #[derive(serde::Serialize)]
@@ -159,47 +169,36 @@ fn convert_llama_to_openai_format(
         total_tokens: u32,
     }
 
-    #[derive(serde::Serialize)]
-    struct OpenAIResponse {
-        id: String,
-        object: String,
-        created: u64,
-        model: String,
-        choices: Vec<OpenAIChoice>,
-        usage: OpenAIUsage,
-    }
+    // Parse the provider response
+    let provider: ProviderResponse = serde_json::from_str(provider_response)?;
 
-    // Parse the Llama response
-    let llama: LlamaResponse = serde_json::from_str(llama_response)?;
+    let completion_tokens = provider
+        .generated_tokens
+        .unwrap_or_else(|| provider.text.split_whitespace().count() as u32);
 
-    // Prepare content and token counts without moving llama.text
-    let content = llama.text.clone();
-    let token_count = content.split_whitespace().count() as u32;
+    let prompt_tokens = provider.input_tokens.unwrap_or(0);
+    let total_tokens = prompt_tokens + completion_tokens;
 
-    // Create OpenAI-compatible response
     let openai_response = OpenAIResponse {
-        id: format!(
-            "chatcmpl-{}",
-            uuid::Uuid::new_v4().to_string().replace("-", "")
-        ),
+        id: format!("chatcmpl-{}", uuid::Uuid::new_v4().simple()),
         object: "chat.completion".to_string(),
         created: std::time::SystemTime::now()
             .duration_since(std::time::UNIX_EPOCH)
             .unwrap()
             .as_secs(),
-        model: "llama".to_string(), // You might want to make this configurable
+        model: "llama".to_string(),
         choices: vec![OpenAIChoice {
             index: 0,
             message: OpenAIMessage {
                 role: "assistant".to_string(),
-                content,
+                content: provider.text,
             },
             finish_reason: "stop".to_string(),
         }],
         usage: OpenAIUsage {
-            prompt_tokens: 0, // You might need to extract these from the Llama response
-            completion_tokens: token_count,
-            total_tokens: token_count,
+            prompt_tokens,
+            completion_tokens,
+            total_tokens,
         },
     };
 
diff --git a/src/services/llm_local.rs b/src/services/llm_local.rs
index 00ff628..c1e21cb 100644
--- a/src/services/llm_local.rs
+++ b/src/services/llm_local.rs
@@ -432,7 +432,7 @@ struct LlamaCppEmbeddingRequest {
 #[derive(Debug, Deserialize)]
 struct LlamaCppEmbeddingResponseItem {
     pub index: usize,
-    pub embedding: Vec<Vec<f32>>, // This is the fucked up part - embedding is an array of arrays
+    pub embedding: Vec<Vec<f32>>, // This is the  up part - embedding is an array of arrays
 }
 
 // Proxy endpoint for embeddings