diff --git a/src/services/llm_local.rs b/src/services/llm_local.rs
index 83ab03d..7ba198b 100644
--- a/src/services/llm_local.rs
+++ b/src/services/llm_local.rs
@@ -178,6 +178,102 @@ pub async fn ensure_llama_servers_running() -> Result<(), Box<dyn std::error::Er
         Err(error_msg.into())
     }
 }
+
+async fn start_llm_server(
+    llama_cpp_path: String,
+    model_path: String,
+    url: String,
+) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+    let port = url.split(':').last().unwrap_or("8081");
+
+    std::env::set_var("OMP_NUM_THREADS", "20");
+    std::env::set_var("OMP_PLACES", "cores");
+    std::env::set_var("OMP_PROC_BIND", "close");
+
+    // Verify paths exist
+    let llama_path = Path::new(&llama_cpp_path);
+    let model_path = Path::new(&model_path);
+
+    if !llama_path.exists() {
+        return Err(format!("Llama path does not exist: {}", llama_cpp_path).into());
+    }
+
+    if !model_path.exists() {
+        return Err(format!("Model path does not exist: {}", model_path.display()).into());
+    }
+
+    #[cfg(target_os = "linux")]
+    {
+        let executable_path = llama_path.join("llama-server");
+
+        if !executable_path.exists() {
+            return Err(format!("Executable not found: {}", executable_path.display()).into());
+        }
+
+        info!("Starting LLM server on port: {}", port);
+        info!("Llama path: {}", llama_cpp_path);
+        info!("Model path: {}", model_path.display());
+
+        // Use absolute paths and proper process management
+        let mut cmd = Command::new("numactl");
+        cmd.arg("--interleave=all")
+            .arg(executable_path)
+            .arg("-m")
+            .arg(model_path)
+            .arg("--host")
+            .arg("0.0.0.0")
+            .arg("--port")
+            .arg(port)
+            .arg("--n-gpu-layers")
+            .arg("99")
+            .arg("--threads")
+            .arg("20")
+            .arg("--threads-batch")
+            .arg("40")
+            .current_dir(llama_path) // Set working directory
+            .stdout(Stdio::piped())
+            .stderr(Stdio::piped());
+
+        // Spawn and don't wait for completion
+        let child = cmd.spawn()?;
+
+        // Store the child process if you need to manage it later
+        // You might want to add this to a process manager
+        info!("LLM server started with PID: {}", child.id());
+    }
+
+    #[cfg(target_os = "windows")]
+    {
+        let executable_path = llama_path.join("llama-server.exe");
+
+        if !executable_path.exists() {
+            return Err(format!("Executable not found: {}", executable_path.display()).into());
+        }
+
+        info!("Starting LLM server on port: {}", port);
+        info!("Llama path: {}", llama_cpp_path);
+        info!("Model path: {}", model_path.display());
+
+        let mut cmd = Command::new(executable_path);
+        cmd.arg("-m")
+            .arg(model_path)
+            .arg("--host")
+            .arg("0.0.0.0")
+            .arg("--port")
+            .arg(port)
+            .arg("--n-gpu-layers")
+            .arg("99")
+            .current_dir(llama_path)
+            .stdout(Stdio::piped())
+            .stderr(Stdio::piped());
+
+        let child = cmd.spawn()?;
+        info!("LLM server started with PID: {}", child.id());
+    }
+
+    Ok(())
+}
+
 async fn start_embedding_server(
     llama_cpp_path: String,
     model_path: String,
@@ -274,6 +370,7 @@ async fn start_embedding_server(
 
     Ok(())
 }
+
 async fn is_server_running(url: &str) -> bool {
     let client = reqwest::Client::new();
     match client.get(&format!("{}/health", url)).send().await {