Rename start_llm_server to start_embedding_server

Update server startup to use port 8082 and include --embedding flag. Added info logging, removed duplicate start_embedding_server.
2025-10-01 08:28:27 -03:00 · 2025-10-01 08:28:27 -03:00 · dc22618dd2
commit dc22618dd2
parent 1aa79a30c4
1 changed files with 14 additions and 34 deletions
--- a/src/services/llm_local.rs
+++ b/src/services/llm_local.rs
@ -178,13 +178,12 @@ pub async fn ensure_llama_servers_running() -> Result<(), Box<dyn std::error::Er
        Err(error_msg.into())
    }
 }
-
-async fn start_llm_server(
+async fn start_embedding_server(
    llama_cpp_path: String,
    model_path: String,
    url: String,
 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
-    let port = url.split(':').last().unwrap_or("8081");
+    let port = url.split(':').last().unwrap_or("8082");

    std::env::set_var("OMP_NUM_THREADS", "20");
    std::env::set_var("OMP_PLACES", "cores");
@ -210,6 +209,10 @@ async fn start_llm_server(
            return Err(format!("Executable not found: {}", executable_path.display()).into());
        }

+        info!("Starting embedding server on port: {}", port);
+        info!("Llama path: {}", llama_cpp_path);
+        info!("Model path: {}", model_path.display());
+
        // Use absolute paths and proper process management
        let mut cmd = Command::new("numactl");
        cmd.arg("--interleave=all")
@ -220,6 +223,7 @@ async fn start_llm_server(
            .arg("0.0.0.0")
            .arg("--port")
            .arg(port)
+            .arg("--embedding")
            .arg("--n-gpu-layers")
            .arg("99")
            .arg("--threads")
@ -235,7 +239,7 @@ async fn start_llm_server(

        // Store the child process if you need to manage it later
        // You might want to add this to a process manager
-        println!("LLM server started with PID: {}", child.id());
+        info!("Embedding server started with PID: {}", child.id());
    }

    #[cfg(target_os = "windows")]
@ -246,6 +250,10 @@ async fn start_llm_server(
            return Err(format!("Executable not found: {}", executable_path.display()).into());
        }

+        info!("Starting embedding server on port: {}", port);
+        info!("Llama path: {}", llama_cpp_path);
+        info!("Model path: {}", model_path.display());
+
        let mut cmd = Command::new(executable_path);
        cmd.arg("-m")
            .arg(model_path)
@ -253,6 +261,7 @@ async fn start_llm_server(
            .arg("0.0.0.0")
            .arg("--port")
            .arg(port)
+            .arg("--embedding")
            .arg("--n-gpu-layers")
            .arg("99")
            .current_dir(llama_path)
@ -260,40 +269,11 @@ async fn start_llm_server(
            .stderr(Stdio::piped());

        let child = cmd.spawn()?;
-        println!("LLM server started with PID: {}", child.id());
+        info!("Embedding server started with PID: {}", child.id());
    }

    Ok(())
 }
-
-async fn start_embedding_server(
-    llama_cpp_path: String,
-    model_path: String,
-    url: String,
-) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
-    let port = url.split(':').last().unwrap_or("8082");
-    let mut cmd = Command::new("cmd");
-
-    #[cfg(target_os = "windows")]
-    {
-        cmd.arg("/C").arg(format!(
-            "cd {} && llama-server.exe -m {} --host 0.0.0.0 --port {} --embedding --n-gpu-layers 99",
-            llama_cpp_path, model_path, port
-        ));
-    }
-
-    #[cfg(any(target_os = "linux", target_os = "macos"))]
-    {
-        cmd.arg("-c").arg(format!(
-            "cd {} && ./llama-server -m {} --host 0.0.0.0 --port {} --embedding --n-gpu-layers 99 &",
-            llama_cpp_path, model_path, port
-        ));
-    }
-
-    cmd.spawn()?;
-    Ok(())
-}
-
 async fn is_server_running(url: &str) -> bool {
    let client = reqwest::Client::new();
    match client.get(&format!("{}/health", url)).send().await {