diff --git a/src/llm/mod.rs b/src/llm/mod.rs
index c7084baec..3d6491e3e 100644
--- a/src/llm/mod.rs
+++ b/src/llm/mod.rs
@@ -101,7 +101,7 @@ impl LLMProvider for OpenAIClient {
         while let Some(chunk) = stream.next().await {
             let chunk = chunk?;
             let chunk_str = String::from_utf8_lossy(&chunk);
-            
+
             for line in chunk_str.lines() {
                 if line.starts_with("data: ") && !line.contains("[DONE]") {
                     if let Ok(data) = serde_json::from_str::<Value>(&line[6..]) {
@@ -209,7 +209,7 @@ impl LLMProvider for AnthropicClient {
         while let Some(chunk) = stream.next().await {
             let chunk = chunk?;
             let chunk_str = String::from_utf8_lossy(&chunk);
-            
+
             for line in chunk_str.lines() {
                 if line.starts_with("data: ") {
                     if let Ok(data) = serde_json::from_str::<Value>(&line[6..]) {
diff --git a/src/llm_legacy/llm_generic.rs b/src/llm_legacy/llm_generic.rs
deleted file mode 100644
index 6a3cb9f24..000000000
--- a/src/llm_legacy/llm_generic.rs
+++ /dev/null
@@ -1,80 +0,0 @@
-use actix_web::{web, HttpResponse, Result};
-use dotenvy::dotenv;
-use log::info;
-use serde::{Deserialize, Serialize};
-
-#[derive(Debug, Deserialize)]
-pub struct GenericChatRequest {
-    pub model: String,
-    pub messages: Vec<ChatMessage>,
-    pub temperature: Option<f32>,
-    pub max_tokens: Option<u32>,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct ChatMessage {
-    pub role: String,
-    pub content: String,
-}
-
-#[derive(Debug, Serialize)]
-pub struct GenericChatResponse {
-    pub id: String,
-    pub object: String,
-    pub created: u64,
-    pub model: String,
-    pub choices: Vec<ChatChoice>,
-    pub usage: Usage,
-}
-
-#[derive(Debug, Serialize)]
-pub struct ChatChoice {
-    pub index: u32,
-    pub message: ChatMessage,
-    pub finish_reason: Option<String>,
-}
-
-#[derive(Debug, Serialize)]
-pub struct Usage {
-    pub prompt_tokens: u32,
-    pub completion_tokens: u32,
-    pub total_tokens: u32,
-}
-
-#[derive(Debug, Deserialize)]
-pub struct ProviderConfig {
-    pub endpoint: String,
-    pub api_key: String,
-    pub models: Vec<String>,
-}
-
-pub async fn generic_chat_completions(
-    payload: web::Json<GenericChatRequest>,
-) -> Result<HttpResponse> {
-    dotenv().ok();
-
-    info!("Received generic chat request for model: {}", payload.model);
-
-    // For now, return a mock response
-    let response = GenericChatResponse {
-        id: "chatcmpl-123".to_string(),
-        object: "chat.completion".to_string(),
-        created: 1677652288,
-        model: payload.model.clone(),
-        choices: vec![ChatChoice {
-            index: 0,
-            message: ChatMessage {
-                role: "assistant".to_string(),
-                content: "This is a mock response from the generic LLM endpoint.".to_string(),
-            },
-            finish_reason: Some("stop".to_string()),
-        }],
-        usage: Usage {
-            prompt_tokens: 10,
-            completion_tokens: 20,
-            total_tokens: 30,
-        },
-    };
-
-    Ok(HttpResponse::Ok().json(response))
-}
diff --git a/src/llm_legacy/mod.rs b/src/llm_legacy/mod.rs
index fbdcfd7f0..aeb806741 100644
--- a/src/llm_legacy/mod.rs
+++ b/src/llm_legacy/mod.rs
@@ -1,3 +1,2 @@
 pub mod llm_azure;
-pub mod llm_generic;
 pub mod llm_local;
diff --git a/src/main.rs b/src/main.rs
index 2d86b417a..b05ed79ab 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -106,9 +106,12 @@ async fn main() -> std::io::Result<()> {
     };
 
     let tool_manager = Arc::new(tools::ToolManager::new());
+    let llama_url =
+        std::env::var("LLM_URL").unwrap_or_else(|_| "http://localhost:8081".to_string());
+
     let llm_provider = Arc::new(crate::llm::OpenAIClient::new(
         "empty".to_string(),
-        Some("http://localhost:8081".to_string()),
+        Some(llama_url.clone()),
     ));
 
     let web_adapter = Arc::new(WebChannelAdapter::new());