diff --git a/src/bootstrap/mod.rs b/src/bootstrap/mod.rs
index da424a45..512ffe47 100644
--- a/src/bootstrap/mod.rs
+++ b/src/bootstrap/mod.rs
@@ -2,7 +2,7 @@ use crate::config::AppConfig;
 use crate::package_manager::{InstallMode, PackageManager};
 use crate::shared::utils::establish_pg_connection;
 use anyhow::Result;
-use diesel::{connection::SimpleConnection, RunQueryDsl, QueryableByName};
+use diesel::{connection::SimpleConnection, QueryableByName};
 use dotenvy::dotenv;
 use log::{debug, error, info, trace};
 use aws_sdk_s3::Client;
diff --git a/src/config/mod.rs b/src/config/mod.rs
index fcec5d8d..49aae185 100644
--- a/src/config/mod.rs
+++ b/src/config/mod.rs
@@ -419,27 +419,22 @@ impl ConfigManager {
 
     pub fn get_config(
         &self,
-        bot_id: &uuid::Uuid,
+        code_bot_id: &uuid::Uuid,
         key: &str,
         fallback: Option<&str>,
     ) -> Result<String, diesel::result::Error> {
+        use crate::shared::models::schema::bot_configuration::dsl::*;
+        
         let mut conn = self.conn.lock().unwrap();
         let fallback_str = fallback.unwrap_or("");
 
-        #[derive(Debug, QueryableByName)]
-        struct ConfigValue {
-            #[diesel(sql_type = Text)]
-            value: String,
-        }
-
-        let result = diesel::sql_query(
-            "SELECT get_bot_config($1, $2, $3) as value"
-        )
-            .bind::<diesel::sql_types::Uuid, _>(bot_id)
-            .bind::<Text, _>(key)
-            .bind::<Text, _>(fallback_str)
-            .get_result::<ConfigValue>(&mut *conn)
-            .map(|row| row.value)?;
+        let result = bot_configuration
+            .filter(bot_id.eq(code_bot_id))
+            .filter(config_key.eq(key))
+            .select(config_value)
+            .first::<String>(&mut *conn)
+            .unwrap_or(fallback_str.to_string());
+            
         Ok(result)
     }
 
diff --git a/src/llm_legacy/llm_local.rs b/src/llm_legacy/llm_local.rs
index b6c9f3b8..4d8f67ec 100644
--- a/src/llm_legacy/llm_local.rs
+++ b/src/llm_legacy/llm_local.rs
@@ -1,12 +1,15 @@
 use actix_web::{post, web, HttpRequest, HttpResponse, Result};
-use crate::config::{AppConfig, ConfigManager};
 use dotenvy::dotenv;
 use log::{error, info};
 use reqwest::Client;
 use serde::{Deserialize, Serialize};
 use std::env;
 use tokio::time::{sleep, Duration};
-use uuid::Uuid;
+
+use crate::config::ConfigManager;
+use crate::shared::models::schema::bots::dsl::*;
+use crate::shared::state::AppState;
+use diesel::prelude::*;
 
 // OpenAI-compatible request/response structures
 #[derive(Debug, Serialize, Deserialize)]
@@ -55,61 +58,37 @@ struct LlamaCppResponse {
     generation_settings: Option<serde_json::Value>,
 }
 
-pub async fn ensure_llama_servers_running() -> Result<(), Box<dyn std::error::Error + Send + Sync>>
-{
-    let llm_local = env::var("LLM_LOCAL").unwrap_or_else(|_| "true".to_string());
+pub async fn ensure_llama_servers_running(
+    app_state: &AppState,
+) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+    let conn = app_state.conn.clone();
+    let config_manager = ConfigManager::new(conn.clone());
 
-    if llm_local.to_lowercase() != "true" {
-        info!("ℹ️  LLM_LOCAL is not enabled, skipping local server startup");
-        return Ok(());
-    }
-
-    // Get configuration with fallback to default bot config
-    let default_bot_id = Uuid::parse_str("00000000-0000-0000-0000-000000000000").unwrap();
-    let config_manager = AppConfig::from_env().db_conn.map(ConfigManager::new);
-
-    let llm_url = match &config_manager {
-        Some(cm) => env::var("LLM_URL").unwrap_or_else(|_| 
-            cm.get_config(&default_bot_id, "LLM_URL", None)
-                .unwrap_or("http://localhost:8081".to_string())
-        ),
-        None => env::var("LLM_URL").unwrap_or("http://localhost:8081".to_string())
-    };
-    let embedding_url = match &config_manager {
-        Some(cm) => env::var("EMBEDDING_URL").unwrap_or_else(|_| 
-            cm.get_config(&default_bot_id, "EMBEDDING_URL", None)
-                .unwrap_or("http://localhost:8082".to_string())
-        ),
-        None => env::var("EMBEDDING_URL").unwrap_or("http://localhost:8082".to_string())
-    };
-    let llama_cpp_path = match &config_manager {
-        Some(cm) => env::var("LLM_CPP_PATH").unwrap_or_else(|_| 
-            cm.get_config(&default_bot_id, "LLM_CPP_PATH", None)
-                .unwrap_or("~/llama.cpp".to_string())
-        ),
-        None => env::var("LLM_CPP_PATH").unwrap_or("~/llama.cpp".to_string())
-    };
-    let llm_model_path = match &config_manager {
-        Some(cm) => env::var("LLM_MODEL_PATH").unwrap_or_else(|_| 
-            cm.get_config(&default_bot_id, "LLM_MODEL_PATH", None)
-                .unwrap_or("".to_string())
-        ),
-        None => env::var("LLM_MODEL_PATH").unwrap_or("".to_string())
-    };
-    let embedding_model_path = match &config_manager {
-        Some(cm) => env::var("EMBEDDING_MODEL_PATH").unwrap_or_else(|_| 
-            cm.get_config(&default_bot_id, "EMBEDDING_MODEL_PATH", None)
-                .unwrap_or("".to_string())
-        ),
-        None => env::var("EMBEDDING_MODEL_PATH").unwrap_or("".to_string())
+    // Get default bot ID from database
+    let default_bot_id = {
+        let mut conn = conn.lock().unwrap();
+        bots.filter(name.eq("default"))
+            .select(id)
+            .first::<uuid::Uuid>(&mut *conn)
+            .unwrap_or_else(|_| uuid::Uuid::nil())
     };
 
-    info!("🚀 Starting local llama.cpp servers...");
-    info!("📋 Configuration:");
+    // Get configuration from config using default bot ID
+    let llm_url = config_manager.get_config(&default_bot_id, "llm-url", None)?;
+    let llm_model = config_manager.get_config(&default_bot_id, "llm-model", None)?;
+
+    let embedding_url = config_manager.get_config(&default_bot_id, "embedding-url", None)?;
+    let embedding_model = config_manager.get_config(&default_bot_id, "embedding-model", None)?;
+
+    let llm_server_path = config_manager.get_config(&default_bot_id, "llm-server-path", None)?;
+
+    info!(" Starting LLM servers...");
+    info!(" Configuration:");
     info!("   LLM URL: {}", llm_url);
     info!("   Embedding URL: {}", embedding_url);
-    info!("   LLM Model: {}", llm_model_path);
-    info!("   Embedding Model: {}", embedding_model_path);
+    info!("   LLM Model: {}", llm_model);
+    info!("   Embedding Model: {}", embedding_model);
+    info!("   LLM Server Path: {}", llm_server_path);
 
     // Check if servers are already running
     let llm_running = is_server_running(&llm_url).await;
@@ -123,26 +102,26 @@ pub async fn ensure_llama_servers_running() -> Result<(), Box<dyn std::error::Er
     // Start servers that aren't running
     let mut tasks = vec![];
 
-    if !llm_running && !llm_model_path.is_empty() {
+    if !llm_running && !llm_model.is_empty() {
         info!("🔄 Starting LLM server...");
         tasks.push(tokio::spawn(start_llm_server(
-            llama_cpp_path.clone(),
-            llm_model_path.clone(),
+            llm_server_path.clone(),
+            llm_model.clone(),
             llm_url.clone(),
         )));
-    } else if llm_model_path.is_empty() {
-        info!("⚠️  LLM_MODEL_PATH not set, skipping LLM server");
+    } else if llm_model.is_empty() {
+        info!("⚠️  LLM_MODEL not set, skipping LLM server");
     }
 
-    if !embedding_running && !embedding_model_path.is_empty() {
+    if !embedding_running && !embedding_model.is_empty() {
         info!("🔄 Starting Embedding server...");
         tasks.push(tokio::spawn(start_embedding_server(
-            llama_cpp_path.clone(),
-            embedding_model_path.clone(),
+            llm_server_path.clone(),
+            embedding_model.clone(),
             embedding_url.clone(),
         )));
-    } else if embedding_model_path.is_empty() {
-        info!("⚠️  EMBEDDING_MODEL_PATH not set, skipping Embedding server");
+    } else if embedding_model.is_empty() {
+        info!("⚠️  EMBEDDING_MODEL not set, skipping Embedding server");
     }
 
     // Wait for all server startup tasks
@@ -153,8 +132,8 @@ pub async fn ensure_llama_servers_running() -> Result<(), Box<dyn std::error::Er
     // Wait for servers to be ready with verbose logging
     info!("⏳ Waiting for servers to become ready...");
 
-    let mut llm_ready = llm_running || llm_model_path.is_empty();
-    let mut embedding_ready = embedding_running || embedding_model_path.is_empty();
+    let mut llm_ready = llm_running || llm_model.is_empty();
+    let mut embedding_ready = embedding_running || embedding_model.is_empty();
 
     let mut attempts = 0;
     let max_attempts = 60; // 2 minutes total
@@ -168,7 +147,7 @@ pub async fn ensure_llama_servers_running() -> Result<(), Box<dyn std::error::Er
             max_attempts
         );
 
-        if !llm_ready && !llm_model_path.is_empty() {
+        if !llm_ready && !llm_model.is_empty() {
             if is_server_running(&llm_url).await {
                 info!("   ✅ LLM server ready at {}", llm_url);
                 llm_ready = true;
@@ -177,7 +156,7 @@ pub async fn ensure_llama_servers_running() -> Result<(), Box<dyn std::error::Er
             }
         }
 
-        if !embedding_ready && !embedding_model_path.is_empty() {
+        if !embedding_ready && !embedding_model.is_empty() {
             if is_server_running(&embedding_url).await {
                 info!("   ✅ Embedding server ready at {}", embedding_url);
                 embedding_ready = true;
@@ -201,10 +180,10 @@ pub async fn ensure_llama_servers_running() -> Result<(), Box<dyn std::error::Er
         Ok(())
     } else {
         let mut error_msg = "❌ Servers failed to start within timeout:".to_string();
-        if !llm_ready && !llm_model_path.is_empty() {
+        if !llm_ready && !llm_model.is_empty() {
             error_msg.push_str(&format!("\n   - LLM server at {}", llm_url));
         }
-        if !embedding_ready && !embedding_model_path.is_empty() {
+        if !embedding_ready && !embedding_model.is_empty() {
             error_msg.push_str(&format!("\n   - Embedding server at {}", embedding_url));
         }
         Err(error_msg.into())
@@ -239,7 +218,7 @@ async fn start_llm_server(
     );
 
     if n_moe != "0" {
-        args.push_str(&format!(" --n-moe {}", n_moe));
+        args.push_str(&format!(" --n-cpu-moe {}", n_moe));
     }
     if parallel != "1" {
         args.push_str(&format!(" --parallel {}", parallel));
@@ -298,7 +277,10 @@ async fn start_embedding_server(
 
     Ok(())
 }
+
 async fn is_server_running(url: &str) -> bool {
+    
+    
     let client = reqwest::Client::new();
     match client.get(&format!("{}/health", url)).send().await {
         Ok(response) => response.status().is_success(),
@@ -364,7 +346,7 @@ pub async fn chat_completions_local(
         })?;
 
     let response = client
-        .post(&format!("{}/completion", llama_url))
+        .post(&format!("{}/v1/completion", llama_url))
         .header("Content-Type", "application/json")
         .json(&llama_request)
         .send()
@@ -639,20 +621,3 @@ pub async fn embeddings_local(
     Ok(HttpResponse::Ok().json(openai_response))
 }
 
-// Health check endpoint
-#[actix_web::get("/health")]
-pub async fn health() -> Result<HttpResponse> {
-    let llama_url = env::var("LLM_URL").unwrap_or_else(|_| "http://localhost:8081".to_string());
-
-    if is_server_running(&llama_url).await {
-        Ok(HttpResponse::Ok().json(serde_json::json!({
-            "status": "healthy",
-            "llama_server": "running"
-        })))
-    } else {
-        Ok(HttpResponse::ServiceUnavailable().json(serde_json::json!({
-            "status": "unhealthy",
-            "llama_server": "not running"
-        })))
-    }
-}
diff --git a/src/main.rs b/src/main.rs
index 90a6283a..7a9ebfc7 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -172,10 +172,6 @@ async fn main() -> std::io::Result<()> {
     };
 
     let db_custom_pool = db_pool.clone();
-    ensure_llama_servers_running()
-        .await
-        .expect("Failed to initialize LLM local server");
-
     let cache_url = std::env::var("CACHE_URL")
         .or_else(|_| std::env::var("REDIS_URL"))
         .unwrap_or_else(|_| "redis://localhost:6379".to_string());
@@ -272,8 +268,14 @@ async fn main() -> std::io::Result<()> {
         log::error!("Failed to mount bots: {}", e);
     }
 
-    
+
+    ensure_llama_servers_running(&app_state)
+        .await
+        .expect("Failed to initialize LLM local server");
+
+        
     HttpServer::new(move || {
+
         let cors = Cors::default()
             .allow_any_origin()
             .allow_any_method()
diff --git a/src/shared/models.rs b/src/shared/models.rs
index 375db639..7ef9d34e 100644
--- a/src/shared/models.rs
+++ b/src/shared/models.rs
@@ -401,6 +401,18 @@ pub mod schema {
             added_at -> Text,
         }
     }
+
+    diesel::table! {
+        bot_configuration (id) {
+            id -> Uuid,
+            bot_id -> Uuid,
+            config_key -> Text,
+            config_value -> Text,
+            config_type -> Text,
+            created_at -> Timestamptz,
+            updated_at -> Timestamptz,
+        }
+    }
 }
 
 pub use schema::*;
diff --git a/templates/default.gbai/default.gbot/config.csv b/templates/default.gbai/default.gbot/config.csv
index 340ca8f2..ece77065 100644
--- a/templates/default.gbai/default.gbot/config.csv
+++ b/templates/default.gbai/default.gbot/config.csv
@@ -5,21 +5,20 @@ server_port,8080
 sites_root,/tmp
 
 llm-key,none
-llm-url,http://localhost:8080/v1
-llm-model,botserver-stack/data/llm/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf
+llm-url,http://localhost:8081
+llm-model,../../../../data/llm/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf
 
 embedding-url,http://localhost:8082
-embedding-model,botserver-stack/data/llm/bge-small-en-v1.5-f32.gguf
+embedding-model,../../../../data/llm/bge-small-en-v1.5-f32.gguf
 
 llm-server,false
-llm-server-path,botserver-stack/bin/llm/
-llm-server-model,botserver-stack/data/llm/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf
+llm-server-path,botserver-stack/bin/llm/build/bin
 llm-server-host,0.0.0.0
-llm-server-port,8080
+llm-server-port,8081
 llm-server-gpu-layers,35
-llm-server-n-moe,4
-llm-server-ctx-size,2048
-llm-server-parallel,4
+llm-server-n-moe,16
+llm-server-ctx-size,16000
+llm-server-parallel,8
 llm-server-cont-batching,true
 llm-server-mlock,true
 llm-server-no-mmap,true