diff --git a/.vscode/launch.json b/.vscode/launch.json
index 27393a06..ef4168dd 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -17,7 +17,7 @@
       },
       "args": [],
       "env": {
-        "RUST_LOG": "debug"
+        "RUST_LOG": "info"
       },
       "cwd": "${workspaceFolder}"
     },
diff --git a/migrations/6.0.6.sql b/migrations/6.0.6.sql
deleted file mode 100644
index e69de29b..00000000
diff --git a/src/bootstrap/mod.rs b/src/bootstrap/mod.rs
index 942d5ee6..ae38989d 100644
--- a/src/bootstrap/mod.rs
+++ b/src/bootstrap/mod.rs
@@ -1,7 +1,8 @@
 use crate::config::AppConfig;
 use crate::package_manager::{InstallMode, PackageManager};
+use crate::shared::utils::establish_pg_connection;
 use anyhow::Result;
-use diesel::{connection::SimpleConnection, RunQueryDsl, Connection, QueryableByName};
+use diesel::{connection::SimpleConnection, RunQueryDsl, QueryableByName, Connection};
 use dotenvy::dotenv;
 use log::{debug, error, info, trace};
 use aws_sdk_s3::Client;
@@ -140,10 +141,7 @@ impl BootstrapManager {
             if pm.is_installed(component.name) {
                 pm.start(component.name)?;
             } else {
-                let database_url = std::env::var("DATABASE_URL")
-                    .unwrap_or_else(|_| "postgres://gbuser:@localhost:5432/botserver".to_string());
-                let mut conn = diesel::pg::PgConnection::establish(&database_url)
-                    .map_err(|e| anyhow::anyhow!("Failed to connect to database: {}", e))?;
+                let mut conn = establish_pg_connection()?;
                 let default_bot_id: uuid::Uuid = diesel::sql_query("SELECT id FROM bots LIMIT 1")
                     .load::<BotIdRow>(&mut conn)
                     .map(|rows| rows.first().map(|r| r.id).unwrap_or_else(|| uuid::Uuid::new_v4()))
@@ -189,7 +187,7 @@ impl BootstrapManager {
                     return Ok(config);
                 }
 
-                match diesel::PgConnection::establish(&database_url) {
+                match establish_pg_connection() {
                     Ok(mut conn) => {
                         if let Err(e) = self.apply_migrations(&mut conn) {
                             log::warn!("Failed to apply migrations: {}", e);
@@ -197,7 +195,7 @@ impl BootstrapManager {
                         return Ok(AppConfig::from_database(&mut conn));
                     }
                     Err(e) => {
-                        log::warn!("Failed to connect to legacy database: {}", e);
+                        log::warn!("Failed to connect to database: {}", e);
                         return Ok(AppConfig::from_env());
                     }
                 }
@@ -205,7 +203,7 @@ impl BootstrapManager {
         }
 
         let pm = PackageManager::new(self.install_mode.clone(), self.tenant.clone())?;
-        let required_components = vec!["tables", "drive", "cache"];
+        let required_components = vec!["tables", "drive", "cache", "llm"];
         let mut config = AppConfig::from_env();
 
         for component in required_components {
@@ -260,8 +258,7 @@ impl BootstrapManager {
                 futures::executor::block_on(pm.install(component))?;
 
                 if component == "tables" {
-                    let database_url = std::env::var("DATABASE_URL").unwrap();
-                    let mut conn = diesel::PgConnection::establish(&database_url)
+                    let mut conn = establish_pg_connection()
                         .map_err(|e| anyhow::anyhow!("Failed to connect to database: {}", e))?;
 
                     let migration_dir = include_dir::include_dir!("./migrations");
@@ -363,9 +360,7 @@ impl BootstrapManager {
 
     fn update_bot_config(&self, bot_id: &uuid::Uuid, component: &str) -> Result<()> {
         use diesel::sql_types::{Text, Uuid as SqlUuid};
-        let database_url = std::env::var("DATABASE_URL")
-            .unwrap_or_else(|_| "postgres://gbuser:@localhost:5432/botserver".to_string());
-        let mut conn = diesel::pg::PgConnection::establish(&database_url)?;
+        let mut conn = establish_pg_connection()?;
 
         // Ensure globally unique keys and update values atomically
         let config_key = format!("{}_{}", bot_id, component);
@@ -388,8 +383,7 @@ impl BootstrapManager {
     }
 
     pub async fn upload_templates_to_drive(&self, config: &AppConfig) -> Result<()> {
-        let database_url = std::env::var("DATABASE_URL").unwrap_or_else(|_| config.database_url());
-        let mut conn = diesel::PgConnection::establish(&database_url)?;
+                let mut conn = establish_pg_connection()?;
         self.create_bots_from_templates(&mut conn)?;
         let templates_dir = Path::new("templates");
         if !templates_dir.exists() {
@@ -539,10 +533,8 @@ impl BootstrapManager {
                 let bytes = response.body.collect().await?.into_bytes();
                 let csv_content = String::from_utf8(bytes.to_vec())?;
                 
-                let database_url = std::env::var("DATABASE_URL")
-                    .unwrap_or_else(|_| "postgres://gbuser:@localhost:5432/botserver".to_string());
                 // Create new connection for config loading
-                let config_conn = diesel::PgConnection::establish(&database_url)?;
+                let config_conn = establish_pg_connection()?;
                 let config_manager = ConfigManager::new(Arc::new(Mutex::new(config_conn)));
                 
                 // Use default bot ID or create one if needed
@@ -556,7 +548,7 @@ impl BootstrapManager {
                     .map_err(|e| anyhow::anyhow!("Failed to sync gbot config: {}", e))?;
                 
                 // Load config from database which now has the CSV values
-                let mut config_conn = diesel::PgConnection::establish(&database_url)?;
+                let mut config_conn = establish_pg_connection()?;
                 let config = AppConfig::from_database(&mut config_conn);
                 info!("Successfully loaded config from CSV");
                 Ok(config)
diff --git a/src/config/mod.rs b/src/config/mod.rs
index 2724ddbc..06eae187 100644
--- a/src/config/mod.rs
+++ b/src/config/mod.rs
@@ -439,6 +439,32 @@ impl ConfigManager {
         Self { conn }
     }
 
+    pub fn get_config(
+        &self,
+        bot_id: &uuid::Uuid,
+        key: &str,
+        fallback: Option<&str>,
+    ) -> Result<String, diesel::result::Error> {
+        let mut conn = self.conn.lock().unwrap();
+        let fallback_str = fallback.unwrap_or("");
+
+        #[derive(Debug, QueryableByName)]
+        struct ConfigValue {
+            #[diesel(sql_type = Text)]
+            value: String,
+        }
+
+        let result = diesel::sql_query(
+            "SELECT get_bot_config($1, $2, $3) as value"
+        )
+            .bind::<diesel::sql_types::Uuid, _>(bot_id)
+            .bind::<Text, _>(key)
+            .bind::<Text, _>(fallback_str)
+            .get_result::<ConfigValue>(&mut *conn)
+            .map(|row| row.value)?;
+        Ok(result)
+    }
+
     pub fn sync_gbot_config(
         &self,
         bot_id: &uuid::Uuid,
diff --git a/src/llm_legacy/llm_local.rs b/src/llm_legacy/llm_local.rs
index a0a7f46f..b6c9f3b8 100644
--- a/src/llm_legacy/llm_local.rs
+++ b/src/llm_legacy/llm_local.rs
@@ -1,10 +1,12 @@
 use actix_web::{post, web, HttpRequest, HttpResponse, Result};
+use crate::config::{AppConfig, ConfigManager};
 use dotenvy::dotenv;
 use log::{error, info};
 use reqwest::Client;
 use serde::{Deserialize, Serialize};
 use std::env;
 use tokio::time::{sleep, Duration};
+use uuid::Uuid;
 
 // OpenAI-compatible request/response structures
 #[derive(Debug, Serialize, Deserialize)]
@@ -62,13 +64,45 @@ pub async fn ensure_llama_servers_running() -> Result<(), Box<dyn std::error::Er
         return Ok(());
     }
 
-    // Get configuration from environment variables
-    let llm_url = env::var("LLM_URL").unwrap_or_else(|_| "http://localhost:8081".to_string());
-    let embedding_url =
-        env::var("EMBEDDING_URL").unwrap_or_else(|_| "http://localhost:8082".to_string());
-    let llama_cpp_path = env::var("LLM_CPP_PATH").unwrap_or_else(|_| "~/llama.cpp".to_string());
-    let llm_model_path = env::var("LLM_MODEL_PATH").unwrap_or_else(|_| "".to_string());
-    let embedding_model_path = env::var("EMBEDDING_MODEL_PATH").unwrap_or_else(|_| "".to_string());
+    // Get configuration with fallback to default bot config
+    let default_bot_id = Uuid::parse_str("00000000-0000-0000-0000-000000000000").unwrap();
+    let config_manager = AppConfig::from_env().db_conn.map(ConfigManager::new);
+
+    let llm_url = match &config_manager {
+        Some(cm) => env::var("LLM_URL").unwrap_or_else(|_| 
+            cm.get_config(&default_bot_id, "LLM_URL", None)
+                .unwrap_or("http://localhost:8081".to_string())
+        ),
+        None => env::var("LLM_URL").unwrap_or("http://localhost:8081".to_string())
+    };
+    let embedding_url = match &config_manager {
+        Some(cm) => env::var("EMBEDDING_URL").unwrap_or_else(|_| 
+            cm.get_config(&default_bot_id, "EMBEDDING_URL", None)
+                .unwrap_or("http://localhost:8082".to_string())
+        ),
+        None => env::var("EMBEDDING_URL").unwrap_or("http://localhost:8082".to_string())
+    };
+    let llama_cpp_path = match &config_manager {
+        Some(cm) => env::var("LLM_CPP_PATH").unwrap_or_else(|_| 
+            cm.get_config(&default_bot_id, "LLM_CPP_PATH", None)
+                .unwrap_or("~/llama.cpp".to_string())
+        ),
+        None => env::var("LLM_CPP_PATH").unwrap_or("~/llama.cpp".to_string())
+    };
+    let llm_model_path = match &config_manager {
+        Some(cm) => env::var("LLM_MODEL_PATH").unwrap_or_else(|_| 
+            cm.get_config(&default_bot_id, "LLM_MODEL_PATH", None)
+                .unwrap_or("".to_string())
+        ),
+        None => env::var("LLM_MODEL_PATH").unwrap_or("".to_string())
+    };
+    let embedding_model_path = match &config_manager {
+        Some(cm) => env::var("EMBEDDING_MODEL_PATH").unwrap_or_else(|_| 
+            cm.get_config(&default_bot_id, "EMBEDDING_MODEL_PATH", None)
+                .unwrap_or("".to_string())
+        ),
+        None => env::var("EMBEDDING_MODEL_PATH").unwrap_or("".to_string())
+    };
 
     info!("🚀 Starting local llama.cpp servers...");
     info!("📋 Configuration:");
@@ -189,18 +223,49 @@ async fn start_llm_server(
     std::env::set_var("OMP_PROC_BIND", "close");
 
     // "cd {} && numactl --interleave=all ./llama-server -m {} --host 0.0.0.0 --port {} --threads 20 --threads-batch 40 --temp 0.7 --parallel 1 --repeat-penalty 1.1 --ctx-size 8192 --batch-size 8192 -n 4096 --mlock --no-mmap --flash-attn  --no-kv-offload  --no-mmap &",
+    // Read config values with defaults
+    let n_moe = env::var("LLM_SERVER_N_MOE").unwrap_or("4".to_string());
+    let ctx_size = env::var("LLM_SERVER_CTX_SIZE").unwrap_or("4096".to_string());
+    let parallel = env::var("LLM_SERVER_PARALLEL").unwrap_or("1".to_string());
+    let cont_batching = env::var("LLM_SERVER_CONT_BATCHING").unwrap_or("true".to_string());
+    let mlock = env::var("LLM_SERVER_MLOCK").unwrap_or("true".to_string());
+    let no_mmap = env::var("LLM_SERVER_NO_MMAP").unwrap_or("true".to_string());
+    let gpu_layers = env::var("LLM_SERVER_GPU_LAYERS").unwrap_or("20".to_string());
+
+    // Build command arguments dynamically
+    let mut args = format!(
+        "-m {} --host 0.0.0.0 --port {} --top_p 0.95 --temp 0.6 --ctx-size {} --repeat-penalty 1.2 -ngl {}",
+        model_path, port, ctx_size, gpu_layers
+    );
+
+    if n_moe != "0" {
+        args.push_str(&format!(" --n-moe {}", n_moe));
+    }
+    if parallel != "1" {
+        args.push_str(&format!(" --parallel {}", parallel));
+    }
+    if cont_batching == "true" {
+        args.push_str(" --cont-batching");
+    }
+    if mlock == "true" {
+        args.push_str(" --mlock");
+    }
+    if no_mmap == "true" {
+        args.push_str(" --no-mmap");
+    }
+
     if cfg!(windows) {
         let mut cmd = tokio::process::Command::new("cmd");
         cmd.arg("/C").arg(format!(
-            "cd {} && .\\llama-server.exe -m {} --host 0.0.0.0 --port {} --top_p 0.95  --temp 0.6 --flash-attn on  --ctx-size 4096  --repeat-penalty 1.2 -ngl 20 ",
-            llama_cpp_path, model_path, port
+            "cd {} && .\\llama-server.exe {}",
+            llama_cpp_path, args
         ));
         cmd.spawn()?;
     } else {
         let mut cmd = tokio::process::Command::new("sh");
         cmd.arg("-c").arg(format!(
-            "cd {} && ./llama-server -m {} --host 0.0.0.0 --port {} --top_p 0.95  --temp 0.6 --flash-attn on  --ctx-size 4096  --repeat-penalty 1.2 -ngl 20 &",
-            llama_cpp_path, model_path, port
+            "cd {} && ./llama-server {} &",
+            llama_cpp_path, args
         ));
         cmd.spawn()?;
     }
diff --git a/src/shared/utils.rs b/src/shared/utils.rs
index 64a3983e..31cc27f4 100644
--- a/src/shared/utils.rs
+++ b/src/shared/utils.rs
@@ -1,4 +1,6 @@
 use crate::config::AIConfig;
+use anyhow::{Context, Result};
+use diesel::{Connection, PgConnection};
 use futures_util::StreamExt;
 use indicatif::{ProgressBar, ProgressStyle};
 use log::trace;
@@ -177,3 +179,12 @@ pub async fn call_llm(
 ) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
     Ok(format!("Generated response for: {}", prompt))
 }
+
+/// Establishes a PostgreSQL connection using DATABASE_URL environment variable
+pub fn establish_pg_connection() -> Result<PgConnection> {
+    let database_url = std::env::var("DATABASE_URL")
+        .unwrap_or_else(|_| "postgres://gbuser:@localhost:5432/botserver".to_string());
+    
+    PgConnection::establish(&database_url)
+        .with_context(|| format!("Failed to connect to database at {}", database_url))
+}
diff --git a/templates/default.gbai/default.gbot/config.csv b/templates/default.gbai/default.gbot/config.csv
index 3d3fcf93..4ac4a5da 100644
--- a/templates/default.gbai/default.gbot/config.csv
+++ b/templates/default.gbai/default.gbot/config.csv
@@ -4,18 +4,25 @@ server_host,0.0.0.0
 server_port,8080
 sites_root,/tmp
 
-llm-key,gsk_
-llm-model,openai/gpt-oss-20b
-llm-url,https://api.groq.com/openai/v1/chat/completions
-
+llm-key,none
 llm-url,http://localhost:8080/v1
-llm-model,./botserver-stack/llm/data/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf
+llm-model,botserver-stack/data/llm/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf
 
 embedding-url,http://localhost:8082
-embedding-model-path,./botserver-stack/llm/data/bge-small-en-v1.5-f32.gguf
+embedding-model-path,botserver-stack/data/llm/bge-small-en-v1.5-f32.gguf
 
 llm-server,false
-llm-server-path,~/llama.cpp
+llm-server-path,botserver-stack/bin/llm/
+llm-server-model,botserver-stack/data/llm/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf
+llm-server-host,0.0.0.0
+llm-server-port,8080
+llm-server-gpu-layers,35
+llm-server-n-moe,4
+llm-server-ctx-size,2048
+llm-server-parallel,4
+llm-server-cont-batching,true
+llm-server-mlock,true
+llm-server-no-mmap,true
 
 email-from,from@domain.com
 email-server,mail.domain.com