- Testing of the first BASIC script.

2025-08-14 09:42:05 -03:00 · 2025-08-14 09:42:05 -03:00 · 57f20d903e
commit 57f20d903e
parent 10b4d85629
21 changed files with 699 additions and 118 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -1,44 +0,0 @@
-{
-    "version": "0.2.0",
-    "configurations": [
-        {
-            "type": "lldb",
-            "request": "launch",
-            "name": "Debug GB Server",
-            "cargo": {
-                "args": [
-                    "build",
-                    "--bin=gbserver"
-                ],
-                "filter": {
-                    "name": "gbserver",
-                    "kind": "bin"
-                }
-            },
-            "args": [],
-            "cwd": "${workspaceFolder}",
-        },
-        {
-            "type": "lldb",
-            "request": "launch",
-            "name": "Debug Integration Tests",
-            "cargo": {
-                "args": [
-                    "test",
-                    "--no-run",
-                    "--lib",
-                    "--package=gbserver"
-                ],
-                "filter": {
-                    "name": "integration",
-                    "kind": "test"
-                }
-            },
-            "args": [],
-            "cwd": "${workspaceFolder}",
-            "env": {
-                "RUST_LOG": "info"
-            }
-        },
-    ],
-}
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -1,7 +0,0 @@
-{
-    "lldb.executable": "/usr/bin/lldb",
-    "lldb.showDisassembly": "never",
-    "lldb.dereferencePointers": true,
-    "lldb.consoleMode": "commands",
-    "rust-test Explorer.cargoTestExtraArgs": ["--", "--nocapture"]
-}
--- a/.vscode/tasks.json
+++ b/.vscode/tasks.json
@ -1,15 +0,0 @@
-{
-    "version": "2.0.0",
-    "tasks": [
-        {
-            "label": "build",
-            "type": "shell",
-            "command": "cargo",
-            "args": ["build"],
-            "group": {
-                "kind": "build",
-                "isDefault": true
-            }
-        }
-    ]
-}
--- a/.zed/debug.json
+++ b/.zed/debug.json
@ -0,0 +1,14 @@
+[
+  {
+    "label": "Build & Debug native binary",
+    "build": {
+      "command": "cargo",
+      "args": ["build"]
+    },
+    "program": "$ZED_WORKTREE_ROOT/target/debug/gbserver",
+
+    "sourceLanguages": ["rust"],
+    "request": "launch",
+    "adapter": "CodeLLDB"
+  }
+]
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1765,6 +1765,7 @@ dependencies = [
 "tracing",
 "tracing-subscriber",
 "urlencoding",
+ "uuid",
 "zip",
 ]

@ -5183,6 +5184,17 @@ version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"

+[[package]]
+name = "uuid"
+version = "1.18.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f33196643e165781c20a5ead5582283a7dacbb87855d867fbc2df3f81eddc1be"
+dependencies = [
+ "getrandom 0.3.3",
+ "js-sys",
+ "wasm-bindgen",
+]
+
 [[package]]
 name = "valuable"
 version = "0.1.1"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -24,7 +24,12 @@ futures = "0.3"
 futures-util = "0.3"
 imap = "2.0"
 langchain-rust = "4.4.3"
-lettre = { version = "0.10", features = ["smtp-transport", "builder", "tokio1", "tokio1-native-tls"] }
+lettre = { version = "0.10", features = [
+    "smtp-transport",
+    "builder",
+    "tokio1",
+    "tokio1-native-tls",
+] }
 log = "0.4"
 mailparse = "0.13"
 minio = { git = "https://github.com/minio/minio-rs", branch = "master" }
@ -43,4 +48,5 @@ tracing-subscriber = { version = "0.3", features = ["fmt"] }
 scraper = "0.18"
 urlencoding = "2.1"
 regex = "1.10"
+uuid = { version = "1.0", features = ["v4"] }
 zip = "4.3.0"
--- a/src/main.rs
+++ b/src/main.rs
@ -5,6 +5,7 @@ use actix_web::http::header;
 use actix_web::{web, App, HttpServer};
 use dotenv::dotenv;

+use reqwest::Client;
 use services::config::*;
 use services::email::*;
 use services::file::*;
@ -13,12 +14,13 @@ use services::script::*;
 use services::state::*;
 use sqlx::PgPool;

+use crate::services::llm_local::ensure_llama_server_running;
+use crate::services::llm_provider::chat_completions;
 use crate::services::web_automation::{initialize_browser_pool, BrowserPool};
-//use services:: find::*;
+
 mod services;

 #[tokio::main(flavor = "multi_thread")]
-
 async fn main() -> std::io::Result<()> {
    dotenv().ok();
    env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
@ -38,6 +40,11 @@ async fn main() -> std::io::Result<()> {
        5,
        "/usr/bin/brave-browser-beta".to_string(),
    ));
+
+    // ensure_llama_server_running()
+    //     .await
+    //     .expect("Failed to initialize LLM local server.");
+
    initialize_browser_pool()
        .await
        .expect("Failed to initialize browser pool");
@ -70,9 +77,9 @@ async fn main() -> std::io::Result<()> {
            .allowed_headers(vec![header::AUTHORIZATION, header::ACCEPT])
            .allowed_header(header::CONTENT_TYPE)
            .max_age(3600);
+        //.wrap(cors)

        App::new()
-            .wrap(cors)
            .app_data(app_state.clone())
            .service(upload_file)
            .service(list_file)
@ -81,6 +88,7 @@ async fn main() -> std::io::Result<()> {
            .service(list_emails)
            .service(send_email)
            .service(chat_stream)
+            .service(chat_completions)
            .service(chat)
    })
    .bind((config.server.host.clone(), config.server.port))?
--- a/src/prompts/business/data-enrichment.bas
+++ b/src/prompts/business/data-enrichment.bas
@ -6,18 +6,15 @@ FOR EACH item IN items
    PRINT website

    let page = GET website
-
    let prompt = "Build the same simulator, but for " + item.company + " using just *content about the company* from its website, so it is possible to create a good and useful emulator in the same langue as the content: " + page
-
    let alias = LLM "Return a single word for " + item.company + " like a token, no spaces, no special characters, no numbers, no uppercase letters."
-
+    CREATE_SITE alias, "blank", prompt

    let to = item.emailcto
    let subject = "Simulador " + alias
-    let body = "Oi, " + FIRST(item.Contact) + "! Tudo bem? Estou empolgado, pois criamos o simulador " + alias + " especificamente para vocês!"      + "\n\n Acesse o site: https://sites.pragmatismo.com.br/" + alias      + "\n\n" + "Para acessar o simulador, clique no link acima ou copie e cole no seu navegador."     + "\n\n" + "Para iniciar, clique no ícone de Play."     + "\n\n" + "Atenciosamente,\nDário Vieira\n\n"
-
+    let name = FIRST(item.Contact)
+    let body = "Oi, " + name + "! Tudo bem? Estou empolgado, pois criamos o simulador " + alias + " especificamente para vocês!"      + "\n\n Acesse o site: https://sites.pragmatismo.com.br/" + alias      + "\n\n" + "Para acessar o simulador, clique no link acima ou copie e cole no seu navegador."     + "\n\n" + "Para iniciar, clique no ícone de Play."     + "\n\n" + "Atenciosamente,\nDário Vieira\n\n"
 	CREATE_DRAFT to, subject, body

-    SET "gb.rob", "robid="+ item.robid, "ACTION=CLOSE"    

 NEXT item
--- a/src/scripts/containers/bot.sh
+++ b/src/scripts/containers/bot.sh
@ -13,8 +13,6 @@ sleep 15

 lxc exec "$PARAM_TENANT"-bot -- bash -c "

-echo "nameserver $PARAM_DNS_INTERNAL_IP" > /etc/resolv.conf
-

 apt-get update && apt-get install -y \
 build-essential cmake git pkg-config libjpeg-dev libtiff-dev \
--- a/src/scripts/containers/email.sh
+++ b/src/scripts/containers/email.sh
@ -1,11 +1,6 @@
 #!/bin/bash
 PUBLIC_INTERFACE="eth0"                 # Your host's public network interface

-# Enable IP forwarding
-echo "[HOST] Enabling IP forwarding..."
-echo "net.ipv4.ip_forward=1" | sudo tee -a /etc/sysctl.conf
-sudo sysctl -p
-
 # Configure firewall
 echo "[HOST] Configuring firewall..."
 sudo iptables -A FORWARD -i $PUBLIC_INTERFACE -o lxcbr0 -p tcp -m multiport --dports 25,80,110,143,465,587,993,995,4190 -j ACCEPT
@ -22,7 +17,7 @@ fi

 # Create directory structure
 echo "[CONTAINER] Creating directories..."
-HOST_BASE="/opt/gbo/tenants/$PARAM_TENANT/email"
+HOST_BASE="/opt/email"
 HOST_DATA="$HOST_BASE/data"
 HOST_CONF="$HOST_BASE/conf"
 HOST_LOGS="$HOST_BASE/logs"
@ -66,9 +61,9 @@ sudo chown -R "$HOST_EMAIL_UID:$HOST_EMAIL_GID" "$HOST_BASE"

 # Mount directories
 echo "[CONTAINER] Mounting directories..."
-lxc config device add "$PARAM_TENANT"-email emaildata disk source="$HOST_DATA" path=/opt/gbo/data
-lxc config device add "$PARAM_TENANT"-email emailconf disk source="$HOST_CONF" path=/opt/gbo/conf
-lxc config device add "$PARAM_TENANT"-email emaillogs disk source="$HOST_LOGS" path=/opt/gbo/logs
+lxc config device add emailprofile emaildata disk source="$HOST_DATA" path=/opt/gbo/data
+lxc config device add emailprofile emailconf disk source="$HOST_CONF" path=/opt/gbo/conf
+lxc config device add emailprofile emaillogs disk source="$HOST_LOGS" path=/opt/gbo/logs

 # Create systemd service
 echo "[CONTAINER] Creating email service..."
@ -96,3 +91,8 @@ systemctl daemon-reload
 systemctl enable email
 systemctl start email
 "
+
+for port in 25 80 110 143 465 587 993 995 4190; do
+    lxc config device remove email "port-$port" 2>/dev/null || true
+    lxc config device add email "port-$port" proxy listen=tcp:0.0.0.0:$port connect=tcp:127.0.0.1:$port
+done
--- a/src/scripts/containers/host.sh
+++ b/src/scripts/containers/host.sh
@ -0,0 +1,30 @@
+sudo apt install sshfs -y
+lxc init
+lxc storage create default dir
+lxc profile device add default root disk path=/ pool=default
+
+sudo apt update && sudo apt install -y bridge-utils
+
+# Enable IP forwarding
+echo "[HOST] Enabling IP forwarding..."
+echo "net.ipv4.ip_forward=1" | sudo tee -a /etc/sysctl.conf
+sudo sysctl -p
+
+wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb
+sudo dpkg -i cuda-keyring_1.1-1_all.deb
+sudo apt-get update
+
+
+
+
+
+
+
+
+sudo apt purge '^nvidia-*'  # Clean existing drivers
+sudo add-apt-repository ppa:graphics-drivers/ppa
+sudo apt update
+sudo apt install nvidia-driver-470-server  # Most stable for Kepler GPUs
+
+wget https://developer.download.nvidia.com/compute/cuda/11.0.3/local_installers/cuda_11.0.3_450.51.06_linux.run
+sudo sh cuda_11.0.3_450.51.06_linux.run --override
--- a/src/scripts/containers/llm.sh
+++ b/src/scripts/containers/llm.sh
@ -0,0 +1,61 @@
+#DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf
+#Phi-3.5-mini-instruct-IQ2_M.gguf
+#tinyllama-1.1b-chat-v1.0.Q4_0.gguf
+
+
+sudo apt update
+sudo apt upgrade -y
+sudo apt install -y build-essential cmake git curl wget libcurl4-openssl-dev pkg-config gcc-9 g++-9
+
+sudo apt install software-properties-common
+sudo add-apt-repository ppa:deadsnakes/ppa
+sudo apt install python3.6 python3.6-venv python3.6-dev
+wget https://download.pytorch.org/whl/cu110/torch-1.7.1%2Bcu110-cp36-cp36m-linux_x86_64.whl
+wget https://download.pytorch.org/whl/cu110/torchvision-0.8.2%2Bcu110-cp36-cp36m-linux_x86_64.whl
+
+
+sudo ubuntu-drivers autoinstall
+
+sleep 10
+
+CUDA_RUN_FILE="cuda_11.0.3_450.51.06_linux.run"
+wget https://developer.download.nvidia.com/compute/cuda/11.0.3/local_installers/$CUDA_RUN_FILE
+chmod +x $CUDA_RUN_FILE
+sudo ./$CUDA_RUN_FILE --silent --toolkit
+
+echo 'export PATH=/usr/local/cuda-11.0/bin:$PATH' >> ~/.bashrc
+echo 'export LD_LIBRARY_PATH=/usr/local/cuda-11.0/lib64:$LD_LIBRARY_PATH' >> ~/.bashrc
+source ~/.bashrc
+
+nvidia-smi
+nvcc --version
+
+python3 -m venv llama_venv
+source llama_venv/bin/activate
+pip install --upgrade pip
+pip install torch==1.12.1+cu110 torchvision==0.13.1+cu110 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu110
+
+cd ~
+git clone https://github.com/ggerganov/llama.cpp.git
+cd llama.cpp
+rm -rf build
+mkdir build
+cd build
+
+
+# EDIT FILE:
+#ifdef __CUDACC__
+  #ifndef __builtin_assume
+    #define __builtin_assume(x)  // empty: ignore it for CUDA compiler
+  #endif
+#endif
+# ggml/src/ggml-cuda/fattn-common.
+#
+cmake -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=35 ..
+make -j$(nproc)
+
+OR
+wget https://github.com/ggml-org/llama.cpp/releases/download/b6148/llama-b6148-bin-ubuntu-x64.zip
+
+
+wget https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_0.gguf?download=true
--- a/src/scripts/utils/cleaner.sh
+++ b/src/scripts/utils/cleaner.sh
@ -56,6 +56,8 @@ if command -v lxc >/dev/null 2>&1; then
            
            echo 'Cleaning logs...'
            rm -rf /opt/gbo/logs/*
+            
+            echo 'Cleaning journal logs...'
            journalctl --vacuum-time=1d 2>/dev/null || true
            
            echo 'Cleaning thumbnail cache...'
--- a/src/scripts/utils/disk-size.md
+++ b/src/scripts/utils/disk-size.md
@ -0,0 +1,6 @@
+lxc list --format json | jq -r '.[].name' | while read container; do
+    echo -n "$container: "
+    lxc exec $container -- df -h / --output=used < /dev/null | tail -n1
+done
+
+du -h --max-depth=1 "." 2>/dev/null | sort -rh | head -n 50 | awk '{printf "%-10s %s\n", $1, $2}'
--- a/src/scripts/utils/email-ips.sh
+++ b/src/scripts/utils/email-ips.sh
@ -0,0 +1,8 @@
+az network public-ip list   --resource-group "$CLOUD_GROUP"   \
+    --query "[].{Name:name, IP:ipAddress, ReverseDNS:dnsSettings.reverseFqdn}"   \
+    -o table
+
+az network public-ip update --resource-group "$CLOUD_GROUP"
+ --name "pip-network-adapter-name" 
+ --reverse-fqdn "outbound14.domain.com.br"
+
--- a/src/scripts/utils/set-limits.sh
+++ b/src/scripts/utils/set-limits.sh
@ -3,18 +3,20 @@
 # Define container limits in an associative array
 declare -A container_limits=(
    # Pattern       Memory    CPU Allowance
-     ["*tables*"]="2048MB:50ms/100ms"
+     ["*tables*"]="4096MB:100ms/100ms"
     ["*dns*"]="2048MB:100ms/100ms"
-     ["*doc-editor*"]="512MB:50ms/100ms"
-     ["*proxy*"]="512MB:50ms/100ms"
-     ["*directory*"]="512MB:50ms/100ms"
-     ["*drive*"]="1024MB:50ms/100ms"
-     ["*email*"]="2048MB:50ms/100ms"
-     ["*webmail*"]="2048MB:50ms/100ms"
-     ["*bot*"]="2048MB:50ms/100ms"
-     ["*meeting*"]="1024MB:50ms/100ms"
+     ["*doc-editor*"]="512MB:10ms/100ms"
+     ["*proxy*"]="20248MB:50ms/100ms"
+     ["*directory*"]="1024MB:50ms/100ms"
+     ["*drive*"]="4096MB:25ms/100ms"
+     ["*email*"]="4096MB:50ms/100ms"
+     ["*webmail*"]="4096MB:50ms/100ms"
+     ["*bot*"]="4096MB:50ms/100ms"
+     ["*meeting*"]="4096MB:100ms/100ms"
     ["*alm*"]="512MB:50ms/100ms"
-     ["*alm-ci*"]="4096MB:50ms/100ms"
+     ["*alm-ci*"]="4096MB:25ms/100ms"
+     ["*system*"]="4096MB:10ms/100ms"
+     ["*mailer*"]="4096MB:25ms/100ms"
 )

 # Default values (for containers that don't match any pattern)
--- a/src/services.rs
+++ b/src/services.rs
@ -3,6 +3,8 @@ pub mod email;
 pub mod file;
 pub mod keywords;
 pub mod llm;
+pub mod llm_local;
+pub mod llm_provider;
 pub mod script;
 pub mod state;
 pub mod utils;
--- a/src/services/llm_local.rs
+++ b/src/services/llm_local.rs
@ -0,0 +1,392 @@
+use actix_web::{post, web, HttpRequest, HttpResponse, Result};
+use dotenv::dotenv;
+use reqwest::Client;
+use serde::{Deserialize, Serialize};
+use std::env;
+use std::process::{Child, Command, Stdio};
+use std::sync::{Arc, Mutex};
+use tokio::io::{AsyncBufReadExt, BufReader};
+use tokio::process::Command as TokioCommand;
+use tokio::time::{sleep, Duration};
+
+// Global process handle
+static mut LLAMA_PROCESS: Option<Arc<Mutex<Option<tokio::process::Child>>>> = None;
+
+// OpenAI-compatible request/response structures
+#[derive(Debug, Serialize, Deserialize)]
+struct ChatMessage {
+    role: String,
+    content: String,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+struct ChatCompletionRequest {
+    model: String,
+    messages: Vec<ChatMessage>,
+    stream: Option<bool>,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+struct ChatCompletionResponse {
+    id: String,
+    object: String,
+    created: u64,
+    model: String,
+    choices: Vec<Choice>,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+struct Choice {
+    message: ChatMessage,
+    finish_reason: String,
+}
+
+// Llama.cpp server request/response structures
+#[derive(Debug, Serialize, Deserialize)]
+struct LlamaCppRequest {
+    prompt: String,
+    n_predict: Option<i32>,
+    temperature: Option<f32>,
+    top_k: Option<i32>,
+    top_p: Option<f32>,
+    stream: Option<bool>,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+struct LlamaCppResponse {
+    content: String,
+    stop: bool,
+    generation_settings: Option<serde_json::Value>,
+}
+
+// Function to check if server is running
+async fn is_server_running(url: &str) -> bool {
+    let client = Client::builder()
+        .timeout(Duration::from_secs(3))
+        .build()
+        .unwrap();
+
+    match client.get(&format!("{}/health", url)).send().await {
+        Ok(response) => {
+            let is_ok = response.status().is_success();
+            if is_ok {
+                println!("🟢 Server health check: OK");
+            } else {
+                println!(
+                    "🔴 Server health check: Failed with status {}",
+                    response.status()
+                );
+            }
+            is_ok
+        }
+        Err(e) => {
+            println!("🔴 Server health check: Connection failed - {}", e);
+            false
+        }
+    }
+}
+
+// Function to start llama.cpp server
+async fn start_llama_server() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+    println!("🚀 Starting llama.cpp server...");
+
+    // Get environment variables for llama.cpp configuration
+    let llama_path = env::var("LLM_CPP_PATH").unwrap_or_else(|_| "llama-server".to_string());
+    let model_path = env::var("LLM_MODEL_PATH")
+        .unwrap_or_else(|_| "./models/tinyllama-1.1b-q4_01.gguf".to_string());
+    let cpu_limit = env::var("CPU_LIMIT").unwrap_or_else(|_| "50".to_string());
+    let port = env::var("LLM_PORT").unwrap_or_else(|_| "8080".to_string());
+
+    println!("🔧 Configuration:");
+    println!("   - Llama path: {}", llama_path);
+    println!("   - Model path: {}", model_path);
+    println!("   - CPU limit: {}%", cpu_limit);
+    println!("   - Port: {}", port);
+
+    // Kill any existing llama processes
+    println!("🧹 Cleaning up existing processes...");
+    let _ = Command::new("pkill").arg("-f").arg("llama-server").output();
+
+    // Wait a bit for cleanup
+    sleep(Duration::from_secs(2)).await;
+
+    // Build the command
+    let full_command = format!(
+        "cpulimit -l {} -- {} -m '{}' --n-gpu-layers 18 --temp 0.7 --ctx-size 1024 --batch-size 256 --no-mmap --mlock --port {} --host 127.0.0.1 --tensor-split 1.0 --main-gpu 0",
+        cpu_limit, llama_path, model_path, port
+    );
+
+    println!("📝 Executing command: {}", full_command);
+
+    // Start llama.cpp server with cpulimit using tokio
+    let mut cmd = TokioCommand::new("sh");
+    cmd.arg("-c");
+    cmd.arg(&full_command);
+    cmd.stdout(Stdio::piped());
+    cmd.stderr(Stdio::piped());
+    cmd.kill_on_drop(true);
+
+    let mut child = cmd
+        .spawn()
+        .map_err(|e| format!("Failed to start llama.cpp server: {}", e))?;
+
+    println!("🔄 Process spawned with PID: {:?}", child.id());
+
+    // Capture stdout and stderr for real-time logging
+    if let Some(stdout) = child.stdout.take() {
+        let stdout_reader = BufReader::new(stdout);
+        tokio::spawn(async move {
+            let mut lines = stdout_reader.lines();
+            while let Ok(Some(line)) = lines.next_line().await {
+                println!("🦙📤 STDOUT: {}", line);
+            }
+            println!("🦙📤 STDOUT stream ended");
+        });
+    }
+
+    if let Some(stderr) = child.stderr.take() {
+        let stderr_reader = BufReader::new(stderr);
+        tokio::spawn(async move {
+            let mut lines = stderr_reader.lines();
+            while let Ok(Some(line)) = lines.next_line().await {
+                println!("🦙📥 STDERR: {}", line);
+            }
+            println!("🦙📥 STDERR stream ended");
+        });
+    }
+
+    // Store the process handle
+    unsafe {
+        LLAMA_PROCESS = Some(Arc::new(Mutex::new(Some(child))));
+    }
+
+    println!("✅ Llama.cpp server process started!");
+    Ok(())
+}
+
+// Function to ensure llama.cpp server is running
+pub async fn ensure_llama_server_running() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+    let llama_url = env::var("LLM_URL").unwrap_or_else(|_| "http://localhost:8080".to_string());
+
+    // Check if server is already running
+    if is_server_running(&llama_url).await {
+        println!("✅ Llama.cpp server is already running");
+        return Ok(());
+    }
+
+    // Start the server
+    start_llama_server().await?;
+
+    // Wait for server to be ready with verbose logging
+    println!("⏳ Waiting for llama.cpp server to become ready...");
+    let mut attempts = 0;
+    let max_attempts = 60; // 2 minutes total
+
+    while attempts < max_attempts {
+        sleep(Duration::from_secs(2)).await;
+
+        print!(
+            "🔍 Checking server health (attempt {}/{})... ",
+            attempts + 1,
+            max_attempts
+        );
+
+        if is_server_running(&llama_url).await {
+            println!("✅ SUCCESS!");
+            println!("🎉 Llama.cpp server is ready and responding!");
+            return Ok(());
+        } else {
+            println!("❌ Not ready yet");
+        }
+
+        attempts += 1;
+        if attempts % 10 == 0 {
+            println!(
+                "⏰ Still waiting for llama.cpp server... (attempt {}/{})",
+                attempts, max_attempts
+            );
+            println!("💡 Check the logs above for any errors from the llama server");
+        }
+    }
+
+    Err("❌ Llama.cpp server failed to start within timeout (2 minutes)".into())
+}
+
+// Convert OpenAI chat messages to a single prompt
+fn messages_to_prompt(messages: &[ChatMessage]) -> String {
+    let mut prompt = String::new();
+
+    for message in messages {
+        match message.role.as_str() {
+            "system" => {
+                prompt.push_str(&format!("System: {}\n\n", message.content));
+            }
+            "user" => {
+                prompt.push_str(&format!("User: {}\n\n", message.content));
+            }
+            "assistant" => {
+                prompt.push_str(&format!("Assistant: {}\n\n", message.content));
+            }
+            _ => {
+                prompt.push_str(&format!("{}: {}\n\n", message.role, message.content));
+            }
+        }
+    }
+
+    prompt.push_str("Assistant: ");
+    prompt
+}
+
+// Cleanup function
+pub fn cleanup_processes() {
+    println!("🧹 Cleaning up llama.cpp processes...");
+
+    unsafe {
+        if let Some(process_handle) = &LLAMA_PROCESS {
+            if let Ok(mut process) = process_handle.lock() {
+                if let Some(ref mut child) = *process {
+                    println!("🔪 Killing llama server process...");
+                    let _ = child.start_kill();
+                }
+            }
+        }
+    }
+
+    // Kill any remaining llama processes
+    println!("🔍 Killing any remaining llama-server processes...");
+    let output = Command::new("pkill").arg("-f").arg("llama-server").output();
+
+    match output {
+        Ok(result) => {
+            if result.status.success() {
+                println!("✅ Successfully killed llama processes");
+            } else {
+                println!("ℹ️ No llama processes found to kill");
+            }
+        }
+        Err(e) => println!("⚠️ Error trying to kill processes: {}", e),
+    }
+}
+
+// Proxy endpoint
+#[post("/v1/chat/completions")]
+pub async fn chat_completions(
+    req_body: web::Json<ChatCompletionRequest>,
+    _req: HttpRequest,
+) -> Result<HttpResponse> {
+    dotenv().ok();
+
+    // Ensure llama.cpp server is running
+    if let Err(e) = ensure_llama_server_running().await {
+        eprintln!("Failed to start llama.cpp server: {}", e);
+        return Ok(HttpResponse::InternalServerError().json(serde_json::json!({
+            "error": {
+                "message": format!("Failed to start llama.cpp server: {}", e),
+                "type": "server_error"
+            }
+        })));
+    }
+
+    // Get llama.cpp server URL
+    let llama_url = env::var("LLM_URL").unwrap_or_else(|_| "http://localhost:8080".to_string());
+
+    // Convert OpenAI format to llama.cpp format
+    let prompt = messages_to_prompt(&req_body.messages);
+
+    let llama_request = LlamaCppRequest {
+        prompt,
+        n_predict: Some(500), // Adjust as needed
+        temperature: Some(0.7),
+        top_k: Some(40),
+        top_p: Some(0.9),
+        stream: req_body.stream,
+    };
+
+    // Send request to llama.cpp server
+    let client = Client::builder()
+        .timeout(Duration::from_secs(120)) // 2 minute timeout
+        .build()
+        .map_err(|e| {
+            eprintln!("Error creating HTTP client: {}", e);
+            actix_web::error::ErrorInternalServerError("Failed to create HTTP client")
+        })?;
+
+    let response = client
+        .post(&format!("{}/completion", llama_url))
+        .header("Content-Type", "application/json")
+        .json(&llama_request)
+        .send()
+        .await
+        .map_err(|e| {
+            eprintln!("Error calling llama.cpp server: {}", e);
+            actix_web::error::ErrorInternalServerError("Failed to call llama.cpp server")
+        })?;
+
+    let status = response.status();
+
+    if status.is_success() {
+        let llama_response: LlamaCppResponse = response.json().await.map_err(|e| {
+            eprintln!("Error parsing llama.cpp response: {}", e);
+            actix_web::error::ErrorInternalServerError("Failed to parse llama.cpp response")
+        })?;
+
+        // Convert llama.cpp response to OpenAI format
+        let openai_response = ChatCompletionResponse {
+            id: format!("chatcmpl-{}", uuid::Uuid::new_v4()),
+            object: "chat.completion".to_string(),
+            created: std::time::SystemTime::now()
+                .duration_since(std::time::UNIX_EPOCH)
+                .unwrap()
+                .as_secs(),
+            model: req_body.model.clone(),
+            choices: vec![Choice {
+                message: ChatMessage {
+                    role: "assistant".to_string(),
+                    content: llama_response.content.trim().to_string(),
+                },
+                finish_reason: if llama_response.stop {
+                    "stop".to_string()
+                } else {
+                    "length".to_string()
+                },
+            }],
+        };
+
+        Ok(HttpResponse::Ok().json(openai_response))
+    } else {
+        let error_text = response
+            .text()
+            .await
+            .unwrap_or_else(|_| "Unknown error".to_string());
+
+        eprintln!("Llama.cpp server error ({}): {}", status, error_text);
+
+        let actix_status = actix_web::http::StatusCode::from_u16(status.as_u16())
+            .unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR);
+
+        Ok(HttpResponse::build(actix_status).json(serde_json::json!({
+            "error": {
+                "message": error_text,
+                "type": "server_error"
+            }
+        })))
+    }
+}
+
+// Health check endpoint
+#[actix_web::get("/health")]
+pub async fn health() -> Result<HttpResponse> {
+    let llama_url = env::var("LLM_URL").unwrap_or_else(|_| "http://localhost:8080".to_string());
+
+    if is_server_running(&llama_url).await {
+        Ok(HttpResponse::Ok().json(serde_json::json!({
+            "status": "healthy",
+            "llama_server": "running"
+        })))
+    } else {
+        Ok(HttpResponse::ServiceUnavailable().json(serde_json::json!({
+            "status": "unhealthy",
+            "llama_server": "not running"
+        })))
+    }
+}
--- a/src/services/llm_provider.rs
+++ b/src/services/llm_provider.rs
@ -0,0 +1,103 @@
+use actix_web::{post, web, HttpRequest, HttpResponse, Result};
+use dotenv::dotenv;
+use reqwest::Client;
+use serde::{Deserialize, Serialize};
+use std::env;
+
+// OpenAI-compatible request/response structures
+#[derive(Debug, Serialize, Deserialize)]
+struct ChatMessage {
+    role: String,
+    content: String,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+struct ChatCompletionRequest {
+    model: String,
+    messages: Vec<ChatMessage>,
+    stream: Option<bool>,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+struct ChatCompletionResponse {
+    id: String,
+    object: String,
+    created: u64,
+    model: String,
+    choices: Vec<Choice>,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+struct Choice {
+    message: ChatMessage,
+    finish_reason: String,
+}
+
+// Proxy endpoint
+#[post("/v1/chat/completions")]
+async fn chat_completions(
+    req_body: web::Json<ChatCompletionRequest>,
+    _req: HttpRequest,
+) -> Result<HttpResponse> {
+    dotenv().ok();
+
+    // Environment variables
+    let azure_endpoint = env::var("AI_ENDPOINT")
+        .map_err(|_| actix_web::error::ErrorInternalServerError("AI_ENDPOINT not set."))?;
+    let azure_key = env::var("AI_KEY")
+        .map_err(|_| actix_web::error::ErrorInternalServerError("AI_KEY not set."))?;
+    let deployment_name = env::var("AI_LLM_MODEL")
+        .map_err(|_| actix_web::error::ErrorInternalServerError("AI_LLM_MODEL not set."))?;
+
+    // Construct Azure OpenAI URL
+    let url = format!(
+        "{}/openai/deployments/{}/chat/completions?api-version=2025-01-01-preview",
+        azure_endpoint, deployment_name
+    );
+
+    // Forward headers
+    let mut headers = reqwest::header::HeaderMap::new();
+    headers.insert(
+        "api-key",
+        reqwest::header::HeaderValue::from_str(&azure_key)
+            .map_err(|_| actix_web::error::ErrorInternalServerError("Invalid Azure key"))?,
+    );
+    headers.insert(
+        "Content-Type",
+        reqwest::header::HeaderValue::from_static("application/json"),
+    );
+
+    // Send request to Azure
+    let client = Client::new();
+    let response = client
+        .post(&url)
+        .headers(headers)
+        .json(&req_body.into_inner())
+        .send()
+        .await
+        .map_err(actix_web::error::ErrorInternalServerError)?;
+
+    // Handle response based on status
+    let status = response.status();
+    let raw_response = response
+        .text()
+        .await
+        .map_err(actix_web::error::ErrorInternalServerError)?;
+
+    // Log the raw response
+    println!("Raw Azure response: {}", raw_response);
+
+    if status.is_success() {
+        // Parse the raw response as JSON
+        let azure_response: serde_json::Value = serde_json::from_str(&raw_response)
+            .map_err(actix_web::error::ErrorInternalServerError)?;
+
+        Ok(HttpResponse::Ok().json(azure_response))
+    } else {
+        // Handle error responses properly
+        let actix_status = actix_web::http::StatusCode::from_u16(status.as_u16())
+            .unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR);
+
+        Ok(HttpResponse::build(actix_status).body(raw_response))
+    }
+}
--- a/src/services/utils.rs
+++ b/src/services/utils.rs
@ -13,17 +13,17 @@ use std::error::Error;
 use std::fs::File;
 use std::io::BufReader;
 use std::path::Path;
+use tokio::fs::File as TokioFile;
 use tokio_stream::StreamExt;
 use zip::ZipArchive;
-use tokio::fs::File as TokioFile;

 use reqwest::Client;
 use tokio::io::AsyncWriteExt;

 pub fn azure_from_config(config: &AIConfig) -> AzureConfig {
-    AzureConfig::default()
-        .with_api_key(&config.key)
+    AzureConfig::new()
    .with_api_base(&config.endpoint)
+        .with_api_key(&config.key)
        .with_api_version(&config.version)
        .with_deployment_id(&config.instance)
 }
@ -240,7 +240,10 @@ pub fn parse_filter(filter_str: &str) -> Result<(String, Vec<String>), Box<dyn E
 }

 // Parse filter without adding quotes
-pub fn parse_filter_with_offset(filter_str: &str, offset: usize) -> Result<(String, Vec<String>), Box<dyn Error>> {
+pub fn parse_filter_with_offset(
+    filter_str: &str,
+    offset: usize,
+) -> Result<(String, Vec<String>), Box<dyn Error>> {
    let mut clauses = Vec::new();
    let mut params = Vec::new();

@ -253,7 +256,10 @@ pub fn parse_filter_with_offset(filter_str: &str, offset: usize) -> Result<(Stri
        let column = parts[0].trim();
        let value = parts[1].trim();

-        if !column.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
+        if !column
+            .chars()
+            .all(|c| c.is_ascii_alphanumeric() || c == '_')
+        {
            return Err("Invalid column name".into());
        }