From 57f20d903eb9d812d5ab797c6e3dd9e477b44a6b Mon Sep 17 00:00:00 2001 From: "Rodrigo Rodriguez (Pragmatismo)" Date: Thu, 14 Aug 2025 09:42:05 -0300 Subject: [PATCH] - Testing of the first BASIC script. --- .vscode/launch.json | 44 --- .vscode/settings.json | 7 - .vscode/tasks.json | 15 - .zed/debug.json | 14 + Cargo.lock | 12 + Cargo.toml | 14 +- src/main.rs | 16 +- src/prompts/business/data-enrichment.bas | 21 +- src/scripts/containers/bot.sh | 2 - src/scripts/containers/email.sh | 18 +- src/scripts/containers/host.sh | 30 ++ src/scripts/containers/llm.sh | 61 ++++ src/scripts/containers/proxy.sh | 10 +- src/scripts/utils/cleaner.sh | 2 + src/scripts/utils/disk-size.md | 6 + src/scripts/utils/email-ips.sh | 8 + src/scripts/utils/set-limits.sh | 22 +- src/services.rs | 2 + src/services/llm_local.rs | 392 +++++++++++++++++++++++ src/services/llm_provider.rs | 103 ++++++ src/services/utils.rs | 18 +- 21 files changed, 699 insertions(+), 118 deletions(-) delete mode 100644 .vscode/launch.json delete mode 100644 .vscode/settings.json delete mode 100644 .vscode/tasks.json create mode 100644 .zed/debug.json create mode 100644 src/scripts/containers/host.sh create mode 100644 src/scripts/containers/llm.sh create mode 100644 src/scripts/utils/disk-size.md create mode 100644 src/scripts/utils/email-ips.sh create mode 100644 src/services/llm_local.rs create mode 100644 src/services/llm_provider.rs diff --git a/.vscode/launch.json b/.vscode/launch.json deleted file mode 100644 index 79f4876..0000000 --- a/.vscode/launch.json +++ /dev/null @@ -1,44 +0,0 @@ -{ - "version": "0.2.0", - "configurations": [ - { - "type": "lldb", - "request": "launch", - "name": "Debug GB Server", - "cargo": { - "args": [ - "build", - "--bin=gbserver" - ], - "filter": { - "name": "gbserver", - "kind": "bin" - } - }, - "args": [], - "cwd": "${workspaceFolder}", - }, - { - "type": "lldb", - "request": "launch", - "name": "Debug Integration Tests", - "cargo": { - "args": [ - "test", - "--no-run", - "--lib", - "--package=gbserver" - ], - "filter": { - "name": "integration", - "kind": "test" - } - }, - "args": [], - "cwd": "${workspaceFolder}", - "env": { - "RUST_LOG": "info" - } - }, - ], -} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 4ca308b..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "lldb.executable": "/usr/bin/lldb", - "lldb.showDisassembly": "never", - "lldb.dereferencePointers": true, - "lldb.consoleMode": "commands", - "rust-test Explorer.cargoTestExtraArgs": ["--", "--nocapture"] -} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json deleted file mode 100644 index 6f66893..0000000 --- a/.vscode/tasks.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "version": "2.0.0", - "tasks": [ - { - "label": "build", - "type": "shell", - "command": "cargo", - "args": ["build"], - "group": { - "kind": "build", - "isDefault": true - } - } - ] -} \ No newline at end of file diff --git a/.zed/debug.json b/.zed/debug.json new file mode 100644 index 0000000..fb5d477 --- /dev/null +++ b/.zed/debug.json @@ -0,0 +1,14 @@ +[ + { + "label": "Build & Debug native binary", + "build": { + "command": "cargo", + "args": ["build"] + }, + "program": "$ZED_WORKTREE_ROOT/target/debug/gbserver", + + "sourceLanguages": ["rust"], + "request": "launch", + "adapter": "CodeLLDB" + } +] diff --git a/Cargo.lock b/Cargo.lock index 3562662..3c27df6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1765,6 +1765,7 @@ dependencies = [ "tracing", "tracing-subscriber", "urlencoding", + "uuid", "zip", ] @@ -5183,6 +5184,17 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "uuid" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f33196643e165781c20a5ead5582283a7dacbb87855d867fbc2df3f81eddc1be" +dependencies = [ + "getrandom 0.3.3", + "js-sys", + "wasm-bindgen", +] + [[package]] name = "valuable" version = "0.1.1" diff --git a/Cargo.toml b/Cargo.toml index b624999..0a0255b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,7 @@ repository = "https://alm.pragmatismo.com.br/generalbots/gbserver" actix-cors = "0.6" actix-multipart = "0.6" actix-web = "4" -actix-ws="0.3.0" +actix-ws = "0.3.0" thirtyfour = { version = "0.30" } downloader = "0.2.8" anyhow = "1.0" @@ -24,7 +24,12 @@ futures = "0.3" futures-util = "0.3" imap = "2.0" langchain-rust = "4.4.3" -lettre = { version = "0.10", features = ["smtp-transport", "builder", "tokio1", "tokio1-native-tls"] } +lettre = { version = "0.10", features = [ + "smtp-transport", + "builder", + "tokio1", + "tokio1-native-tls", +] } log = "0.4" mailparse = "0.13" minio = { git = "https://github.com/minio/minio-rs", branch = "master" } @@ -33,7 +38,7 @@ reqwest = { version = "0.11", features = ["json", "stream"] } rhai = "1.22.2" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" -smartstring = "1.0" # Use the latest version from crates.io +smartstring = "1.0" # Use the latest version from crates.io sqlx = { version = "0.7", features = ["runtime-tokio-rustls", "postgres"] } tempfile = "3" tokio = { version = "1", features = ["full"] } @@ -43,4 +48,5 @@ tracing-subscriber = { version = "0.3", features = ["fmt"] } scraper = "0.18" urlencoding = "2.1" regex = "1.10" -zip = "4.3.0" \ No newline at end of file +uuid = { version = "1.0", features = ["v4"] } +zip = "4.3.0" diff --git a/src/main.rs b/src/main.rs index 631b119..9ce5b03 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,6 +5,7 @@ use actix_web::http::header; use actix_web::{web, App, HttpServer}; use dotenv::dotenv; +use reqwest::Client; use services::config::*; use services::email::*; use services::file::*; @@ -13,12 +14,13 @@ use services::script::*; use services::state::*; use sqlx::PgPool; +use crate::services::llm_local::ensure_llama_server_running; +use crate::services::llm_provider::chat_completions; use crate::services::web_automation::{initialize_browser_pool, BrowserPool}; -//use services:: find::*; + mod services; #[tokio::main(flavor = "multi_thread")] - async fn main() -> std::io::Result<()> { dotenv().ok(); env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init(); @@ -38,10 +40,15 @@ async fn main() -> std::io::Result<()> { 5, "/usr/bin/brave-browser-beta".to_string(), )); + + // ensure_llama_server_running() + // .await + // .expect("Failed to initialize LLM local server."); + initialize_browser_pool() .await .expect("Failed to initialize browser pool"); - + let app_state = web::Data::new(AppState { db: db.into(), db_custom: db_custom.into(), @@ -70,9 +77,9 @@ async fn main() -> std::io::Result<()> { .allowed_headers(vec![header::AUTHORIZATION, header::ACCEPT]) .allowed_header(header::CONTENT_TYPE) .max_age(3600); + //.wrap(cors) App::new() - .wrap(cors) .app_data(app_state.clone()) .service(upload_file) .service(list_file) @@ -81,6 +88,7 @@ async fn main() -> std::io::Result<()> { .service(list_emails) .service(send_email) .service(chat_stream) + .service(chat_completions) .service(chat) }) .bind((config.server.host.clone(), config.server.port))? diff --git a/src/prompts/business/data-enrichment.bas b/src/prompts/business/data-enrichment.bas index c0760f7..8659500 100644 --- a/src/prompts/business/data-enrichment.bas +++ b/src/prompts/business/data-enrichment.bas @@ -1,23 +1,20 @@ let items = FIND "gb.rob", "ACTION=EMUL1" -FOR EACH item IN items +FOR EACH item IN items PRINT item.company let website = WEBSITE OF item.company PRINT website - + let page = GET website - - let prompt = "Build the same simulator , but for " + item.company + " using just *content about the company* from its website, so it is possible to create a good and useful emulator in the same langue as the content: " + page - + let prompt = "Build the same simulator, but for " + item.company + " using just *content about the company* from its website, so it is possible to create a good and useful emulator in the same langue as the content: " + page let alias = LLM "Return a single word for " + item.company + " like a token, no spaces, no special characters, no numbers, no uppercase letters." - + CREATE_SITE alias, "blank", prompt let to = item.emailcto - let subject = "Simulador " + alias - let body = "Oi, " + FIRST(item.Contact) + "! Tudo bem? Estou empolgado, pois criamos o simulador " + alias + " especificamente para vocês!" + "\n\n Acesse o site: https://sites.pragmatismo.com.br/" + alias + "\n\n" + "Para acessar o simulador, clique no link acima ou copie e cole no seu navegador." + "\n\n" + "Para iniciar, clique no ícone de Play." + "\n\n" + "Atenciosamente,\nDário Vieira\n\n" - + let subject = "Simulador " + alias + let name = FIRST(item.Contact) + let body = "Oi, " + name + "! Tudo bem? Estou empolgado, pois criamos o simulador " + alias + " especificamente para vocês!" + "\n\n Acesse o site: https://sites.pragmatismo.com.br/" + alias + "\n\n" + "Para acessar o simulador, clique no link acima ou copie e cole no seu navegador." + "\n\n" + "Para iniciar, clique no ícone de Play." + "\n\n" + "Atenciosamente,\nDário Vieira\n\n" CREATE_DRAFT to, subject, body - SET "gb.rob", "robid="+ item.robid, "ACTION=CLOSE" - -NEXT item \ No newline at end of file + +NEXT item diff --git a/src/scripts/containers/bot.sh b/src/scripts/containers/bot.sh index ea455c8..f879522 100644 --- a/src/scripts/containers/bot.sh +++ b/src/scripts/containers/bot.sh @@ -13,8 +13,6 @@ sleep 15 lxc exec "$PARAM_TENANT"-bot -- bash -c " -echo "nameserver $PARAM_DNS_INTERNAL_IP" > /etc/resolv.conf - apt-get update && apt-get install -y \ build-essential cmake git pkg-config libjpeg-dev libtiff-dev \ diff --git a/src/scripts/containers/email.sh b/src/scripts/containers/email.sh index fda2429..ed0503d 100644 --- a/src/scripts/containers/email.sh +++ b/src/scripts/containers/email.sh @@ -1,11 +1,6 @@ #!/bin/bash PUBLIC_INTERFACE="eth0" # Your host's public network interface -# Enable IP forwarding -echo "[HOST] Enabling IP forwarding..." -echo "net.ipv4.ip_forward=1" | sudo tee -a /etc/sysctl.conf -sudo sysctl -p - # Configure firewall echo "[HOST] Configuring firewall..." sudo iptables -A FORWARD -i $PUBLIC_INTERFACE -o lxcbr0 -p tcp -m multiport --dports 25,80,110,143,465,587,993,995,4190 -j ACCEPT @@ -22,7 +17,7 @@ fi # Create directory structure echo "[CONTAINER] Creating directories..." -HOST_BASE="/opt/gbo/tenants/$PARAM_TENANT/email" +HOST_BASE="/opt/email" HOST_DATA="$HOST_BASE/data" HOST_CONF="$HOST_BASE/conf" HOST_LOGS="$HOST_BASE/logs" @@ -66,9 +61,9 @@ sudo chown -R "$HOST_EMAIL_UID:$HOST_EMAIL_GID" "$HOST_BASE" # Mount directories echo "[CONTAINER] Mounting directories..." -lxc config device add "$PARAM_TENANT"-email emaildata disk source="$HOST_DATA" path=/opt/gbo/data -lxc config device add "$PARAM_TENANT"-email emailconf disk source="$HOST_CONF" path=/opt/gbo/conf -lxc config device add "$PARAM_TENANT"-email emaillogs disk source="$HOST_LOGS" path=/opt/gbo/logs +lxc config device add emailprofile emaildata disk source="$HOST_DATA" path=/opt/gbo/data +lxc config device add emailprofile emailconf disk source="$HOST_CONF" path=/opt/gbo/conf +lxc config device add emailprofile emaillogs disk source="$HOST_LOGS" path=/opt/gbo/logs # Create systemd service echo "[CONTAINER] Creating email service..." @@ -96,3 +91,8 @@ systemctl daemon-reload systemctl enable email systemctl start email " + +for port in 25 80 110 143 465 587 993 995 4190; do + lxc config device remove email "port-$port" 2>/dev/null || true + lxc config device add email "port-$port" proxy listen=tcp:0.0.0.0:$port connect=tcp:127.0.0.1:$port +done diff --git a/src/scripts/containers/host.sh b/src/scripts/containers/host.sh new file mode 100644 index 0000000..27ad2c9 --- /dev/null +++ b/src/scripts/containers/host.sh @@ -0,0 +1,30 @@ +sudo apt install sshfs -y +lxc init +lxc storage create default dir +lxc profile device add default root disk path=/ pool=default + +sudo apt update && sudo apt install -y bridge-utils + +# Enable IP forwarding +echo "[HOST] Enabling IP forwarding..." +echo "net.ipv4.ip_forward=1" | sudo tee -a /etc/sysctl.conf +sudo sysctl -p + +wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb +sudo dpkg -i cuda-keyring_1.1-1_all.deb +sudo apt-get update + + + + + + + + +sudo apt purge '^nvidia-*' # Clean existing drivers +sudo add-apt-repository ppa:graphics-drivers/ppa +sudo apt update +sudo apt install nvidia-driver-470-server # Most stable for Kepler GPUs + +wget https://developer.download.nvidia.com/compute/cuda/11.0.3/local_installers/cuda_11.0.3_450.51.06_linux.run +sudo sh cuda_11.0.3_450.51.06_linux.run --override diff --git a/src/scripts/containers/llm.sh b/src/scripts/containers/llm.sh new file mode 100644 index 0000000..e94476c --- /dev/null +++ b/src/scripts/containers/llm.sh @@ -0,0 +1,61 @@ +#DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf +#Phi-3.5-mini-instruct-IQ2_M.gguf +#tinyllama-1.1b-chat-v1.0.Q4_0.gguf + + +sudo apt update +sudo apt upgrade -y +sudo apt install -y build-essential cmake git curl wget libcurl4-openssl-dev pkg-config gcc-9 g++-9 + +sudo apt install software-properties-common +sudo add-apt-repository ppa:deadsnakes/ppa +sudo apt install python3.6 python3.6-venv python3.6-dev +wget https://download.pytorch.org/whl/cu110/torch-1.7.1%2Bcu110-cp36-cp36m-linux_x86_64.whl +wget https://download.pytorch.org/whl/cu110/torchvision-0.8.2%2Bcu110-cp36-cp36m-linux_x86_64.whl + + +sudo ubuntu-drivers autoinstall + +sleep 10 + +CUDA_RUN_FILE="cuda_11.0.3_450.51.06_linux.run" +wget https://developer.download.nvidia.com/compute/cuda/11.0.3/local_installers/$CUDA_RUN_FILE +chmod +x $CUDA_RUN_FILE +sudo ./$CUDA_RUN_FILE --silent --toolkit + +echo 'export PATH=/usr/local/cuda-11.0/bin:$PATH' >> ~/.bashrc +echo 'export LD_LIBRARY_PATH=/usr/local/cuda-11.0/lib64:$LD_LIBRARY_PATH' >> ~/.bashrc +source ~/.bashrc + +nvidia-smi +nvcc --version + +python3 -m venv llama_venv +source llama_venv/bin/activate +pip install --upgrade pip +pip install torch==1.12.1+cu110 torchvision==0.13.1+cu110 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu110 + +cd ~ +git clone https://github.com/ggerganov/llama.cpp.git +cd llama.cpp +rm -rf build +mkdir build +cd build + + +# EDIT FILE: +#ifdef __CUDACC__ + #ifndef __builtin_assume + #define __builtin_assume(x) // empty: ignore it for CUDA compiler + #endif +#endif +# ggml/src/ggml-cuda/fattn-common. +# +cmake -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=35 .. +make -j$(nproc) + +OR +wget https://github.com/ggml-org/llama.cpp/releases/download/b6148/llama-b6148-bin-ubuntu-x64.zip + + +wget https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_0.gguf?download=true diff --git a/src/scripts/containers/proxy.sh b/src/scripts/containers/proxy.sh index 1a4c7fe..97bbee6 100644 --- a/src/scripts/containers/proxy.sh +++ b/src/scripts/containers/proxy.sh @@ -47,10 +47,10 @@ chown -R gbuser:gbuser /opt/gbo/{bin,data,conf,logs} systemctl enable proxy " -for port in 80 443; do -lxc config device remove "$PARAM_TENANT"-proxy "port-$port" 2>/dev/null || true -lxc config device add "$PARAM_TENANT"-proxy "port-$port" proxy listen=tcp:0.0.0.0:$port connect=tcp:127.0.0.1:$port -done + for port in 80 443; do + lxc config device remove "$PARAM_TENANT"-proxy "port-$port" 2>/dev/null || true + lxc config device add "$PARAM_TENANT"-proxy "port-$port" proxy listen=tcp:0.0.0.0:$port connect=tcp:127.0.0.1:$port + done lxc config set "$PARAM_TENANT"-proxy security.syscalls.intercept.mknod true -lxc config set "$PARAM_TENANT"-proxy security.syscalls.intercept.setxattr true \ No newline at end of file +lxc config set "$PARAM_TENANT"-proxy security.syscalls.intercept.setxattr true diff --git a/src/scripts/utils/cleaner.sh b/src/scripts/utils/cleaner.sh index 2e71d91..7115508 100644 --- a/src/scripts/utils/cleaner.sh +++ b/src/scripts/utils/cleaner.sh @@ -56,6 +56,8 @@ if command -v lxc >/dev/null 2>&1; then echo 'Cleaning logs...' rm -rf /opt/gbo/logs/* + + echo 'Cleaning journal logs...' journalctl --vacuum-time=1d 2>/dev/null || true echo 'Cleaning thumbnail cache...' diff --git a/src/scripts/utils/disk-size.md b/src/scripts/utils/disk-size.md new file mode 100644 index 0000000..b7d3a54 --- /dev/null +++ b/src/scripts/utils/disk-size.md @@ -0,0 +1,6 @@ +lxc list --format json | jq -r '.[].name' | while read container; do + echo -n "$container: " + lxc exec $container -- df -h / --output=used < /dev/null | tail -n1 +done + +du -h --max-depth=1 "." 2>/dev/null | sort -rh | head -n 50 | awk '{printf "%-10s %s\n", $1, $2}' diff --git a/src/scripts/utils/email-ips.sh b/src/scripts/utils/email-ips.sh new file mode 100644 index 0000000..442af3c --- /dev/null +++ b/src/scripts/utils/email-ips.sh @@ -0,0 +1,8 @@ +az network public-ip list --resource-group "$CLOUD_GROUP" \ + --query "[].{Name:name, IP:ipAddress, ReverseDNS:dnsSettings.reverseFqdn}" \ + -o table + +az network public-ip update --resource-group "$CLOUD_GROUP" + --name "pip-network-adapter-name" + --reverse-fqdn "outbound14.domain.com.br" + diff --git a/src/scripts/utils/set-limits.sh b/src/scripts/utils/set-limits.sh index 8ad715a..0c81ed0 100644 --- a/src/scripts/utils/set-limits.sh +++ b/src/scripts/utils/set-limits.sh @@ -3,18 +3,20 @@ # Define container limits in an associative array declare -A container_limits=( # Pattern Memory CPU Allowance - ["*tables*"]="2048MB:50ms/100ms" + ["*tables*"]="4096MB:100ms/100ms" ["*dns*"]="2048MB:100ms/100ms" - ["*doc-editor*"]="512MB:50ms/100ms" - ["*proxy*"]="512MB:50ms/100ms" - ["*directory*"]="512MB:50ms/100ms" - ["*drive*"]="1024MB:50ms/100ms" - ["*email*"]="2048MB:50ms/100ms" - ["*webmail*"]="2048MB:50ms/100ms" - ["*bot*"]="2048MB:50ms/100ms" - ["*meeting*"]="1024MB:50ms/100ms" + ["*doc-editor*"]="512MB:10ms/100ms" + ["*proxy*"]="20248MB:50ms/100ms" + ["*directory*"]="1024MB:50ms/100ms" + ["*drive*"]="4096MB:25ms/100ms" + ["*email*"]="4096MB:50ms/100ms" + ["*webmail*"]="4096MB:50ms/100ms" + ["*bot*"]="4096MB:50ms/100ms" + ["*meeting*"]="4096MB:100ms/100ms" ["*alm*"]="512MB:50ms/100ms" - ["*alm-ci*"]="4096MB:50ms/100ms" + ["*alm-ci*"]="4096MB:25ms/100ms" + ["*system*"]="4096MB:10ms/100ms" + ["*mailer*"]="4096MB:25ms/100ms" ) # Default values (for containers that don't match any pattern) diff --git a/src/services.rs b/src/services.rs index 5077993..57eaa80 100644 --- a/src/services.rs +++ b/src/services.rs @@ -3,6 +3,8 @@ pub mod email; pub mod file; pub mod keywords; pub mod llm; +pub mod llm_local; +pub mod llm_provider; pub mod script; pub mod state; pub mod utils; diff --git a/src/services/llm_local.rs b/src/services/llm_local.rs new file mode 100644 index 0000000..3ac0ba0 --- /dev/null +++ b/src/services/llm_local.rs @@ -0,0 +1,392 @@ +use actix_web::{post, web, HttpRequest, HttpResponse, Result}; +use dotenv::dotenv; +use reqwest::Client; +use serde::{Deserialize, Serialize}; +use std::env; +use std::process::{Child, Command, Stdio}; +use std::sync::{Arc, Mutex}; +use tokio::io::{AsyncBufReadExt, BufReader}; +use tokio::process::Command as TokioCommand; +use tokio::time::{sleep, Duration}; + +// Global process handle +static mut LLAMA_PROCESS: Option>>> = None; + +// OpenAI-compatible request/response structures +#[derive(Debug, Serialize, Deserialize)] +struct ChatMessage { + role: String, + content: String, +} + +#[derive(Debug, Serialize, Deserialize)] +struct ChatCompletionRequest { + model: String, + messages: Vec, + stream: Option, +} + +#[derive(Debug, Serialize, Deserialize)] +struct ChatCompletionResponse { + id: String, + object: String, + created: u64, + model: String, + choices: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +struct Choice { + message: ChatMessage, + finish_reason: String, +} + +// Llama.cpp server request/response structures +#[derive(Debug, Serialize, Deserialize)] +struct LlamaCppRequest { + prompt: String, + n_predict: Option, + temperature: Option, + top_k: Option, + top_p: Option, + stream: Option, +} + +#[derive(Debug, Serialize, Deserialize)] +struct LlamaCppResponse { + content: String, + stop: bool, + generation_settings: Option, +} + +// Function to check if server is running +async fn is_server_running(url: &str) -> bool { + let client = Client::builder() + .timeout(Duration::from_secs(3)) + .build() + .unwrap(); + + match client.get(&format!("{}/health", url)).send().await { + Ok(response) => { + let is_ok = response.status().is_success(); + if is_ok { + println!("🟢 Server health check: OK"); + } else { + println!( + "🔴 Server health check: Failed with status {}", + response.status() + ); + } + is_ok + } + Err(e) => { + println!("🔴 Server health check: Connection failed - {}", e); + false + } + } +} + +// Function to start llama.cpp server +async fn start_llama_server() -> Result<(), Box> { + println!("🚀 Starting llama.cpp server..."); + + // Get environment variables for llama.cpp configuration + let llama_path = env::var("LLM_CPP_PATH").unwrap_or_else(|_| "llama-server".to_string()); + let model_path = env::var("LLM_MODEL_PATH") + .unwrap_or_else(|_| "./models/tinyllama-1.1b-q4_01.gguf".to_string()); + let cpu_limit = env::var("CPU_LIMIT").unwrap_or_else(|_| "50".to_string()); + let port = env::var("LLM_PORT").unwrap_or_else(|_| "8080".to_string()); + + println!("🔧 Configuration:"); + println!(" - Llama path: {}", llama_path); + println!(" - Model path: {}", model_path); + println!(" - CPU limit: {}%", cpu_limit); + println!(" - Port: {}", port); + + // Kill any existing llama processes + println!("🧹 Cleaning up existing processes..."); + let _ = Command::new("pkill").arg("-f").arg("llama-server").output(); + + // Wait a bit for cleanup + sleep(Duration::from_secs(2)).await; + + // Build the command + let full_command = format!( + "cpulimit -l {} -- {} -m '{}' --n-gpu-layers 18 --temp 0.7 --ctx-size 1024 --batch-size 256 --no-mmap --mlock --port {} --host 127.0.0.1 --tensor-split 1.0 --main-gpu 0", + cpu_limit, llama_path, model_path, port + ); + + println!("📝 Executing command: {}", full_command); + + // Start llama.cpp server with cpulimit using tokio + let mut cmd = TokioCommand::new("sh"); + cmd.arg("-c"); + cmd.arg(&full_command); + cmd.stdout(Stdio::piped()); + cmd.stderr(Stdio::piped()); + cmd.kill_on_drop(true); + + let mut child = cmd + .spawn() + .map_err(|e| format!("Failed to start llama.cpp server: {}", e))?; + + println!("🔄 Process spawned with PID: {:?}", child.id()); + + // Capture stdout and stderr for real-time logging + if let Some(stdout) = child.stdout.take() { + let stdout_reader = BufReader::new(stdout); + tokio::spawn(async move { + let mut lines = stdout_reader.lines(); + while let Ok(Some(line)) = lines.next_line().await { + println!("🦙📤 STDOUT: {}", line); + } + println!("🦙📤 STDOUT stream ended"); + }); + } + + if let Some(stderr) = child.stderr.take() { + let stderr_reader = BufReader::new(stderr); + tokio::spawn(async move { + let mut lines = stderr_reader.lines(); + while let Ok(Some(line)) = lines.next_line().await { + println!("🦙📥 STDERR: {}", line); + } + println!("🦙📥 STDERR stream ended"); + }); + } + + // Store the process handle + unsafe { + LLAMA_PROCESS = Some(Arc::new(Mutex::new(Some(child)))); + } + + println!("✅ Llama.cpp server process started!"); + Ok(()) +} + +// Function to ensure llama.cpp server is running +pub async fn ensure_llama_server_running() -> Result<(), Box> { + let llama_url = env::var("LLM_URL").unwrap_or_else(|_| "http://localhost:8080".to_string()); + + // Check if server is already running + if is_server_running(&llama_url).await { + println!("✅ Llama.cpp server is already running"); + return Ok(()); + } + + // Start the server + start_llama_server().await?; + + // Wait for server to be ready with verbose logging + println!("⏳ Waiting for llama.cpp server to become ready..."); + let mut attempts = 0; + let max_attempts = 60; // 2 minutes total + + while attempts < max_attempts { + sleep(Duration::from_secs(2)).await; + + print!( + "🔍 Checking server health (attempt {}/{})... ", + attempts + 1, + max_attempts + ); + + if is_server_running(&llama_url).await { + println!("✅ SUCCESS!"); + println!("🎉 Llama.cpp server is ready and responding!"); + return Ok(()); + } else { + println!("❌ Not ready yet"); + } + + attempts += 1; + if attempts % 10 == 0 { + println!( + "⏰ Still waiting for llama.cpp server... (attempt {}/{})", + attempts, max_attempts + ); + println!("💡 Check the logs above for any errors from the llama server"); + } + } + + Err("❌ Llama.cpp server failed to start within timeout (2 minutes)".into()) +} + +// Convert OpenAI chat messages to a single prompt +fn messages_to_prompt(messages: &[ChatMessage]) -> String { + let mut prompt = String::new(); + + for message in messages { + match message.role.as_str() { + "system" => { + prompt.push_str(&format!("System: {}\n\n", message.content)); + } + "user" => { + prompt.push_str(&format!("User: {}\n\n", message.content)); + } + "assistant" => { + prompt.push_str(&format!("Assistant: {}\n\n", message.content)); + } + _ => { + prompt.push_str(&format!("{}: {}\n\n", message.role, message.content)); + } + } + } + + prompt.push_str("Assistant: "); + prompt +} + +// Cleanup function +pub fn cleanup_processes() { + println!("🧹 Cleaning up llama.cpp processes..."); + + unsafe { + if let Some(process_handle) = &LLAMA_PROCESS { + if let Ok(mut process) = process_handle.lock() { + if let Some(ref mut child) = *process { + println!("🔪 Killing llama server process..."); + let _ = child.start_kill(); + } + } + } + } + + // Kill any remaining llama processes + println!("🔍 Killing any remaining llama-server processes..."); + let output = Command::new("pkill").arg("-f").arg("llama-server").output(); + + match output { + Ok(result) => { + if result.status.success() { + println!("✅ Successfully killed llama processes"); + } else { + println!("ℹ️ No llama processes found to kill"); + } + } + Err(e) => println!("⚠️ Error trying to kill processes: {}", e), + } +} + +// Proxy endpoint +#[post("/v1/chat/completions")] +pub async fn chat_completions( + req_body: web::Json, + _req: HttpRequest, +) -> Result { + dotenv().ok(); + + // Ensure llama.cpp server is running + if let Err(e) = ensure_llama_server_running().await { + eprintln!("Failed to start llama.cpp server: {}", e); + return Ok(HttpResponse::InternalServerError().json(serde_json::json!({ + "error": { + "message": format!("Failed to start llama.cpp server: {}", e), + "type": "server_error" + } + }))); + } + + // Get llama.cpp server URL + let llama_url = env::var("LLM_URL").unwrap_or_else(|_| "http://localhost:8080".to_string()); + + // Convert OpenAI format to llama.cpp format + let prompt = messages_to_prompt(&req_body.messages); + + let llama_request = LlamaCppRequest { + prompt, + n_predict: Some(500), // Adjust as needed + temperature: Some(0.7), + top_k: Some(40), + top_p: Some(0.9), + stream: req_body.stream, + }; + + // Send request to llama.cpp server + let client = Client::builder() + .timeout(Duration::from_secs(120)) // 2 minute timeout + .build() + .map_err(|e| { + eprintln!("Error creating HTTP client: {}", e); + actix_web::error::ErrorInternalServerError("Failed to create HTTP client") + })?; + + let response = client + .post(&format!("{}/completion", llama_url)) + .header("Content-Type", "application/json") + .json(&llama_request) + .send() + .await + .map_err(|e| { + eprintln!("Error calling llama.cpp server: {}", e); + actix_web::error::ErrorInternalServerError("Failed to call llama.cpp server") + })?; + + let status = response.status(); + + if status.is_success() { + let llama_response: LlamaCppResponse = response.json().await.map_err(|e| { + eprintln!("Error parsing llama.cpp response: {}", e); + actix_web::error::ErrorInternalServerError("Failed to parse llama.cpp response") + })?; + + // Convert llama.cpp response to OpenAI format + let openai_response = ChatCompletionResponse { + id: format!("chatcmpl-{}", uuid::Uuid::new_v4()), + object: "chat.completion".to_string(), + created: std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs(), + model: req_body.model.clone(), + choices: vec![Choice { + message: ChatMessage { + role: "assistant".to_string(), + content: llama_response.content.trim().to_string(), + }, + finish_reason: if llama_response.stop { + "stop".to_string() + } else { + "length".to_string() + }, + }], + }; + + Ok(HttpResponse::Ok().json(openai_response)) + } else { + let error_text = response + .text() + .await + .unwrap_or_else(|_| "Unknown error".to_string()); + + eprintln!("Llama.cpp server error ({}): {}", status, error_text); + + let actix_status = actix_web::http::StatusCode::from_u16(status.as_u16()) + .unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR); + + Ok(HttpResponse::build(actix_status).json(serde_json::json!({ + "error": { + "message": error_text, + "type": "server_error" + } + }))) + } +} + +// Health check endpoint +#[actix_web::get("/health")] +pub async fn health() -> Result { + let llama_url = env::var("LLM_URL").unwrap_or_else(|_| "http://localhost:8080".to_string()); + + if is_server_running(&llama_url).await { + Ok(HttpResponse::Ok().json(serde_json::json!({ + "status": "healthy", + "llama_server": "running" + }))) + } else { + Ok(HttpResponse::ServiceUnavailable().json(serde_json::json!({ + "status": "unhealthy", + "llama_server": "not running" + }))) + } +} diff --git a/src/services/llm_provider.rs b/src/services/llm_provider.rs new file mode 100644 index 0000000..9af6251 --- /dev/null +++ b/src/services/llm_provider.rs @@ -0,0 +1,103 @@ +use actix_web::{post, web, HttpRequest, HttpResponse, Result}; +use dotenv::dotenv; +use reqwest::Client; +use serde::{Deserialize, Serialize}; +use std::env; + +// OpenAI-compatible request/response structures +#[derive(Debug, Serialize, Deserialize)] +struct ChatMessage { + role: String, + content: String, +} + +#[derive(Debug, Serialize, Deserialize)] +struct ChatCompletionRequest { + model: String, + messages: Vec, + stream: Option, +} + +#[derive(Debug, Serialize, Deserialize)] +struct ChatCompletionResponse { + id: String, + object: String, + created: u64, + model: String, + choices: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +struct Choice { + message: ChatMessage, + finish_reason: String, +} + +// Proxy endpoint +#[post("/v1/chat/completions")] +async fn chat_completions( + req_body: web::Json, + _req: HttpRequest, +) -> Result { + dotenv().ok(); + + // Environment variables + let azure_endpoint = env::var("AI_ENDPOINT") + .map_err(|_| actix_web::error::ErrorInternalServerError("AI_ENDPOINT not set."))?; + let azure_key = env::var("AI_KEY") + .map_err(|_| actix_web::error::ErrorInternalServerError("AI_KEY not set."))?; + let deployment_name = env::var("AI_LLM_MODEL") + .map_err(|_| actix_web::error::ErrorInternalServerError("AI_LLM_MODEL not set."))?; + + // Construct Azure OpenAI URL + let url = format!( + "{}/openai/deployments/{}/chat/completions?api-version=2025-01-01-preview", + azure_endpoint, deployment_name + ); + + // Forward headers + let mut headers = reqwest::header::HeaderMap::new(); + headers.insert( + "api-key", + reqwest::header::HeaderValue::from_str(&azure_key) + .map_err(|_| actix_web::error::ErrorInternalServerError("Invalid Azure key"))?, + ); + headers.insert( + "Content-Type", + reqwest::header::HeaderValue::from_static("application/json"), + ); + + // Send request to Azure + let client = Client::new(); + let response = client + .post(&url) + .headers(headers) + .json(&req_body.into_inner()) + .send() + .await + .map_err(actix_web::error::ErrorInternalServerError)?; + + // Handle response based on status + let status = response.status(); + let raw_response = response + .text() + .await + .map_err(actix_web::error::ErrorInternalServerError)?; + + // Log the raw response + println!("Raw Azure response: {}", raw_response); + + if status.is_success() { + // Parse the raw response as JSON + let azure_response: serde_json::Value = serde_json::from_str(&raw_response) + .map_err(actix_web::error::ErrorInternalServerError)?; + + Ok(HttpResponse::Ok().json(azure_response)) + } else { + // Handle error responses properly + let actix_status = actix_web::http::StatusCode::from_u16(status.as_u16()) + .unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR); + + Ok(HttpResponse::build(actix_status).body(raw_response)) + } +} diff --git a/src/services/utils.rs b/src/services/utils.rs index d57169f..a8e9f09 100644 --- a/src/services/utils.rs +++ b/src/services/utils.rs @@ -13,17 +13,17 @@ use std::error::Error; use std::fs::File; use std::io::BufReader; use std::path::Path; +use tokio::fs::File as TokioFile; use tokio_stream::StreamExt; use zip::ZipArchive; -use tokio::fs::File as TokioFile; use reqwest::Client; use tokio::io::AsyncWriteExt; pub fn azure_from_config(config: &AIConfig) -> AzureConfig { - AzureConfig::default() + AzureConfig::new() + .with_api_base(&config.endpoint) .with_api_key(&config.key) - .with_api_base(&config.endpoint) .with_api_version(&config.version) .with_deployment_id(&config.instance) } @@ -240,10 +240,13 @@ pub fn parse_filter(filter_str: &str) -> Result<(String, Vec), Box Result<(String, Vec), Box> { +pub fn parse_filter_with_offset( + filter_str: &str, + offset: usize, +) -> Result<(String, Vec), Box> { let mut clauses = Vec::new(); let mut params = Vec::new(); - + for (i, condition) in filter_str.split('&').enumerate() { let parts: Vec<&str> = condition.split('=').collect(); if parts.len() != 2 { @@ -253,7 +256,10 @@ pub fn parse_filter_with_offset(filter_str: &str, offset: usize) -> Result<(Stri let column = parts[0].trim(); let value = parts[1].trim(); - if !column.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') { + if !column + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '_') + { return Err("Invalid column name".into()); }