This commit is contained in:
parent
10b4d85629
commit
57f20d903e
21 changed files with 699 additions and 118 deletions
44
.vscode/launch.json
vendored
44
.vscode/launch.json
vendored
|
@ -1,44 +0,0 @@
|
|||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "Debug GB Server",
|
||||
"cargo": {
|
||||
"args": [
|
||||
"build",
|
||||
"--bin=gbserver"
|
||||
],
|
||||
"filter": {
|
||||
"name": "gbserver",
|
||||
"kind": "bin"
|
||||
}
|
||||
},
|
||||
"args": [],
|
||||
"cwd": "${workspaceFolder}",
|
||||
},
|
||||
{
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "Debug Integration Tests",
|
||||
"cargo": {
|
||||
"args": [
|
||||
"test",
|
||||
"--no-run",
|
||||
"--lib",
|
||||
"--package=gbserver"
|
||||
],
|
||||
"filter": {
|
||||
"name": "integration",
|
||||
"kind": "test"
|
||||
}
|
||||
},
|
||||
"args": [],
|
||||
"cwd": "${workspaceFolder}",
|
||||
"env": {
|
||||
"RUST_LOG": "info"
|
||||
}
|
||||
},
|
||||
],
|
||||
}
|
7
.vscode/settings.json
vendored
7
.vscode/settings.json
vendored
|
@ -1,7 +0,0 @@
|
|||
{
|
||||
"lldb.executable": "/usr/bin/lldb",
|
||||
"lldb.showDisassembly": "never",
|
||||
"lldb.dereferencePointers": true,
|
||||
"lldb.consoleMode": "commands",
|
||||
"rust-test Explorer.cargoTestExtraArgs": ["--", "--nocapture"]
|
||||
}
|
15
.vscode/tasks.json
vendored
15
.vscode/tasks.json
vendored
|
@ -1,15 +0,0 @@
|
|||
{
|
||||
"version": "2.0.0",
|
||||
"tasks": [
|
||||
{
|
||||
"label": "build",
|
||||
"type": "shell",
|
||||
"command": "cargo",
|
||||
"args": ["build"],
|
||||
"group": {
|
||||
"kind": "build",
|
||||
"isDefault": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
14
.zed/debug.json
Normal file
14
.zed/debug.json
Normal file
|
@ -0,0 +1,14 @@
|
|||
[
|
||||
{
|
||||
"label": "Build & Debug native binary",
|
||||
"build": {
|
||||
"command": "cargo",
|
||||
"args": ["build"]
|
||||
},
|
||||
"program": "$ZED_WORKTREE_ROOT/target/debug/gbserver",
|
||||
|
||||
"sourceLanguages": ["rust"],
|
||||
"request": "launch",
|
||||
"adapter": "CodeLLDB"
|
||||
}
|
||||
]
|
12
Cargo.lock
generated
12
Cargo.lock
generated
|
@ -1765,6 +1765,7 @@ dependencies = [
|
|||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"urlencoding",
|
||||
"uuid",
|
||||
"zip",
|
||||
]
|
||||
|
||||
|
@ -5183,6 +5184,17 @@ version = "0.2.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||
|
||||
[[package]]
|
||||
name = "uuid"
|
||||
version = "1.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f33196643e165781c20a5ead5582283a7dacbb87855d867fbc2df3f81eddc1be"
|
||||
dependencies = [
|
||||
"getrandom 0.3.3",
|
||||
"js-sys",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "valuable"
|
||||
version = "0.1.1"
|
||||
|
|
|
@ -24,7 +24,12 @@ futures = "0.3"
|
|||
futures-util = "0.3"
|
||||
imap = "2.0"
|
||||
langchain-rust = "4.4.3"
|
||||
lettre = { version = "0.10", features = ["smtp-transport", "builder", "tokio1", "tokio1-native-tls"] }
|
||||
lettre = { version = "0.10", features = [
|
||||
"smtp-transport",
|
||||
"builder",
|
||||
"tokio1",
|
||||
"tokio1-native-tls",
|
||||
] }
|
||||
log = "0.4"
|
||||
mailparse = "0.13"
|
||||
minio = { git = "https://github.com/minio/minio-rs", branch = "master" }
|
||||
|
@ -43,4 +48,5 @@ tracing-subscriber = { version = "0.3", features = ["fmt"] }
|
|||
scraper = "0.18"
|
||||
urlencoding = "2.1"
|
||||
regex = "1.10"
|
||||
uuid = { version = "1.0", features = ["v4"] }
|
||||
zip = "4.3.0"
|
14
src/main.rs
14
src/main.rs
|
@ -5,6 +5,7 @@ use actix_web::http::header;
|
|||
use actix_web::{web, App, HttpServer};
|
||||
use dotenv::dotenv;
|
||||
|
||||
use reqwest::Client;
|
||||
use services::config::*;
|
||||
use services::email::*;
|
||||
use services::file::*;
|
||||
|
@ -13,12 +14,13 @@ use services::script::*;
|
|||
use services::state::*;
|
||||
use sqlx::PgPool;
|
||||
|
||||
use crate::services::llm_local::ensure_llama_server_running;
|
||||
use crate::services::llm_provider::chat_completions;
|
||||
use crate::services::web_automation::{initialize_browser_pool, BrowserPool};
|
||||
//use services:: find::*;
|
||||
|
||||
mod services;
|
||||
|
||||
#[tokio::main(flavor = "multi_thread")]
|
||||
|
||||
async fn main() -> std::io::Result<()> {
|
||||
dotenv().ok();
|
||||
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
|
||||
|
@ -38,6 +40,11 @@ async fn main() -> std::io::Result<()> {
|
|||
5,
|
||||
"/usr/bin/brave-browser-beta".to_string(),
|
||||
));
|
||||
|
||||
// ensure_llama_server_running()
|
||||
// .await
|
||||
// .expect("Failed to initialize LLM local server.");
|
||||
|
||||
initialize_browser_pool()
|
||||
.await
|
||||
.expect("Failed to initialize browser pool");
|
||||
|
@ -70,9 +77,9 @@ async fn main() -> std::io::Result<()> {
|
|||
.allowed_headers(vec![header::AUTHORIZATION, header::ACCEPT])
|
||||
.allowed_header(header::CONTENT_TYPE)
|
||||
.max_age(3600);
|
||||
//.wrap(cors)
|
||||
|
||||
App::new()
|
||||
.wrap(cors)
|
||||
.app_data(app_state.clone())
|
||||
.service(upload_file)
|
||||
.service(list_file)
|
||||
|
@ -81,6 +88,7 @@ async fn main() -> std::io::Result<()> {
|
|||
.service(list_emails)
|
||||
.service(send_email)
|
||||
.service(chat_stream)
|
||||
.service(chat_completions)
|
||||
.service(chat)
|
||||
})
|
||||
.bind((config.server.host.clone(), config.server.port))?
|
||||
|
|
|
@ -6,18 +6,15 @@ FOR EACH item IN items
|
|||
PRINT website
|
||||
|
||||
let page = GET website
|
||||
|
||||
let prompt = "Build the same simulator, but for " + item.company + " using just *content about the company* from its website, so it is possible to create a good and useful emulator in the same langue as the content: " + page
|
||||
|
||||
let alias = LLM "Return a single word for " + item.company + " like a token, no spaces, no special characters, no numbers, no uppercase letters."
|
||||
|
||||
CREATE_SITE alias, "blank", prompt
|
||||
|
||||
let to = item.emailcto
|
||||
let subject = "Simulador " + alias
|
||||
let body = "Oi, " + FIRST(item.Contact) + "! Tudo bem? Estou empolgado, pois criamos o simulador " + alias + " especificamente para vocês!" + "\n\n Acesse o site: https://sites.pragmatismo.com.br/" + alias + "\n\n" + "Para acessar o simulador, clique no link acima ou copie e cole no seu navegador." + "\n\n" + "Para iniciar, clique no ícone de Play." + "\n\n" + "Atenciosamente,\nDário Vieira\n\n"
|
||||
|
||||
let name = FIRST(item.Contact)
|
||||
let body = "Oi, " + name + "! Tudo bem? Estou empolgado, pois criamos o simulador " + alias + " especificamente para vocês!" + "\n\n Acesse o site: https://sites.pragmatismo.com.br/" + alias + "\n\n" + "Para acessar o simulador, clique no link acima ou copie e cole no seu navegador." + "\n\n" + "Para iniciar, clique no ícone de Play." + "\n\n" + "Atenciosamente,\nDário Vieira\n\n"
|
||||
CREATE_DRAFT to, subject, body
|
||||
|
||||
SET "gb.rob", "robid="+ item.robid, "ACTION=CLOSE"
|
||||
|
||||
NEXT item
|
|
@ -13,8 +13,6 @@ sleep 15
|
|||
|
||||
lxc exec "$PARAM_TENANT"-bot -- bash -c "
|
||||
|
||||
echo "nameserver $PARAM_DNS_INTERNAL_IP" > /etc/resolv.conf
|
||||
|
||||
|
||||
apt-get update && apt-get install -y \
|
||||
build-essential cmake git pkg-config libjpeg-dev libtiff-dev \
|
||||
|
|
|
@ -1,11 +1,6 @@
|
|||
#!/bin/bash
|
||||
PUBLIC_INTERFACE="eth0" # Your host's public network interface
|
||||
|
||||
# Enable IP forwarding
|
||||
echo "[HOST] Enabling IP forwarding..."
|
||||
echo "net.ipv4.ip_forward=1" | sudo tee -a /etc/sysctl.conf
|
||||
sudo sysctl -p
|
||||
|
||||
# Configure firewall
|
||||
echo "[HOST] Configuring firewall..."
|
||||
sudo iptables -A FORWARD -i $PUBLIC_INTERFACE -o lxcbr0 -p tcp -m multiport --dports 25,80,110,143,465,587,993,995,4190 -j ACCEPT
|
||||
|
@ -22,7 +17,7 @@ fi
|
|||
|
||||
# Create directory structure
|
||||
echo "[CONTAINER] Creating directories..."
|
||||
HOST_BASE="/opt/gbo/tenants/$PARAM_TENANT/email"
|
||||
HOST_BASE="/opt/email"
|
||||
HOST_DATA="$HOST_BASE/data"
|
||||
HOST_CONF="$HOST_BASE/conf"
|
||||
HOST_LOGS="$HOST_BASE/logs"
|
||||
|
@ -66,9 +61,9 @@ sudo chown -R "$HOST_EMAIL_UID:$HOST_EMAIL_GID" "$HOST_BASE"
|
|||
|
||||
# Mount directories
|
||||
echo "[CONTAINER] Mounting directories..."
|
||||
lxc config device add "$PARAM_TENANT"-email emaildata disk source="$HOST_DATA" path=/opt/gbo/data
|
||||
lxc config device add "$PARAM_TENANT"-email emailconf disk source="$HOST_CONF" path=/opt/gbo/conf
|
||||
lxc config device add "$PARAM_TENANT"-email emaillogs disk source="$HOST_LOGS" path=/opt/gbo/logs
|
||||
lxc config device add emailprofile emaildata disk source="$HOST_DATA" path=/opt/gbo/data
|
||||
lxc config device add emailprofile emailconf disk source="$HOST_CONF" path=/opt/gbo/conf
|
||||
lxc config device add emailprofile emaillogs disk source="$HOST_LOGS" path=/opt/gbo/logs
|
||||
|
||||
# Create systemd service
|
||||
echo "[CONTAINER] Creating email service..."
|
||||
|
@ -96,3 +91,8 @@ systemctl daemon-reload
|
|||
systemctl enable email
|
||||
systemctl start email
|
||||
"
|
||||
|
||||
for port in 25 80 110 143 465 587 993 995 4190; do
|
||||
lxc config device remove email "port-$port" 2>/dev/null || true
|
||||
lxc config device add email "port-$port" proxy listen=tcp:0.0.0.0:$port connect=tcp:127.0.0.1:$port
|
||||
done
|
||||
|
|
30
src/scripts/containers/host.sh
Normal file
30
src/scripts/containers/host.sh
Normal file
|
@ -0,0 +1,30 @@
|
|||
sudo apt install sshfs -y
|
||||
lxc init
|
||||
lxc storage create default dir
|
||||
lxc profile device add default root disk path=/ pool=default
|
||||
|
||||
sudo apt update && sudo apt install -y bridge-utils
|
||||
|
||||
# Enable IP forwarding
|
||||
echo "[HOST] Enabling IP forwarding..."
|
||||
echo "net.ipv4.ip_forward=1" | sudo tee -a /etc/sysctl.conf
|
||||
sudo sysctl -p
|
||||
|
||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb
|
||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||
sudo apt-get update
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
sudo apt purge '^nvidia-*' # Clean existing drivers
|
||||
sudo add-apt-repository ppa:graphics-drivers/ppa
|
||||
sudo apt update
|
||||
sudo apt install nvidia-driver-470-server # Most stable for Kepler GPUs
|
||||
|
||||
wget https://developer.download.nvidia.com/compute/cuda/11.0.3/local_installers/cuda_11.0.3_450.51.06_linux.run
|
||||
sudo sh cuda_11.0.3_450.51.06_linux.run --override
|
61
src/scripts/containers/llm.sh
Normal file
61
src/scripts/containers/llm.sh
Normal file
|
@ -0,0 +1,61 @@
|
|||
#DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf
|
||||
#Phi-3.5-mini-instruct-IQ2_M.gguf
|
||||
#tinyllama-1.1b-chat-v1.0.Q4_0.gguf
|
||||
|
||||
|
||||
sudo apt update
|
||||
sudo apt upgrade -y
|
||||
sudo apt install -y build-essential cmake git curl wget libcurl4-openssl-dev pkg-config gcc-9 g++-9
|
||||
|
||||
sudo apt install software-properties-common
|
||||
sudo add-apt-repository ppa:deadsnakes/ppa
|
||||
sudo apt install python3.6 python3.6-venv python3.6-dev
|
||||
wget https://download.pytorch.org/whl/cu110/torch-1.7.1%2Bcu110-cp36-cp36m-linux_x86_64.whl
|
||||
wget https://download.pytorch.org/whl/cu110/torchvision-0.8.2%2Bcu110-cp36-cp36m-linux_x86_64.whl
|
||||
|
||||
|
||||
sudo ubuntu-drivers autoinstall
|
||||
|
||||
sleep 10
|
||||
|
||||
CUDA_RUN_FILE="cuda_11.0.3_450.51.06_linux.run"
|
||||
wget https://developer.download.nvidia.com/compute/cuda/11.0.3/local_installers/$CUDA_RUN_FILE
|
||||
chmod +x $CUDA_RUN_FILE
|
||||
sudo ./$CUDA_RUN_FILE --silent --toolkit
|
||||
|
||||
echo 'export PATH=/usr/local/cuda-11.0/bin:$PATH' >> ~/.bashrc
|
||||
echo 'export LD_LIBRARY_PATH=/usr/local/cuda-11.0/lib64:$LD_LIBRARY_PATH' >> ~/.bashrc
|
||||
source ~/.bashrc
|
||||
|
||||
nvidia-smi
|
||||
nvcc --version
|
||||
|
||||
python3 -m venv llama_venv
|
||||
source llama_venv/bin/activate
|
||||
pip install --upgrade pip
|
||||
pip install torch==1.12.1+cu110 torchvision==0.13.1+cu110 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu110
|
||||
|
||||
cd ~
|
||||
git clone https://github.com/ggerganov/llama.cpp.git
|
||||
cd llama.cpp
|
||||
rm -rf build
|
||||
mkdir build
|
||||
cd build
|
||||
|
||||
|
||||
# EDIT FILE:
|
||||
#ifdef __CUDACC__
|
||||
#ifndef __builtin_assume
|
||||
#define __builtin_assume(x) // empty: ignore it for CUDA compiler
|
||||
#endif
|
||||
#endif
|
||||
# ggml/src/ggml-cuda/fattn-common.
|
||||
#
|
||||
cmake -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=35 ..
|
||||
make -j$(nproc)
|
||||
|
||||
OR
|
||||
wget https://github.com/ggml-org/llama.cpp/releases/download/b6148/llama-b6148-bin-ubuntu-x64.zip
|
||||
|
||||
|
||||
wget https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_0.gguf?download=true
|
|
@ -56,6 +56,8 @@ if command -v lxc >/dev/null 2>&1; then
|
|||
|
||||
echo 'Cleaning logs...'
|
||||
rm -rf /opt/gbo/logs/*
|
||||
|
||||
echo 'Cleaning journal logs...'
|
||||
journalctl --vacuum-time=1d 2>/dev/null || true
|
||||
|
||||
echo 'Cleaning thumbnail cache...'
|
||||
|
|
6
src/scripts/utils/disk-size.md
Normal file
6
src/scripts/utils/disk-size.md
Normal file
|
@ -0,0 +1,6 @@
|
|||
lxc list --format json | jq -r '.[].name' | while read container; do
|
||||
echo -n "$container: "
|
||||
lxc exec $container -- df -h / --output=used < /dev/null | tail -n1
|
||||
done
|
||||
|
||||
du -h --max-depth=1 "." 2>/dev/null | sort -rh | head -n 50 | awk '{printf "%-10s %s\n", $1, $2}'
|
8
src/scripts/utils/email-ips.sh
Normal file
8
src/scripts/utils/email-ips.sh
Normal file
|
@ -0,0 +1,8 @@
|
|||
az network public-ip list --resource-group "$CLOUD_GROUP" \
|
||||
--query "[].{Name:name, IP:ipAddress, ReverseDNS:dnsSettings.reverseFqdn}" \
|
||||
-o table
|
||||
|
||||
az network public-ip update --resource-group "$CLOUD_GROUP"
|
||||
--name "pip-network-adapter-name"
|
||||
--reverse-fqdn "outbound14.domain.com.br"
|
||||
|
|
@ -3,18 +3,20 @@
|
|||
# Define container limits in an associative array
|
||||
declare -A container_limits=(
|
||||
# Pattern Memory CPU Allowance
|
||||
["*tables*"]="2048MB:50ms/100ms"
|
||||
["*tables*"]="4096MB:100ms/100ms"
|
||||
["*dns*"]="2048MB:100ms/100ms"
|
||||
["*doc-editor*"]="512MB:50ms/100ms"
|
||||
["*proxy*"]="512MB:50ms/100ms"
|
||||
["*directory*"]="512MB:50ms/100ms"
|
||||
["*drive*"]="1024MB:50ms/100ms"
|
||||
["*email*"]="2048MB:50ms/100ms"
|
||||
["*webmail*"]="2048MB:50ms/100ms"
|
||||
["*bot*"]="2048MB:50ms/100ms"
|
||||
["*meeting*"]="1024MB:50ms/100ms"
|
||||
["*doc-editor*"]="512MB:10ms/100ms"
|
||||
["*proxy*"]="20248MB:50ms/100ms"
|
||||
["*directory*"]="1024MB:50ms/100ms"
|
||||
["*drive*"]="4096MB:25ms/100ms"
|
||||
["*email*"]="4096MB:50ms/100ms"
|
||||
["*webmail*"]="4096MB:50ms/100ms"
|
||||
["*bot*"]="4096MB:50ms/100ms"
|
||||
["*meeting*"]="4096MB:100ms/100ms"
|
||||
["*alm*"]="512MB:50ms/100ms"
|
||||
["*alm-ci*"]="4096MB:50ms/100ms"
|
||||
["*alm-ci*"]="4096MB:25ms/100ms"
|
||||
["*system*"]="4096MB:10ms/100ms"
|
||||
["*mailer*"]="4096MB:25ms/100ms"
|
||||
)
|
||||
|
||||
# Default values (for containers that don't match any pattern)
|
||||
|
|
|
@ -3,6 +3,8 @@ pub mod email;
|
|||
pub mod file;
|
||||
pub mod keywords;
|
||||
pub mod llm;
|
||||
pub mod llm_local;
|
||||
pub mod llm_provider;
|
||||
pub mod script;
|
||||
pub mod state;
|
||||
pub mod utils;
|
||||
|
|
392
src/services/llm_local.rs
Normal file
392
src/services/llm_local.rs
Normal file
|
@ -0,0 +1,392 @@
|
|||
use actix_web::{post, web, HttpRequest, HttpResponse, Result};
|
||||
use dotenv::dotenv;
|
||||
use reqwest::Client;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::env;
|
||||
use std::process::{Child, Command, Stdio};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||
use tokio::process::Command as TokioCommand;
|
||||
use tokio::time::{sleep, Duration};
|
||||
|
||||
// Global process handle
|
||||
static mut LLAMA_PROCESS: Option<Arc<Mutex<Option<tokio::process::Child>>>> = None;
|
||||
|
||||
// OpenAI-compatible request/response structures
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct ChatMessage {
|
||||
role: String,
|
||||
content: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct ChatCompletionRequest {
|
||||
model: String,
|
||||
messages: Vec<ChatMessage>,
|
||||
stream: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct ChatCompletionResponse {
|
||||
id: String,
|
||||
object: String,
|
||||
created: u64,
|
||||
model: String,
|
||||
choices: Vec<Choice>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct Choice {
|
||||
message: ChatMessage,
|
||||
finish_reason: String,
|
||||
}
|
||||
|
||||
// Llama.cpp server request/response structures
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct LlamaCppRequest {
|
||||
prompt: String,
|
||||
n_predict: Option<i32>,
|
||||
temperature: Option<f32>,
|
||||
top_k: Option<i32>,
|
||||
top_p: Option<f32>,
|
||||
stream: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct LlamaCppResponse {
|
||||
content: String,
|
||||
stop: bool,
|
||||
generation_settings: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
// Function to check if server is running
|
||||
async fn is_server_running(url: &str) -> bool {
|
||||
let client = Client::builder()
|
||||
.timeout(Duration::from_secs(3))
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
match client.get(&format!("{}/health", url)).send().await {
|
||||
Ok(response) => {
|
||||
let is_ok = response.status().is_success();
|
||||
if is_ok {
|
||||
println!("🟢 Server health check: OK");
|
||||
} else {
|
||||
println!(
|
||||
"🔴 Server health check: Failed with status {}",
|
||||
response.status()
|
||||
);
|
||||
}
|
||||
is_ok
|
||||
}
|
||||
Err(e) => {
|
||||
println!("🔴 Server health check: Connection failed - {}", e);
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Function to start llama.cpp server
|
||||
async fn start_llama_server() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||
println!("🚀 Starting llama.cpp server...");
|
||||
|
||||
// Get environment variables for llama.cpp configuration
|
||||
let llama_path = env::var("LLM_CPP_PATH").unwrap_or_else(|_| "llama-server".to_string());
|
||||
let model_path = env::var("LLM_MODEL_PATH")
|
||||
.unwrap_or_else(|_| "./models/tinyllama-1.1b-q4_01.gguf".to_string());
|
||||
let cpu_limit = env::var("CPU_LIMIT").unwrap_or_else(|_| "50".to_string());
|
||||
let port = env::var("LLM_PORT").unwrap_or_else(|_| "8080".to_string());
|
||||
|
||||
println!("🔧 Configuration:");
|
||||
println!(" - Llama path: {}", llama_path);
|
||||
println!(" - Model path: {}", model_path);
|
||||
println!(" - CPU limit: {}%", cpu_limit);
|
||||
println!(" - Port: {}", port);
|
||||
|
||||
// Kill any existing llama processes
|
||||
println!("🧹 Cleaning up existing processes...");
|
||||
let _ = Command::new("pkill").arg("-f").arg("llama-server").output();
|
||||
|
||||
// Wait a bit for cleanup
|
||||
sleep(Duration::from_secs(2)).await;
|
||||
|
||||
// Build the command
|
||||
let full_command = format!(
|
||||
"cpulimit -l {} -- {} -m '{}' --n-gpu-layers 18 --temp 0.7 --ctx-size 1024 --batch-size 256 --no-mmap --mlock --port {} --host 127.0.0.1 --tensor-split 1.0 --main-gpu 0",
|
||||
cpu_limit, llama_path, model_path, port
|
||||
);
|
||||
|
||||
println!("📝 Executing command: {}", full_command);
|
||||
|
||||
// Start llama.cpp server with cpulimit using tokio
|
||||
let mut cmd = TokioCommand::new("sh");
|
||||
cmd.arg("-c");
|
||||
cmd.arg(&full_command);
|
||||
cmd.stdout(Stdio::piped());
|
||||
cmd.stderr(Stdio::piped());
|
||||
cmd.kill_on_drop(true);
|
||||
|
||||
let mut child = cmd
|
||||
.spawn()
|
||||
.map_err(|e| format!("Failed to start llama.cpp server: {}", e))?;
|
||||
|
||||
println!("🔄 Process spawned with PID: {:?}", child.id());
|
||||
|
||||
// Capture stdout and stderr for real-time logging
|
||||
if let Some(stdout) = child.stdout.take() {
|
||||
let stdout_reader = BufReader::new(stdout);
|
||||
tokio::spawn(async move {
|
||||
let mut lines = stdout_reader.lines();
|
||||
while let Ok(Some(line)) = lines.next_line().await {
|
||||
println!("🦙📤 STDOUT: {}", line);
|
||||
}
|
||||
println!("🦙📤 STDOUT stream ended");
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(stderr) = child.stderr.take() {
|
||||
let stderr_reader = BufReader::new(stderr);
|
||||
tokio::spawn(async move {
|
||||
let mut lines = stderr_reader.lines();
|
||||
while let Ok(Some(line)) = lines.next_line().await {
|
||||
println!("🦙📥 STDERR: {}", line);
|
||||
}
|
||||
println!("🦙📥 STDERR stream ended");
|
||||
});
|
||||
}
|
||||
|
||||
// Store the process handle
|
||||
unsafe {
|
||||
LLAMA_PROCESS = Some(Arc::new(Mutex::new(Some(child))));
|
||||
}
|
||||
|
||||
println!("✅ Llama.cpp server process started!");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Function to ensure llama.cpp server is running
|
||||
pub async fn ensure_llama_server_running() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||
let llama_url = env::var("LLM_URL").unwrap_or_else(|_| "http://localhost:8080".to_string());
|
||||
|
||||
// Check if server is already running
|
||||
if is_server_running(&llama_url).await {
|
||||
println!("✅ Llama.cpp server is already running");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Start the server
|
||||
start_llama_server().await?;
|
||||
|
||||
// Wait for server to be ready with verbose logging
|
||||
println!("⏳ Waiting for llama.cpp server to become ready...");
|
||||
let mut attempts = 0;
|
||||
let max_attempts = 60; // 2 minutes total
|
||||
|
||||
while attempts < max_attempts {
|
||||
sleep(Duration::from_secs(2)).await;
|
||||
|
||||
print!(
|
||||
"🔍 Checking server health (attempt {}/{})... ",
|
||||
attempts + 1,
|
||||
max_attempts
|
||||
);
|
||||
|
||||
if is_server_running(&llama_url).await {
|
||||
println!("✅ SUCCESS!");
|
||||
println!("🎉 Llama.cpp server is ready and responding!");
|
||||
return Ok(());
|
||||
} else {
|
||||
println!("❌ Not ready yet");
|
||||
}
|
||||
|
||||
attempts += 1;
|
||||
if attempts % 10 == 0 {
|
||||
println!(
|
||||
"⏰ Still waiting for llama.cpp server... (attempt {}/{})",
|
||||
attempts, max_attempts
|
||||
);
|
||||
println!("💡 Check the logs above for any errors from the llama server");
|
||||
}
|
||||
}
|
||||
|
||||
Err("❌ Llama.cpp server failed to start within timeout (2 minutes)".into())
|
||||
}
|
||||
|
||||
// Convert OpenAI chat messages to a single prompt
|
||||
fn messages_to_prompt(messages: &[ChatMessage]) -> String {
|
||||
let mut prompt = String::new();
|
||||
|
||||
for message in messages {
|
||||
match message.role.as_str() {
|
||||
"system" => {
|
||||
prompt.push_str(&format!("System: {}\n\n", message.content));
|
||||
}
|
||||
"user" => {
|
||||
prompt.push_str(&format!("User: {}\n\n", message.content));
|
||||
}
|
||||
"assistant" => {
|
||||
prompt.push_str(&format!("Assistant: {}\n\n", message.content));
|
||||
}
|
||||
_ => {
|
||||
prompt.push_str(&format!("{}: {}\n\n", message.role, message.content));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
prompt.push_str("Assistant: ");
|
||||
prompt
|
||||
}
|
||||
|
||||
// Cleanup function
|
||||
pub fn cleanup_processes() {
|
||||
println!("🧹 Cleaning up llama.cpp processes...");
|
||||
|
||||
unsafe {
|
||||
if let Some(process_handle) = &LLAMA_PROCESS {
|
||||
if let Ok(mut process) = process_handle.lock() {
|
||||
if let Some(ref mut child) = *process {
|
||||
println!("🔪 Killing llama server process...");
|
||||
let _ = child.start_kill();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Kill any remaining llama processes
|
||||
println!("🔍 Killing any remaining llama-server processes...");
|
||||
let output = Command::new("pkill").arg("-f").arg("llama-server").output();
|
||||
|
||||
match output {
|
||||
Ok(result) => {
|
||||
if result.status.success() {
|
||||
println!("✅ Successfully killed llama processes");
|
||||
} else {
|
||||
println!("ℹ️ No llama processes found to kill");
|
||||
}
|
||||
}
|
||||
Err(e) => println!("⚠️ Error trying to kill processes: {}", e),
|
||||
}
|
||||
}
|
||||
|
||||
// Proxy endpoint
|
||||
#[post("/v1/chat/completions")]
|
||||
pub async fn chat_completions(
|
||||
req_body: web::Json<ChatCompletionRequest>,
|
||||
_req: HttpRequest,
|
||||
) -> Result<HttpResponse> {
|
||||
dotenv().ok();
|
||||
|
||||
// Ensure llama.cpp server is running
|
||||
if let Err(e) = ensure_llama_server_running().await {
|
||||
eprintln!("Failed to start llama.cpp server: {}", e);
|
||||
return Ok(HttpResponse::InternalServerError().json(serde_json::json!({
|
||||
"error": {
|
||||
"message": format!("Failed to start llama.cpp server: {}", e),
|
||||
"type": "server_error"
|
||||
}
|
||||
})));
|
||||
}
|
||||
|
||||
// Get llama.cpp server URL
|
||||
let llama_url = env::var("LLM_URL").unwrap_or_else(|_| "http://localhost:8080".to_string());
|
||||
|
||||
// Convert OpenAI format to llama.cpp format
|
||||
let prompt = messages_to_prompt(&req_body.messages);
|
||||
|
||||
let llama_request = LlamaCppRequest {
|
||||
prompt,
|
||||
n_predict: Some(500), // Adjust as needed
|
||||
temperature: Some(0.7),
|
||||
top_k: Some(40),
|
||||
top_p: Some(0.9),
|
||||
stream: req_body.stream,
|
||||
};
|
||||
|
||||
// Send request to llama.cpp server
|
||||
let client = Client::builder()
|
||||
.timeout(Duration::from_secs(120)) // 2 minute timeout
|
||||
.build()
|
||||
.map_err(|e| {
|
||||
eprintln!("Error creating HTTP client: {}", e);
|
||||
actix_web::error::ErrorInternalServerError("Failed to create HTTP client")
|
||||
})?;
|
||||
|
||||
let response = client
|
||||
.post(&format!("{}/completion", llama_url))
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&llama_request)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
eprintln!("Error calling llama.cpp server: {}", e);
|
||||
actix_web::error::ErrorInternalServerError("Failed to call llama.cpp server")
|
||||
})?;
|
||||
|
||||
let status = response.status();
|
||||
|
||||
if status.is_success() {
|
||||
let llama_response: LlamaCppResponse = response.json().await.map_err(|e| {
|
||||
eprintln!("Error parsing llama.cpp response: {}", e);
|
||||
actix_web::error::ErrorInternalServerError("Failed to parse llama.cpp response")
|
||||
})?;
|
||||
|
||||
// Convert llama.cpp response to OpenAI format
|
||||
let openai_response = ChatCompletionResponse {
|
||||
id: format!("chatcmpl-{}", uuid::Uuid::new_v4()),
|
||||
object: "chat.completion".to_string(),
|
||||
created: std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs(),
|
||||
model: req_body.model.clone(),
|
||||
choices: vec![Choice {
|
||||
message: ChatMessage {
|
||||
role: "assistant".to_string(),
|
||||
content: llama_response.content.trim().to_string(),
|
||||
},
|
||||
finish_reason: if llama_response.stop {
|
||||
"stop".to_string()
|
||||
} else {
|
||||
"length".to_string()
|
||||
},
|
||||
}],
|
||||
};
|
||||
|
||||
Ok(HttpResponse::Ok().json(openai_response))
|
||||
} else {
|
||||
let error_text = response
|
||||
.text()
|
||||
.await
|
||||
.unwrap_or_else(|_| "Unknown error".to_string());
|
||||
|
||||
eprintln!("Llama.cpp server error ({}): {}", status, error_text);
|
||||
|
||||
let actix_status = actix_web::http::StatusCode::from_u16(status.as_u16())
|
||||
.unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR);
|
||||
|
||||
Ok(HttpResponse::build(actix_status).json(serde_json::json!({
|
||||
"error": {
|
||||
"message": error_text,
|
||||
"type": "server_error"
|
||||
}
|
||||
})))
|
||||
}
|
||||
}
|
||||
|
||||
// Health check endpoint
|
||||
#[actix_web::get("/health")]
|
||||
pub async fn health() -> Result<HttpResponse> {
|
||||
let llama_url = env::var("LLM_URL").unwrap_or_else(|_| "http://localhost:8080".to_string());
|
||||
|
||||
if is_server_running(&llama_url).await {
|
||||
Ok(HttpResponse::Ok().json(serde_json::json!({
|
||||
"status": "healthy",
|
||||
"llama_server": "running"
|
||||
})))
|
||||
} else {
|
||||
Ok(HttpResponse::ServiceUnavailable().json(serde_json::json!({
|
||||
"status": "unhealthy",
|
||||
"llama_server": "not running"
|
||||
})))
|
||||
}
|
||||
}
|
103
src/services/llm_provider.rs
Normal file
103
src/services/llm_provider.rs
Normal file
|
@ -0,0 +1,103 @@
|
|||
use actix_web::{post, web, HttpRequest, HttpResponse, Result};
|
||||
use dotenv::dotenv;
|
||||
use reqwest::Client;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::env;
|
||||
|
||||
// OpenAI-compatible request/response structures
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct ChatMessage {
|
||||
role: String,
|
||||
content: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct ChatCompletionRequest {
|
||||
model: String,
|
||||
messages: Vec<ChatMessage>,
|
||||
stream: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct ChatCompletionResponse {
|
||||
id: String,
|
||||
object: String,
|
||||
created: u64,
|
||||
model: String,
|
||||
choices: Vec<Choice>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct Choice {
|
||||
message: ChatMessage,
|
||||
finish_reason: String,
|
||||
}
|
||||
|
||||
// Proxy endpoint
|
||||
#[post("/v1/chat/completions")]
|
||||
async fn chat_completions(
|
||||
req_body: web::Json<ChatCompletionRequest>,
|
||||
_req: HttpRequest,
|
||||
) -> Result<HttpResponse> {
|
||||
dotenv().ok();
|
||||
|
||||
// Environment variables
|
||||
let azure_endpoint = env::var("AI_ENDPOINT")
|
||||
.map_err(|_| actix_web::error::ErrorInternalServerError("AI_ENDPOINT not set."))?;
|
||||
let azure_key = env::var("AI_KEY")
|
||||
.map_err(|_| actix_web::error::ErrorInternalServerError("AI_KEY not set."))?;
|
||||
let deployment_name = env::var("AI_LLM_MODEL")
|
||||
.map_err(|_| actix_web::error::ErrorInternalServerError("AI_LLM_MODEL not set."))?;
|
||||
|
||||
// Construct Azure OpenAI URL
|
||||
let url = format!(
|
||||
"{}/openai/deployments/{}/chat/completions?api-version=2025-01-01-preview",
|
||||
azure_endpoint, deployment_name
|
||||
);
|
||||
|
||||
// Forward headers
|
||||
let mut headers = reqwest::header::HeaderMap::new();
|
||||
headers.insert(
|
||||
"api-key",
|
||||
reqwest::header::HeaderValue::from_str(&azure_key)
|
||||
.map_err(|_| actix_web::error::ErrorInternalServerError("Invalid Azure key"))?,
|
||||
);
|
||||
headers.insert(
|
||||
"Content-Type",
|
||||
reqwest::header::HeaderValue::from_static("application/json"),
|
||||
);
|
||||
|
||||
// Send request to Azure
|
||||
let client = Client::new();
|
||||
let response = client
|
||||
.post(&url)
|
||||
.headers(headers)
|
||||
.json(&req_body.into_inner())
|
||||
.send()
|
||||
.await
|
||||
.map_err(actix_web::error::ErrorInternalServerError)?;
|
||||
|
||||
// Handle response based on status
|
||||
let status = response.status();
|
||||
let raw_response = response
|
||||
.text()
|
||||
.await
|
||||
.map_err(actix_web::error::ErrorInternalServerError)?;
|
||||
|
||||
// Log the raw response
|
||||
println!("Raw Azure response: {}", raw_response);
|
||||
|
||||
if status.is_success() {
|
||||
// Parse the raw response as JSON
|
||||
let azure_response: serde_json::Value = serde_json::from_str(&raw_response)
|
||||
.map_err(actix_web::error::ErrorInternalServerError)?;
|
||||
|
||||
Ok(HttpResponse::Ok().json(azure_response))
|
||||
} else {
|
||||
// Handle error responses properly
|
||||
let actix_status = actix_web::http::StatusCode::from_u16(status.as_u16())
|
||||
.unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR);
|
||||
|
||||
Ok(HttpResponse::build(actix_status).body(raw_response))
|
||||
}
|
||||
}
|
|
@ -13,17 +13,17 @@ use std::error::Error;
|
|||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::path::Path;
|
||||
use tokio::fs::File as TokioFile;
|
||||
use tokio_stream::StreamExt;
|
||||
use zip::ZipArchive;
|
||||
use tokio::fs::File as TokioFile;
|
||||
|
||||
use reqwest::Client;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
|
||||
pub fn azure_from_config(config: &AIConfig) -> AzureConfig {
|
||||
AzureConfig::default()
|
||||
.with_api_key(&config.key)
|
||||
AzureConfig::new()
|
||||
.with_api_base(&config.endpoint)
|
||||
.with_api_key(&config.key)
|
||||
.with_api_version(&config.version)
|
||||
.with_deployment_id(&config.instance)
|
||||
}
|
||||
|
@ -240,7 +240,10 @@ pub fn parse_filter(filter_str: &str) -> Result<(String, Vec<String>), Box<dyn E
|
|||
}
|
||||
|
||||
// Parse filter without adding quotes
|
||||
pub fn parse_filter_with_offset(filter_str: &str, offset: usize) -> Result<(String, Vec<String>), Box<dyn Error>> {
|
||||
pub fn parse_filter_with_offset(
|
||||
filter_str: &str,
|
||||
offset: usize,
|
||||
) -> Result<(String, Vec<String>), Box<dyn Error>> {
|
||||
let mut clauses = Vec::new();
|
||||
let mut params = Vec::new();
|
||||
|
||||
|
@ -253,7 +256,10 @@ pub fn parse_filter_with_offset(filter_str: &str, offset: usize) -> Result<(Stri
|
|||
let column = parts[0].trim();
|
||||
let value = parts[1].trim();
|
||||
|
||||
if !column.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
|
||||
if !column
|
||||
.chars()
|
||||
.all(|c| c.is_ascii_alphanumeric() || c == '_')
|
||||
{
|
||||
return Err("Invalid column name".into());
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue