This commit is contained in:
parent
10b4d85629
commit
57f20d903e
21 changed files with 699 additions and 118 deletions
44
.vscode/launch.json
vendored
44
.vscode/launch.json
vendored
|
@ -1,44 +0,0 @@
|
||||||
{
|
|
||||||
"version": "0.2.0",
|
|
||||||
"configurations": [
|
|
||||||
{
|
|
||||||
"type": "lldb",
|
|
||||||
"request": "launch",
|
|
||||||
"name": "Debug GB Server",
|
|
||||||
"cargo": {
|
|
||||||
"args": [
|
|
||||||
"build",
|
|
||||||
"--bin=gbserver"
|
|
||||||
],
|
|
||||||
"filter": {
|
|
||||||
"name": "gbserver",
|
|
||||||
"kind": "bin"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"args": [],
|
|
||||||
"cwd": "${workspaceFolder}",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "lldb",
|
|
||||||
"request": "launch",
|
|
||||||
"name": "Debug Integration Tests",
|
|
||||||
"cargo": {
|
|
||||||
"args": [
|
|
||||||
"test",
|
|
||||||
"--no-run",
|
|
||||||
"--lib",
|
|
||||||
"--package=gbserver"
|
|
||||||
],
|
|
||||||
"filter": {
|
|
||||||
"name": "integration",
|
|
||||||
"kind": "test"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"args": [],
|
|
||||||
"cwd": "${workspaceFolder}",
|
|
||||||
"env": {
|
|
||||||
"RUST_LOG": "info"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
],
|
|
||||||
}
|
|
7
.vscode/settings.json
vendored
7
.vscode/settings.json
vendored
|
@ -1,7 +0,0 @@
|
||||||
{
|
|
||||||
"lldb.executable": "/usr/bin/lldb",
|
|
||||||
"lldb.showDisassembly": "never",
|
|
||||||
"lldb.dereferencePointers": true,
|
|
||||||
"lldb.consoleMode": "commands",
|
|
||||||
"rust-test Explorer.cargoTestExtraArgs": ["--", "--nocapture"]
|
|
||||||
}
|
|
15
.vscode/tasks.json
vendored
15
.vscode/tasks.json
vendored
|
@ -1,15 +0,0 @@
|
||||||
{
|
|
||||||
"version": "2.0.0",
|
|
||||||
"tasks": [
|
|
||||||
{
|
|
||||||
"label": "build",
|
|
||||||
"type": "shell",
|
|
||||||
"command": "cargo",
|
|
||||||
"args": ["build"],
|
|
||||||
"group": {
|
|
||||||
"kind": "build",
|
|
||||||
"isDefault": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
14
.zed/debug.json
Normal file
14
.zed/debug.json
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"label": "Build & Debug native binary",
|
||||||
|
"build": {
|
||||||
|
"command": "cargo",
|
||||||
|
"args": ["build"]
|
||||||
|
},
|
||||||
|
"program": "$ZED_WORKTREE_ROOT/target/debug/gbserver",
|
||||||
|
|
||||||
|
"sourceLanguages": ["rust"],
|
||||||
|
"request": "launch",
|
||||||
|
"adapter": "CodeLLDB"
|
||||||
|
}
|
||||||
|
]
|
12
Cargo.lock
generated
12
Cargo.lock
generated
|
@ -1765,6 +1765,7 @@ dependencies = [
|
||||||
"tracing",
|
"tracing",
|
||||||
"tracing-subscriber",
|
"tracing-subscriber",
|
||||||
"urlencoding",
|
"urlencoding",
|
||||||
|
"uuid",
|
||||||
"zip",
|
"zip",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -5183,6 +5184,17 @@ version = "0.2.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "uuid"
|
||||||
|
version = "1.18.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f33196643e165781c20a5ead5582283a7dacbb87855d867fbc2df3f81eddc1be"
|
||||||
|
dependencies = [
|
||||||
|
"getrandom 0.3.3",
|
||||||
|
"js-sys",
|
||||||
|
"wasm-bindgen",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "valuable"
|
name = "valuable"
|
||||||
version = "0.1.1"
|
version = "0.1.1"
|
||||||
|
|
14
Cargo.toml
14
Cargo.toml
|
@ -11,7 +11,7 @@ repository = "https://alm.pragmatismo.com.br/generalbots/gbserver"
|
||||||
actix-cors = "0.6"
|
actix-cors = "0.6"
|
||||||
actix-multipart = "0.6"
|
actix-multipart = "0.6"
|
||||||
actix-web = "4"
|
actix-web = "4"
|
||||||
actix-ws="0.3.0"
|
actix-ws = "0.3.0"
|
||||||
thirtyfour = { version = "0.30" }
|
thirtyfour = { version = "0.30" }
|
||||||
downloader = "0.2.8"
|
downloader = "0.2.8"
|
||||||
anyhow = "1.0"
|
anyhow = "1.0"
|
||||||
|
@ -24,7 +24,12 @@ futures = "0.3"
|
||||||
futures-util = "0.3"
|
futures-util = "0.3"
|
||||||
imap = "2.0"
|
imap = "2.0"
|
||||||
langchain-rust = "4.4.3"
|
langchain-rust = "4.4.3"
|
||||||
lettre = { version = "0.10", features = ["smtp-transport", "builder", "tokio1", "tokio1-native-tls"] }
|
lettre = { version = "0.10", features = [
|
||||||
|
"smtp-transport",
|
||||||
|
"builder",
|
||||||
|
"tokio1",
|
||||||
|
"tokio1-native-tls",
|
||||||
|
] }
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
mailparse = "0.13"
|
mailparse = "0.13"
|
||||||
minio = { git = "https://github.com/minio/minio-rs", branch = "master" }
|
minio = { git = "https://github.com/minio/minio-rs", branch = "master" }
|
||||||
|
@ -33,7 +38,7 @@ reqwest = { version = "0.11", features = ["json", "stream"] }
|
||||||
rhai = "1.22.2"
|
rhai = "1.22.2"
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
smartstring = "1.0" # Use the latest version from crates.io
|
smartstring = "1.0" # Use the latest version from crates.io
|
||||||
sqlx = { version = "0.7", features = ["runtime-tokio-rustls", "postgres"] }
|
sqlx = { version = "0.7", features = ["runtime-tokio-rustls", "postgres"] }
|
||||||
tempfile = "3"
|
tempfile = "3"
|
||||||
tokio = { version = "1", features = ["full"] }
|
tokio = { version = "1", features = ["full"] }
|
||||||
|
@ -43,4 +48,5 @@ tracing-subscriber = { version = "0.3", features = ["fmt"] }
|
||||||
scraper = "0.18"
|
scraper = "0.18"
|
||||||
urlencoding = "2.1"
|
urlencoding = "2.1"
|
||||||
regex = "1.10"
|
regex = "1.10"
|
||||||
zip = "4.3.0"
|
uuid = { version = "1.0", features = ["v4"] }
|
||||||
|
zip = "4.3.0"
|
||||||
|
|
16
src/main.rs
16
src/main.rs
|
@ -5,6 +5,7 @@ use actix_web::http::header;
|
||||||
use actix_web::{web, App, HttpServer};
|
use actix_web::{web, App, HttpServer};
|
||||||
use dotenv::dotenv;
|
use dotenv::dotenv;
|
||||||
|
|
||||||
|
use reqwest::Client;
|
||||||
use services::config::*;
|
use services::config::*;
|
||||||
use services::email::*;
|
use services::email::*;
|
||||||
use services::file::*;
|
use services::file::*;
|
||||||
|
@ -13,12 +14,13 @@ use services::script::*;
|
||||||
use services::state::*;
|
use services::state::*;
|
||||||
use sqlx::PgPool;
|
use sqlx::PgPool;
|
||||||
|
|
||||||
|
use crate::services::llm_local::ensure_llama_server_running;
|
||||||
|
use crate::services::llm_provider::chat_completions;
|
||||||
use crate::services::web_automation::{initialize_browser_pool, BrowserPool};
|
use crate::services::web_automation::{initialize_browser_pool, BrowserPool};
|
||||||
//use services:: find::*;
|
|
||||||
mod services;
|
mod services;
|
||||||
|
|
||||||
#[tokio::main(flavor = "multi_thread")]
|
#[tokio::main(flavor = "multi_thread")]
|
||||||
|
|
||||||
async fn main() -> std::io::Result<()> {
|
async fn main() -> std::io::Result<()> {
|
||||||
dotenv().ok();
|
dotenv().ok();
|
||||||
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
|
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
|
||||||
|
@ -38,10 +40,15 @@ async fn main() -> std::io::Result<()> {
|
||||||
5,
|
5,
|
||||||
"/usr/bin/brave-browser-beta".to_string(),
|
"/usr/bin/brave-browser-beta".to_string(),
|
||||||
));
|
));
|
||||||
|
|
||||||
|
// ensure_llama_server_running()
|
||||||
|
// .await
|
||||||
|
// .expect("Failed to initialize LLM local server.");
|
||||||
|
|
||||||
initialize_browser_pool()
|
initialize_browser_pool()
|
||||||
.await
|
.await
|
||||||
.expect("Failed to initialize browser pool");
|
.expect("Failed to initialize browser pool");
|
||||||
|
|
||||||
let app_state = web::Data::new(AppState {
|
let app_state = web::Data::new(AppState {
|
||||||
db: db.into(),
|
db: db.into(),
|
||||||
db_custom: db_custom.into(),
|
db_custom: db_custom.into(),
|
||||||
|
@ -70,9 +77,9 @@ async fn main() -> std::io::Result<()> {
|
||||||
.allowed_headers(vec![header::AUTHORIZATION, header::ACCEPT])
|
.allowed_headers(vec![header::AUTHORIZATION, header::ACCEPT])
|
||||||
.allowed_header(header::CONTENT_TYPE)
|
.allowed_header(header::CONTENT_TYPE)
|
||||||
.max_age(3600);
|
.max_age(3600);
|
||||||
|
//.wrap(cors)
|
||||||
|
|
||||||
App::new()
|
App::new()
|
||||||
.wrap(cors)
|
|
||||||
.app_data(app_state.clone())
|
.app_data(app_state.clone())
|
||||||
.service(upload_file)
|
.service(upload_file)
|
||||||
.service(list_file)
|
.service(list_file)
|
||||||
|
@ -81,6 +88,7 @@ async fn main() -> std::io::Result<()> {
|
||||||
.service(list_emails)
|
.service(list_emails)
|
||||||
.service(send_email)
|
.service(send_email)
|
||||||
.service(chat_stream)
|
.service(chat_stream)
|
||||||
|
.service(chat_completions)
|
||||||
.service(chat)
|
.service(chat)
|
||||||
})
|
})
|
||||||
.bind((config.server.host.clone(), config.server.port))?
|
.bind((config.server.host.clone(), config.server.port))?
|
||||||
|
|
|
@ -1,23 +1,20 @@
|
||||||
let items = FIND "gb.rob", "ACTION=EMUL1"
|
let items = FIND "gb.rob", "ACTION=EMUL1"
|
||||||
FOR EACH item IN items
|
FOR EACH item IN items
|
||||||
|
|
||||||
PRINT item.company
|
PRINT item.company
|
||||||
let website = WEBSITE OF item.company
|
let website = WEBSITE OF item.company
|
||||||
PRINT website
|
PRINT website
|
||||||
|
|
||||||
let page = GET website
|
let page = GET website
|
||||||
|
let prompt = "Build the same simulator, but for " + item.company + " using just *content about the company* from its website, so it is possible to create a good and useful emulator in the same langue as the content: " + page
|
||||||
let prompt = "Build the same simulator , but for " + item.company + " using just *content about the company* from its website, so it is possible to create a good and useful emulator in the same langue as the content: " + page
|
|
||||||
|
|
||||||
let alias = LLM "Return a single word for " + item.company + " like a token, no spaces, no special characters, no numbers, no uppercase letters."
|
let alias = LLM "Return a single word for " + item.company + " like a token, no spaces, no special characters, no numbers, no uppercase letters."
|
||||||
|
CREATE_SITE alias, "blank", prompt
|
||||||
|
|
||||||
let to = item.emailcto
|
let to = item.emailcto
|
||||||
let subject = "Simulador " + alias
|
let subject = "Simulador " + alias
|
||||||
let body = "Oi, " + FIRST(item.Contact) + "! Tudo bem? Estou empolgado, pois criamos o simulador " + alias + " especificamente para vocês!" + "\n\n Acesse o site: https://sites.pragmatismo.com.br/" + alias + "\n\n" + "Para acessar o simulador, clique no link acima ou copie e cole no seu navegador." + "\n\n" + "Para iniciar, clique no ícone de Play." + "\n\n" + "Atenciosamente,\nDário Vieira\n\n"
|
let name = FIRST(item.Contact)
|
||||||
|
let body = "Oi, " + name + "! Tudo bem? Estou empolgado, pois criamos o simulador " + alias + " especificamente para vocês!" + "\n\n Acesse o site: https://sites.pragmatismo.com.br/" + alias + "\n\n" + "Para acessar o simulador, clique no link acima ou copie e cole no seu navegador." + "\n\n" + "Para iniciar, clique no ícone de Play." + "\n\n" + "Atenciosamente,\nDário Vieira\n\n"
|
||||||
CREATE_DRAFT to, subject, body
|
CREATE_DRAFT to, subject, body
|
||||||
|
|
||||||
SET "gb.rob", "robid="+ item.robid, "ACTION=CLOSE"
|
|
||||||
|
NEXT item
|
||||||
NEXT item
|
|
||||||
|
|
|
@ -13,8 +13,6 @@ sleep 15
|
||||||
|
|
||||||
lxc exec "$PARAM_TENANT"-bot -- bash -c "
|
lxc exec "$PARAM_TENANT"-bot -- bash -c "
|
||||||
|
|
||||||
echo "nameserver $PARAM_DNS_INTERNAL_IP" > /etc/resolv.conf
|
|
||||||
|
|
||||||
|
|
||||||
apt-get update && apt-get install -y \
|
apt-get update && apt-get install -y \
|
||||||
build-essential cmake git pkg-config libjpeg-dev libtiff-dev \
|
build-essential cmake git pkg-config libjpeg-dev libtiff-dev \
|
||||||
|
|
|
@ -1,11 +1,6 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
PUBLIC_INTERFACE="eth0" # Your host's public network interface
|
PUBLIC_INTERFACE="eth0" # Your host's public network interface
|
||||||
|
|
||||||
# Enable IP forwarding
|
|
||||||
echo "[HOST] Enabling IP forwarding..."
|
|
||||||
echo "net.ipv4.ip_forward=1" | sudo tee -a /etc/sysctl.conf
|
|
||||||
sudo sysctl -p
|
|
||||||
|
|
||||||
# Configure firewall
|
# Configure firewall
|
||||||
echo "[HOST] Configuring firewall..."
|
echo "[HOST] Configuring firewall..."
|
||||||
sudo iptables -A FORWARD -i $PUBLIC_INTERFACE -o lxcbr0 -p tcp -m multiport --dports 25,80,110,143,465,587,993,995,4190 -j ACCEPT
|
sudo iptables -A FORWARD -i $PUBLIC_INTERFACE -o lxcbr0 -p tcp -m multiport --dports 25,80,110,143,465,587,993,995,4190 -j ACCEPT
|
||||||
|
@ -22,7 +17,7 @@ fi
|
||||||
|
|
||||||
# Create directory structure
|
# Create directory structure
|
||||||
echo "[CONTAINER] Creating directories..."
|
echo "[CONTAINER] Creating directories..."
|
||||||
HOST_BASE="/opt/gbo/tenants/$PARAM_TENANT/email"
|
HOST_BASE="/opt/email"
|
||||||
HOST_DATA="$HOST_BASE/data"
|
HOST_DATA="$HOST_BASE/data"
|
||||||
HOST_CONF="$HOST_BASE/conf"
|
HOST_CONF="$HOST_BASE/conf"
|
||||||
HOST_LOGS="$HOST_BASE/logs"
|
HOST_LOGS="$HOST_BASE/logs"
|
||||||
|
@ -66,9 +61,9 @@ sudo chown -R "$HOST_EMAIL_UID:$HOST_EMAIL_GID" "$HOST_BASE"
|
||||||
|
|
||||||
# Mount directories
|
# Mount directories
|
||||||
echo "[CONTAINER] Mounting directories..."
|
echo "[CONTAINER] Mounting directories..."
|
||||||
lxc config device add "$PARAM_TENANT"-email emaildata disk source="$HOST_DATA" path=/opt/gbo/data
|
lxc config device add emailprofile emaildata disk source="$HOST_DATA" path=/opt/gbo/data
|
||||||
lxc config device add "$PARAM_TENANT"-email emailconf disk source="$HOST_CONF" path=/opt/gbo/conf
|
lxc config device add emailprofile emailconf disk source="$HOST_CONF" path=/opt/gbo/conf
|
||||||
lxc config device add "$PARAM_TENANT"-email emaillogs disk source="$HOST_LOGS" path=/opt/gbo/logs
|
lxc config device add emailprofile emaillogs disk source="$HOST_LOGS" path=/opt/gbo/logs
|
||||||
|
|
||||||
# Create systemd service
|
# Create systemd service
|
||||||
echo "[CONTAINER] Creating email service..."
|
echo "[CONTAINER] Creating email service..."
|
||||||
|
@ -96,3 +91,8 @@ systemctl daemon-reload
|
||||||
systemctl enable email
|
systemctl enable email
|
||||||
systemctl start email
|
systemctl start email
|
||||||
"
|
"
|
||||||
|
|
||||||
|
for port in 25 80 110 143 465 587 993 995 4190; do
|
||||||
|
lxc config device remove email "port-$port" 2>/dev/null || true
|
||||||
|
lxc config device add email "port-$port" proxy listen=tcp:0.0.0.0:$port connect=tcp:127.0.0.1:$port
|
||||||
|
done
|
||||||
|
|
30
src/scripts/containers/host.sh
Normal file
30
src/scripts/containers/host.sh
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
sudo apt install sshfs -y
|
||||||
|
lxc init
|
||||||
|
lxc storage create default dir
|
||||||
|
lxc profile device add default root disk path=/ pool=default
|
||||||
|
|
||||||
|
sudo apt update && sudo apt install -y bridge-utils
|
||||||
|
|
||||||
|
# Enable IP forwarding
|
||||||
|
echo "[HOST] Enabling IP forwarding..."
|
||||||
|
echo "net.ipv4.ip_forward=1" | sudo tee -a /etc/sysctl.conf
|
||||||
|
sudo sysctl -p
|
||||||
|
|
||||||
|
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb
|
||||||
|
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||||
|
sudo apt-get update
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
sudo apt purge '^nvidia-*' # Clean existing drivers
|
||||||
|
sudo add-apt-repository ppa:graphics-drivers/ppa
|
||||||
|
sudo apt update
|
||||||
|
sudo apt install nvidia-driver-470-server # Most stable for Kepler GPUs
|
||||||
|
|
||||||
|
wget https://developer.download.nvidia.com/compute/cuda/11.0.3/local_installers/cuda_11.0.3_450.51.06_linux.run
|
||||||
|
sudo sh cuda_11.0.3_450.51.06_linux.run --override
|
61
src/scripts/containers/llm.sh
Normal file
61
src/scripts/containers/llm.sh
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
#DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf
|
||||||
|
#Phi-3.5-mini-instruct-IQ2_M.gguf
|
||||||
|
#tinyllama-1.1b-chat-v1.0.Q4_0.gguf
|
||||||
|
|
||||||
|
|
||||||
|
sudo apt update
|
||||||
|
sudo apt upgrade -y
|
||||||
|
sudo apt install -y build-essential cmake git curl wget libcurl4-openssl-dev pkg-config gcc-9 g++-9
|
||||||
|
|
||||||
|
sudo apt install software-properties-common
|
||||||
|
sudo add-apt-repository ppa:deadsnakes/ppa
|
||||||
|
sudo apt install python3.6 python3.6-venv python3.6-dev
|
||||||
|
wget https://download.pytorch.org/whl/cu110/torch-1.7.1%2Bcu110-cp36-cp36m-linux_x86_64.whl
|
||||||
|
wget https://download.pytorch.org/whl/cu110/torchvision-0.8.2%2Bcu110-cp36-cp36m-linux_x86_64.whl
|
||||||
|
|
||||||
|
|
||||||
|
sudo ubuntu-drivers autoinstall
|
||||||
|
|
||||||
|
sleep 10
|
||||||
|
|
||||||
|
CUDA_RUN_FILE="cuda_11.0.3_450.51.06_linux.run"
|
||||||
|
wget https://developer.download.nvidia.com/compute/cuda/11.0.3/local_installers/$CUDA_RUN_FILE
|
||||||
|
chmod +x $CUDA_RUN_FILE
|
||||||
|
sudo ./$CUDA_RUN_FILE --silent --toolkit
|
||||||
|
|
||||||
|
echo 'export PATH=/usr/local/cuda-11.0/bin:$PATH' >> ~/.bashrc
|
||||||
|
echo 'export LD_LIBRARY_PATH=/usr/local/cuda-11.0/lib64:$LD_LIBRARY_PATH' >> ~/.bashrc
|
||||||
|
source ~/.bashrc
|
||||||
|
|
||||||
|
nvidia-smi
|
||||||
|
nvcc --version
|
||||||
|
|
||||||
|
python3 -m venv llama_venv
|
||||||
|
source llama_venv/bin/activate
|
||||||
|
pip install --upgrade pip
|
||||||
|
pip install torch==1.12.1+cu110 torchvision==0.13.1+cu110 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu110
|
||||||
|
|
||||||
|
cd ~
|
||||||
|
git clone https://github.com/ggerganov/llama.cpp.git
|
||||||
|
cd llama.cpp
|
||||||
|
rm -rf build
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
|
||||||
|
|
||||||
|
# EDIT FILE:
|
||||||
|
#ifdef __CUDACC__
|
||||||
|
#ifndef __builtin_assume
|
||||||
|
#define __builtin_assume(x) // empty: ignore it for CUDA compiler
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
# ggml/src/ggml-cuda/fattn-common.
|
||||||
|
#
|
||||||
|
cmake -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=35 ..
|
||||||
|
make -j$(nproc)
|
||||||
|
|
||||||
|
OR
|
||||||
|
wget https://github.com/ggml-org/llama.cpp/releases/download/b6148/llama-b6148-bin-ubuntu-x64.zip
|
||||||
|
|
||||||
|
|
||||||
|
wget https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_0.gguf?download=true
|
|
@ -47,10 +47,10 @@ chown -R gbuser:gbuser /opt/gbo/{bin,data,conf,logs}
|
||||||
systemctl enable proxy
|
systemctl enable proxy
|
||||||
"
|
"
|
||||||
|
|
||||||
for port in 80 443; do
|
for port in 80 443; do
|
||||||
lxc config device remove "$PARAM_TENANT"-proxy "port-$port" 2>/dev/null || true
|
lxc config device remove "$PARAM_TENANT"-proxy "port-$port" 2>/dev/null || true
|
||||||
lxc config device add "$PARAM_TENANT"-proxy "port-$port" proxy listen=tcp:0.0.0.0:$port connect=tcp:127.0.0.1:$port
|
lxc config device add "$PARAM_TENANT"-proxy "port-$port" proxy listen=tcp:0.0.0.0:$port connect=tcp:127.0.0.1:$port
|
||||||
done
|
done
|
||||||
|
|
||||||
lxc config set "$PARAM_TENANT"-proxy security.syscalls.intercept.mknod true
|
lxc config set "$PARAM_TENANT"-proxy security.syscalls.intercept.mknod true
|
||||||
lxc config set "$PARAM_TENANT"-proxy security.syscalls.intercept.setxattr true
|
lxc config set "$PARAM_TENANT"-proxy security.syscalls.intercept.setxattr true
|
||||||
|
|
|
@ -56,6 +56,8 @@ if command -v lxc >/dev/null 2>&1; then
|
||||||
|
|
||||||
echo 'Cleaning logs...'
|
echo 'Cleaning logs...'
|
||||||
rm -rf /opt/gbo/logs/*
|
rm -rf /opt/gbo/logs/*
|
||||||
|
|
||||||
|
echo 'Cleaning journal logs...'
|
||||||
journalctl --vacuum-time=1d 2>/dev/null || true
|
journalctl --vacuum-time=1d 2>/dev/null || true
|
||||||
|
|
||||||
echo 'Cleaning thumbnail cache...'
|
echo 'Cleaning thumbnail cache...'
|
||||||
|
|
6
src/scripts/utils/disk-size.md
Normal file
6
src/scripts/utils/disk-size.md
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
lxc list --format json | jq -r '.[].name' | while read container; do
|
||||||
|
echo -n "$container: "
|
||||||
|
lxc exec $container -- df -h / --output=used < /dev/null | tail -n1
|
||||||
|
done
|
||||||
|
|
||||||
|
du -h --max-depth=1 "." 2>/dev/null | sort -rh | head -n 50 | awk '{printf "%-10s %s\n", $1, $2}'
|
8
src/scripts/utils/email-ips.sh
Normal file
8
src/scripts/utils/email-ips.sh
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
az network public-ip list --resource-group "$CLOUD_GROUP" \
|
||||||
|
--query "[].{Name:name, IP:ipAddress, ReverseDNS:dnsSettings.reverseFqdn}" \
|
||||||
|
-o table
|
||||||
|
|
||||||
|
az network public-ip update --resource-group "$CLOUD_GROUP"
|
||||||
|
--name "pip-network-adapter-name"
|
||||||
|
--reverse-fqdn "outbound14.domain.com.br"
|
||||||
|
|
|
@ -3,18 +3,20 @@
|
||||||
# Define container limits in an associative array
|
# Define container limits in an associative array
|
||||||
declare -A container_limits=(
|
declare -A container_limits=(
|
||||||
# Pattern Memory CPU Allowance
|
# Pattern Memory CPU Allowance
|
||||||
["*tables*"]="2048MB:50ms/100ms"
|
["*tables*"]="4096MB:100ms/100ms"
|
||||||
["*dns*"]="2048MB:100ms/100ms"
|
["*dns*"]="2048MB:100ms/100ms"
|
||||||
["*doc-editor*"]="512MB:50ms/100ms"
|
["*doc-editor*"]="512MB:10ms/100ms"
|
||||||
["*proxy*"]="512MB:50ms/100ms"
|
["*proxy*"]="20248MB:50ms/100ms"
|
||||||
["*directory*"]="512MB:50ms/100ms"
|
["*directory*"]="1024MB:50ms/100ms"
|
||||||
["*drive*"]="1024MB:50ms/100ms"
|
["*drive*"]="4096MB:25ms/100ms"
|
||||||
["*email*"]="2048MB:50ms/100ms"
|
["*email*"]="4096MB:50ms/100ms"
|
||||||
["*webmail*"]="2048MB:50ms/100ms"
|
["*webmail*"]="4096MB:50ms/100ms"
|
||||||
["*bot*"]="2048MB:50ms/100ms"
|
["*bot*"]="4096MB:50ms/100ms"
|
||||||
["*meeting*"]="1024MB:50ms/100ms"
|
["*meeting*"]="4096MB:100ms/100ms"
|
||||||
["*alm*"]="512MB:50ms/100ms"
|
["*alm*"]="512MB:50ms/100ms"
|
||||||
["*alm-ci*"]="4096MB:50ms/100ms"
|
["*alm-ci*"]="4096MB:25ms/100ms"
|
||||||
|
["*system*"]="4096MB:10ms/100ms"
|
||||||
|
["*mailer*"]="4096MB:25ms/100ms"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Default values (for containers that don't match any pattern)
|
# Default values (for containers that don't match any pattern)
|
||||||
|
|
|
@ -3,6 +3,8 @@ pub mod email;
|
||||||
pub mod file;
|
pub mod file;
|
||||||
pub mod keywords;
|
pub mod keywords;
|
||||||
pub mod llm;
|
pub mod llm;
|
||||||
|
pub mod llm_local;
|
||||||
|
pub mod llm_provider;
|
||||||
pub mod script;
|
pub mod script;
|
||||||
pub mod state;
|
pub mod state;
|
||||||
pub mod utils;
|
pub mod utils;
|
||||||
|
|
392
src/services/llm_local.rs
Normal file
392
src/services/llm_local.rs
Normal file
|
@ -0,0 +1,392 @@
|
||||||
|
use actix_web::{post, web, HttpRequest, HttpResponse, Result};
|
||||||
|
use dotenv::dotenv;
|
||||||
|
use reqwest::Client;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::env;
|
||||||
|
use std::process::{Child, Command, Stdio};
|
||||||
|
use std::sync::{Arc, Mutex};
|
||||||
|
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||||
|
use tokio::process::Command as TokioCommand;
|
||||||
|
use tokio::time::{sleep, Duration};
|
||||||
|
|
||||||
|
// Global process handle
|
||||||
|
static mut LLAMA_PROCESS: Option<Arc<Mutex<Option<tokio::process::Child>>>> = None;
|
||||||
|
|
||||||
|
// OpenAI-compatible request/response structures
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
struct ChatMessage {
|
||||||
|
role: String,
|
||||||
|
content: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
struct ChatCompletionRequest {
|
||||||
|
model: String,
|
||||||
|
messages: Vec<ChatMessage>,
|
||||||
|
stream: Option<bool>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
struct ChatCompletionResponse {
|
||||||
|
id: String,
|
||||||
|
object: String,
|
||||||
|
created: u64,
|
||||||
|
model: String,
|
||||||
|
choices: Vec<Choice>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
struct Choice {
|
||||||
|
message: ChatMessage,
|
||||||
|
finish_reason: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Llama.cpp server request/response structures
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
struct LlamaCppRequest {
|
||||||
|
prompt: String,
|
||||||
|
n_predict: Option<i32>,
|
||||||
|
temperature: Option<f32>,
|
||||||
|
top_k: Option<i32>,
|
||||||
|
top_p: Option<f32>,
|
||||||
|
stream: Option<bool>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
struct LlamaCppResponse {
|
||||||
|
content: String,
|
||||||
|
stop: bool,
|
||||||
|
generation_settings: Option<serde_json::Value>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Function to check if server is running
|
||||||
|
async fn is_server_running(url: &str) -> bool {
|
||||||
|
let client = Client::builder()
|
||||||
|
.timeout(Duration::from_secs(3))
|
||||||
|
.build()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
match client.get(&format!("{}/health", url)).send().await {
|
||||||
|
Ok(response) => {
|
||||||
|
let is_ok = response.status().is_success();
|
||||||
|
if is_ok {
|
||||||
|
println!("🟢 Server health check: OK");
|
||||||
|
} else {
|
||||||
|
println!(
|
||||||
|
"🔴 Server health check: Failed with status {}",
|
||||||
|
response.status()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
is_ok
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
println!("🔴 Server health check: Connection failed - {}", e);
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Function to start llama.cpp server
|
||||||
|
async fn start_llama_server() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||||
|
println!("🚀 Starting llama.cpp server...");
|
||||||
|
|
||||||
|
// Get environment variables for llama.cpp configuration
|
||||||
|
let llama_path = env::var("LLM_CPP_PATH").unwrap_or_else(|_| "llama-server".to_string());
|
||||||
|
let model_path = env::var("LLM_MODEL_PATH")
|
||||||
|
.unwrap_or_else(|_| "./models/tinyllama-1.1b-q4_01.gguf".to_string());
|
||||||
|
let cpu_limit = env::var("CPU_LIMIT").unwrap_or_else(|_| "50".to_string());
|
||||||
|
let port = env::var("LLM_PORT").unwrap_or_else(|_| "8080".to_string());
|
||||||
|
|
||||||
|
println!("🔧 Configuration:");
|
||||||
|
println!(" - Llama path: {}", llama_path);
|
||||||
|
println!(" - Model path: {}", model_path);
|
||||||
|
println!(" - CPU limit: {}%", cpu_limit);
|
||||||
|
println!(" - Port: {}", port);
|
||||||
|
|
||||||
|
// Kill any existing llama processes
|
||||||
|
println!("🧹 Cleaning up existing processes...");
|
||||||
|
let _ = Command::new("pkill").arg("-f").arg("llama-server").output();
|
||||||
|
|
||||||
|
// Wait a bit for cleanup
|
||||||
|
sleep(Duration::from_secs(2)).await;
|
||||||
|
|
||||||
|
// Build the command
|
||||||
|
let full_command = format!(
|
||||||
|
"cpulimit -l {} -- {} -m '{}' --n-gpu-layers 18 --temp 0.7 --ctx-size 1024 --batch-size 256 --no-mmap --mlock --port {} --host 127.0.0.1 --tensor-split 1.0 --main-gpu 0",
|
||||||
|
cpu_limit, llama_path, model_path, port
|
||||||
|
);
|
||||||
|
|
||||||
|
println!("📝 Executing command: {}", full_command);
|
||||||
|
|
||||||
|
// Start llama.cpp server with cpulimit using tokio
|
||||||
|
let mut cmd = TokioCommand::new("sh");
|
||||||
|
cmd.arg("-c");
|
||||||
|
cmd.arg(&full_command);
|
||||||
|
cmd.stdout(Stdio::piped());
|
||||||
|
cmd.stderr(Stdio::piped());
|
||||||
|
cmd.kill_on_drop(true);
|
||||||
|
|
||||||
|
let mut child = cmd
|
||||||
|
.spawn()
|
||||||
|
.map_err(|e| format!("Failed to start llama.cpp server: {}", e))?;
|
||||||
|
|
||||||
|
println!("🔄 Process spawned with PID: {:?}", child.id());
|
||||||
|
|
||||||
|
// Capture stdout and stderr for real-time logging
|
||||||
|
if let Some(stdout) = child.stdout.take() {
|
||||||
|
let stdout_reader = BufReader::new(stdout);
|
||||||
|
tokio::spawn(async move {
|
||||||
|
let mut lines = stdout_reader.lines();
|
||||||
|
while let Ok(Some(line)) = lines.next_line().await {
|
||||||
|
println!("🦙📤 STDOUT: {}", line);
|
||||||
|
}
|
||||||
|
println!("🦙📤 STDOUT stream ended");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(stderr) = child.stderr.take() {
|
||||||
|
let stderr_reader = BufReader::new(stderr);
|
||||||
|
tokio::spawn(async move {
|
||||||
|
let mut lines = stderr_reader.lines();
|
||||||
|
while let Ok(Some(line)) = lines.next_line().await {
|
||||||
|
println!("🦙📥 STDERR: {}", line);
|
||||||
|
}
|
||||||
|
println!("🦙📥 STDERR stream ended");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store the process handle
|
||||||
|
unsafe {
|
||||||
|
LLAMA_PROCESS = Some(Arc::new(Mutex::new(Some(child))));
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("✅ Llama.cpp server process started!");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Function to ensure llama.cpp server is running
|
||||||
|
pub async fn ensure_llama_server_running() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||||
|
let llama_url = env::var("LLM_URL").unwrap_or_else(|_| "http://localhost:8080".to_string());
|
||||||
|
|
||||||
|
// Check if server is already running
|
||||||
|
if is_server_running(&llama_url).await {
|
||||||
|
println!("✅ Llama.cpp server is already running");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start the server
|
||||||
|
start_llama_server().await?;
|
||||||
|
|
||||||
|
// Wait for server to be ready with verbose logging
|
||||||
|
println!("⏳ Waiting for llama.cpp server to become ready...");
|
||||||
|
let mut attempts = 0;
|
||||||
|
let max_attempts = 60; // 2 minutes total
|
||||||
|
|
||||||
|
while attempts < max_attempts {
|
||||||
|
sleep(Duration::from_secs(2)).await;
|
||||||
|
|
||||||
|
print!(
|
||||||
|
"🔍 Checking server health (attempt {}/{})... ",
|
||||||
|
attempts + 1,
|
||||||
|
max_attempts
|
||||||
|
);
|
||||||
|
|
||||||
|
if is_server_running(&llama_url).await {
|
||||||
|
println!("✅ SUCCESS!");
|
||||||
|
println!("🎉 Llama.cpp server is ready and responding!");
|
||||||
|
return Ok(());
|
||||||
|
} else {
|
||||||
|
println!("❌ Not ready yet");
|
||||||
|
}
|
||||||
|
|
||||||
|
attempts += 1;
|
||||||
|
if attempts % 10 == 0 {
|
||||||
|
println!(
|
||||||
|
"⏰ Still waiting for llama.cpp server... (attempt {}/{})",
|
||||||
|
attempts, max_attempts
|
||||||
|
);
|
||||||
|
println!("💡 Check the logs above for any errors from the llama server");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Err("❌ Llama.cpp server failed to start within timeout (2 minutes)".into())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert OpenAI chat messages to a single prompt
|
||||||
|
fn messages_to_prompt(messages: &[ChatMessage]) -> String {
|
||||||
|
let mut prompt = String::new();
|
||||||
|
|
||||||
|
for message in messages {
|
||||||
|
match message.role.as_str() {
|
||||||
|
"system" => {
|
||||||
|
prompt.push_str(&format!("System: {}\n\n", message.content));
|
||||||
|
}
|
||||||
|
"user" => {
|
||||||
|
prompt.push_str(&format!("User: {}\n\n", message.content));
|
||||||
|
}
|
||||||
|
"assistant" => {
|
||||||
|
prompt.push_str(&format!("Assistant: {}\n\n", message.content));
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
prompt.push_str(&format!("{}: {}\n\n", message.role, message.content));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
prompt.push_str("Assistant: ");
|
||||||
|
prompt
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cleanup function
|
||||||
|
pub fn cleanup_processes() {
|
||||||
|
println!("🧹 Cleaning up llama.cpp processes...");
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
if let Some(process_handle) = &LLAMA_PROCESS {
|
||||||
|
if let Ok(mut process) = process_handle.lock() {
|
||||||
|
if let Some(ref mut child) = *process {
|
||||||
|
println!("🔪 Killing llama server process...");
|
||||||
|
let _ = child.start_kill();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Kill any remaining llama processes
|
||||||
|
println!("🔍 Killing any remaining llama-server processes...");
|
||||||
|
let output = Command::new("pkill").arg("-f").arg("llama-server").output();
|
||||||
|
|
||||||
|
match output {
|
||||||
|
Ok(result) => {
|
||||||
|
if result.status.success() {
|
||||||
|
println!("✅ Successfully killed llama processes");
|
||||||
|
} else {
|
||||||
|
println!("ℹ️ No llama processes found to kill");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => println!("⚠️ Error trying to kill processes: {}", e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Proxy endpoint
|
||||||
|
#[post("/v1/chat/completions")]
|
||||||
|
pub async fn chat_completions(
|
||||||
|
req_body: web::Json<ChatCompletionRequest>,
|
||||||
|
_req: HttpRequest,
|
||||||
|
) -> Result<HttpResponse> {
|
||||||
|
dotenv().ok();
|
||||||
|
|
||||||
|
// Ensure llama.cpp server is running
|
||||||
|
if let Err(e) = ensure_llama_server_running().await {
|
||||||
|
eprintln!("Failed to start llama.cpp server: {}", e);
|
||||||
|
return Ok(HttpResponse::InternalServerError().json(serde_json::json!({
|
||||||
|
"error": {
|
||||||
|
"message": format!("Failed to start llama.cpp server: {}", e),
|
||||||
|
"type": "server_error"
|
||||||
|
}
|
||||||
|
})));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get llama.cpp server URL
|
||||||
|
let llama_url = env::var("LLM_URL").unwrap_or_else(|_| "http://localhost:8080".to_string());
|
||||||
|
|
||||||
|
// Convert OpenAI format to llama.cpp format
|
||||||
|
let prompt = messages_to_prompt(&req_body.messages);
|
||||||
|
|
||||||
|
let llama_request = LlamaCppRequest {
|
||||||
|
prompt,
|
||||||
|
n_predict: Some(500), // Adjust as needed
|
||||||
|
temperature: Some(0.7),
|
||||||
|
top_k: Some(40),
|
||||||
|
top_p: Some(0.9),
|
||||||
|
stream: req_body.stream,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Send request to llama.cpp server
|
||||||
|
let client = Client::builder()
|
||||||
|
.timeout(Duration::from_secs(120)) // 2 minute timeout
|
||||||
|
.build()
|
||||||
|
.map_err(|e| {
|
||||||
|
eprintln!("Error creating HTTP client: {}", e);
|
||||||
|
actix_web::error::ErrorInternalServerError("Failed to create HTTP client")
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let response = client
|
||||||
|
.post(&format!("{}/completion", llama_url))
|
||||||
|
.header("Content-Type", "application/json")
|
||||||
|
.json(&llama_request)
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
eprintln!("Error calling llama.cpp server: {}", e);
|
||||||
|
actix_web::error::ErrorInternalServerError("Failed to call llama.cpp server")
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let status = response.status();
|
||||||
|
|
||||||
|
if status.is_success() {
|
||||||
|
let llama_response: LlamaCppResponse = response.json().await.map_err(|e| {
|
||||||
|
eprintln!("Error parsing llama.cpp response: {}", e);
|
||||||
|
actix_web::error::ErrorInternalServerError("Failed to parse llama.cpp response")
|
||||||
|
})?;
|
||||||
|
|
||||||
|
// Convert llama.cpp response to OpenAI format
|
||||||
|
let openai_response = ChatCompletionResponse {
|
||||||
|
id: format!("chatcmpl-{}", uuid::Uuid::new_v4()),
|
||||||
|
object: "chat.completion".to_string(),
|
||||||
|
created: std::time::SystemTime::now()
|
||||||
|
.duration_since(std::time::UNIX_EPOCH)
|
||||||
|
.unwrap()
|
||||||
|
.as_secs(),
|
||||||
|
model: req_body.model.clone(),
|
||||||
|
choices: vec![Choice {
|
||||||
|
message: ChatMessage {
|
||||||
|
role: "assistant".to_string(),
|
||||||
|
content: llama_response.content.trim().to_string(),
|
||||||
|
},
|
||||||
|
finish_reason: if llama_response.stop {
|
||||||
|
"stop".to_string()
|
||||||
|
} else {
|
||||||
|
"length".to_string()
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(HttpResponse::Ok().json(openai_response))
|
||||||
|
} else {
|
||||||
|
let error_text = response
|
||||||
|
.text()
|
||||||
|
.await
|
||||||
|
.unwrap_or_else(|_| "Unknown error".to_string());
|
||||||
|
|
||||||
|
eprintln!("Llama.cpp server error ({}): {}", status, error_text);
|
||||||
|
|
||||||
|
let actix_status = actix_web::http::StatusCode::from_u16(status.as_u16())
|
||||||
|
.unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR);
|
||||||
|
|
||||||
|
Ok(HttpResponse::build(actix_status).json(serde_json::json!({
|
||||||
|
"error": {
|
||||||
|
"message": error_text,
|
||||||
|
"type": "server_error"
|
||||||
|
}
|
||||||
|
})))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Health check endpoint
|
||||||
|
#[actix_web::get("/health")]
|
||||||
|
pub async fn health() -> Result<HttpResponse> {
|
||||||
|
let llama_url = env::var("LLM_URL").unwrap_or_else(|_| "http://localhost:8080".to_string());
|
||||||
|
|
||||||
|
if is_server_running(&llama_url).await {
|
||||||
|
Ok(HttpResponse::Ok().json(serde_json::json!({
|
||||||
|
"status": "healthy",
|
||||||
|
"llama_server": "running"
|
||||||
|
})))
|
||||||
|
} else {
|
||||||
|
Ok(HttpResponse::ServiceUnavailable().json(serde_json::json!({
|
||||||
|
"status": "unhealthy",
|
||||||
|
"llama_server": "not running"
|
||||||
|
})))
|
||||||
|
}
|
||||||
|
}
|
103
src/services/llm_provider.rs
Normal file
103
src/services/llm_provider.rs
Normal file
|
@ -0,0 +1,103 @@
|
||||||
|
use actix_web::{post, web, HttpRequest, HttpResponse, Result};
|
||||||
|
use dotenv::dotenv;
|
||||||
|
use reqwest::Client;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::env;
|
||||||
|
|
||||||
|
// OpenAI-compatible request/response structures
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
struct ChatMessage {
|
||||||
|
role: String,
|
||||||
|
content: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
struct ChatCompletionRequest {
|
||||||
|
model: String,
|
||||||
|
messages: Vec<ChatMessage>,
|
||||||
|
stream: Option<bool>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
struct ChatCompletionResponse {
|
||||||
|
id: String,
|
||||||
|
object: String,
|
||||||
|
created: u64,
|
||||||
|
model: String,
|
||||||
|
choices: Vec<Choice>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
struct Choice {
|
||||||
|
message: ChatMessage,
|
||||||
|
finish_reason: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Proxy endpoint
|
||||||
|
#[post("/v1/chat/completions")]
|
||||||
|
async fn chat_completions(
|
||||||
|
req_body: web::Json<ChatCompletionRequest>,
|
||||||
|
_req: HttpRequest,
|
||||||
|
) -> Result<HttpResponse> {
|
||||||
|
dotenv().ok();
|
||||||
|
|
||||||
|
// Environment variables
|
||||||
|
let azure_endpoint = env::var("AI_ENDPOINT")
|
||||||
|
.map_err(|_| actix_web::error::ErrorInternalServerError("AI_ENDPOINT not set."))?;
|
||||||
|
let azure_key = env::var("AI_KEY")
|
||||||
|
.map_err(|_| actix_web::error::ErrorInternalServerError("AI_KEY not set."))?;
|
||||||
|
let deployment_name = env::var("AI_LLM_MODEL")
|
||||||
|
.map_err(|_| actix_web::error::ErrorInternalServerError("AI_LLM_MODEL not set."))?;
|
||||||
|
|
||||||
|
// Construct Azure OpenAI URL
|
||||||
|
let url = format!(
|
||||||
|
"{}/openai/deployments/{}/chat/completions?api-version=2025-01-01-preview",
|
||||||
|
azure_endpoint, deployment_name
|
||||||
|
);
|
||||||
|
|
||||||
|
// Forward headers
|
||||||
|
let mut headers = reqwest::header::HeaderMap::new();
|
||||||
|
headers.insert(
|
||||||
|
"api-key",
|
||||||
|
reqwest::header::HeaderValue::from_str(&azure_key)
|
||||||
|
.map_err(|_| actix_web::error::ErrorInternalServerError("Invalid Azure key"))?,
|
||||||
|
);
|
||||||
|
headers.insert(
|
||||||
|
"Content-Type",
|
||||||
|
reqwest::header::HeaderValue::from_static("application/json"),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Send request to Azure
|
||||||
|
let client = Client::new();
|
||||||
|
let response = client
|
||||||
|
.post(&url)
|
||||||
|
.headers(headers)
|
||||||
|
.json(&req_body.into_inner())
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(actix_web::error::ErrorInternalServerError)?;
|
||||||
|
|
||||||
|
// Handle response based on status
|
||||||
|
let status = response.status();
|
||||||
|
let raw_response = response
|
||||||
|
.text()
|
||||||
|
.await
|
||||||
|
.map_err(actix_web::error::ErrorInternalServerError)?;
|
||||||
|
|
||||||
|
// Log the raw response
|
||||||
|
println!("Raw Azure response: {}", raw_response);
|
||||||
|
|
||||||
|
if status.is_success() {
|
||||||
|
// Parse the raw response as JSON
|
||||||
|
let azure_response: serde_json::Value = serde_json::from_str(&raw_response)
|
||||||
|
.map_err(actix_web::error::ErrorInternalServerError)?;
|
||||||
|
|
||||||
|
Ok(HttpResponse::Ok().json(azure_response))
|
||||||
|
} else {
|
||||||
|
// Handle error responses properly
|
||||||
|
let actix_status = actix_web::http::StatusCode::from_u16(status.as_u16())
|
||||||
|
.unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR);
|
||||||
|
|
||||||
|
Ok(HttpResponse::build(actix_status).body(raw_response))
|
||||||
|
}
|
||||||
|
}
|
|
@ -13,17 +13,17 @@ use std::error::Error;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::BufReader;
|
use std::io::BufReader;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
use tokio::fs::File as TokioFile;
|
||||||
use tokio_stream::StreamExt;
|
use tokio_stream::StreamExt;
|
||||||
use zip::ZipArchive;
|
use zip::ZipArchive;
|
||||||
use tokio::fs::File as TokioFile;
|
|
||||||
|
|
||||||
use reqwest::Client;
|
use reqwest::Client;
|
||||||
use tokio::io::AsyncWriteExt;
|
use tokio::io::AsyncWriteExt;
|
||||||
|
|
||||||
pub fn azure_from_config(config: &AIConfig) -> AzureConfig {
|
pub fn azure_from_config(config: &AIConfig) -> AzureConfig {
|
||||||
AzureConfig::default()
|
AzureConfig::new()
|
||||||
|
.with_api_base(&config.endpoint)
|
||||||
.with_api_key(&config.key)
|
.with_api_key(&config.key)
|
||||||
.with_api_base(&config.endpoint)
|
|
||||||
.with_api_version(&config.version)
|
.with_api_version(&config.version)
|
||||||
.with_deployment_id(&config.instance)
|
.with_deployment_id(&config.instance)
|
||||||
}
|
}
|
||||||
|
@ -240,10 +240,13 @@ pub fn parse_filter(filter_str: &str) -> Result<(String, Vec<String>), Box<dyn E
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse filter without adding quotes
|
// Parse filter without adding quotes
|
||||||
pub fn parse_filter_with_offset(filter_str: &str, offset: usize) -> Result<(String, Vec<String>), Box<dyn Error>> {
|
pub fn parse_filter_with_offset(
|
||||||
|
filter_str: &str,
|
||||||
|
offset: usize,
|
||||||
|
) -> Result<(String, Vec<String>), Box<dyn Error>> {
|
||||||
let mut clauses = Vec::new();
|
let mut clauses = Vec::new();
|
||||||
let mut params = Vec::new();
|
let mut params = Vec::new();
|
||||||
|
|
||||||
for (i, condition) in filter_str.split('&').enumerate() {
|
for (i, condition) in filter_str.split('&').enumerate() {
|
||||||
let parts: Vec<&str> = condition.split('=').collect();
|
let parts: Vec<&str> = condition.split('=').collect();
|
||||||
if parts.len() != 2 {
|
if parts.len() != 2 {
|
||||||
|
@ -253,7 +256,10 @@ pub fn parse_filter_with_offset(filter_str: &str, offset: usize) -> Result<(Stri
|
||||||
let column = parts[0].trim();
|
let column = parts[0].trim();
|
||||||
let value = parts[1].trim();
|
let value = parts[1].trim();
|
||||||
|
|
||||||
if !column.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
|
if !column
|
||||||
|
.chars()
|
||||||
|
.all(|c| c.is_ascii_alphanumeric() || c == '_')
|
||||||
|
{
|
||||||
return Err("Invalid column name".into());
|
return Err("Invalid column name".into());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue