- LLM local fixes.

This commit is contained in:
Rodrigo Rodriguez (Pragmatismo) 2025-09-09 15:09:28 -03:00
parent 909f2ae5f1
commit c54904b18b
4 changed files with 68 additions and 66 deletions

View file

@ -17,9 +17,6 @@ apt-get update && apt-get install -y wget
wget https://dl.min.io/server/minio/release/linux-amd64/minio -O /usr/local/bin/minio wget https://dl.min.io/server/minio/release/linux-amd64/minio -O /usr/local/bin/minio
chmod +x /usr/local/bin/minio chmod +x /usr/local/bin/minio
wget https://dl.min.io/client/mc/release/linux-amd64/mc
chmod +x /usr/local/bin/mc
useradd -r -s /bin/false minio-user || true useradd -r -s /bin/false minio-user || true
mkdir -p /var/log/minio /data mkdir -p /var/log/minio /data
chown -R minio-user:minio-user /var/log/minio /data chown -R minio-user:minio-user /var/log/minio /data
@ -35,7 +32,7 @@ User=minio-user
Group=minio-user Group=minio-user
Environment="MINIO_ROOT_USER='"${PARAM_DRIVE_USER}"'" Environment="MINIO_ROOT_USER='"${PARAM_DRIVE_USER}"'"
Environment="MINIO_ROOT_PASSWORD='"${PARAM_DRIVE_PASSWORD}"'" Environment="MINIO_ROOT_PASSWORD='"${PARAM_DRIVE_PASSWORD}"'"
ExecStart=/usr/local/bin/minio server --console-address ":'"${PARAM_DRIVE_PORT}"'" /data ExecStart=/usr/local/bin/minio server --address ":'"${PARAM_DRIVE_PORT}"'" --console-address ":'"${PARAM_PORT}"'" /data
StandardOutput=append:/var/log/minio/output.log StandardOutput=append:/var/log/minio/output.log
StandardError=append:/var/log/minio/error.log StandardError=append:/var/log/minio/error.log

View file

@ -17,7 +17,7 @@ sleep 15
lxc exec $CONTAINER_NAME -- bash -c ' lxc exec $CONTAINER_NAME -- bash -c '
apt-get update && apt-get install -y wget unzip apt-get update && apt-get install -y wget curl unzip git
useradd -r -s /bin/false gbuser || true useradd -r -s /bin/false gbuser || true
@ -36,6 +36,23 @@ rm llama-b6148-bin-ubuntu-x64.zip
wget https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf wget https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf
wget https://huggingface.co/CompendiumLabs/bge-small-en-v1.5-gguf/resolve/main/bge-small-en-v1.5-f32.gguf wget https://huggingface.co/CompendiumLabs/bge-small-en-v1.5-gguf/resolve/main/bge-small-en-v1.5-f32.gguf
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
source "$HOME/.cargo/env"
git clone https://alm.pragmatismo.com.br/generalbots/gbserver
apt install -y build-essential \
pkg-config \
libssl-dev \
gcc-multilib \
g++-multilib \
clang \
lld \
binutils-dev \
libudev-dev \
libdbus-1-dev
cat > /etc/systemd/system/system.service <<EOF cat > /etc/systemd/system/system.service <<EOF
[Unit] [Unit]
Description=General Bots System Service Description=General Bots System Service

41
src/scripts/containers/tables.sh Executable file → Normal file
View file

@ -1,3 +1,4 @@
HOST_BASE="/opt/gbo/tenants/$PARAM_TENANT/tables" HOST_BASE="/opt/gbo/tenants/$PARAM_TENANT/tables"
HOST_DATA="$HOST_BASE/data" HOST_DATA="$HOST_BASE/data"
HOST_CONF="$HOST_BASE/conf" HOST_CONF="$HOST_BASE/conf"
@ -23,29 +24,7 @@ sudo apt install -y postgresql-common
sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh
apt install -y postgresql apt install -y postgresql
systemctl stop postgresql # TODO: Open listener on *.
mkdir -p /etc/systemd/system/postgresql.service.d/
cat > /etc/systemd/system/postgresql.service.d/override.conf <<EOF
[Service]
Environment=\"PGPORT=$PARAM_TABLES_PORT\"
EOF
CONF_FILE=\$(find /etc/postgresql -name postgresql.conf | head -1)
if [ -f \"\$CONF_FILE\" ]; then
cp \"\$CONF_FILE\" \"\${CONF_FILE}.bak\"
sed -i \"s/^#*port *=.*/port = $PARAM_TABLES_PORT/\" \"\$CONF_FILE\"
sed -i \"s/^#*listen_addresses *=.*/listen_addresses = '*'/\" \"\$CONF_FILE\"
HBA_FILE=\$(find /etc/postgresql -name pg_hba.conf | head -1)
if [ -f \"\$HBA_FILE\" ]; then
echo 'host all all 0.0.0.0/0 md5' >> \"\$HBA_FILE\"
fi
fi
systemctl daemon-reload
systemctl start postgresql
systemctl enable postgresql
until sudo -u postgres psql -p $PARAM_TABLES_PORT -c '\q' 2>/dev/null; do until sudo -u postgres psql -p $PARAM_TABLES_PORT -c '\q' 2>/dev/null; do
echo \"Waiting for PostgreSQL to start on port $PARAM_TABLES_PORT...\" echo \"Waiting for PostgreSQL to start on port $PARAM_TABLES_PORT...\"
@ -56,24 +35,8 @@ sudo -u postgres psql -p $PARAM_TABLES_PORT -c \"CREATE USER $PARAM_TENANT WITH
sudo -u postgres psql -p $PARAM_TABLES_PORT -c \"CREATE DATABASE ${PARAM_TENANT}_db OWNER $PARAM_TENANT;\" sudo -u postgres psql -p $PARAM_TABLES_PORT -c \"CREATE DATABASE ${PARAM_TENANT}_db OWNER $PARAM_TENANT;\"
sudo -u postgres psql -p $PARAM_TABLES_PORT -c \"GRANT ALL PRIVILEGES ON DATABASE ${PARAM_TENANT}_db TO $PARAM_TENANT;\" sudo -u postgres psql -p $PARAM_TABLES_PORT -c \"GRANT ALL PRIVILEGES ON DATABASE ${PARAM_TENANT}_db TO $PARAM_TENANT;\"
systemctl restart postgresql
" "
lxc exec "$PARAM_TENANT"-tables -- systemctl stop postgresql
PG_DATA_DIR=$(lxc exec "$PARAM_TENANT"-tables -- bash -c "find /var/lib/postgresql -name main -type d | head -1")
PG_CONF_DIR=$(lxc exec "$PARAM_TENANT"-tables -- bash -c "find /etc/postgresql -name main -type d | head -1")
PG_LOGS_DIR=$(lxc exec "$PARAM_TENANT"-tables -- bash -c "find /var/log/postgresql -name postgresql-*.log -o -name postgresql.log | head -1 | xargs dirname 2>/dev/null || echo /var/log/postgresql")
lxc config device add "$PARAM_TENANT"-tables pgdata disk source="$HOST_DATA" path="$PG_DATA_DIR"
lxc config device add "$PARAM_TENANT"-tables pgconf disk source="$HOST_CONF" path="$PG_CONF_DIR"
lxc config device add "$PARAM_TENANT"-tables pglogs disk source="$HOST_LOGS" path="$PG_LOGS_DIR"
lxc exec "$PARAM_TENANT"-tables -- chown -R postgres:postgres "$PG_DATA_DIR"
lxc exec "$PARAM_TENANT"-tables -- chown -R postgres:postgres "$PG_CONF_DIR"
lxc exec "$PARAM_TENANT"-tables -- chown -R postgres:postgres "$PG_LOGS_DIR"
lxc exec "$PARAM_TENANT"-tables -- systemctl start postgresql
lxc config device remove "$PARAM_TENANT"-tables postgres-proxy 2>/dev/null || true lxc config device remove "$PARAM_TENANT"-tables postgres-proxy 2>/dev/null || true
lxc config device add "$PARAM_TENANT"-tables postgres-proxy proxy \ lxc config device add "$PARAM_TENANT"-tables postgres-proxy proxy \

View file

@ -3,15 +3,10 @@ use dotenv::dotenv;
use reqwest::Client; use reqwest::Client;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::env; use std::env;
use std::process::{Command, Stdio};
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::io::{AsyncBufReadExt, BufReader};
use tokio::process::Command as TokioCommand;
use tokio::time::{sleep, Duration}; use tokio::time::{sleep, Duration};
// Global process handle
static mut LLAMA_PROCESS: Option<Arc<Mutex<Option<tokio::process::Child>>>> = None;
// OpenAI-compatible request/response structures // OpenAI-compatible request/response structures
#[derive(Debug, Serialize, Deserialize)] #[derive(Debug, Serialize, Deserialize)]
struct ChatMessage { struct ChatMessage {
@ -381,10 +376,11 @@ struct LlamaCppEmbeddingRequest {
pub content: String, pub content: String,
} }
// Llama.cpp Embedding Response // FIXED: Handle the stupid nested array format
#[derive(Debug, Deserialize)] #[derive(Debug, Deserialize)]
struct LlamaCppEmbeddingResponse { struct LlamaCppEmbeddingResponseItem {
pub embedding: Vec<f32>, pub index: usize,
pub embedding: Vec<Vec<f32>>, // This is the fucked up part - embedding is an array of arrays
} }
// Proxy endpoint for embeddings // Proxy endpoint for embeddings
@ -396,7 +392,8 @@ pub async fn embeddings_local(
dotenv().ok(); dotenv().ok();
// Get llama.cpp server URL // Get llama.cpp server URL
let llama_url = env::var("LLM_URL").unwrap_or_else(|_| "http://localhost:8082".to_string()); let llama_url =
env::var("EMBEDDING_URL").unwrap_or_else(|_| "http://localhost:8082".to_string());
let client = Client::builder() let client = Client::builder()
.timeout(Duration::from_secs(120)) .timeout(Duration::from_secs(120))
@ -431,22 +428,50 @@ pub async fn embeddings_local(
let status = response.status(); let status = response.status();
if status.is_success() { if status.is_success() {
let llama_response: LlamaCppEmbeddingResponse = response.json().await.map_err(|e| { // First, get the raw response text for debugging
eprintln!("Error parsing llama.cpp embedding response: {}", e); let raw_response = response.text().await.map_err(|e| {
actix_web::error::ErrorInternalServerError( eprintln!("Error reading response text: {}", e);
"Failed to parse llama.cpp embedding response", actix_web::error::ErrorInternalServerError("Failed to read response")
)
})?; })?;
// Estimate token count (this is approximate since llama.cpp doesn't return token count for embeddings) // Parse the response as a vector of items with nested arrays
let estimated_tokens = (input_text.len() as f32 / 4.0).ceil() as u32; let llama_response: Vec<LlamaCppEmbeddingResponseItem> =
total_tokens += estimated_tokens; serde_json::from_str(&raw_response).map_err(|e| {
eprintln!("Error parsing llama.cpp embedding response: {}", e);
eprintln!("Raw response: {}", raw_response);
actix_web::error::ErrorInternalServerError(
"Failed to parse llama.cpp embedding response",
)
})?;
embeddings_data.push(EmbeddingData { // Extract the embedding from the nested array bullshit
object: "embedding".to_string(), if let Some(item) = llama_response.get(0) {
embedding: llama_response.embedding, // The embedding field contains Vec<Vec<f32>>, so we need to flatten it
index, // If it's [[0.1, 0.2, 0.3]], we want [0.1, 0.2, 0.3]
}); let flattened_embedding = if !item.embedding.is_empty() {
item.embedding[0].clone() // Take the first (and probably only) inner array
} else {
vec![] // Empty if no embedding data
};
// Estimate token count
let estimated_tokens = (input_text.len() as f32 / 4.0).ceil() as u32;
total_tokens += estimated_tokens;
embeddings_data.push(EmbeddingData {
object: "embedding".to_string(),
embedding: flattened_embedding,
index,
});
} else {
eprintln!("No embedding data returned for input: {}", input_text);
return Ok(HttpResponse::InternalServerError().json(serde_json::json!({
"error": {
"message": format!("No embedding data returned for input {}", index),
"type": "server_error"
}
})));
}
} else { } else {
let error_text = response let error_text = response
.text() .text()