- LLM local fixes.
This commit is contained in:
parent
909f2ae5f1
commit
c54904b18b
4 changed files with 68 additions and 66 deletions
|
@ -17,9 +17,6 @@ apt-get update && apt-get install -y wget
|
|||
wget https://dl.min.io/server/minio/release/linux-amd64/minio -O /usr/local/bin/minio
|
||||
chmod +x /usr/local/bin/minio
|
||||
|
||||
wget https://dl.min.io/client/mc/release/linux-amd64/mc
|
||||
chmod +x /usr/local/bin/mc
|
||||
|
||||
useradd -r -s /bin/false minio-user || true
|
||||
mkdir -p /var/log/minio /data
|
||||
chown -R minio-user:minio-user /var/log/minio /data
|
||||
|
@ -35,7 +32,7 @@ User=minio-user
|
|||
Group=minio-user
|
||||
Environment="MINIO_ROOT_USER='"${PARAM_DRIVE_USER}"'"
|
||||
Environment="MINIO_ROOT_PASSWORD='"${PARAM_DRIVE_PASSWORD}"'"
|
||||
ExecStart=/usr/local/bin/minio server --console-address ":'"${PARAM_DRIVE_PORT}"'" /data
|
||||
ExecStart=/usr/local/bin/minio server --address ":'"${PARAM_DRIVE_PORT}"'" --console-address ":'"${PARAM_PORT}"'" /data
|
||||
StandardOutput=append:/var/log/minio/output.log
|
||||
StandardError=append:/var/log/minio/error.log
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@ sleep 15
|
|||
|
||||
lxc exec $CONTAINER_NAME -- bash -c '
|
||||
|
||||
apt-get update && apt-get install -y wget unzip
|
||||
apt-get update && apt-get install -y wget curl unzip git
|
||||
|
||||
|
||||
useradd -r -s /bin/false gbuser || true
|
||||
|
@ -36,6 +36,23 @@ rm llama-b6148-bin-ubuntu-x64.zip
|
|||
wget https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf
|
||||
wget https://huggingface.co/CompendiumLabs/bge-small-en-v1.5-gguf/resolve/main/bge-small-en-v1.5-f32.gguf
|
||||
|
||||
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
source "$HOME/.cargo/env"
|
||||
git clone https://alm.pragmatismo.com.br/generalbots/gbserver
|
||||
|
||||
apt install -y build-essential \
|
||||
pkg-config \
|
||||
libssl-dev \
|
||||
gcc-multilib \
|
||||
g++-multilib \
|
||||
clang \
|
||||
lld \
|
||||
binutils-dev \
|
||||
libudev-dev \
|
||||
libdbus-1-dev
|
||||
|
||||
|
||||
cat > /etc/systemd/system/system.service <<EOF
|
||||
[Unit]
|
||||
Description=General Bots System Service
|
||||
|
|
41
src/scripts/containers/tables.sh
Executable file → Normal file
41
src/scripts/containers/tables.sh
Executable file → Normal file
|
@ -1,3 +1,4 @@
|
|||
|
||||
HOST_BASE="/opt/gbo/tenants/$PARAM_TENANT/tables"
|
||||
HOST_DATA="$HOST_BASE/data"
|
||||
HOST_CONF="$HOST_BASE/conf"
|
||||
|
@ -23,29 +24,7 @@ sudo apt install -y postgresql-common
|
|||
sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh
|
||||
apt install -y postgresql
|
||||
|
||||
systemctl stop postgresql
|
||||
|
||||
mkdir -p /etc/systemd/system/postgresql.service.d/
|
||||
cat > /etc/systemd/system/postgresql.service.d/override.conf <<EOF
|
||||
[Service]
|
||||
Environment=\"PGPORT=$PARAM_TABLES_PORT\"
|
||||
EOF
|
||||
|
||||
CONF_FILE=\$(find /etc/postgresql -name postgresql.conf | head -1)
|
||||
if [ -f \"\$CONF_FILE\" ]; then
|
||||
cp \"\$CONF_FILE\" \"\${CONF_FILE}.bak\"
|
||||
sed -i \"s/^#*port *=.*/port = $PARAM_TABLES_PORT/\" \"\$CONF_FILE\"
|
||||
sed -i \"s/^#*listen_addresses *=.*/listen_addresses = '*'/\" \"\$CONF_FILE\"
|
||||
|
||||
HBA_FILE=\$(find /etc/postgresql -name pg_hba.conf | head -1)
|
||||
if [ -f \"\$HBA_FILE\" ]; then
|
||||
echo 'host all all 0.0.0.0/0 md5' >> \"\$HBA_FILE\"
|
||||
fi
|
||||
fi
|
||||
|
||||
systemctl daemon-reload
|
||||
systemctl start postgresql
|
||||
systemctl enable postgresql
|
||||
# TODO: Open listener on *.
|
||||
|
||||
until sudo -u postgres psql -p $PARAM_TABLES_PORT -c '\q' 2>/dev/null; do
|
||||
echo \"Waiting for PostgreSQL to start on port $PARAM_TABLES_PORT...\"
|
||||
|
@ -56,24 +35,8 @@ sudo -u postgres psql -p $PARAM_TABLES_PORT -c \"CREATE USER $PARAM_TENANT WITH
|
|||
sudo -u postgres psql -p $PARAM_TABLES_PORT -c \"CREATE DATABASE ${PARAM_TENANT}_db OWNER $PARAM_TENANT;\"
|
||||
sudo -u postgres psql -p $PARAM_TABLES_PORT -c \"GRANT ALL PRIVILEGES ON DATABASE ${PARAM_TENANT}_db TO $PARAM_TENANT;\"
|
||||
|
||||
systemctl restart postgresql
|
||||
"
|
||||
|
||||
lxc exec "$PARAM_TENANT"-tables -- systemctl stop postgresql
|
||||
|
||||
PG_DATA_DIR=$(lxc exec "$PARAM_TENANT"-tables -- bash -c "find /var/lib/postgresql -name main -type d | head -1")
|
||||
PG_CONF_DIR=$(lxc exec "$PARAM_TENANT"-tables -- bash -c "find /etc/postgresql -name main -type d | head -1")
|
||||
PG_LOGS_DIR=$(lxc exec "$PARAM_TENANT"-tables -- bash -c "find /var/log/postgresql -name postgresql-*.log -o -name postgresql.log | head -1 | xargs dirname 2>/dev/null || echo /var/log/postgresql")
|
||||
|
||||
lxc config device add "$PARAM_TENANT"-tables pgdata disk source="$HOST_DATA" path="$PG_DATA_DIR"
|
||||
lxc config device add "$PARAM_TENANT"-tables pgconf disk source="$HOST_CONF" path="$PG_CONF_DIR"
|
||||
lxc config device add "$PARAM_TENANT"-tables pglogs disk source="$HOST_LOGS" path="$PG_LOGS_DIR"
|
||||
|
||||
lxc exec "$PARAM_TENANT"-tables -- chown -R postgres:postgres "$PG_DATA_DIR"
|
||||
lxc exec "$PARAM_TENANT"-tables -- chown -R postgres:postgres "$PG_CONF_DIR"
|
||||
lxc exec "$PARAM_TENANT"-tables -- chown -R postgres:postgres "$PG_LOGS_DIR"
|
||||
|
||||
lxc exec "$PARAM_TENANT"-tables -- systemctl start postgresql
|
||||
|
||||
lxc config device remove "$PARAM_TENANT"-tables postgres-proxy 2>/dev/null || true
|
||||
lxc config device add "$PARAM_TENANT"-tables postgres-proxy proxy \
|
||||
|
|
|
@ -3,15 +3,10 @@ use dotenv::dotenv;
|
|||
use reqwest::Client;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::env;
|
||||
use std::process::{Command, Stdio};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||
use tokio::process::Command as TokioCommand;
|
||||
use tokio::time::{sleep, Duration};
|
||||
|
||||
// Global process handle
|
||||
static mut LLAMA_PROCESS: Option<Arc<Mutex<Option<tokio::process::Child>>>> = None;
|
||||
|
||||
// OpenAI-compatible request/response structures
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct ChatMessage {
|
||||
|
@ -381,10 +376,11 @@ struct LlamaCppEmbeddingRequest {
|
|||
pub content: String,
|
||||
}
|
||||
|
||||
// Llama.cpp Embedding Response
|
||||
// FIXED: Handle the stupid nested array format
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct LlamaCppEmbeddingResponse {
|
||||
pub embedding: Vec<f32>,
|
||||
struct LlamaCppEmbeddingResponseItem {
|
||||
pub index: usize,
|
||||
pub embedding: Vec<Vec<f32>>, // This is the fucked up part - embedding is an array of arrays
|
||||
}
|
||||
|
||||
// Proxy endpoint for embeddings
|
||||
|
@ -396,7 +392,8 @@ pub async fn embeddings_local(
|
|||
dotenv().ok();
|
||||
|
||||
// Get llama.cpp server URL
|
||||
let llama_url = env::var("LLM_URL").unwrap_or_else(|_| "http://localhost:8082".to_string());
|
||||
let llama_url =
|
||||
env::var("EMBEDDING_URL").unwrap_or_else(|_| "http://localhost:8082".to_string());
|
||||
|
||||
let client = Client::builder()
|
||||
.timeout(Duration::from_secs(120))
|
||||
|
@ -431,22 +428,50 @@ pub async fn embeddings_local(
|
|||
let status = response.status();
|
||||
|
||||
if status.is_success() {
|
||||
let llama_response: LlamaCppEmbeddingResponse = response.json().await.map_err(|e| {
|
||||
eprintln!("Error parsing llama.cpp embedding response: {}", e);
|
||||
actix_web::error::ErrorInternalServerError(
|
||||
"Failed to parse llama.cpp embedding response",
|
||||
)
|
||||
// First, get the raw response text for debugging
|
||||
let raw_response = response.text().await.map_err(|e| {
|
||||
eprintln!("Error reading response text: {}", e);
|
||||
actix_web::error::ErrorInternalServerError("Failed to read response")
|
||||
})?;
|
||||
|
||||
// Estimate token count (this is approximate since llama.cpp doesn't return token count for embeddings)
|
||||
let estimated_tokens = (input_text.len() as f32 / 4.0).ceil() as u32;
|
||||
total_tokens += estimated_tokens;
|
||||
// Parse the response as a vector of items with nested arrays
|
||||
let llama_response: Vec<LlamaCppEmbeddingResponseItem> =
|
||||
serde_json::from_str(&raw_response).map_err(|e| {
|
||||
eprintln!("Error parsing llama.cpp embedding response: {}", e);
|
||||
eprintln!("Raw response: {}", raw_response);
|
||||
actix_web::error::ErrorInternalServerError(
|
||||
"Failed to parse llama.cpp embedding response",
|
||||
)
|
||||
})?;
|
||||
|
||||
embeddings_data.push(EmbeddingData {
|
||||
object: "embedding".to_string(),
|
||||
embedding: llama_response.embedding,
|
||||
index,
|
||||
});
|
||||
// Extract the embedding from the nested array bullshit
|
||||
if let Some(item) = llama_response.get(0) {
|
||||
// The embedding field contains Vec<Vec<f32>>, so we need to flatten it
|
||||
// If it's [[0.1, 0.2, 0.3]], we want [0.1, 0.2, 0.3]
|
||||
let flattened_embedding = if !item.embedding.is_empty() {
|
||||
item.embedding[0].clone() // Take the first (and probably only) inner array
|
||||
} else {
|
||||
vec![] // Empty if no embedding data
|
||||
};
|
||||
|
||||
// Estimate token count
|
||||
let estimated_tokens = (input_text.len() as f32 / 4.0).ceil() as u32;
|
||||
total_tokens += estimated_tokens;
|
||||
|
||||
embeddings_data.push(EmbeddingData {
|
||||
object: "embedding".to_string(),
|
||||
embedding: flattened_embedding,
|
||||
index,
|
||||
});
|
||||
} else {
|
||||
eprintln!("No embedding data returned for input: {}", input_text);
|
||||
return Ok(HttpResponse::InternalServerError().json(serde_json::json!({
|
||||
"error": {
|
||||
"message": format!("No embedding data returned for input {}", index),
|
||||
"type": "server_error"
|
||||
}
|
||||
})));
|
||||
}
|
||||
} else {
|
||||
let error_text = response
|
||||
.text()
|
||||
|
|
Loading…
Add table
Reference in a new issue