- LLM local fixes.
This commit is contained in:
parent
909f2ae5f1
commit
c54904b18b
4 changed files with 68 additions and 66 deletions
|
@ -17,9 +17,6 @@ apt-get update && apt-get install -y wget
|
||||||
wget https://dl.min.io/server/minio/release/linux-amd64/minio -O /usr/local/bin/minio
|
wget https://dl.min.io/server/minio/release/linux-amd64/minio -O /usr/local/bin/minio
|
||||||
chmod +x /usr/local/bin/minio
|
chmod +x /usr/local/bin/minio
|
||||||
|
|
||||||
wget https://dl.min.io/client/mc/release/linux-amd64/mc
|
|
||||||
chmod +x /usr/local/bin/mc
|
|
||||||
|
|
||||||
useradd -r -s /bin/false minio-user || true
|
useradd -r -s /bin/false minio-user || true
|
||||||
mkdir -p /var/log/minio /data
|
mkdir -p /var/log/minio /data
|
||||||
chown -R minio-user:minio-user /var/log/minio /data
|
chown -R minio-user:minio-user /var/log/minio /data
|
||||||
|
@ -35,7 +32,7 @@ User=minio-user
|
||||||
Group=minio-user
|
Group=minio-user
|
||||||
Environment="MINIO_ROOT_USER='"${PARAM_DRIVE_USER}"'"
|
Environment="MINIO_ROOT_USER='"${PARAM_DRIVE_USER}"'"
|
||||||
Environment="MINIO_ROOT_PASSWORD='"${PARAM_DRIVE_PASSWORD}"'"
|
Environment="MINIO_ROOT_PASSWORD='"${PARAM_DRIVE_PASSWORD}"'"
|
||||||
ExecStart=/usr/local/bin/minio server --console-address ":'"${PARAM_DRIVE_PORT}"'" /data
|
ExecStart=/usr/local/bin/minio server --address ":'"${PARAM_DRIVE_PORT}"'" --console-address ":'"${PARAM_PORT}"'" /data
|
||||||
StandardOutput=append:/var/log/minio/output.log
|
StandardOutput=append:/var/log/minio/output.log
|
||||||
StandardError=append:/var/log/minio/error.log
|
StandardError=append:/var/log/minio/error.log
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,7 @@ sleep 15
|
||||||
|
|
||||||
lxc exec $CONTAINER_NAME -- bash -c '
|
lxc exec $CONTAINER_NAME -- bash -c '
|
||||||
|
|
||||||
apt-get update && apt-get install -y wget unzip
|
apt-get update && apt-get install -y wget curl unzip git
|
||||||
|
|
||||||
|
|
||||||
useradd -r -s /bin/false gbuser || true
|
useradd -r -s /bin/false gbuser || true
|
||||||
|
@ -36,6 +36,23 @@ rm llama-b6148-bin-ubuntu-x64.zip
|
||||||
wget https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf
|
wget https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf
|
||||||
wget https://huggingface.co/CompendiumLabs/bge-small-en-v1.5-gguf/resolve/main/bge-small-en-v1.5-f32.gguf
|
wget https://huggingface.co/CompendiumLabs/bge-small-en-v1.5-gguf/resolve/main/bge-small-en-v1.5-f32.gguf
|
||||||
|
|
||||||
|
|
||||||
|
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||||
|
source "$HOME/.cargo/env"
|
||||||
|
git clone https://alm.pragmatismo.com.br/generalbots/gbserver
|
||||||
|
|
||||||
|
apt install -y build-essential \
|
||||||
|
pkg-config \
|
||||||
|
libssl-dev \
|
||||||
|
gcc-multilib \
|
||||||
|
g++-multilib \
|
||||||
|
clang \
|
||||||
|
lld \
|
||||||
|
binutils-dev \
|
||||||
|
libudev-dev \
|
||||||
|
libdbus-1-dev
|
||||||
|
|
||||||
|
|
||||||
cat > /etc/systemd/system/system.service <<EOF
|
cat > /etc/systemd/system/system.service <<EOF
|
||||||
[Unit]
|
[Unit]
|
||||||
Description=General Bots System Service
|
Description=General Bots System Service
|
||||||
|
|
41
src/scripts/containers/tables.sh
Executable file → Normal file
41
src/scripts/containers/tables.sh
Executable file → Normal file
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
HOST_BASE="/opt/gbo/tenants/$PARAM_TENANT/tables"
|
HOST_BASE="/opt/gbo/tenants/$PARAM_TENANT/tables"
|
||||||
HOST_DATA="$HOST_BASE/data"
|
HOST_DATA="$HOST_BASE/data"
|
||||||
HOST_CONF="$HOST_BASE/conf"
|
HOST_CONF="$HOST_BASE/conf"
|
||||||
|
@ -23,29 +24,7 @@ sudo apt install -y postgresql-common
|
||||||
sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh
|
sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh
|
||||||
apt install -y postgresql
|
apt install -y postgresql
|
||||||
|
|
||||||
systemctl stop postgresql
|
# TODO: Open listener on *.
|
||||||
|
|
||||||
mkdir -p /etc/systemd/system/postgresql.service.d/
|
|
||||||
cat > /etc/systemd/system/postgresql.service.d/override.conf <<EOF
|
|
||||||
[Service]
|
|
||||||
Environment=\"PGPORT=$PARAM_TABLES_PORT\"
|
|
||||||
EOF
|
|
||||||
|
|
||||||
CONF_FILE=\$(find /etc/postgresql -name postgresql.conf | head -1)
|
|
||||||
if [ -f \"\$CONF_FILE\" ]; then
|
|
||||||
cp \"\$CONF_FILE\" \"\${CONF_FILE}.bak\"
|
|
||||||
sed -i \"s/^#*port *=.*/port = $PARAM_TABLES_PORT/\" \"\$CONF_FILE\"
|
|
||||||
sed -i \"s/^#*listen_addresses *=.*/listen_addresses = '*'/\" \"\$CONF_FILE\"
|
|
||||||
|
|
||||||
HBA_FILE=\$(find /etc/postgresql -name pg_hba.conf | head -1)
|
|
||||||
if [ -f \"\$HBA_FILE\" ]; then
|
|
||||||
echo 'host all all 0.0.0.0/0 md5' >> \"\$HBA_FILE\"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
systemctl daemon-reload
|
|
||||||
systemctl start postgresql
|
|
||||||
systemctl enable postgresql
|
|
||||||
|
|
||||||
until sudo -u postgres psql -p $PARAM_TABLES_PORT -c '\q' 2>/dev/null; do
|
until sudo -u postgres psql -p $PARAM_TABLES_PORT -c '\q' 2>/dev/null; do
|
||||||
echo \"Waiting for PostgreSQL to start on port $PARAM_TABLES_PORT...\"
|
echo \"Waiting for PostgreSQL to start on port $PARAM_TABLES_PORT...\"
|
||||||
|
@ -56,24 +35,8 @@ sudo -u postgres psql -p $PARAM_TABLES_PORT -c \"CREATE USER $PARAM_TENANT WITH
|
||||||
sudo -u postgres psql -p $PARAM_TABLES_PORT -c \"CREATE DATABASE ${PARAM_TENANT}_db OWNER $PARAM_TENANT;\"
|
sudo -u postgres psql -p $PARAM_TABLES_PORT -c \"CREATE DATABASE ${PARAM_TENANT}_db OWNER $PARAM_TENANT;\"
|
||||||
sudo -u postgres psql -p $PARAM_TABLES_PORT -c \"GRANT ALL PRIVILEGES ON DATABASE ${PARAM_TENANT}_db TO $PARAM_TENANT;\"
|
sudo -u postgres psql -p $PARAM_TABLES_PORT -c \"GRANT ALL PRIVILEGES ON DATABASE ${PARAM_TENANT}_db TO $PARAM_TENANT;\"
|
||||||
|
|
||||||
systemctl restart postgresql
|
|
||||||
"
|
"
|
||||||
|
|
||||||
lxc exec "$PARAM_TENANT"-tables -- systemctl stop postgresql
|
|
||||||
|
|
||||||
PG_DATA_DIR=$(lxc exec "$PARAM_TENANT"-tables -- bash -c "find /var/lib/postgresql -name main -type d | head -1")
|
|
||||||
PG_CONF_DIR=$(lxc exec "$PARAM_TENANT"-tables -- bash -c "find /etc/postgresql -name main -type d | head -1")
|
|
||||||
PG_LOGS_DIR=$(lxc exec "$PARAM_TENANT"-tables -- bash -c "find /var/log/postgresql -name postgresql-*.log -o -name postgresql.log | head -1 | xargs dirname 2>/dev/null || echo /var/log/postgresql")
|
|
||||||
|
|
||||||
lxc config device add "$PARAM_TENANT"-tables pgdata disk source="$HOST_DATA" path="$PG_DATA_DIR"
|
|
||||||
lxc config device add "$PARAM_TENANT"-tables pgconf disk source="$HOST_CONF" path="$PG_CONF_DIR"
|
|
||||||
lxc config device add "$PARAM_TENANT"-tables pglogs disk source="$HOST_LOGS" path="$PG_LOGS_DIR"
|
|
||||||
|
|
||||||
lxc exec "$PARAM_TENANT"-tables -- chown -R postgres:postgres "$PG_DATA_DIR"
|
|
||||||
lxc exec "$PARAM_TENANT"-tables -- chown -R postgres:postgres "$PG_CONF_DIR"
|
|
||||||
lxc exec "$PARAM_TENANT"-tables -- chown -R postgres:postgres "$PG_LOGS_DIR"
|
|
||||||
|
|
||||||
lxc exec "$PARAM_TENANT"-tables -- systemctl start postgresql
|
|
||||||
|
|
||||||
lxc config device remove "$PARAM_TENANT"-tables postgres-proxy 2>/dev/null || true
|
lxc config device remove "$PARAM_TENANT"-tables postgres-proxy 2>/dev/null || true
|
||||||
lxc config device add "$PARAM_TENANT"-tables postgres-proxy proxy \
|
lxc config device add "$PARAM_TENANT"-tables postgres-proxy proxy \
|
||||||
|
|
|
@ -3,15 +3,10 @@ use dotenv::dotenv;
|
||||||
use reqwest::Client;
|
use reqwest::Client;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::process::{Command, Stdio};
|
|
||||||
use std::sync::{Arc, Mutex};
|
use std::sync::{Arc, Mutex};
|
||||||
use tokio::io::{AsyncBufReadExt, BufReader};
|
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||||
use tokio::process::Command as TokioCommand;
|
|
||||||
use tokio::time::{sleep, Duration};
|
use tokio::time::{sleep, Duration};
|
||||||
|
|
||||||
// Global process handle
|
|
||||||
static mut LLAMA_PROCESS: Option<Arc<Mutex<Option<tokio::process::Child>>>> = None;
|
|
||||||
|
|
||||||
// OpenAI-compatible request/response structures
|
// OpenAI-compatible request/response structures
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
struct ChatMessage {
|
struct ChatMessage {
|
||||||
|
@ -381,10 +376,11 @@ struct LlamaCppEmbeddingRequest {
|
||||||
pub content: String,
|
pub content: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Llama.cpp Embedding Response
|
// FIXED: Handle the stupid nested array format
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
struct LlamaCppEmbeddingResponse {
|
struct LlamaCppEmbeddingResponseItem {
|
||||||
pub embedding: Vec<f32>,
|
pub index: usize,
|
||||||
|
pub embedding: Vec<Vec<f32>>, // This is the fucked up part - embedding is an array of arrays
|
||||||
}
|
}
|
||||||
|
|
||||||
// Proxy endpoint for embeddings
|
// Proxy endpoint for embeddings
|
||||||
|
@ -396,7 +392,8 @@ pub async fn embeddings_local(
|
||||||
dotenv().ok();
|
dotenv().ok();
|
||||||
|
|
||||||
// Get llama.cpp server URL
|
// Get llama.cpp server URL
|
||||||
let llama_url = env::var("LLM_URL").unwrap_or_else(|_| "http://localhost:8082".to_string());
|
let llama_url =
|
||||||
|
env::var("EMBEDDING_URL").unwrap_or_else(|_| "http://localhost:8082".to_string());
|
||||||
|
|
||||||
let client = Client::builder()
|
let client = Client::builder()
|
||||||
.timeout(Duration::from_secs(120))
|
.timeout(Duration::from_secs(120))
|
||||||
|
@ -431,22 +428,50 @@ pub async fn embeddings_local(
|
||||||
let status = response.status();
|
let status = response.status();
|
||||||
|
|
||||||
if status.is_success() {
|
if status.is_success() {
|
||||||
let llama_response: LlamaCppEmbeddingResponse = response.json().await.map_err(|e| {
|
// First, get the raw response text for debugging
|
||||||
|
let raw_response = response.text().await.map_err(|e| {
|
||||||
|
eprintln!("Error reading response text: {}", e);
|
||||||
|
actix_web::error::ErrorInternalServerError("Failed to read response")
|
||||||
|
})?;
|
||||||
|
|
||||||
|
// Parse the response as a vector of items with nested arrays
|
||||||
|
let llama_response: Vec<LlamaCppEmbeddingResponseItem> =
|
||||||
|
serde_json::from_str(&raw_response).map_err(|e| {
|
||||||
eprintln!("Error parsing llama.cpp embedding response: {}", e);
|
eprintln!("Error parsing llama.cpp embedding response: {}", e);
|
||||||
|
eprintln!("Raw response: {}", raw_response);
|
||||||
actix_web::error::ErrorInternalServerError(
|
actix_web::error::ErrorInternalServerError(
|
||||||
"Failed to parse llama.cpp embedding response",
|
"Failed to parse llama.cpp embedding response",
|
||||||
)
|
)
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
// Estimate token count (this is approximate since llama.cpp doesn't return token count for embeddings)
|
// Extract the embedding from the nested array bullshit
|
||||||
|
if let Some(item) = llama_response.get(0) {
|
||||||
|
// The embedding field contains Vec<Vec<f32>>, so we need to flatten it
|
||||||
|
// If it's [[0.1, 0.2, 0.3]], we want [0.1, 0.2, 0.3]
|
||||||
|
let flattened_embedding = if !item.embedding.is_empty() {
|
||||||
|
item.embedding[0].clone() // Take the first (and probably only) inner array
|
||||||
|
} else {
|
||||||
|
vec![] // Empty if no embedding data
|
||||||
|
};
|
||||||
|
|
||||||
|
// Estimate token count
|
||||||
let estimated_tokens = (input_text.len() as f32 / 4.0).ceil() as u32;
|
let estimated_tokens = (input_text.len() as f32 / 4.0).ceil() as u32;
|
||||||
total_tokens += estimated_tokens;
|
total_tokens += estimated_tokens;
|
||||||
|
|
||||||
embeddings_data.push(EmbeddingData {
|
embeddings_data.push(EmbeddingData {
|
||||||
object: "embedding".to_string(),
|
object: "embedding".to_string(),
|
||||||
embedding: llama_response.embedding,
|
embedding: flattened_embedding,
|
||||||
index,
|
index,
|
||||||
});
|
});
|
||||||
|
} else {
|
||||||
|
eprintln!("No embedding data returned for input: {}", input_text);
|
||||||
|
return Ok(HttpResponse::InternalServerError().json(serde_json::json!({
|
||||||
|
"error": {
|
||||||
|
"message": format!("No embedding data returned for input {}", index),
|
||||||
|
"type": "server_error"
|
||||||
|
}
|
||||||
|
})));
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
let error_text = response
|
let error_text = response
|
||||||
.text()
|
.text()
|
||||||
|
|
Loading…
Add table
Reference in a new issue