- LLM local fixes.

2025-09-09 15:09:28 -03:00 · 2025-09-09 15:09:28 -03:00 · c54904b18b
commit c54904b18b
parent 909f2ae5f1
4 changed files with 68 additions and 66 deletions
--- a/src/scripts/containers/drive.sh
+++ b/src/scripts/containers/drive.sh
@ -17,9 +17,6 @@ apt-get update && apt-get install -y wget
 wget https://dl.min.io/server/minio/release/linux-amd64/minio -O /usr/local/bin/minio
 chmod +x /usr/local/bin/minio

-wget https://dl.min.io/client/mc/release/linux-amd64/mc
-chmod +x /usr/local/bin/mc
-
 useradd -r -s /bin/false minio-user || true
 mkdir -p /var/log/minio /data
 chown -R minio-user:minio-user /var/log/minio /data
@ -35,7 +32,7 @@ User=minio-user
 Group=minio-user
 Environment="MINIO_ROOT_USER='"${PARAM_DRIVE_USER}"'"
 Environment="MINIO_ROOT_PASSWORD='"${PARAM_DRIVE_PASSWORD}"'"
-ExecStart=/usr/local/bin/minio server --console-address ":'"${PARAM_DRIVE_PORT}"'" /data
+ExecStart=/usr/local/bin/minio server --address ":'"${PARAM_DRIVE_PORT}"'" --console-address ":'"${PARAM_PORT}"'" /data
 StandardOutput=append:/var/log/minio/output.log
 StandardError=append:/var/log/minio/error.log

--- a/src/scripts/containers/system.sh
+++ b/src/scripts/containers/system.sh
@ -17,7 +17,7 @@ sleep 15

 lxc exec $CONTAINER_NAME -- bash -c '

-apt-get update && apt-get install -y wget unzip
+apt-get update && apt-get install -y wget curl unzip git


 useradd -r -s /bin/false gbuser || true
@ -36,6 +36,23 @@ rm llama-b6148-bin-ubuntu-x64.zip
 wget https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf
 wget https://huggingface.co/CompendiumLabs/bge-small-en-v1.5-gguf/resolve/main/bge-small-en-v1.5-f32.gguf

+
+curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+source "$HOME/.cargo/env"
+git clone https://alm.pragmatismo.com.br/generalbots/gbserver
+
+apt install -y build-essential \
+    pkg-config \
+    libssl-dev \
+    gcc-multilib \
+    g++-multilib \
+    clang \
+    lld \
+    binutils-dev \
+    libudev-dev \
+    libdbus-1-dev
+
+
 cat > /etc/systemd/system/system.service <<EOF
 [Unit]
 Description=General Bots System Service
--- a/src/scripts/containers/tables.sh
+++ b/src/scripts/containers/tables.sh
@ -1,3 +1,4 @@
+
 HOST_BASE="/opt/gbo/tenants/$PARAM_TENANT/tables"
 HOST_DATA="$HOST_BASE/data"
 HOST_CONF="$HOST_BASE/conf"
@ -23,29 +24,7 @@ sudo apt install -y postgresql-common
 sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh
 apt install -y postgresql

-systemctl stop postgresql
-
-mkdir -p /etc/systemd/system/postgresql.service.d/
-cat > /etc/systemd/system/postgresql.service.d/override.conf <<EOF
-[Service]
-Environment=\"PGPORT=$PARAM_TABLES_PORT\"
-EOF
-
-CONF_FILE=\$(find /etc/postgresql -name postgresql.conf | head -1)
-if [ -f \"\$CONF_FILE\" ]; then
-    cp \"\$CONF_FILE\" \"\${CONF_FILE}.bak\"
-    sed -i \"s/^#*port *=.*/port = $PARAM_TABLES_PORT/\" \"\$CONF_FILE\"
-    sed -i \"s/^#*listen_addresses *=.*/listen_addresses = '*'/\" \"\$CONF_FILE\"
-
-    HBA_FILE=\$(find /etc/postgresql -name pg_hba.conf | head -1)
-    if [ -f \"\$HBA_FILE\" ]; then
-        echo 'host    all             all             0.0.0.0/0               md5' >> \"\$HBA_FILE\"
-    fi
-fi
-
-systemctl daemon-reload
-systemctl start postgresql
-systemctl enable postgresql
+# TODO: Open listener on *.

 until sudo -u postgres psql -p $PARAM_TABLES_PORT -c '\q' 2>/dev/null; do
    echo \"Waiting for PostgreSQL to start on port $PARAM_TABLES_PORT...\"
@ -56,24 +35,8 @@ sudo -u postgres psql -p $PARAM_TABLES_PORT -c \"CREATE USER $PARAM_TENANT WITH
 sudo -u postgres psql -p $PARAM_TABLES_PORT -c \"CREATE DATABASE ${PARAM_TENANT}_db OWNER $PARAM_TENANT;\"
 sudo -u postgres psql -p $PARAM_TABLES_PORT -c \"GRANT ALL PRIVILEGES ON DATABASE ${PARAM_TENANT}_db TO $PARAM_TENANT;\"

-systemctl restart postgresql
 "

-lxc exec "$PARAM_TENANT"-tables -- systemctl stop postgresql
-
-PG_DATA_DIR=$(lxc exec "$PARAM_TENANT"-tables -- bash -c "find /var/lib/postgresql -name main -type d | head -1")
-PG_CONF_DIR=$(lxc exec "$PARAM_TENANT"-tables -- bash -c "find /etc/postgresql -name main -type d | head -1")
-PG_LOGS_DIR=$(lxc exec "$PARAM_TENANT"-tables -- bash -c "find /var/log/postgresql -name postgresql-*.log -o -name postgresql.log | head -1 | xargs dirname 2>/dev/null || echo /var/log/postgresql")
-
-lxc config device add "$PARAM_TENANT"-tables pgdata disk source="$HOST_DATA" path="$PG_DATA_DIR"
-lxc config device add "$PARAM_TENANT"-tables pgconf disk source="$HOST_CONF" path="$PG_CONF_DIR"
-lxc config device add "$PARAM_TENANT"-tables pglogs disk source="$HOST_LOGS" path="$PG_LOGS_DIR"
-
-lxc exec "$PARAM_TENANT"-tables -- chown -R postgres:postgres "$PG_DATA_DIR"
-lxc exec "$PARAM_TENANT"-tables -- chown -R postgres:postgres "$PG_CONF_DIR"
-lxc exec "$PARAM_TENANT"-tables -- chown -R postgres:postgres "$PG_LOGS_DIR"
-
-lxc exec "$PARAM_TENANT"-tables -- systemctl start postgresql

 lxc config device remove "$PARAM_TENANT"-tables postgres-proxy 2>/dev/null || true
 lxc config device add "$PARAM_TENANT"-tables postgres-proxy proxy \
--- a/src/services/llm_local.rs
+++ b/src/services/llm_local.rs
@ -3,15 +3,10 @@ use dotenv::dotenv;
 use reqwest::Client;
 use serde::{Deserialize, Serialize};
 use std::env;
-use std::process::{Command, Stdio};
 use std::sync::{Arc, Mutex};
 use tokio::io::{AsyncBufReadExt, BufReader};
-use tokio::process::Command as TokioCommand;
 use tokio::time::{sleep, Duration};

-// Global process handle
-static mut LLAMA_PROCESS: Option<Arc<Mutex<Option<tokio::process::Child>>>> = None;
-
 // OpenAI-compatible request/response structures
 #[derive(Debug, Serialize, Deserialize)]
 struct ChatMessage {
@ -381,10 +376,11 @@ struct LlamaCppEmbeddingRequest {
    pub content: String,
 }

-// Llama.cpp Embedding Response
+// FIXED: Handle the stupid nested array format
 #[derive(Debug, Deserialize)]
-struct LlamaCppEmbeddingResponse {
-    pub embedding: Vec<f32>,
+struct LlamaCppEmbeddingResponseItem {
+    pub index: usize,
+    pub embedding: Vec<Vec<f32>>, // This is the fucked up part - embedding is an array of arrays
 }

 // Proxy endpoint for embeddings
@ -396,7 +392,8 @@ pub async fn embeddings_local(
    dotenv().ok();

    // Get llama.cpp server URL
-    let llama_url = env::var("LLM_URL").unwrap_or_else(|_| "http://localhost:8082".to_string());
+    let llama_url =
+        env::var("EMBEDDING_URL").unwrap_or_else(|_| "http://localhost:8082".to_string());

    let client = Client::builder()
        .timeout(Duration::from_secs(120))
@ -431,22 +428,50 @@ pub async fn embeddings_local(
        let status = response.status();

        if status.is_success() {
-            let llama_response: LlamaCppEmbeddingResponse = response.json().await.map_err(|e| {
-                eprintln!("Error parsing llama.cpp embedding response: {}", e);
-                actix_web::error::ErrorInternalServerError(
-                    "Failed to parse llama.cpp embedding response",
-                )
+            // First, get the raw response text for debugging
+            let raw_response = response.text().await.map_err(|e| {
+                eprintln!("Error reading response text: {}", e);
+                actix_web::error::ErrorInternalServerError("Failed to read response")
            })?;

-            // Estimate token count (this is approximate since llama.cpp doesn't return token count for embeddings)
-            let estimated_tokens = (input_text.len() as f32 / 4.0).ceil() as u32;
-            total_tokens += estimated_tokens;
+            // Parse the response as a vector of items with nested arrays
+            let llama_response: Vec<LlamaCppEmbeddingResponseItem> =
+                serde_json::from_str(&raw_response).map_err(|e| {
+                    eprintln!("Error parsing llama.cpp embedding response: {}", e);
+                    eprintln!("Raw response: {}", raw_response);
+                    actix_web::error::ErrorInternalServerError(
+                        "Failed to parse llama.cpp embedding response",
+                    )
+                })?;

-            embeddings_data.push(EmbeddingData {
-                object: "embedding".to_string(),
-                embedding: llama_response.embedding,
-                index,
-            });
+            // Extract the embedding from the nested array bullshit
+            if let Some(item) = llama_response.get(0) {
+                // The embedding field contains Vec<Vec<f32>>, so we need to flatten it
+                // If it's [[0.1, 0.2, 0.3]], we want [0.1, 0.2, 0.3]
+                let flattened_embedding = if !item.embedding.is_empty() {
+                    item.embedding[0].clone() // Take the first (and probably only) inner array
+                } else {
+                    vec![] // Empty if no embedding data
+                };
+
+                // Estimate token count
+                let estimated_tokens = (input_text.len() as f32 / 4.0).ceil() as u32;
+                total_tokens += estimated_tokens;
+
+                embeddings_data.push(EmbeddingData {
+                    object: "embedding".to_string(),
+                    embedding: flattened_embedding,
+                    index,
+                });
+            } else {
+                eprintln!("No embedding data returned for input: {}", input_text);
+                return Ok(HttpResponse::InternalServerError().json(serde_json::json!({
+                    "error": {
+                        "message": format!("No embedding data returned for input {}", index),
+                        "type": "server_error"
+                    }
+                })));
+            }
        } else {
            let error_text = response
                .text()