diff --git a/src/scripts/containers/drive.sh b/src/scripts/containers/drive.sh index 5bd7f45..0bb2abf 100644 --- a/src/scripts/containers/drive.sh +++ b/src/scripts/containers/drive.sh @@ -17,9 +17,6 @@ apt-get update && apt-get install -y wget wget https://dl.min.io/server/minio/release/linux-amd64/minio -O /usr/local/bin/minio chmod +x /usr/local/bin/minio -wget https://dl.min.io/client/mc/release/linux-amd64/mc -chmod +x /usr/local/bin/mc - useradd -r -s /bin/false minio-user || true mkdir -p /var/log/minio /data chown -R minio-user:minio-user /var/log/minio /data @@ -35,7 +32,7 @@ User=minio-user Group=minio-user Environment="MINIO_ROOT_USER='"${PARAM_DRIVE_USER}"'" Environment="MINIO_ROOT_PASSWORD='"${PARAM_DRIVE_PASSWORD}"'" -ExecStart=/usr/local/bin/minio server --console-address ":'"${PARAM_DRIVE_PORT}"'" /data +ExecStart=/usr/local/bin/minio server --address ":'"${PARAM_DRIVE_PORT}"'" --console-address ":'"${PARAM_PORT}"'" /data StandardOutput=append:/var/log/minio/output.log StandardError=append:/var/log/minio/error.log diff --git a/src/scripts/containers/system.sh b/src/scripts/containers/system.sh index 94da09c..52714c3 100644 --- a/src/scripts/containers/system.sh +++ b/src/scripts/containers/system.sh @@ -17,7 +17,7 @@ sleep 15 lxc exec $CONTAINER_NAME -- bash -c ' -apt-get update && apt-get install -y wget unzip +apt-get update && apt-get install -y wget curl unzip git useradd -r -s /bin/false gbuser || true @@ -36,6 +36,23 @@ rm llama-b6148-bin-ubuntu-x64.zip wget https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf wget https://huggingface.co/CompendiumLabs/bge-small-en-v1.5-gguf/resolve/main/bge-small-en-v1.5-f32.gguf + +curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y +source "$HOME/.cargo/env" +git clone https://alm.pragmatismo.com.br/generalbots/gbserver + +apt install -y build-essential \ + pkg-config \ + libssl-dev \ + gcc-multilib \ + g++-multilib \ + clang \ + lld \ + binutils-dev \ + libudev-dev \ + libdbus-1-dev + + cat > /etc/systemd/system/system.service < /etc/systemd/system/postgresql.service.d/override.conf <> \"\$HBA_FILE\" - fi -fi - -systemctl daemon-reload -systemctl start postgresql -systemctl enable postgresql +# TODO: Open listener on *. until sudo -u postgres psql -p $PARAM_TABLES_PORT -c '\q' 2>/dev/null; do echo \"Waiting for PostgreSQL to start on port $PARAM_TABLES_PORT...\" @@ -56,24 +35,8 @@ sudo -u postgres psql -p $PARAM_TABLES_PORT -c \"CREATE USER $PARAM_TENANT WITH sudo -u postgres psql -p $PARAM_TABLES_PORT -c \"CREATE DATABASE ${PARAM_TENANT}_db OWNER $PARAM_TENANT;\" sudo -u postgres psql -p $PARAM_TABLES_PORT -c \"GRANT ALL PRIVILEGES ON DATABASE ${PARAM_TENANT}_db TO $PARAM_TENANT;\" -systemctl restart postgresql " -lxc exec "$PARAM_TENANT"-tables -- systemctl stop postgresql - -PG_DATA_DIR=$(lxc exec "$PARAM_TENANT"-tables -- bash -c "find /var/lib/postgresql -name main -type d | head -1") -PG_CONF_DIR=$(lxc exec "$PARAM_TENANT"-tables -- bash -c "find /etc/postgresql -name main -type d | head -1") -PG_LOGS_DIR=$(lxc exec "$PARAM_TENANT"-tables -- bash -c "find /var/log/postgresql -name postgresql-*.log -o -name postgresql.log | head -1 | xargs dirname 2>/dev/null || echo /var/log/postgresql") - -lxc config device add "$PARAM_TENANT"-tables pgdata disk source="$HOST_DATA" path="$PG_DATA_DIR" -lxc config device add "$PARAM_TENANT"-tables pgconf disk source="$HOST_CONF" path="$PG_CONF_DIR" -lxc config device add "$PARAM_TENANT"-tables pglogs disk source="$HOST_LOGS" path="$PG_LOGS_DIR" - -lxc exec "$PARAM_TENANT"-tables -- chown -R postgres:postgres "$PG_DATA_DIR" -lxc exec "$PARAM_TENANT"-tables -- chown -R postgres:postgres "$PG_CONF_DIR" -lxc exec "$PARAM_TENANT"-tables -- chown -R postgres:postgres "$PG_LOGS_DIR" - -lxc exec "$PARAM_TENANT"-tables -- systemctl start postgresql lxc config device remove "$PARAM_TENANT"-tables postgres-proxy 2>/dev/null || true lxc config device add "$PARAM_TENANT"-tables postgres-proxy proxy \ diff --git a/src/services/llm_local.rs b/src/services/llm_local.rs index 8f56d13..c018708 100644 --- a/src/services/llm_local.rs +++ b/src/services/llm_local.rs @@ -3,15 +3,10 @@ use dotenv::dotenv; use reqwest::Client; use serde::{Deserialize, Serialize}; use std::env; -use std::process::{Command, Stdio}; use std::sync::{Arc, Mutex}; use tokio::io::{AsyncBufReadExt, BufReader}; -use tokio::process::Command as TokioCommand; use tokio::time::{sleep, Duration}; -// Global process handle -static mut LLAMA_PROCESS: Option>>> = None; - // OpenAI-compatible request/response structures #[derive(Debug, Serialize, Deserialize)] struct ChatMessage { @@ -381,10 +376,11 @@ struct LlamaCppEmbeddingRequest { pub content: String, } -// Llama.cpp Embedding Response +// FIXED: Handle the stupid nested array format #[derive(Debug, Deserialize)] -struct LlamaCppEmbeddingResponse { - pub embedding: Vec, +struct LlamaCppEmbeddingResponseItem { + pub index: usize, + pub embedding: Vec>, // This is the fucked up part - embedding is an array of arrays } // Proxy endpoint for embeddings @@ -396,7 +392,8 @@ pub async fn embeddings_local( dotenv().ok(); // Get llama.cpp server URL - let llama_url = env::var("LLM_URL").unwrap_or_else(|_| "http://localhost:8082".to_string()); + let llama_url = + env::var("EMBEDDING_URL").unwrap_or_else(|_| "http://localhost:8082".to_string()); let client = Client::builder() .timeout(Duration::from_secs(120)) @@ -431,22 +428,50 @@ pub async fn embeddings_local( let status = response.status(); if status.is_success() { - let llama_response: LlamaCppEmbeddingResponse = response.json().await.map_err(|e| { - eprintln!("Error parsing llama.cpp embedding response: {}", e); - actix_web::error::ErrorInternalServerError( - "Failed to parse llama.cpp embedding response", - ) + // First, get the raw response text for debugging + let raw_response = response.text().await.map_err(|e| { + eprintln!("Error reading response text: {}", e); + actix_web::error::ErrorInternalServerError("Failed to read response") })?; - // Estimate token count (this is approximate since llama.cpp doesn't return token count for embeddings) - let estimated_tokens = (input_text.len() as f32 / 4.0).ceil() as u32; - total_tokens += estimated_tokens; + // Parse the response as a vector of items with nested arrays + let llama_response: Vec = + serde_json::from_str(&raw_response).map_err(|e| { + eprintln!("Error parsing llama.cpp embedding response: {}", e); + eprintln!("Raw response: {}", raw_response); + actix_web::error::ErrorInternalServerError( + "Failed to parse llama.cpp embedding response", + ) + })?; - embeddings_data.push(EmbeddingData { - object: "embedding".to_string(), - embedding: llama_response.embedding, - index, - }); + // Extract the embedding from the nested array bullshit + if let Some(item) = llama_response.get(0) { + // The embedding field contains Vec>, so we need to flatten it + // If it's [[0.1, 0.2, 0.3]], we want [0.1, 0.2, 0.3] + let flattened_embedding = if !item.embedding.is_empty() { + item.embedding[0].clone() // Take the first (and probably only) inner array + } else { + vec![] // Empty if no embedding data + }; + + // Estimate token count + let estimated_tokens = (input_text.len() as f32 / 4.0).ceil() as u32; + total_tokens += estimated_tokens; + + embeddings_data.push(EmbeddingData { + object: "embedding".to_string(), + embedding: flattened_embedding, + index, + }); + } else { + eprintln!("No embedding data returned for input: {}", input_text); + return Ok(HttpResponse::InternalServerError().json(serde_json::json!({ + "error": { + "message": format!("No embedding data returned for input {}", index), + "type": "server_error" + } + }))); + } } else { let error_text = response .text()