Add llama.cpp support and embedded config for Orange Pi / Raspberry Pi

2025-12-12 13:46:04 -03:00 · 2025-12-12 13:46:04 -03:00 · 59a74fa3ec
commit 59a74fa3ec
parent fed35e116a
2 changed files with 191 additions and 0 deletions
--- a/.env.embedded
+++ b/.env.embedded
@ -0,0 +1,52 @@
+# BotServer Embedded Configuration
+# For Orange Pi, Raspberry Pi, and other ARM SBCs
+
+# Server
+HOST=0.0.0.0
+PORT=8088
+RUST_LOG=info
+
+# Database (SQLite for embedded, no PostgreSQL needed)
+DATABASE_URL=sqlite:///opt/botserver/data/botserver.db
+
+# LLM Configuration - Local llama.cpp
+LLM_PROVIDER=llamacpp
+LLM_API_URL=http://127.0.0.1:8080
+LLM_MODEL=tinyllama
+
+# Alternative: Use remote API
+# LLM_PROVIDER=openai
+# LLM_API_URL=https://api.openai.com/v1
+# LLM_API_KEY=sk-...
+
+# Alternative: Ollama (if installed)
+# LLM_PROVIDER=ollama
+# LLM_API_URL=http://127.0.0.1:11434
+# LLM_MODEL=tinyllama
+
+# Memory limits for embedded
+MAX_CONTEXT_TOKENS=2048
+MAX_RESPONSE_TOKENS=512
+STREAMING_ENABLED=true
+
+# Embedded UI
+STATIC_FILES_PATH=/opt/botserver/ui
+DEFAULT_UI=embedded
+
+# WebSocket
+WS_PING_INTERVAL=30
+WS_TIMEOUT=300
+
+# Security (change in production!)
+JWT_SECRET=embedded-change-me-in-production
+CORS_ORIGINS=*
+
+# Logging
+LOG_FILE=/opt/botserver/data/botserver.log
+LOG_MAX_SIZE=10M
+LOG_RETENTION=7
+
+# Performance tuning for low-memory devices
+# Uncomment for <2GB RAM devices
+# RUST_BACKTRACE=0
+# MALLOC_ARENA_MAX=2
--- a/scripts/deploy-embedded.sh
+++ b/scripts/deploy-embedded.sh
@ -27,9 +27,12 @@ NC='\033[0m'
 # Default values
 TARGET_HOST=""
 WITH_UI=false
+WITH_LLAMA=false
 LOCAL_INSTALL=false
 ARCH=""
 SERVICE_NAME="botserver"
+LLAMA_MODEL="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+LLAMA_URL="https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main"

 print_banner() {
    echo -e "${BLUE}"
@ -271,12 +274,129 @@ deploy_local() {
    fi
 }

+install_llama_cpp() {
+    local host=$1
+    local is_local=$2
+    
+    echo -e "${YELLOW}Installing llama.cpp...${NC}"
+    
+    local commands='
+        # Install dependencies
+        sudo apt-get update
+        sudo apt-get install -y build-essential cmake git
+        
+        # Clone and build llama.cpp
+        cd /opt
+        if [ ! -d "llama.cpp" ]; then
+            sudo git clone https://github.com/ggerganov/llama.cpp.git
+            sudo chown -R $(whoami):$(whoami) llama.cpp
+        fi
+        cd llama.cpp
+        
+        # Build with optimizations for ARM
+        mkdir -p build && cd build
+        cmake .. -DLLAMA_NATIVE=ON -DCMAKE_BUILD_TYPE=Release
+        make -j$(nproc)
+        
+        # Create models directory
+        mkdir -p /opt/llama.cpp/models
+    '
+    
+    if [ "$is_local" = true ]; then
+        eval "$commands"
+    else
+        ssh $host "$commands"
+    fi
+}
+
+download_model() {
+    local host=$1
+    local is_local=$2
+    
+    echo -e "${YELLOW}Downloading model: $LLAMA_MODEL...${NC}"
+    
+    local commands="
+        cd /opt/llama.cpp/models
+        if [ ! -f '$LLAMA_MODEL' ]; then
+            wget -c '$LLAMA_URL/$LLAMA_MODEL'
+        fi
+        ls -lh /opt/llama.cpp/models/
+    "
+    
+    if [ "$is_local" = true ]; then
+        eval "$commands"
+    else
+        ssh $host "$commands"
+    fi
+}
+
+create_llama_service() {
+    cat > /tmp/llama-server.service << 'EOF'
+[Unit]
+Description=llama.cpp Server - Local LLM Inference
+After=network.target
+
+[Service]
+Type=simple
+User=root
+WorkingDirectory=/opt/llama.cpp
+ExecStart=/opt/llama.cpp/build/bin/llama-server \
+    -m /opt/llama.cpp/models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf \
+    --host 0.0.0.0 \
+    --port 8080 \
+    -c 2048 \
+    -ngl 0 \
+    --threads 4
+Restart=always
+RestartSec=5
+Environment=LLAMA_LOG_LEVEL=info
+
+[Install]
+WantedBy=multi-user.target
+EOF
+}
+
+setup_llama_service() {
+    local host=$1
+    local is_local=$2
+    
+    echo -e "${YELLOW}Setting up llama.cpp systemd service...${NC}"
+    
+    create_llama_service
+    
+    if [ "$is_local" = true ]; then
+        sudo mv /tmp/llama-server.service /etc/systemd/system/
+        sudo systemctl daemon-reload
+        sudo systemctl enable llama-server
+        sudo systemctl start llama-server
+    else
+        scp /tmp/llama-server.service "$host:/tmp/"
+        ssh $host "sudo mv /tmp/llama-server.service /etc/systemd/system/"
+        ssh $host "sudo systemctl daemon-reload"
+        ssh $host "sudo systemctl enable llama-server"
+        ssh $host "sudo systemctl start llama-server"
+    fi
+    
+    echo -e "${GREEN}llama.cpp server configured on port 8080${NC}"
+}
+
+deploy_llama() {
+    local host=$1
+    local is_local=${2:-false}
+    
+    install_llama_cpp "$host" "$is_local"
+    download_model "$host" "$is_local"
+    setup_llama_service "$host" "$is_local"
+}
+
 show_help() {
    echo "Usage: $0 [target-host] [options]"
    echo ""
    echo "Options:"
    echo "  --local       Install on this machine"
    echo "  --with-ui     Also deploy embedded UI with kiosk mode"
+    echo "  --with-llama  Install llama.cpp for local LLM inference"
+    echo "  --model NAME  Specify GGUF model (default: TinyLlama 1.1B Q4)"
    echo "  --arch ARCH   Force target architecture"
    echo "  -h, --help    Show this help"
    echo ""
@ -305,6 +425,14 @@ while [[ $# -gt 0 ]]; do
            WITH_UI=true
            shift
            ;;
+        --with-llama)
+            WITH_LLAMA=true
+            shift
+            ;;
+        --model)
+            LLAMA_MODEL="$2"
+            shift 2
+            ;;
        --arch)
            ARCH="$2"
            shift 2
@ -328,6 +456,9 @@ print_banner
 if [ "$LOCAL_INSTALL" = true ]; then
    detect_arch
    deploy_local
+    if [ "$WITH_LLAMA" = true ]; then
+        deploy_llama "" true
+    fi
 elif [ -n "$TARGET_HOST" ]; then
    # Get remote arch
    echo "Detecting remote architecture..."
@ -349,6 +480,9 @@ elif [ -n "$TARGET_HOST" ]; then
    install_cross_compiler
    build_botserver
    deploy_remote $TARGET_HOST
+    if [ "$WITH_LLAMA" = true ]; then
+        deploy_llama $TARGET_HOST false
+    fi
 else
    show_help
    exit 1
@ -366,3 +500,8 @@ echo ""
 if [ "$WITH_UI" = true ]; then
    echo "Access UI at: http://$TARGET_HOST:8088/embedded/"
 fi
+if [ "$WITH_LLAMA" = true ]; then
+    echo ""
+    echo "llama.cpp server running at: http://$TARGET_HOST:8080"
+    echo "Test: curl http://$TARGET_HOST:8080/v1/models"
+fi