Add llama.cpp support and embedded config for Orange Pi / Raspberry Pi

2025-12-12 13:46:04 -03:00 · 2025-12-12 13:46:04 -03:00 · 59a74fa3ec
commit 59a74fa3ec
parent fed35e116a
2 changed files with 191 additions and 0 deletions
--- a/.env.embedded
+++ b/.env.embedded
@ -0,0 +1,52 @@
 # BotServer Embedded Configuration
 # For Orange Pi, Raspberry Pi, and other ARM SBCs
 # Server
 HOST=0.0.0.0
 PORT=8088
 RUST_LOG=info
 # Database (SQLite for embedded, no PostgreSQL needed)
 DATABASE_URL=sqlite:///opt/botserver/data/botserver.db
 # LLM Configuration - Local llama.cpp
 LLM_PROVIDER=llamacpp
 LLM_API_URL=http://127.0.0.1:8080
 LLM_MODEL=tinyllama
 # Alternative: Use remote API
 # LLM_PROVIDER=openai
 # LLM_API_URL=https://api.openai.com/v1
 # LLM_API_KEY=sk-...
 # Alternative: Ollama (if installed)
 # LLM_PROVIDER=ollama
 # LLM_API_URL=http://127.0.0.1:11434
 # LLM_MODEL=tinyllama
 # Memory limits for embedded
 MAX_CONTEXT_TOKENS=2048
 MAX_RESPONSE_TOKENS=512
 STREAMING_ENABLED=true
 # Embedded UI
 STATIC_FILES_PATH=/opt/botserver/ui
 DEFAULT_UI=embedded
 # WebSocket
 WS_PING_INTERVAL=30
 WS_TIMEOUT=300
 # Security (change in production!)
 JWT_SECRET=embedded-change-me-in-production
 CORS_ORIGINS=*
 # Logging
 LOG_FILE=/opt/botserver/data/botserver.log
 LOG_MAX_SIZE=10M
 LOG_RETENTION=7
 # Performance tuning for low-memory devices
 # Uncomment for <2GB RAM devices
 # RUST_BACKTRACE=0
 # MALLOC_ARENA_MAX=2
--- a/scripts/deploy-embedded.sh
+++ b/scripts/deploy-embedded.sh
@ -27,9 +27,12 @@ NC='\033[0m'
 # Default values
 TARGET_HOST=""
 WITH_UI=false
 WITH_LLAMA=false
 LOCAL_INSTALL=false
 ARCH=""
 SERVICE_NAME="botserver"
 LLAMA_MODEL="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
 LLAMA_URL="https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main"
 print_banner() {
    echo -e "${BLUE}"
@ -271,12 +274,129 @@ deploy_local() {
    fi
 }
 install_llama_cpp() {
    local host=$1
    local is_local=$2
    echo -e "${YELLOW}Installing llama.cpp...${NC}"
    local commands='
        # Install dependencies
        sudo apt-get update
        sudo apt-get install -y build-essential cmake git
        # Clone and build llama.cpp
        cd /opt
        if [ ! -d "llama.cpp" ]; then
            sudo git clone https://github.com/ggerganov/llama.cpp.git
            sudo chown -R $(whoami):$(whoami) llama.cpp
        fi
        cd llama.cpp
        # Build with optimizations for ARM
        mkdir -p build && cd build
        cmake .. -DLLAMA_NATIVE=ON -DCMAKE_BUILD_TYPE=Release
        make -j$(nproc)
        # Create models directory
        mkdir -p /opt/llama.cpp/models
    '
    if [ "$is_local" = true ]; then
        eval "$commands"
    else
        ssh $host "$commands"
    fi
 }
 download_model() {
    local host=$1
    local is_local=$2
    echo -e "${YELLOW}Downloading model: $LLAMA_MODEL...${NC}"
    local commands="
        cd /opt/llama.cpp/models
        if [ ! -f '$LLAMA_MODEL' ]; then
            wget -c '$LLAMA_URL/$LLAMA_MODEL'
        fi
        ls -lh /opt/llama.cpp/models/
    "
    if [ "$is_local" = true ]; then
        eval "$commands"
    else
        ssh $host "$commands"
    fi
 }
 create_llama_service() {
    cat > /tmp/llama-server.service << 'EOF'
 [Unit]
 Description=llama.cpp Server - Local LLM Inference
 After=network.target
 [Service]
 Type=simple
 User=root
 WorkingDirectory=/opt/llama.cpp
 ExecStart=/opt/llama.cpp/build/bin/llama-server \
    -m /opt/llama.cpp/models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf \
    --host 0.0.0.0 \
    --port 8080 \
    -c 2048 \
    -ngl 0 \
    --threads 4
 Restart=always
 RestartSec=5
 Environment=LLAMA_LOG_LEVEL=info
 [Install]
 WantedBy=multi-user.target
 EOF
 }
 setup_llama_service() {
    local host=$1
    local is_local=$2
    echo -e "${YELLOW}Setting up llama.cpp systemd service...${NC}"
    create_llama_service
    if [ "$is_local" = true ]; then
        sudo mv /tmp/llama-server.service /etc/systemd/system/
        sudo systemctl daemon-reload
        sudo systemctl enable llama-server
        sudo systemctl start llama-server
    else
        scp /tmp/llama-server.service "$host:/tmp/"
        ssh $host "sudo mv /tmp/llama-server.service /etc/systemd/system/"
        ssh $host "sudo systemctl daemon-reload"
        ssh $host "sudo systemctl enable llama-server"
        ssh $host "sudo systemctl start llama-server"
    fi
    echo -e "${GREEN}llama.cpp server configured on port 8080${NC}"
 }
 deploy_llama() {
    local host=$1
    local is_local=${2:-false}
    install_llama_cpp "$host" "$is_local"
    download_model "$host" "$is_local"
    setup_llama_service "$host" "$is_local"
 }
 show_help() {
    echo "Usage: $0 [target-host] [options]"
    echo ""
    echo "Options:"
    echo "  --local       Install on this machine"
    echo "  --with-ui     Also deploy embedded UI with kiosk mode"
    echo "  --with-llama  Install llama.cpp for local LLM inference"
    echo "  --model NAME  Specify GGUF model (default: TinyLlama 1.1B Q4)"
    echo "  --arch ARCH   Force target architecture"
    echo "  -h, --help    Show this help"
    echo ""
@ -305,6 +425,14 @@ while [[ $# -gt 0 ]]; do
            WITH_UI=true
            shift
            ;;
        --with-llama)
            WITH_LLAMA=true
            shift
            ;;
        --model)
            LLAMA_MODEL="$2"
            shift 2
            ;;
        --arch)
            ARCH="$2"
            shift 2
@ -328,6 +456,9 @@ print_banner
 if [ "$LOCAL_INSTALL" = true ]; then
    detect_arch
    deploy_local
    if [ "$WITH_LLAMA" = true ]; then
        deploy_llama "" true
    fi
 elif [ -n "$TARGET_HOST" ]; then
    # Get remote arch
    echo "Detecting remote architecture..."
@ -349,6 +480,9 @@ elif [ -n "$TARGET_HOST" ]; then
    install_cross_compiler
    build_botserver
    deploy_remote $TARGET_HOST
    if [ "$WITH_LLAMA" = true ]; then
        deploy_llama $TARGET_HOST false
    fi
 else
    show_help
    exit 1
@ -366,3 +500,8 @@ echo ""
 if [ "$WITH_UI" = true ]; then
    echo "Access UI at: http://$TARGET_HOST:8088/embedded/"
 fi
 if [ "$WITH_LLAMA" = true ]; then
    echo ""
    echo "llama.cpp server running at: http://$TARGET_HOST:8080"
    echo "Test: curl http://$TARGET_HOST:8080/v1/models"
 fi