diff --git a/.env.embedded b/.env.embedded new file mode 100644 index 00000000..2555996f --- /dev/null +++ b/.env.embedded @@ -0,0 +1,52 @@ +# BotServer Embedded Configuration +# For Orange Pi, Raspberry Pi, and other ARM SBCs + +# Server +HOST=0.0.0.0 +PORT=8088 +RUST_LOG=info + +# Database (SQLite for embedded, no PostgreSQL needed) +DATABASE_URL=sqlite:///opt/botserver/data/botserver.db + +# LLM Configuration - Local llama.cpp +LLM_PROVIDER=llamacpp +LLM_API_URL=http://127.0.0.1:8080 +LLM_MODEL=tinyllama + +# Alternative: Use remote API +# LLM_PROVIDER=openai +# LLM_API_URL=https://api.openai.com/v1 +# LLM_API_KEY=sk-... + +# Alternative: Ollama (if installed) +# LLM_PROVIDER=ollama +# LLM_API_URL=http://127.0.0.1:11434 +# LLM_MODEL=tinyllama + +# Memory limits for embedded +MAX_CONTEXT_TOKENS=2048 +MAX_RESPONSE_TOKENS=512 +STREAMING_ENABLED=true + +# Embedded UI +STATIC_FILES_PATH=/opt/botserver/ui +DEFAULT_UI=embedded + +# WebSocket +WS_PING_INTERVAL=30 +WS_TIMEOUT=300 + +# Security (change in production!) +JWT_SECRET=embedded-change-me-in-production +CORS_ORIGINS=* + +# Logging +LOG_FILE=/opt/botserver/data/botserver.log +LOG_MAX_SIZE=10M +LOG_RETENTION=7 + +# Performance tuning for low-memory devices +# Uncomment for <2GB RAM devices +# RUST_BACKTRACE=0 +# MALLOC_ARENA_MAX=2 diff --git a/scripts/deploy-embedded.sh b/scripts/deploy-embedded.sh index e75a4897..8c5b233b 100644 --- a/scripts/deploy-embedded.sh +++ b/scripts/deploy-embedded.sh @@ -27,9 +27,12 @@ NC='\033[0m' # Default values TARGET_HOST="" WITH_UI=false +WITH_LLAMA=false LOCAL_INSTALL=false ARCH="" SERVICE_NAME="botserver" +LLAMA_MODEL="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" +LLAMA_URL="https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main" print_banner() { echo -e "${BLUE}" @@ -271,12 +274,129 @@ deploy_local() { fi } +install_llama_cpp() { + local host=$1 + local is_local=$2 + + echo -e "${YELLOW}Installing llama.cpp...${NC}" + + local commands=' + # Install dependencies + sudo apt-get update + sudo apt-get install -y build-essential cmake git + + # Clone and build llama.cpp + cd /opt + if [ ! -d "llama.cpp" ]; then + sudo git clone https://github.com/ggerganov/llama.cpp.git + sudo chown -R $(whoami):$(whoami) llama.cpp + fi + cd llama.cpp + + # Build with optimizations for ARM + mkdir -p build && cd build + cmake .. -DLLAMA_NATIVE=ON -DCMAKE_BUILD_TYPE=Release + make -j$(nproc) + + # Create models directory + mkdir -p /opt/llama.cpp/models + ' + + if [ "$is_local" = true ]; then + eval "$commands" + else + ssh $host "$commands" + fi +} + +download_model() { + local host=$1 + local is_local=$2 + + echo -e "${YELLOW}Downloading model: $LLAMA_MODEL...${NC}" + + local commands=" + cd /opt/llama.cpp/models + if [ ! -f '$LLAMA_MODEL' ]; then + wget -c '$LLAMA_URL/$LLAMA_MODEL' + fi + ls -lh /opt/llama.cpp/models/ + " + + if [ "$is_local" = true ]; then + eval "$commands" + else + ssh $host "$commands" + fi +} + +create_llama_service() { + cat > /tmp/llama-server.service << 'EOF' +[Unit] +Description=llama.cpp Server - Local LLM Inference +After=network.target + +[Service] +Type=simple +User=root +WorkingDirectory=/opt/llama.cpp +ExecStart=/opt/llama.cpp/build/bin/llama-server \ + -m /opt/llama.cpp/models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf \ + --host 0.0.0.0 \ + --port 8080 \ + -c 2048 \ + -ngl 0 \ + --threads 4 +Restart=always +RestartSec=5 +Environment=LLAMA_LOG_LEVEL=info + +[Install] +WantedBy=multi-user.target +EOF +} + +setup_llama_service() { + local host=$1 + local is_local=$2 + + echo -e "${YELLOW}Setting up llama.cpp systemd service...${NC}" + + create_llama_service + + if [ "$is_local" = true ]; then + sudo mv /tmp/llama-server.service /etc/systemd/system/ + sudo systemctl daemon-reload + sudo systemctl enable llama-server + sudo systemctl start llama-server + else + scp /tmp/llama-server.service "$host:/tmp/" + ssh $host "sudo mv /tmp/llama-server.service /etc/systemd/system/" + ssh $host "sudo systemctl daemon-reload" + ssh $host "sudo systemctl enable llama-server" + ssh $host "sudo systemctl start llama-server" + fi + + echo -e "${GREEN}llama.cpp server configured on port 8080${NC}" +} + +deploy_llama() { + local host=$1 + local is_local=${2:-false} + + install_llama_cpp "$host" "$is_local" + download_model "$host" "$is_local" + setup_llama_service "$host" "$is_local" +} + show_help() { echo "Usage: $0 [target-host] [options]" echo "" echo "Options:" echo " --local Install on this machine" echo " --with-ui Also deploy embedded UI with kiosk mode" + echo " --with-llama Install llama.cpp for local LLM inference" + echo " --model NAME Specify GGUF model (default: TinyLlama 1.1B Q4)" echo " --arch ARCH Force target architecture" echo " -h, --help Show this help" echo "" @@ -305,6 +425,14 @@ while [[ $# -gt 0 ]]; do WITH_UI=true shift ;; + --with-llama) + WITH_LLAMA=true + shift + ;; + --model) + LLAMA_MODEL="$2" + shift 2 + ;; --arch) ARCH="$2" shift 2 @@ -328,6 +456,9 @@ print_banner if [ "$LOCAL_INSTALL" = true ]; then detect_arch deploy_local + if [ "$WITH_LLAMA" = true ]; then + deploy_llama "" true + fi elif [ -n "$TARGET_HOST" ]; then # Get remote arch echo "Detecting remote architecture..." @@ -349,6 +480,9 @@ elif [ -n "$TARGET_HOST" ]; then install_cross_compiler build_botserver deploy_remote $TARGET_HOST + if [ "$WITH_LLAMA" = true ]; then + deploy_llama $TARGET_HOST false + fi else show_help exit 1 @@ -366,3 +500,8 @@ echo "" if [ "$WITH_UI" = true ]; then echo "Access UI at: http://$TARGET_HOST:8088/embedded/" fi +if [ "$WITH_LLAMA" = true ]; then + echo "" + echo "llama.cpp server running at: http://$TARGET_HOST:8080" + echo "Test: curl http://$TARGET_HOST:8080/v1/models" +fi