Add llama.cpp support and embedded config for Orange Pi / Raspberry Pi

This commit is contained in:
Rodrigo Rodriguez (Pragmatismo) 2025-12-12 13:46:04 -03:00
parent fed35e116a
commit 59a74fa3ec
2 changed files with 191 additions and 0 deletions

52
.env.embedded Normal file
View file

@ -0,0 +1,52 @@
# BotServer Embedded Configuration
# For Orange Pi, Raspberry Pi, and other ARM SBCs
# Server
HOST=0.0.0.0
PORT=8088
RUST_LOG=info
# Database (SQLite for embedded, no PostgreSQL needed)
DATABASE_URL=sqlite:///opt/botserver/data/botserver.db
# LLM Configuration - Local llama.cpp
LLM_PROVIDER=llamacpp
LLM_API_URL=http://127.0.0.1:8080
LLM_MODEL=tinyllama
# Alternative: Use remote API
# LLM_PROVIDER=openai
# LLM_API_URL=https://api.openai.com/v1
# LLM_API_KEY=sk-...
# Alternative: Ollama (if installed)
# LLM_PROVIDER=ollama
# LLM_API_URL=http://127.0.0.1:11434
# LLM_MODEL=tinyllama
# Memory limits for embedded
MAX_CONTEXT_TOKENS=2048
MAX_RESPONSE_TOKENS=512
STREAMING_ENABLED=true
# Embedded UI
STATIC_FILES_PATH=/opt/botserver/ui
DEFAULT_UI=embedded
# WebSocket
WS_PING_INTERVAL=30
WS_TIMEOUT=300
# Security (change in production!)
JWT_SECRET=embedded-change-me-in-production
CORS_ORIGINS=*
# Logging
LOG_FILE=/opt/botserver/data/botserver.log
LOG_MAX_SIZE=10M
LOG_RETENTION=7
# Performance tuning for low-memory devices
# Uncomment for <2GB RAM devices
# RUST_BACKTRACE=0
# MALLOC_ARENA_MAX=2

View file

@ -27,9 +27,12 @@ NC='\033[0m'
# Default values # Default values
TARGET_HOST="" TARGET_HOST=""
WITH_UI=false WITH_UI=false
WITH_LLAMA=false
LOCAL_INSTALL=false LOCAL_INSTALL=false
ARCH="" ARCH=""
SERVICE_NAME="botserver" SERVICE_NAME="botserver"
LLAMA_MODEL="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
LLAMA_URL="https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main"
print_banner() { print_banner() {
echo -e "${BLUE}" echo -e "${BLUE}"
@ -271,12 +274,129 @@ deploy_local() {
fi fi
} }
install_llama_cpp() {
local host=$1
local is_local=$2
echo -e "${YELLOW}Installing llama.cpp...${NC}"
local commands='
# Install dependencies
sudo apt-get update
sudo apt-get install -y build-essential cmake git
# Clone and build llama.cpp
cd /opt
if [ ! -d "llama.cpp" ]; then
sudo git clone https://github.com/ggerganov/llama.cpp.git
sudo chown -R $(whoami):$(whoami) llama.cpp
fi
cd llama.cpp
# Build with optimizations for ARM
mkdir -p build && cd build
cmake .. -DLLAMA_NATIVE=ON -DCMAKE_BUILD_TYPE=Release
make -j$(nproc)
# Create models directory
mkdir -p /opt/llama.cpp/models
'
if [ "$is_local" = true ]; then
eval "$commands"
else
ssh $host "$commands"
fi
}
download_model() {
local host=$1
local is_local=$2
echo -e "${YELLOW}Downloading model: $LLAMA_MODEL...${NC}"
local commands="
cd /opt/llama.cpp/models
if [ ! -f '$LLAMA_MODEL' ]; then
wget -c '$LLAMA_URL/$LLAMA_MODEL'
fi
ls -lh /opt/llama.cpp/models/
"
if [ "$is_local" = true ]; then
eval "$commands"
else
ssh $host "$commands"
fi
}
create_llama_service() {
cat > /tmp/llama-server.service << 'EOF'
[Unit]
Description=llama.cpp Server - Local LLM Inference
After=network.target
[Service]
Type=simple
User=root
WorkingDirectory=/opt/llama.cpp
ExecStart=/opt/llama.cpp/build/bin/llama-server \
-m /opt/llama.cpp/models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf \
--host 0.0.0.0 \
--port 8080 \
-c 2048 \
-ngl 0 \
--threads 4
Restart=always
RestartSec=5
Environment=LLAMA_LOG_LEVEL=info
[Install]
WantedBy=multi-user.target
EOF
}
setup_llama_service() {
local host=$1
local is_local=$2
echo -e "${YELLOW}Setting up llama.cpp systemd service...${NC}"
create_llama_service
if [ "$is_local" = true ]; then
sudo mv /tmp/llama-server.service /etc/systemd/system/
sudo systemctl daemon-reload
sudo systemctl enable llama-server
sudo systemctl start llama-server
else
scp /tmp/llama-server.service "$host:/tmp/"
ssh $host "sudo mv /tmp/llama-server.service /etc/systemd/system/"
ssh $host "sudo systemctl daemon-reload"
ssh $host "sudo systemctl enable llama-server"
ssh $host "sudo systemctl start llama-server"
fi
echo -e "${GREEN}llama.cpp server configured on port 8080${NC}"
}
deploy_llama() {
local host=$1
local is_local=${2:-false}
install_llama_cpp "$host" "$is_local"
download_model "$host" "$is_local"
setup_llama_service "$host" "$is_local"
}
show_help() { show_help() {
echo "Usage: $0 [target-host] [options]" echo "Usage: $0 [target-host] [options]"
echo "" echo ""
echo "Options:" echo "Options:"
echo " --local Install on this machine" echo " --local Install on this machine"
echo " --with-ui Also deploy embedded UI with kiosk mode" echo " --with-ui Also deploy embedded UI with kiosk mode"
echo " --with-llama Install llama.cpp for local LLM inference"
echo " --model NAME Specify GGUF model (default: TinyLlama 1.1B Q4)"
echo " --arch ARCH Force target architecture" echo " --arch ARCH Force target architecture"
echo " -h, --help Show this help" echo " -h, --help Show this help"
echo "" echo ""
@ -305,6 +425,14 @@ while [[ $# -gt 0 ]]; do
WITH_UI=true WITH_UI=true
shift shift
;; ;;
--with-llama)
WITH_LLAMA=true
shift
;;
--model)
LLAMA_MODEL="$2"
shift 2
;;
--arch) --arch)
ARCH="$2" ARCH="$2"
shift 2 shift 2
@ -328,6 +456,9 @@ print_banner
if [ "$LOCAL_INSTALL" = true ]; then if [ "$LOCAL_INSTALL" = true ]; then
detect_arch detect_arch
deploy_local deploy_local
if [ "$WITH_LLAMA" = true ]; then
deploy_llama "" true
fi
elif [ -n "$TARGET_HOST" ]; then elif [ -n "$TARGET_HOST" ]; then
# Get remote arch # Get remote arch
echo "Detecting remote architecture..." echo "Detecting remote architecture..."
@ -349,6 +480,9 @@ elif [ -n "$TARGET_HOST" ]; then
install_cross_compiler install_cross_compiler
build_botserver build_botserver
deploy_remote $TARGET_HOST deploy_remote $TARGET_HOST
if [ "$WITH_LLAMA" = true ]; then
deploy_llama $TARGET_HOST false
fi
else else
show_help show_help
exit 1 exit 1
@ -366,3 +500,8 @@ echo ""
if [ "$WITH_UI" = true ]; then if [ "$WITH_UI" = true ]; then
echo "Access UI at: http://$TARGET_HOST:8088/embedded/" echo "Access UI at: http://$TARGET_HOST:8088/embedded/"
fi fi
if [ "$WITH_LLAMA" = true ]; then
echo ""
echo "llama.cpp server running at: http://$TARGET_HOST:8080"
echo "Test: curl http://$TARGET_HOST:8080/v1/models"
fi