botserver/deploy/kubernetes/deployment.yaml
Rodrigo Rodriguez (Pragmatismo) 5165131b06 Add implementation plan and multi-agent features
This commit introduces comprehensive documentation and implementation
for multi-agent orchestration capabilities:

- Add IMPLEMENTATION-PLAN.md with 4-phase roadmap
- Add Kubernetes deployment manifests (deployment.yaml, hpa.yaml)
- Add database migrations for multi-agent tables (6.1.1, 6.1.2)
- Implement A2A protocol for agent-to-agent communication
- Implement user memory keywords for cross-session persistence
- Implement model routing for dynamic L
2025-11-30 19:18:23 -03:00

539 lines
12 KiB
YAML

# General Bots Kubernetes Deployment Configuration
# This file contains the core deployment resources for running General Bots
# in a Kubernetes cluster.
#
# Usage:
# kubectl apply -f deployment.yaml
#
# Prerequisites:
# - Kubernetes cluster 1.24+
# - kubectl configured
# - Secrets created (see secrets.yaml)
# - PersistentVolumeClaim for data (optional)
---
apiVersion: v1
kind: Namespace
metadata:
name: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: namespace
---
# ConfigMap for non-sensitive configuration
apiVersion: v1
kind: ConfigMap
metadata:
name: botserver-config
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: config
data:
# Server configuration
SERVER_HOST: "0.0.0.0"
SERVER_PORT: "8080"
# LLM configuration
LLM_SERVER_HOST: "0.0.0.0"
LLM_SERVER_PORT: "8081"
LLM_SERVER_CTX_SIZE: "4096"
LLM_SERVER_N_PREDICT: "1024"
LLM_SERVER_PARALLEL: "6"
LLM_SERVER_CONT_BATCHING: "true"
LLM_CACHE: "true"
LLM_CACHE_TTL: "3600"
# Embedding configuration
EMBEDDING_PORT: "8082"
# Multi-agent configuration
A2A_ENABLED: "true"
A2A_TIMEOUT: "30"
A2A_MAX_HOPS: "5"
# Memory configuration
USER_MEMORY_ENABLED: "true"
USER_MEMORY_MAX_KEYS: "1000"
EPISODIC_MEMORY_ENABLED: "true"
# Hybrid RAG configuration
RAG_HYBRID_ENABLED: "true"
RAG_DENSE_WEIGHT: "0.7"
RAG_SPARSE_WEIGHT: "0.3"
# Observability
OBSERVABILITY_ENABLED: "true"
OBSERVABILITY_METRICS_INTERVAL: "60"
# Sandbox configuration
SANDBOX_RUNTIME: "process" # Use 'lxc' or 'docker' if available
SANDBOX_TIMEOUT: "30"
SANDBOX_MEMORY_MB: "512"
---
# Main botserver Deployment
apiVersion: apps/v1
kind: Deployment
metadata:
name: botserver
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: botserver
app.kubernetes.io/version: "6.1.1"
spec:
replicas: 3
selector:
matchLabels:
app: botserver
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 1
maxUnavailable: 0
template:
metadata:
labels:
app: botserver
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: botserver
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "9090"
prometheus.io/path: "/metrics"
spec:
serviceAccountName: botserver
securityContext:
runAsNonRoot: true
runAsUser: 1000
fsGroup: 1000
# Init container to wait for dependencies
initContainers:
- name: wait-for-postgres
image: busybox:1.35
command: ['sh', '-c', 'until nc -z postgres-service 5432; do echo waiting for postgres; sleep 2; done']
- name: wait-for-qdrant
image: busybox:1.35
command: ['sh', '-c', 'until nc -z qdrant-service 6333; do echo waiting for qdrant; sleep 2; done']
containers:
- name: botserver
image: generalbots/botserver:latest
imagePullPolicy: Always
ports:
- name: http
containerPort: 8080
protocol: TCP
- name: metrics
containerPort: 9090
protocol: TCP
envFrom:
- configMapRef:
name: botserver-config
env:
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: botserver-secrets
key: database-url
- name: QDRANT_URL
valueFrom:
secretKeyRef:
name: botserver-secrets
key: qdrant-url
- name: LLM_KEY
valueFrom:
secretKeyRef:
name: botserver-secrets
key: llm-api-key
optional: true
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
resources:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "2Gi"
cpu: "2000m"
livenessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
readinessProbe:
httpGet:
path: /ready
port: http
initialDelaySeconds: 10
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
startupProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 30
volumeMounts:
- name: data
mountPath: /data
- name: models
mountPath: /models
readOnly: true
- name: gbai-packages
mountPath: /packages
volumes:
- name: data
persistentVolumeClaim:
claimName: botserver-data
- name: models
persistentVolumeClaim:
claimName: llm-models
- name: gbai-packages
persistentVolumeClaim:
claimName: gbai-packages
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app
operator: In
values:
- botserver
topologyKey: kubernetes.io/hostname
topologySpreadConstraints:
- maxSkew: 1
topologyKey: topology.kubernetes.io/zone
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: botserver
---
# LLM Server Deployment (for local model inference)
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-server
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: llm-server
spec:
replicas: 2
selector:
matchLabels:
app: llm-server
template:
metadata:
labels:
app: llm-server
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: llm-server
spec:
containers:
- name: llm-server
image: generalbots/llm-server:latest
imagePullPolicy: Always
ports:
- name: http
containerPort: 8081
protocol: TCP
env:
- name: MODEL_PATH
value: "/models/DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf"
- name: CTX_SIZE
value: "4096"
- name: N_PREDICT
value: "1024"
- name: PARALLEL
value: "6"
- name: CONT_BATCHING
value: "true"
- name: GPU_LAYERS
value: "35" # Adjust based on available GPU memory
resources:
requests:
memory: "8Gi"
cpu: "2000m"
# Uncomment for GPU support
# nvidia.com/gpu: 1
limits:
memory: "24Gi"
cpu: "8000m"
# nvidia.com/gpu: 1
volumeMounts:
- name: models
mountPath: /models
readOnly: true
livenessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 120
periodSeconds: 30
timeoutSeconds: 10
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 60
periodSeconds: 10
timeoutSeconds: 5
volumes:
- name: models
persistentVolumeClaim:
claimName: llm-models
# Schedule on nodes with GPU
# nodeSelector:
# nvidia.com/gpu.present: "true"
tolerations:
- key: "nvidia.com/gpu"
operator: "Exists"
effect: "NoSchedule"
---
# Service for botserver
apiVersion: v1
kind: Service
metadata:
name: botserver-service
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: service
spec:
type: ClusterIP
selector:
app: botserver
ports:
- name: http
port: 80
targetPort: 8080
protocol: TCP
- name: metrics
port: 9090
targetPort: 9090
protocol: TCP
---
# Service for LLM server
apiVersion: v1
kind: Service
metadata:
name: llm-server-service
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: llm-service
spec:
type: ClusterIP
selector:
app: llm-server
ports:
- name: http
port: 8081
targetPort: 8081
protocol: TCP
---
# Headless service for StatefulSet-like DNS (if needed)
apiVersion: v1
kind: Service
metadata:
name: botserver-headless
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: headless-service
spec:
clusterIP: None
selector:
app: botserver
ports:
- name: http
port: 8080
targetPort: 8080
---
# Ingress for external access
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: botserver-ingress
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: ingress
annotations:
kubernetes.io/ingress.class: nginx
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/proxy-body-size: "50m"
nginx.ingress.kubernetes.io/proxy-read-timeout: "300"
nginx.ingress.kubernetes.io/proxy-send-timeout: "300"
nginx.ingress.kubernetes.io/websocket-services: "botserver-service"
cert-manager.io/cluster-issuer: "letsencrypt-prod"
spec:
tls:
- hosts:
- bot.example.com
secretName: botserver-tls
rules:
- host: bot.example.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: botserver-service
port:
number: 80
---
# ServiceAccount
apiVersion: v1
kind: ServiceAccount
metadata:
name: botserver
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: serviceaccount
---
# Role for botserver
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: botserver-role
namespace: generalbots
rules:
- apiGroups: [""]
resources: ["configmaps", "secrets"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "list"]
---
# RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: botserver-rolebinding
namespace: generalbots
subjects:
- kind: ServiceAccount
name: botserver
namespace: generalbots
roleRef:
kind: Role
name: botserver-role
apiGroup: rbac.authorization.k8s.io
---
# PodDisruptionBudget for high availability
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: botserver-pdb
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: pdb
spec:
minAvailable: 2
selector:
matchLabels:
app: botserver
---
# PersistentVolumeClaim for botserver data
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: botserver-data
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: storage
spec:
accessModes:
- ReadWriteMany
storageClassName: standard
resources:
requests:
storage: 50Gi
---
# PersistentVolumeClaim for LLM models
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: llm-models
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: storage
spec:
accessModes:
- ReadOnlyMany
storageClassName: standard
resources:
requests:
storage: 100Gi
---
# PersistentVolumeClaim for .gbai packages
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: gbai-packages
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: storage
spec:
accessModes:
- ReadWriteMany
storageClassName: standard
resources:
requests:
storage: 20Gi