This commit introduces comprehensive documentation and implementation for multi-agent orchestration capabilities: - Add IMPLEMENTATION-PLAN.md with 4-phase roadmap - Add Kubernetes deployment manifests (deployment.yaml, hpa.yaml) - Add database migrations for multi-agent tables (6.1.1, 6.1.2) - Implement A2A protocol for agent-to-agent communication - Implement user memory keywords for cross-session persistence - Implement model routing for dynamic L
539 lines
12 KiB
YAML
539 lines
12 KiB
YAML
# General Bots Kubernetes Deployment Configuration
|
|
# This file contains the core deployment resources for running General Bots
|
|
# in a Kubernetes cluster.
|
|
#
|
|
# Usage:
|
|
# kubectl apply -f deployment.yaml
|
|
#
|
|
# Prerequisites:
|
|
# - Kubernetes cluster 1.24+
|
|
# - kubectl configured
|
|
# - Secrets created (see secrets.yaml)
|
|
# - PersistentVolumeClaim for data (optional)
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: Namespace
|
|
metadata:
|
|
name: generalbots
|
|
labels:
|
|
app.kubernetes.io/name: generalbots
|
|
app.kubernetes.io/component: namespace
|
|
|
|
---
|
|
# ConfigMap for non-sensitive configuration
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: botserver-config
|
|
namespace: generalbots
|
|
labels:
|
|
app.kubernetes.io/name: generalbots
|
|
app.kubernetes.io/component: config
|
|
data:
|
|
# Server configuration
|
|
SERVER_HOST: "0.0.0.0"
|
|
SERVER_PORT: "8080"
|
|
|
|
# LLM configuration
|
|
LLM_SERVER_HOST: "0.0.0.0"
|
|
LLM_SERVER_PORT: "8081"
|
|
LLM_SERVER_CTX_SIZE: "4096"
|
|
LLM_SERVER_N_PREDICT: "1024"
|
|
LLM_SERVER_PARALLEL: "6"
|
|
LLM_SERVER_CONT_BATCHING: "true"
|
|
LLM_CACHE: "true"
|
|
LLM_CACHE_TTL: "3600"
|
|
|
|
# Embedding configuration
|
|
EMBEDDING_PORT: "8082"
|
|
|
|
# Multi-agent configuration
|
|
A2A_ENABLED: "true"
|
|
A2A_TIMEOUT: "30"
|
|
A2A_MAX_HOPS: "5"
|
|
|
|
# Memory configuration
|
|
USER_MEMORY_ENABLED: "true"
|
|
USER_MEMORY_MAX_KEYS: "1000"
|
|
EPISODIC_MEMORY_ENABLED: "true"
|
|
|
|
# Hybrid RAG configuration
|
|
RAG_HYBRID_ENABLED: "true"
|
|
RAG_DENSE_WEIGHT: "0.7"
|
|
RAG_SPARSE_WEIGHT: "0.3"
|
|
|
|
# Observability
|
|
OBSERVABILITY_ENABLED: "true"
|
|
OBSERVABILITY_METRICS_INTERVAL: "60"
|
|
|
|
# Sandbox configuration
|
|
SANDBOX_RUNTIME: "process" # Use 'lxc' or 'docker' if available
|
|
SANDBOX_TIMEOUT: "30"
|
|
SANDBOX_MEMORY_MB: "512"
|
|
|
|
---
|
|
# Main botserver Deployment
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: botserver
|
|
namespace: generalbots
|
|
labels:
|
|
app.kubernetes.io/name: generalbots
|
|
app.kubernetes.io/component: botserver
|
|
app.kubernetes.io/version: "6.1.1"
|
|
spec:
|
|
replicas: 3
|
|
selector:
|
|
matchLabels:
|
|
app: botserver
|
|
strategy:
|
|
type: RollingUpdate
|
|
rollingUpdate:
|
|
maxSurge: 1
|
|
maxUnavailable: 0
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: botserver
|
|
app.kubernetes.io/name: generalbots
|
|
app.kubernetes.io/component: botserver
|
|
annotations:
|
|
prometheus.io/scrape: "true"
|
|
prometheus.io/port: "9090"
|
|
prometheus.io/path: "/metrics"
|
|
spec:
|
|
serviceAccountName: botserver
|
|
securityContext:
|
|
runAsNonRoot: true
|
|
runAsUser: 1000
|
|
fsGroup: 1000
|
|
|
|
# Init container to wait for dependencies
|
|
initContainers:
|
|
- name: wait-for-postgres
|
|
image: busybox:1.35
|
|
command: ['sh', '-c', 'until nc -z postgres-service 5432; do echo waiting for postgres; sleep 2; done']
|
|
- name: wait-for-qdrant
|
|
image: busybox:1.35
|
|
command: ['sh', '-c', 'until nc -z qdrant-service 6333; do echo waiting for qdrant; sleep 2; done']
|
|
|
|
containers:
|
|
- name: botserver
|
|
image: generalbots/botserver:latest
|
|
imagePullPolicy: Always
|
|
ports:
|
|
- name: http
|
|
containerPort: 8080
|
|
protocol: TCP
|
|
- name: metrics
|
|
containerPort: 9090
|
|
protocol: TCP
|
|
|
|
envFrom:
|
|
- configMapRef:
|
|
name: botserver-config
|
|
|
|
env:
|
|
- name: DATABASE_URL
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: botserver-secrets
|
|
key: database-url
|
|
- name: QDRANT_URL
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: botserver-secrets
|
|
key: qdrant-url
|
|
- name: LLM_KEY
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: botserver-secrets
|
|
key: llm-api-key
|
|
optional: true
|
|
- name: POD_NAME
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: metadata.name
|
|
- name: POD_NAMESPACE
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: metadata.namespace
|
|
|
|
resources:
|
|
requests:
|
|
memory: "512Mi"
|
|
cpu: "250m"
|
|
limits:
|
|
memory: "2Gi"
|
|
cpu: "2000m"
|
|
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: http
|
|
initialDelaySeconds: 30
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
failureThreshold: 3
|
|
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /ready
|
|
port: http
|
|
initialDelaySeconds: 10
|
|
periodSeconds: 5
|
|
timeoutSeconds: 3
|
|
failureThreshold: 3
|
|
|
|
startupProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: http
|
|
initialDelaySeconds: 10
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
failureThreshold: 30
|
|
|
|
volumeMounts:
|
|
- name: data
|
|
mountPath: /data
|
|
- name: models
|
|
mountPath: /models
|
|
readOnly: true
|
|
- name: gbai-packages
|
|
mountPath: /packages
|
|
|
|
volumes:
|
|
- name: data
|
|
persistentVolumeClaim:
|
|
claimName: botserver-data
|
|
- name: models
|
|
persistentVolumeClaim:
|
|
claimName: llm-models
|
|
- name: gbai-packages
|
|
persistentVolumeClaim:
|
|
claimName: gbai-packages
|
|
|
|
affinity:
|
|
podAntiAffinity:
|
|
preferredDuringSchedulingIgnoredDuringExecution:
|
|
- weight: 100
|
|
podAffinityTerm:
|
|
labelSelector:
|
|
matchExpressions:
|
|
- key: app
|
|
operator: In
|
|
values:
|
|
- botserver
|
|
topologyKey: kubernetes.io/hostname
|
|
|
|
topologySpreadConstraints:
|
|
- maxSkew: 1
|
|
topologyKey: topology.kubernetes.io/zone
|
|
whenUnsatisfiable: ScheduleAnyway
|
|
labelSelector:
|
|
matchLabels:
|
|
app: botserver
|
|
|
|
---
|
|
# LLM Server Deployment (for local model inference)
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: llm-server
|
|
namespace: generalbots
|
|
labels:
|
|
app.kubernetes.io/name: generalbots
|
|
app.kubernetes.io/component: llm-server
|
|
spec:
|
|
replicas: 2
|
|
selector:
|
|
matchLabels:
|
|
app: llm-server
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: llm-server
|
|
app.kubernetes.io/name: generalbots
|
|
app.kubernetes.io/component: llm-server
|
|
spec:
|
|
containers:
|
|
- name: llm-server
|
|
image: generalbots/llm-server:latest
|
|
imagePullPolicy: Always
|
|
ports:
|
|
- name: http
|
|
containerPort: 8081
|
|
protocol: TCP
|
|
|
|
env:
|
|
- name: MODEL_PATH
|
|
value: "/models/DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf"
|
|
- name: CTX_SIZE
|
|
value: "4096"
|
|
- name: N_PREDICT
|
|
value: "1024"
|
|
- name: PARALLEL
|
|
value: "6"
|
|
- name: CONT_BATCHING
|
|
value: "true"
|
|
- name: GPU_LAYERS
|
|
value: "35" # Adjust based on available GPU memory
|
|
|
|
resources:
|
|
requests:
|
|
memory: "8Gi"
|
|
cpu: "2000m"
|
|
# Uncomment for GPU support
|
|
# nvidia.com/gpu: 1
|
|
limits:
|
|
memory: "24Gi"
|
|
cpu: "8000m"
|
|
# nvidia.com/gpu: 1
|
|
|
|
volumeMounts:
|
|
- name: models
|
|
mountPath: /models
|
|
readOnly: true
|
|
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: http
|
|
initialDelaySeconds: 120
|
|
periodSeconds: 30
|
|
timeoutSeconds: 10
|
|
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: http
|
|
initialDelaySeconds: 60
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
|
|
volumes:
|
|
- name: models
|
|
persistentVolumeClaim:
|
|
claimName: llm-models
|
|
|
|
# Schedule on nodes with GPU
|
|
# nodeSelector:
|
|
# nvidia.com/gpu.present: "true"
|
|
|
|
tolerations:
|
|
- key: "nvidia.com/gpu"
|
|
operator: "Exists"
|
|
effect: "NoSchedule"
|
|
|
|
---
|
|
# Service for botserver
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: botserver-service
|
|
namespace: generalbots
|
|
labels:
|
|
app.kubernetes.io/name: generalbots
|
|
app.kubernetes.io/component: service
|
|
spec:
|
|
type: ClusterIP
|
|
selector:
|
|
app: botserver
|
|
ports:
|
|
- name: http
|
|
port: 80
|
|
targetPort: 8080
|
|
protocol: TCP
|
|
- name: metrics
|
|
port: 9090
|
|
targetPort: 9090
|
|
protocol: TCP
|
|
|
|
---
|
|
# Service for LLM server
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: llm-server-service
|
|
namespace: generalbots
|
|
labels:
|
|
app.kubernetes.io/name: generalbots
|
|
app.kubernetes.io/component: llm-service
|
|
spec:
|
|
type: ClusterIP
|
|
selector:
|
|
app: llm-server
|
|
ports:
|
|
- name: http
|
|
port: 8081
|
|
targetPort: 8081
|
|
protocol: TCP
|
|
|
|
---
|
|
# Headless service for StatefulSet-like DNS (if needed)
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: botserver-headless
|
|
namespace: generalbots
|
|
labels:
|
|
app.kubernetes.io/name: generalbots
|
|
app.kubernetes.io/component: headless-service
|
|
spec:
|
|
clusterIP: None
|
|
selector:
|
|
app: botserver
|
|
ports:
|
|
- name: http
|
|
port: 8080
|
|
targetPort: 8080
|
|
|
|
---
|
|
# Ingress for external access
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: botserver-ingress
|
|
namespace: generalbots
|
|
labels:
|
|
app.kubernetes.io/name: generalbots
|
|
app.kubernetes.io/component: ingress
|
|
annotations:
|
|
kubernetes.io/ingress.class: nginx
|
|
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
|
nginx.ingress.kubernetes.io/proxy-body-size: "50m"
|
|
nginx.ingress.kubernetes.io/proxy-read-timeout: "300"
|
|
nginx.ingress.kubernetes.io/proxy-send-timeout: "300"
|
|
nginx.ingress.kubernetes.io/websocket-services: "botserver-service"
|
|
cert-manager.io/cluster-issuer: "letsencrypt-prod"
|
|
spec:
|
|
tls:
|
|
- hosts:
|
|
- bot.example.com
|
|
secretName: botserver-tls
|
|
rules:
|
|
- host: bot.example.com
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend:
|
|
service:
|
|
name: botserver-service
|
|
port:
|
|
number: 80
|
|
|
|
---
|
|
# ServiceAccount
|
|
apiVersion: v1
|
|
kind: ServiceAccount
|
|
metadata:
|
|
name: botserver
|
|
namespace: generalbots
|
|
labels:
|
|
app.kubernetes.io/name: generalbots
|
|
app.kubernetes.io/component: serviceaccount
|
|
|
|
---
|
|
# Role for botserver
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: Role
|
|
metadata:
|
|
name: botserver-role
|
|
namespace: generalbots
|
|
rules:
|
|
- apiGroups: [""]
|
|
resources: ["configmaps", "secrets"]
|
|
verbs: ["get", "list", "watch"]
|
|
- apiGroups: [""]
|
|
resources: ["pods"]
|
|
verbs: ["get", "list"]
|
|
|
|
---
|
|
# RoleBinding
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: RoleBinding
|
|
metadata:
|
|
name: botserver-rolebinding
|
|
namespace: generalbots
|
|
subjects:
|
|
- kind: ServiceAccount
|
|
name: botserver
|
|
namespace: generalbots
|
|
roleRef:
|
|
kind: Role
|
|
name: botserver-role
|
|
apiGroup: rbac.authorization.k8s.io
|
|
|
|
---
|
|
# PodDisruptionBudget for high availability
|
|
apiVersion: policy/v1
|
|
kind: PodDisruptionBudget
|
|
metadata:
|
|
name: botserver-pdb
|
|
namespace: generalbots
|
|
labels:
|
|
app.kubernetes.io/name: generalbots
|
|
app.kubernetes.io/component: pdb
|
|
spec:
|
|
minAvailable: 2
|
|
selector:
|
|
matchLabels:
|
|
app: botserver
|
|
|
|
---
|
|
# PersistentVolumeClaim for botserver data
|
|
apiVersion: v1
|
|
kind: PersistentVolumeClaim
|
|
metadata:
|
|
name: botserver-data
|
|
namespace: generalbots
|
|
labels:
|
|
app.kubernetes.io/name: generalbots
|
|
app.kubernetes.io/component: storage
|
|
spec:
|
|
accessModes:
|
|
- ReadWriteMany
|
|
storageClassName: standard
|
|
resources:
|
|
requests:
|
|
storage: 50Gi
|
|
|
|
---
|
|
# PersistentVolumeClaim for LLM models
|
|
apiVersion: v1
|
|
kind: PersistentVolumeClaim
|
|
metadata:
|
|
name: llm-models
|
|
namespace: generalbots
|
|
labels:
|
|
app.kubernetes.io/name: generalbots
|
|
app.kubernetes.io/component: storage
|
|
spec:
|
|
accessModes:
|
|
- ReadOnlyMany
|
|
storageClassName: standard
|
|
resources:
|
|
requests:
|
|
storage: 100Gi
|
|
|
|
---
|
|
# PersistentVolumeClaim for .gbai packages
|
|
apiVersion: v1
|
|
kind: PersistentVolumeClaim
|
|
metadata:
|
|
name: gbai-packages
|
|
namespace: generalbots
|
|
labels:
|
|
app.kubernetes.io/name: generalbots
|
|
app.kubernetes.io/component: storage
|
|
spec:
|
|
accessModes:
|
|
- ReadWriteMany
|
|
storageClassName: standard
|
|
resources:
|
|
requests:
|
|
storage: 20Gi
|