6 changed files with 872 additions and 232 deletions
--- a/.forgejo/workflows/botserver.yaml
+++ b/.forgejo/workflows/botserver.yaml
@ -7,7 +7,7 @@ on:
    branches: ["main"]
 env:
-  CARGO_BUILD_JOBS: 8
+  CARGO_BUILD_JOBS: 5
  CARGO_NET_RETRY: 10
 jobs:
--- a/Cargo.toml
+++ b/Cargo.toml
@ -10,7 +10,7 @@ features = ["database", "i18n"]
 [features]
 # ===== DEFAULT =====
-default = ["chat", "automation", "drive", "tasks", "cache", "directory", "llm", "crawler", "embed-ui"]
+default = ["chat", "automation", "drive", "tasks", "cache", "directory", "llm", "crawler"]
 # ===== CORE INFRASTRUCTURE (Can be used standalone) =====
 scripting = ["dep:rhai"]
--- a/deploy/README.md
+++ b/deploy/README.md
@ -1,214 +0,0 @@
 # Deployment Guide
 ## Overview
 This directory contains deployment configurations and scripts for General Bots in production environments.
 ## Deployment Methods
 ### 1. Traditional Server Deployment
 #### Prerequisites
 - Server with Linux (Ubuntu 20.04+ recommended)
 - Rust 1.70+ toolchain
 - PostgreSQL, Redis, Qdrant installed or managed by botserver
 - At least 4GB RAM, 2 CPU cores
 #### Steps
 1. **Build Release Binaries:**
 ```bash
 cargo build --release -p botserver -p botui
 ```
 2. **Deploy to Production:**
 ```bash
 # Copy binaries
 sudo cp target/release/botserver /opt/gbo/bin/
 sudo cp target/release/botui /opt/gbo/bin/
 # Deploy UI files
 ./botserver/deploy/deploy-ui.sh /opt/gbo
 # Set permissions
 sudo chmod +x /opt/gbo/bin/botserver
 sudo chmod +x /opt/gbo/bin/botui
 ```
 3. **Configure Environment:**
 ```bash
 # Copy and edit environment file
 cp botserver/.env.example /opt/gbo/.env
 nano /opt/gbo/.env
 ```
 4. **Start Services:**
 ```bash
 # Using systemd (recommended)
 sudo systemctl start botserver
 sudo systemctl start botui
 # Or manually
 /opt/gbo/bin/botserver --noconsole
 /opt/gbo/bin/botui
 ```
 ### 2. Kubernetes Deployment
 #### Prerequisites
 - Kubernetes cluster 1.24+
 - kubectl configured
 - Persistent volumes provisioned
 #### Steps
 1. **Create Namespace:**
 ```bash
 kubectl create namespace generalbots
 ```
 2. **Deploy UI Files:**
 ```bash
 # Create ConfigMap with UI files
 kubectl create configmap botui-files \
  --from-file=botui/ui/suite/ \
  -n generalbots
 ```
 3. **Apply Deployment:**
 ```bash
 kubectl apply -f botserver/deploy/kubernetes/deployment.yaml
 ```
 4. **Verify Deployment:**
 ```bash
 kubectl get pods -n generalbots
 kubectl logs -f deployment/botserver -n generalbots
 ```
 ## Troubleshooting
 ### UI Files Not Found Error
 **Symptom:**
 ```
 Asset 'suite/index.html' not found in embedded binary, falling back to filesystem
 Failed to load suite UI: No such file or directory
 ```
 **Solution:**
 **For Traditional Deployment:**
 ```bash
 # Run the deployment script
 ./botserver/deploy/deploy-ui.sh /opt/gbo
 # Verify files exist
 ls -la /opt/gbo/bin/ui/suite/index.html
 ```
 **For Kubernetes:**
 ```bash
 # Recreate UI ConfigMap
 kubectl delete configmap botui-files -n generalbots
 kubectl create configmap botui-files \
  --from-file=botui/ui/suite/ \
  -n generalbots
 # Restart pods
 kubectl rollout restart deployment/botserver -n generalbots
 ```
 ### Port Already in Use
 ```bash
 # Find process using port
 lsof -ti:8088 | xargs kill -9
 lsof -ti:3000 | xargs kill -9
 ```
 ### Permission Denied
 ```bash
 # Fix ownership and permissions
 sudo chown -R gbo:gbo /opt/gbo
 sudo chmod -R 755 /opt/gbo/bin
 ```
 ## Maintenance
 ### Update UI Files
 **Traditional:**
 ```bash
 ./botserver/deploy/deploy-ui.sh /opt/gbo
 sudo systemctl restart botui
 ```
 **Kubernetes:**
 ```bash
 kubectl create configmap botui-files \
  --from-file=botui/ui/suite/ \
  -n generalbots \
  --dry-run=client -o yaml | kubectl apply -f -
 kubectl rollout restart deployment/botserver -n generalbots
 ```
 ### Update Binaries
 1. Build new release
 2. Stop services
 3. Replace binaries
 4. Start services
 ### Backup
 ```bash
 # Backup database
 pg_dump -U postgres -d gb > backup.sql
 # Backup UI files (if customized)
 tar -czf ui-backup.tar.gz /opt/gbo/bin/ui/
 # Backup configuration
 cp /opt/gbo/.env /opt/gbo/.env.backup
 ```
 ## Monitoring
 ### Check Logs
 **Traditional:**
 ```bash
 tail -f /opt/gbo/logs/botserver.log
 tail -f /opt/gbo/logs/botui.log
 ```
 **Kubernetes:**
 ```bash
 kubectl logs -f deployment/botserver -n generalbots
 ```
 ### Health Checks
 ```bash
 # Check server health
 curl http://localhost:8088/health
 # Check botui health
 curl http://localhost:3000/health
 ```
 ## Security
 - Always use HTTPS in production
 - Rotate secrets regularly
 - Update dependencies monthly
 - Review logs for suspicious activity
 - Use firewall to restrict access
 ## Support
 For issues or questions:
 - Documentation: https://docs.pragmatismo.com.br
 - GitHub Issues: https://github.com/GeneralBots/BotServer/issues
--- a/deploy/deploy-ui.sh
+++ b/deploy/deploy-ui.sh
@ -1,16 +0,0 @@
 #!/bin/bash
 set -e
 DEPLOY_DIR="${1:-/opt/gbo}"
 SRC_DIR="$(dirname "$0")/../.."
 echo "Deploying UI files to $DEPLOY_DIR"
 mkdir -p "$DEPLOY_DIR/bin/ui/suite"
 cp -r "$SRC_DIR/botui/ui/suite/"* "$DEPLOY_DIR/bin/ui/suite/"
 echo "UI files deployed successfully"
 echo "Location: $DEPLOY_DIR/bin/ui/suite"
 ls -la "$DEPLOY_DIR/bin/ui/suite" | head -20
--- a/deploy/kubernetes/deployment.yaml
+++ b/deploy/kubernetes/deployment.yaml
@ -0,0 +1,539 @@
 # General Bots Kubernetes Deployment Configuration
 # This file contains the core deployment resources for running General Bots
 # in a Kubernetes cluster.
 #
 # Usage:
 #   kubectl apply -f deployment.yaml
 #
 # Prerequisites:
 #   - Kubernetes cluster 1.24+
 #   - kubectl configured
 #   - Secrets created (see secrets.yaml)
 #   - PersistentVolumeClaim for data (optional)
 ---
 apiVersion: v1
 kind: Namespace
 metadata:
  name: generalbots
  labels:
    app.kubernetes.io/name: generalbots
    app.kubernetes.io/component: namespace
 ---
 # ConfigMap for non-sensitive configuration
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: botserver-config
  namespace: generalbots
  labels:
    app.kubernetes.io/name: generalbots
    app.kubernetes.io/component: config
 data:
  # Server configuration
  SERVER_HOST: "0.0.0.0"
  SERVER_PORT: "8080"
  # LLM configuration
  LLM_SERVER_HOST: "0.0.0.0"
  LLM_SERVER_PORT: "8081"
  LLM_SERVER_CTX_SIZE: "4096"
  LLM_SERVER_N_PREDICT: "1024"
  LLM_SERVER_PARALLEL: "6"
  LLM_SERVER_CONT_BATCHING: "true"
  LLM_CACHE: "true"
  LLM_CACHE_TTL: "3600"
  # Embedding configuration
  EMBEDDING_PORT: "8082"
  # Multi-agent configuration
  A2A_ENABLED: "true"
  A2A_TIMEOUT: "30"
  A2A_MAX_HOPS: "5"
  # Memory configuration
  USER_MEMORY_ENABLED: "true"
  USER_MEMORY_MAX_KEYS: "1000"
  EPISODIC_MEMORY_ENABLED: "true"
  # Hybrid RAG configuration
  RAG_HYBRID_ENABLED: "true"
  RAG_DENSE_WEIGHT: "0.7"
  RAG_SPARSE_WEIGHT: "0.3"
  # Observability
  OBSERVABILITY_ENABLED: "true"
  OBSERVABILITY_METRICS_INTERVAL: "60"
  # Sandbox configuration
  SANDBOX_RUNTIME: "process"  # Use 'lxc' or 'docker' if available
  SANDBOX_TIMEOUT: "30"
  SANDBOX_MEMORY_MB: "512"
 ---
 # Main botserver Deployment
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: botserver
  namespace: generalbots
  labels:
    app.kubernetes.io/name: generalbots
    app.kubernetes.io/component: botserver
    app.kubernetes.io/version: "6.1.1"
 spec:
  replicas: 3
  selector:
    matchLabels:
      app: botserver
  strategy:
    type: RollingUpdate
    rollingUpdate:
      maxSurge: 1
      maxUnavailable: 0
  template:
    metadata:
      labels:
        app: botserver
        app.kubernetes.io/name: generalbots
        app.kubernetes.io/component: botserver
      annotations:
        prometheus.io/scrape: "true"
        prometheus.io/port: "9090"
        prometheus.io/path: "/metrics"
    spec:
      serviceAccountName: botserver
      securityContext:
        runAsNonRoot: true
        runAsUser: 1000
        fsGroup: 1000
      # Init container to wait for dependencies
      initContainers:
        - name: wait-for-postgres
          image: busybox:1.35
          command: ['sh', '-c', 'until nc -z postgres-service 5432; do echo waiting for postgres; sleep 2; done']
        - name: wait-for-qdrant
          image: busybox:1.35
          command: ['sh', '-c', 'until nc -z qdrant-service 6333; do echo waiting for qdrant; sleep 2; done']
      containers:
        - name: botserver
          image: generalbots/botserver:latest
          imagePullPolicy: Always
          ports:
            - name: http
              containerPort: 8080
              protocol: TCP
            - name: metrics
              containerPort: 9090
              protocol: TCP
          envFrom:
            - configMapRef:
                name: botserver-config
          env:
            - name: DATABASE_URL
              valueFrom:
                secretKeyRef:
                  name: botserver-secrets
                  key: database-url
            - name: QDRANT_URL
              valueFrom:
                secretKeyRef:
                  name: botserver-secrets
                  key: qdrant-url
            - name: LLM_KEY
              valueFrom:
                secretKeyRef:
                  name: botserver-secrets
                  key: llm-api-key
                  optional: true
            - name: POD_NAME
              valueFrom:
                fieldRef:
                  fieldPath: metadata.name
            - name: POD_NAMESPACE
              valueFrom:
                fieldRef:
                  fieldPath: metadata.namespace
          resources:
            requests:
              memory: "512Mi"
              cpu: "250m"
            limits:
              memory: "2Gi"
              cpu: "2000m"
          livenessProbe:
            httpGet:
              path: /health
              port: http
            initialDelaySeconds: 30
            periodSeconds: 10
            timeoutSeconds: 5
            failureThreshold: 3
          readinessProbe:
            httpGet:
              path: /ready
              port: http
            initialDelaySeconds: 10
            periodSeconds: 5
            timeoutSeconds: 3
            failureThreshold: 3
          startupProbe:
            httpGet:
              path: /health
              port: http
            initialDelaySeconds: 10
            periodSeconds: 10
            timeoutSeconds: 5
            failureThreshold: 30
          volumeMounts:
            - name: data
              mountPath: /data
            - name: models
              mountPath: /models
              readOnly: true
            - name: gbai-packages
              mountPath: /packages
      volumes:
        - name: data
          persistentVolumeClaim:
            claimName: botserver-data
        - name: models
          persistentVolumeClaim:
            claimName: llm-models
        - name: gbai-packages
          persistentVolumeClaim:
            claimName: gbai-packages
      affinity:
        podAntiAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              podAffinityTerm:
                labelSelector:
                  matchExpressions:
                    - key: app
                      operator: In
                      values:
                        - botserver
                topologyKey: kubernetes.io/hostname
      topologySpreadConstraints:
        - maxSkew: 1
          topologyKey: topology.kubernetes.io/zone
          whenUnsatisfiable: ScheduleAnyway
          labelSelector:
            matchLabels:
              app: botserver
 ---
 # LLM Server Deployment (for local model inference)
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: llm-server
  namespace: generalbots
  labels:
    app.kubernetes.io/name: generalbots
    app.kubernetes.io/component: llm-server
 spec:
  replicas: 2
  selector:
    matchLabels:
      app: llm-server
  template:
    metadata:
      labels:
        app: llm-server
        app.kubernetes.io/name: generalbots
        app.kubernetes.io/component: llm-server
    spec:
      containers:
        - name: llm-server
          image: generalbots/llm-server:latest
          imagePullPolicy: Always
          ports:
            - name: http
              containerPort: 8081
              protocol: TCP
          env:
            - name: MODEL_PATH
              value: "/models/DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf"
            - name: CTX_SIZE
              value: "4096"
            - name: N_PREDICT
              value: "1024"
            - name: PARALLEL
              value: "6"
            - name: CONT_BATCHING
              value: "true"
            - name: GPU_LAYERS
              value: "35"  # Adjust based on available GPU memory
          resources:
            requests:
              memory: "8Gi"
              cpu: "2000m"
              # Uncomment for GPU support
              # nvidia.com/gpu: 1
            limits:
              memory: "24Gi"
              cpu: "8000m"
              # nvidia.com/gpu: 1
          volumeMounts:
            - name: models
              mountPath: /models
              readOnly: true
          livenessProbe:
            httpGet:
              path: /health
              port: http
            initialDelaySeconds: 120
            periodSeconds: 30
            timeoutSeconds: 10
          readinessProbe:
            httpGet:
              path: /health
              port: http
            initialDelaySeconds: 60
            periodSeconds: 10
            timeoutSeconds: 5
      volumes:
        - name: models
          persistentVolumeClaim:
            claimName: llm-models
      # Schedule on nodes with GPU
      # nodeSelector:
      #   nvidia.com/gpu.present: "true"
      tolerations:
        - key: "nvidia.com/gpu"
          operator: "Exists"
          effect: "NoSchedule"
 ---
 # Service for botserver
 apiVersion: v1
 kind: Service
 metadata:
  name: botserver-service
  namespace: generalbots
  labels:
    app.kubernetes.io/name: generalbots
    app.kubernetes.io/component: service
 spec:
  type: ClusterIP
  selector:
    app: botserver
  ports:
    - name: http
      port: 80
      targetPort: 8080
      protocol: TCP
    - name: metrics
      port: 9090
      targetPort: 9090
      protocol: TCP
 ---
 # Service for LLM server
 apiVersion: v1
 kind: Service
 metadata:
  name: llm-server-service
  namespace: generalbots
  labels:
    app.kubernetes.io/name: generalbots
    app.kubernetes.io/component: llm-service
 spec:
  type: ClusterIP
  selector:
    app: llm-server
  ports:
    - name: http
      port: 8081
      targetPort: 8081
      protocol: TCP
 ---
 # Headless service for StatefulSet-like DNS (if needed)
 apiVersion: v1
 kind: Service
 metadata:
  name: botserver-headless
  namespace: generalbots
  labels:
    app.kubernetes.io/name: generalbots
    app.kubernetes.io/component: headless-service
 spec:
  clusterIP: None
  selector:
    app: botserver
  ports:
    - name: http
      port: 8080
      targetPort: 8080
 ---
 # Ingress for external access
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: botserver-ingress
  namespace: generalbots
  labels:
    app.kubernetes.io/name: generalbots
    app.kubernetes.io/component: ingress
  annotations:
    kubernetes.io/ingress.class: nginx
    nginx.ingress.kubernetes.io/ssl-redirect: "true"
    nginx.ingress.kubernetes.io/proxy-body-size: "50m"
    nginx.ingress.kubernetes.io/proxy-read-timeout: "300"
    nginx.ingress.kubernetes.io/proxy-send-timeout: "300"
    nginx.ingress.kubernetes.io/websocket-services: "botserver-service"
    cert-manager.io/cluster-issuer: "letsencrypt-prod"
 spec:
  tls:
    - hosts:
        - bot.example.com
      secretName: botserver-tls
  rules:
    - host: bot.example.com
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: botserver-service
                port:
                  number: 80
 ---
 # ServiceAccount
 apiVersion: v1
 kind: ServiceAccount
 metadata:
  name: botserver
  namespace: generalbots
  labels:
    app.kubernetes.io/name: generalbots
    app.kubernetes.io/component: serviceaccount
 ---
 # Role for botserver
 apiVersion: rbac.authorization.k8s.io/v1
 kind: Role
 metadata:
  name: botserver-role
  namespace: generalbots
 rules:
  - apiGroups: [""]
    resources: ["configmaps", "secrets"]
    verbs: ["get", "list", "watch"]
  - apiGroups: [""]
    resources: ["pods"]
    verbs: ["get", "list"]
 ---
 # RoleBinding
 apiVersion: rbac.authorization.k8s.io/v1
 kind: RoleBinding
 metadata:
  name: botserver-rolebinding
  namespace: generalbots
 subjects:
  - kind: ServiceAccount
    name: botserver
    namespace: generalbots
 roleRef:
  kind: Role
  name: botserver-role
  apiGroup: rbac.authorization.k8s.io
 ---
 # PodDisruptionBudget for high availability
 apiVersion: policy/v1
 kind: PodDisruptionBudget
 metadata:
  name: botserver-pdb
  namespace: generalbots
  labels:
    app.kubernetes.io/name: generalbots
    app.kubernetes.io/component: pdb
 spec:
  minAvailable: 2
  selector:
    matchLabels:
      app: botserver
 ---
 # PersistentVolumeClaim for botserver data
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: botserver-data
  namespace: generalbots
  labels:
    app.kubernetes.io/name: generalbots
    app.kubernetes.io/component: storage
 spec:
  accessModes:
    - ReadWriteMany
  storageClassName: standard
  resources:
    requests:
      storage: 50Gi
 ---
 # PersistentVolumeClaim for LLM models
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: llm-models
  namespace: generalbots
  labels:
    app.kubernetes.io/name: generalbots
    app.kubernetes.io/component: storage
 spec:
  accessModes:
    - ReadOnlyMany
  storageClassName: standard
  resources:
    requests:
      storage: 100Gi
 ---
 # PersistentVolumeClaim for .gbai packages
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: gbai-packages
  namespace: generalbots
  labels:
    app.kubernetes.io/name: generalbots
    app.kubernetes.io/component: storage
 spec:
  accessModes:
    - ReadWriteMany
  storageClassName: standard
  resources:
    requests:
      storage: 20Gi
--- a/deploy/kubernetes/hpa.yaml
+++ b/deploy/kubernetes/hpa.yaml
@ -0,0 +1,331 @@
 # General Bots Kubernetes HorizontalPodAutoscaler Configuration
 # This file contains autoscaling configurations for General Bots components.
 #
 # Usage:
 #   kubectl apply -f hpa.yaml
 #
 # Prerequisites:
 #   - Metrics Server installed in cluster
 #   - deployment.yaml already applied
 ---
 # HPA for botserver - scales based on CPU and memory
 apiVersion: autoscaling/v2
 kind: HorizontalPodAutoscaler
 metadata:
  name: botserver-hpa
  namespace: generalbots
  labels:
    app.kubernetes.io/name: generalbots
    app.kubernetes.io/component: hpa
 spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: botserver
  minReplicas: 3
  maxReplicas: 20
  metrics:
    # Scale based on CPU utilization
    - type: Resource
      resource:
        name: cpu
        target:
          type: Utilization
          averageUtilization: 70
    # Scale based on memory utilization
    - type: Resource
      resource:
        name: memory
        target:
          type: Utilization
          averageUtilization: 80
    # Scale based on requests per second (requires custom metrics)
    # Uncomment if using Prometheus Adapter
    # - type: Pods
    #   pods:
    #     metric:
    #       name: http_requests_per_second
    #     target:
    #       type: AverageValue
    #       averageValue: 100
  behavior:
    scaleDown:
      stabilizationWindowSeconds: 300  # 5 minutes cooldown before scaling down
      policies:
        - type: Percent
          value: 10
          periodSeconds: 60
        - type: Pods
          value: 2
          periodSeconds: 60
      selectPolicy: Min  # Use the most conservative policy
    scaleUp:
      stabilizationWindowSeconds: 60  # 1 minute before scaling up
      policies:
        - type: Percent
          value: 100
          periodSeconds: 30
        - type: Pods
          value: 4
          periodSeconds: 30
      selectPolicy: Max  # Scale up aggressively when needed
 ---
 # HPA for LLM server - scales based on CPU (inference is CPU/GPU bound)
 apiVersion: autoscaling/v2
 kind: HorizontalPodAutoscaler
 metadata:
  name: llm-server-hpa
  namespace: generalbots
  labels:
    app.kubernetes.io/name: generalbots
    app.kubernetes.io/component: hpa
 spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: llm-server
  minReplicas: 2
  maxReplicas: 10
  metrics:
    # Scale based on CPU utilization
    - type: Resource
      resource:
        name: cpu
        target:
          type: Utilization
          averageUtilization: 60  # Lower threshold for LLM - inference is expensive
    # Scale based on memory utilization
    - type: Resource
      resource:
        name: memory
        target:
          type: Utilization
          averageUtilization: 75
    # Scale based on inference queue length (requires custom metrics)
    # Uncomment if using Prometheus Adapter
    # - type: Pods
    #   pods:
    #     metric:
    #       name: llm_inference_queue_length
    #     target:
    #       type: AverageValue
    #       averageValue: 5
  behavior:
    scaleDown:
      stabilizationWindowSeconds: 600  # 10 minutes - LLM pods are expensive to recreate
      policies:
        - type: Pods
          value: 1
          periodSeconds: 120
      selectPolicy: Min
    scaleUp:
      stabilizationWindowSeconds: 120  # 2 minutes
      policies:
        - type: Pods
          value: 2
          periodSeconds: 60
      selectPolicy: Max
 ---
 # HPA for embedding server (if deployed separately)
 apiVersion: autoscaling/v2
 kind: HorizontalPodAutoscaler
 metadata:
  name: embedding-server-hpa
  namespace: generalbots
  labels:
    app.kubernetes.io/name: generalbots
    app.kubernetes.io/component: hpa
 spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: embedding-server
  minReplicas: 2
  maxReplicas: 8
  metrics:
    - type: Resource
      resource:
        name: cpu
        target:
          type: Utilization
          averageUtilization: 70
    - type: Resource
      resource:
        name: memory
        target:
          type: Utilization
          averageUtilization: 80
  behavior:
    scaleDown:
      stabilizationWindowSeconds: 300
      policies:
        - type: Pods
          value: 1
          periodSeconds: 60
      selectPolicy: Min
    scaleUp:
      stabilizationWindowSeconds: 60
      policies:
        - type: Pods
          value: 2
          periodSeconds: 30
      selectPolicy: Max
 ---
 # Vertical Pod Autoscaler for botserver (optional - requires VPA installed)
 # Automatically adjusts resource requests/limits
 apiVersion: autoscaling.k8s.io/v1
 kind: VerticalPodAutoscaler
 metadata:
  name: botserver-vpa
  namespace: generalbots
  labels:
    app.kubernetes.io/name: generalbots
    app.kubernetes.io/component: vpa
 spec:
  targetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: botserver
  updatePolicy:
    updateMode: "Auto"  # Options: Off, Initial, Recreate, Auto
  resourcePolicy:
    containerPolicies:
      - containerName: botserver
        minAllowed:
          cpu: 250m
          memory: 512Mi
        maxAllowed:
          cpu: 4000m
          memory: 8Gi
        controlledResources: ["cpu", "memory"]
        controlledValues: RequestsAndLimits
 ---
 # Vertical Pod Autoscaler for LLM server
 apiVersion: autoscaling.k8s.io/v1
 kind: VerticalPodAutoscaler
 metadata:
  name: llm-server-vpa
  namespace: generalbots
  labels:
    app.kubernetes.io/name: generalbots
    app.kubernetes.io/component: vpa
 spec:
  targetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: llm-server
  updatePolicy:
    updateMode: "Off"  # Manual for LLM - too disruptive to auto-update
  resourcePolicy:
    containerPolicies:
      - containerName: llm-server
        minAllowed:
          cpu: 2000m
          memory: 8Gi
        maxAllowed:
          cpu: 16000m
          memory: 64Gi
        controlledResources: ["cpu", "memory"]
        controlledValues: RequestsOnly  # Only adjust requests, not limits
 ---
 # Custom metrics for HPA (requires Prometheus + Prometheus Adapter)
 # This ServiceMonitor tells Prometheus to scrape botserver metrics
 apiVersion: monitoring.coreos.com/v1
 kind: ServiceMonitor
 metadata:
  name: botserver-metrics
  namespace: generalbots
  labels:
    app.kubernetes.io/name: generalbots
    app.kubernetes.io/component: monitoring
 spec:
  selector:
    matchLabels:
      app: botserver
  endpoints:
    - port: metrics
      interval: 30s
      path: /metrics
  namespaceSelector:
    matchNames:
      - generalbots
 ---
 # PrometheusRule for alerting on scaling events
 apiVersion: monitoring.coreos.com/v1
 kind: PrometheusRule
 metadata:
  name: botserver-scaling-alerts
  namespace: generalbots
  labels:
    app.kubernetes.io/name: generalbots
    app.kubernetes.io/component: alerts
 spec:
  groups:
    - name: botserver-scaling
      rules:
        # Alert when approaching max replicas
        - alert: BotserverNearMaxReplicas
          expr: |
            kube_horizontalpodautoscaler_status_current_replicas{horizontalpodautoscaler="botserver-hpa"}
            / kube_horizontalpodautoscaler_spec_max_replicas{horizontalpodautoscaler="botserver-hpa"}
            > 0.8
          for: 5m
          labels:
            severity: warning
          annotations:
            summary: "Botserver near maximum replicas"
            description: "Botserver HPA is at {{ $value | humanizePercentage }} of max replicas"
        # Alert when at max replicas
        - alert: BotserverAtMaxReplicas
          expr: |
            kube_horizontalpodautoscaler_status_current_replicas{horizontalpodautoscaler="botserver-hpa"}
            == kube_horizontalpodautoscaler_spec_max_replicas{horizontalpodautoscaler="botserver-hpa"}
          for: 10m
          labels:
            severity: critical
          annotations:
            summary: "Botserver at maximum replicas"
            description: "Botserver HPA has been at max replicas for 10 minutes - consider increasing max"
        # Alert on rapid scaling
        - alert: BotserverRapidScaling
          expr: |
            increase(kube_horizontalpodautoscaler_status_current_replicas{horizontalpodautoscaler="botserver-hpa"}[10m])
            > 5
          for: 1m
          labels:
            severity: warning
          annotations:
            summary: "Botserver scaling rapidly"
            description: "Botserver has scaled by {{ $value }} replicas in 10 minutes"
        # Alert on LLM server max replicas
        - alert: LLMServerAtMaxReplicas
          expr: |
            kube_horizontalpodautoscaler_status_current_replicas{horizontalpodautoscaler="llm-server-hpa"}
            == kube_horizontalpodautoscaler_spec_max_replicas{horizontalpodautoscaler="llm-server-hpa"}
          for: 5m
          labels:
            severity: critical
          annotations:
            summary: "LLM Server at maximum replicas"
            description: "LLM Server HPA is at max - inference capacity may be constrained"