Add implementation plan and multi-agent features

This commit introduces comprehensive documentation and implementation
for multi-agent orchestration capabilities:

- Add IMPLEMENTATION-PLAN.md with 4-phase roadmap
- Add Kubernetes deployment manifests (deployment.yaml, hpa.yaml)
- Add database migrations for multi-agent tables (6.1.1, 6.1.2)
- Implement A2A protocol for agent-to-agent communication
- Implement user memory keywords for cross-session persistence
- Implement model routing for dynamic L
This commit is contained in:
Rodrigo Rodriguez (Pragmatismo) 2025-11-30 19:18:23 -03:00
parent 430ec12357
commit 5165131b06
37 changed files with 17286 additions and 0 deletions

1768
IMPLEMENTATION-PLAN.md Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,539 @@
# General Bots Kubernetes Deployment Configuration
# This file contains the core deployment resources for running General Bots
# in a Kubernetes cluster.
#
# Usage:
# kubectl apply -f deployment.yaml
#
# Prerequisites:
# - Kubernetes cluster 1.24+
# - kubectl configured
# - Secrets created (see secrets.yaml)
# - PersistentVolumeClaim for data (optional)
---
apiVersion: v1
kind: Namespace
metadata:
name: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: namespace
---
# ConfigMap for non-sensitive configuration
apiVersion: v1
kind: ConfigMap
metadata:
name: botserver-config
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: config
data:
# Server configuration
SERVER_HOST: "0.0.0.0"
SERVER_PORT: "8080"
# LLM configuration
LLM_SERVER_HOST: "0.0.0.0"
LLM_SERVER_PORT: "8081"
LLM_SERVER_CTX_SIZE: "4096"
LLM_SERVER_N_PREDICT: "1024"
LLM_SERVER_PARALLEL: "6"
LLM_SERVER_CONT_BATCHING: "true"
LLM_CACHE: "true"
LLM_CACHE_TTL: "3600"
# Embedding configuration
EMBEDDING_PORT: "8082"
# Multi-agent configuration
A2A_ENABLED: "true"
A2A_TIMEOUT: "30"
A2A_MAX_HOPS: "5"
# Memory configuration
USER_MEMORY_ENABLED: "true"
USER_MEMORY_MAX_KEYS: "1000"
EPISODIC_MEMORY_ENABLED: "true"
# Hybrid RAG configuration
RAG_HYBRID_ENABLED: "true"
RAG_DENSE_WEIGHT: "0.7"
RAG_SPARSE_WEIGHT: "0.3"
# Observability
OBSERVABILITY_ENABLED: "true"
OBSERVABILITY_METRICS_INTERVAL: "60"
# Sandbox configuration
SANDBOX_RUNTIME: "process" # Use 'lxc' or 'docker' if available
SANDBOX_TIMEOUT: "30"
SANDBOX_MEMORY_MB: "512"
---
# Main botserver Deployment
apiVersion: apps/v1
kind: Deployment
metadata:
name: botserver
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: botserver
app.kubernetes.io/version: "6.1.1"
spec:
replicas: 3
selector:
matchLabels:
app: botserver
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 1
maxUnavailable: 0
template:
metadata:
labels:
app: botserver
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: botserver
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "9090"
prometheus.io/path: "/metrics"
spec:
serviceAccountName: botserver
securityContext:
runAsNonRoot: true
runAsUser: 1000
fsGroup: 1000
# Init container to wait for dependencies
initContainers:
- name: wait-for-postgres
image: busybox:1.35
command: ['sh', '-c', 'until nc -z postgres-service 5432; do echo waiting for postgres; sleep 2; done']
- name: wait-for-qdrant
image: busybox:1.35
command: ['sh', '-c', 'until nc -z qdrant-service 6333; do echo waiting for qdrant; sleep 2; done']
containers:
- name: botserver
image: generalbots/botserver:latest
imagePullPolicy: Always
ports:
- name: http
containerPort: 8080
protocol: TCP
- name: metrics
containerPort: 9090
protocol: TCP
envFrom:
- configMapRef:
name: botserver-config
env:
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: botserver-secrets
key: database-url
- name: QDRANT_URL
valueFrom:
secretKeyRef:
name: botserver-secrets
key: qdrant-url
- name: LLM_KEY
valueFrom:
secretKeyRef:
name: botserver-secrets
key: llm-api-key
optional: true
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
resources:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "2Gi"
cpu: "2000m"
livenessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
readinessProbe:
httpGet:
path: /ready
port: http
initialDelaySeconds: 10
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
startupProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 30
volumeMounts:
- name: data
mountPath: /data
- name: models
mountPath: /models
readOnly: true
- name: gbai-packages
mountPath: /packages
volumes:
- name: data
persistentVolumeClaim:
claimName: botserver-data
- name: models
persistentVolumeClaim:
claimName: llm-models
- name: gbai-packages
persistentVolumeClaim:
claimName: gbai-packages
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app
operator: In
values:
- botserver
topologyKey: kubernetes.io/hostname
topologySpreadConstraints:
- maxSkew: 1
topologyKey: topology.kubernetes.io/zone
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: botserver
---
# LLM Server Deployment (for local model inference)
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-server
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: llm-server
spec:
replicas: 2
selector:
matchLabels:
app: llm-server
template:
metadata:
labels:
app: llm-server
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: llm-server
spec:
containers:
- name: llm-server
image: generalbots/llm-server:latest
imagePullPolicy: Always
ports:
- name: http
containerPort: 8081
protocol: TCP
env:
- name: MODEL_PATH
value: "/models/DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf"
- name: CTX_SIZE
value: "4096"
- name: N_PREDICT
value: "1024"
- name: PARALLEL
value: "6"
- name: CONT_BATCHING
value: "true"
- name: GPU_LAYERS
value: "35" # Adjust based on available GPU memory
resources:
requests:
memory: "8Gi"
cpu: "2000m"
# Uncomment for GPU support
# nvidia.com/gpu: 1
limits:
memory: "24Gi"
cpu: "8000m"
# nvidia.com/gpu: 1
volumeMounts:
- name: models
mountPath: /models
readOnly: true
livenessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 120
periodSeconds: 30
timeoutSeconds: 10
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 60
periodSeconds: 10
timeoutSeconds: 5
volumes:
- name: models
persistentVolumeClaim:
claimName: llm-models
# Schedule on nodes with GPU
# nodeSelector:
# nvidia.com/gpu.present: "true"
tolerations:
- key: "nvidia.com/gpu"
operator: "Exists"
effect: "NoSchedule"
---
# Service for botserver
apiVersion: v1
kind: Service
metadata:
name: botserver-service
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: service
spec:
type: ClusterIP
selector:
app: botserver
ports:
- name: http
port: 80
targetPort: 8080
protocol: TCP
- name: metrics
port: 9090
targetPort: 9090
protocol: TCP
---
# Service for LLM server
apiVersion: v1
kind: Service
metadata:
name: llm-server-service
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: llm-service
spec:
type: ClusterIP
selector:
app: llm-server
ports:
- name: http
port: 8081
targetPort: 8081
protocol: TCP
---
# Headless service for StatefulSet-like DNS (if needed)
apiVersion: v1
kind: Service
metadata:
name: botserver-headless
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: headless-service
spec:
clusterIP: None
selector:
app: botserver
ports:
- name: http
port: 8080
targetPort: 8080
---
# Ingress for external access
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: botserver-ingress
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: ingress
annotations:
kubernetes.io/ingress.class: nginx
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/proxy-body-size: "50m"
nginx.ingress.kubernetes.io/proxy-read-timeout: "300"
nginx.ingress.kubernetes.io/proxy-send-timeout: "300"
nginx.ingress.kubernetes.io/websocket-services: "botserver-service"
cert-manager.io/cluster-issuer: "letsencrypt-prod"
spec:
tls:
- hosts:
- bot.example.com
secretName: botserver-tls
rules:
- host: bot.example.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: botserver-service
port:
number: 80
---
# ServiceAccount
apiVersion: v1
kind: ServiceAccount
metadata:
name: botserver
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: serviceaccount
---
# Role for botserver
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: botserver-role
namespace: generalbots
rules:
- apiGroups: [""]
resources: ["configmaps", "secrets"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "list"]
---
# RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: botserver-rolebinding
namespace: generalbots
subjects:
- kind: ServiceAccount
name: botserver
namespace: generalbots
roleRef:
kind: Role
name: botserver-role
apiGroup: rbac.authorization.k8s.io
---
# PodDisruptionBudget for high availability
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: botserver-pdb
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: pdb
spec:
minAvailable: 2
selector:
matchLabels:
app: botserver
---
# PersistentVolumeClaim for botserver data
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: botserver-data
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: storage
spec:
accessModes:
- ReadWriteMany
storageClassName: standard
resources:
requests:
storage: 50Gi
---
# PersistentVolumeClaim for LLM models
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: llm-models
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: storage
spec:
accessModes:
- ReadOnlyMany
storageClassName: standard
resources:
requests:
storage: 100Gi
---
# PersistentVolumeClaim for .gbai packages
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: gbai-packages
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: storage
spec:
accessModes:
- ReadWriteMany
storageClassName: standard
resources:
requests:
storage: 20Gi

331
deploy/kubernetes/hpa.yaml Normal file
View file

@ -0,0 +1,331 @@
# General Bots Kubernetes HorizontalPodAutoscaler Configuration
# This file contains autoscaling configurations for General Bots components.
#
# Usage:
# kubectl apply -f hpa.yaml
#
# Prerequisites:
# - Metrics Server installed in cluster
# - deployment.yaml already applied
---
# HPA for botserver - scales based on CPU and memory
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: botserver-hpa
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: hpa
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: botserver
minReplicas: 3
maxReplicas: 20
metrics:
# Scale based on CPU utilization
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
# Scale based on memory utilization
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
# Scale based on requests per second (requires custom metrics)
# Uncomment if using Prometheus Adapter
# - type: Pods
# pods:
# metric:
# name: http_requests_per_second
# target:
# type: AverageValue
# averageValue: 100
behavior:
scaleDown:
stabilizationWindowSeconds: 300 # 5 minutes cooldown before scaling down
policies:
- type: Percent
value: 10
periodSeconds: 60
- type: Pods
value: 2
periodSeconds: 60
selectPolicy: Min # Use the most conservative policy
scaleUp:
stabilizationWindowSeconds: 60 # 1 minute before scaling up
policies:
- type: Percent
value: 100
periodSeconds: 30
- type: Pods
value: 4
periodSeconds: 30
selectPolicy: Max # Scale up aggressively when needed
---
# HPA for LLM server - scales based on CPU (inference is CPU/GPU bound)
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: llm-server-hpa
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: hpa
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: llm-server
minReplicas: 2
maxReplicas: 10
metrics:
# Scale based on CPU utilization
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 60 # Lower threshold for LLM - inference is expensive
# Scale based on memory utilization
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 75
# Scale based on inference queue length (requires custom metrics)
# Uncomment if using Prometheus Adapter
# - type: Pods
# pods:
# metric:
# name: llm_inference_queue_length
# target:
# type: AverageValue
# averageValue: 5
behavior:
scaleDown:
stabilizationWindowSeconds: 600 # 10 minutes - LLM pods are expensive to recreate
policies:
- type: Pods
value: 1
periodSeconds: 120
selectPolicy: Min
scaleUp:
stabilizationWindowSeconds: 120 # 2 minutes
policies:
- type: Pods
value: 2
periodSeconds: 60
selectPolicy: Max
---
# HPA for embedding server (if deployed separately)
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: embedding-server-hpa
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: hpa
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: embedding-server
minReplicas: 2
maxReplicas: 8
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
behavior:
scaleDown:
stabilizationWindowSeconds: 300
policies:
- type: Pods
value: 1
periodSeconds: 60
selectPolicy: Min
scaleUp:
stabilizationWindowSeconds: 60
policies:
- type: Pods
value: 2
periodSeconds: 30
selectPolicy: Max
---
# Vertical Pod Autoscaler for botserver (optional - requires VPA installed)
# Automatically adjusts resource requests/limits
apiVersion: autoscaling.k8s.io/v1
kind: VerticalPodAutoscaler
metadata:
name: botserver-vpa
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: vpa
spec:
targetRef:
apiVersion: apps/v1
kind: Deployment
name: botserver
updatePolicy:
updateMode: "Auto" # Options: Off, Initial, Recreate, Auto
resourcePolicy:
containerPolicies:
- containerName: botserver
minAllowed:
cpu: 250m
memory: 512Mi
maxAllowed:
cpu: 4000m
memory: 8Gi
controlledResources: ["cpu", "memory"]
controlledValues: RequestsAndLimits
---
# Vertical Pod Autoscaler for LLM server
apiVersion: autoscaling.k8s.io/v1
kind: VerticalPodAutoscaler
metadata:
name: llm-server-vpa
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: vpa
spec:
targetRef:
apiVersion: apps/v1
kind: Deployment
name: llm-server
updatePolicy:
updateMode: "Off" # Manual for LLM - too disruptive to auto-update
resourcePolicy:
containerPolicies:
- containerName: llm-server
minAllowed:
cpu: 2000m
memory: 8Gi
maxAllowed:
cpu: 16000m
memory: 64Gi
controlledResources: ["cpu", "memory"]
controlledValues: RequestsOnly # Only adjust requests, not limits
---
# Custom metrics for HPA (requires Prometheus + Prometheus Adapter)
# This ServiceMonitor tells Prometheus to scrape botserver metrics
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: botserver-metrics
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: monitoring
spec:
selector:
matchLabels:
app: botserver
endpoints:
- port: metrics
interval: 30s
path: /metrics
namespaceSelector:
matchNames:
- generalbots
---
# PrometheusRule for alerting on scaling events
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: botserver-scaling-alerts
namespace: generalbots
labels:
app.kubernetes.io/name: generalbots
app.kubernetes.io/component: alerts
spec:
groups:
- name: botserver-scaling
rules:
# Alert when approaching max replicas
- alert: BotserverNearMaxReplicas
expr: |
kube_horizontalpodautoscaler_status_current_replicas{horizontalpodautoscaler="botserver-hpa"}
/ kube_horizontalpodautoscaler_spec_max_replicas{horizontalpodautoscaler="botserver-hpa"}
> 0.8
for: 5m
labels:
severity: warning
annotations:
summary: "Botserver near maximum replicas"
description: "Botserver HPA is at {{ $value | humanizePercentage }} of max replicas"
# Alert when at max replicas
- alert: BotserverAtMaxReplicas
expr: |
kube_horizontalpodautoscaler_status_current_replicas{horizontalpodautoscaler="botserver-hpa"}
== kube_horizontalpodautoscaler_spec_max_replicas{horizontalpodautoscaler="botserver-hpa"}
for: 10m
labels:
severity: critical
annotations:
summary: "Botserver at maximum replicas"
description: "Botserver HPA has been at max replicas for 10 minutes - consider increasing max"
# Alert on rapid scaling
- alert: BotserverRapidScaling
expr: |
increase(kube_horizontalpodautoscaler_status_current_replicas{horizontalpodautoscaler="botserver-hpa"}[10m])
> 5
for: 1m
labels:
severity: warning
annotations:
summary: "Botserver scaling rapidly"
description: "Botserver has scaled by {{ $value }} replicas in 10 minutes"
# Alert on LLM server max replicas
- alert: LLMServerAtMaxReplicas
expr: |
kube_horizontalpodautoscaler_status_current_replicas{horizontalpodautoscaler="llm-server-hpa"}
== kube_horizontalpodautoscaler_spec_max_replicas{horizontalpodautoscaler="llm-server-hpa"}
for: 5m
labels:
severity: critical
annotations:
summary: "LLM Server at maximum replicas"
description: "LLM Server HPA is at max - inference capacity may be constrained"

View file

@ -68,6 +68,12 @@
- [SET CONTEXT](./chapter-06-gbdialog/keyword-set-context.md) - [SET CONTEXT](./chapter-06-gbdialog/keyword-set-context.md)
- [GET BOT MEMORY](./chapter-06-gbdialog/keyword-get-bot-memory.md) - [GET BOT MEMORY](./chapter-06-gbdialog/keyword-get-bot-memory.md)
- [SET BOT MEMORY](./chapter-06-gbdialog/keyword-set-bot-memory.md) - [SET BOT MEMORY](./chapter-06-gbdialog/keyword-set-bot-memory.md)
- [GET USER MEMORY](./chapter-06-gbdialog/keyword-get-user-memory.md)
- [SET USER MEMORY](./chapter-06-gbdialog/keyword-set-user-memory.md)
- [USE MODEL](./chapter-06-gbdialog/keyword-use-model.md)
- [DELEGATE TO BOT](./chapter-06-gbdialog/keyword-delegate-to-bot.md)
- [BOT REFLECTION](./chapter-06-gbdialog/keyword-bot-reflection.md)
- [RUN PYTHON / JAVASCRIPT / BASH](./chapter-06-gbdialog/keyword-run-code.md)
- [USE KB](./chapter-06-gbdialog/keyword-use-kb.md) - [USE KB](./chapter-06-gbdialog/keyword-use-kb.md)
- [CLEAR KB](./chapter-06-gbdialog/keyword-clear-kb.md) - [CLEAR KB](./chapter-06-gbdialog/keyword-clear-kb.md)
- [USE WEBSITE](./chapter-06-gbdialog/keyword-use-website.md) - [USE WEBSITE](./chapter-06-gbdialog/keyword-use-website.md)
@ -101,6 +107,7 @@
- [KB DOCUMENTS ADDED SINCE](./chapter-06-gbdialog/keyword-kb-documents-added-since.md) - [KB DOCUMENTS ADDED SINCE](./chapter-06-gbdialog/keyword-kb-documents-added-since.md)
- [KB LIST COLLECTIONS](./chapter-06-gbdialog/keyword-kb-list-collections.md) - [KB LIST COLLECTIONS](./chapter-06-gbdialog/keyword-kb-list-collections.md)
- [KB STORAGE SIZE](./chapter-06-gbdialog/keyword-kb-storage-size.md) - [KB STORAGE SIZE](./chapter-06-gbdialog/keyword-kb-storage-size.md)
- [Multi-Agent Keywords](./chapter-06-gbdialog/keywords-multi-agent.md)
- [Social Media Keywords](./chapter-06-gbdialog/keywords-social-media.md) - [Social Media Keywords](./chapter-06-gbdialog/keywords-social-media.md)
- [Lead Scoring Keywords](./chapter-06-gbdialog/keywords-lead-scoring.md) - [Lead Scoring Keywords](./chapter-06-gbdialog/keywords-lead-scoring.md)
- [HTTP & API Operations](./chapter-06-gbdialog/keywords-http.md) - [HTTP & API Operations](./chapter-06-gbdialog/keywords-http.md)
@ -221,6 +228,10 @@
- [Enterprise Platform Migration](./chapter-11-features/m365-comparison.md) - [Enterprise Platform Migration](./chapter-11-features/m365-comparison.md)
- [Projects](./chapter-11-features/projects.md) - [Projects](./chapter-11-features/projects.md)
- [Multi-Agent Office Suite Design](./chapter-11-features/multi-agent-design.md) - [Multi-Agent Office Suite Design](./chapter-11-features/multi-agent-design.md)
- [What's New: Multi-Agent Features](./chapter-11-features/whats-new.md)
- [Multi-Agent Orchestration](./chapter-11-features/multi-agent-orchestration.md)
- [Memory Management](./chapter-11-features/memory-management.md)
- [Hybrid RAG Search](./chapter-11-features/hybrid-search.md)
# Part XI - Security # Part XI - Security

View file

@ -0,0 +1,240 @@
# BOT REFLECTION
Enables agent self-analysis and improvement by using LLM to evaluate conversation quality, identify issues, and suggest improvements. This is a key feature for continuous agent optimization.
## Syntax
```basic
BOT REFLECTION enabled
BOT REFLECTION ON "metric"
insights = BOT REFLECTION INSIGHTS()
```
## Parameters
| Parameter | Type | Description |
|-----------|------|-------------|
| `enabled` | Boolean | `true` to enable, `false` to disable reflection |
| `metric` | String | Specific metric to analyze (e.g., "conversation_quality", "response_accuracy") |
## Description
`BOT REFLECTION` activates the agent self-improvement system, which periodically analyzes conversations and provides actionable insights. When enabled, the system:
- **Analyzes conversation quality** - Tone, clarity, helpfulness
- **Identifies issues** - Misunderstandings, incomplete answers, user frustration
- **Suggests improvements** - Better responses, missing information, tone adjustments
- **Tracks metrics over time** - Quality scores, resolution rates
This creates a continuous improvement loop where agents learn from their interactions.
## Examples
### Enable Basic Reflection
```basic
' Enable reflection for this bot session
BOT REFLECTION true
' Normal conversation proceeds
TALK "Hello! How can I help you today?"
HEAR userquery
response = LLM userquery
TALK response
' Reflection runs automatically in background
```
### Monitor Specific Metrics
```basic
' Enable reflection on conversation quality
BOT REFLECTION ON "conversation_quality"
' Enable reflection on response accuracy
BOT REFLECTION ON "response_accuracy"
' Enable reflection on user satisfaction
BOT REFLECTION ON "user_satisfaction"
```
### Retrieve Reflection Insights
```basic
' Get insights from reflection analysis
insights = BOT REFLECTION INSIGHTS()
IF insights <> "" THEN
PRINT "Reflection Insights:"
PRINT insights.summary
PRINT "Quality Score: " + insights.qualityScore
PRINT "Issues Found: " + insights.issuesCount
FOR EACH suggestion IN insights.suggestions
PRINT "Suggestion: " + suggestion
NEXT suggestion
END IF
```
### Use Insights for Self-Improvement
```basic
' Periodic reflection check
BOT REFLECTION true
' After conversation ends, check insights
insights = BOT REFLECTION INSIGHTS()
IF insights.qualityScore < 0.7 THEN
' Log for review
PRINT "Low quality conversation detected"
PRINT "Issues: " + insights.issues
' Store for analysis
SET BOT MEMORY "reflection_" + conversationid, insights
END IF
```
### Admin Dashboard Integration
```basic
' Script for admin to review bot performance
insights = BOT REFLECTION INSIGHTS()
BEGIN TALK
**Bot Performance Report**
📊 **Quality Score:** {insights.qualityScore}/1.0
📈 **Metrics:**
- Response Accuracy: {insights.responseAccuracy}%
- User Satisfaction: {insights.userSatisfaction}%
- Resolution Rate: {insights.resolutionRate}%
⚠️ **Issues Identified:**
{insights.issues}
💡 **Improvement Suggestions:**
{insights.suggestions}
END TALK
```
### Conditional Reflection
```basic
' Only reflect on complex conversations
messageCount = GET BOT MEMORY("messageCount")
IF messageCount > 5 THEN
' Enable reflection for longer conversations
BOT REFLECTION true
BOT REFLECTION ON "conversation_quality"
END IF
```
### Reflection with Alerts
```basic
' Enable reflection with alerting
BOT REFLECTION true
' Check for critical issues periodically
insights = BOT REFLECTION INSIGHTS()
IF insights.criticalIssues > 0 THEN
' Alert admin
SEND MAIL admin, "Bot Alert: Critical Issues Detected", insights.summary
END IF
```
## Reflection Metrics
| Metric | Description | Score Range |
|--------|-------------|-------------|
| `conversation_quality` | Overall conversation effectiveness | 0.0 - 1.0 |
| `response_accuracy` | How accurate/correct responses are | 0.0 - 1.0 |
| `user_satisfaction` | Estimated user satisfaction | 0.0 - 1.0 |
| `tone_appropriateness` | Whether tone matches context | 0.0 - 1.0 |
| `resolution_rate` | Whether user issues were resolved | 0.0 - 1.0 |
| `response_time` | Average response latency | milliseconds |
## Insights Object Structure
```basic
insights = BOT REFLECTION INSIGHTS()
' Available properties:
insights.qualityScore ' Overall quality (0-1)
insights.summary ' Text summary of analysis
insights.issues ' Array of identified issues
insights.issuesCount ' Number of issues found
insights.suggestions ' Array of improvement suggestions
insights.metrics ' Object with detailed metrics
insights.criticalIssues ' Count of critical problems
insights.conversationId ' ID of analyzed conversation
insights.timestamp ' When analysis was performed
```
## Config.csv Options
```csv
name,value
reflection-enabled,true
reflection-interval,10
reflection-min-messages,3
reflection-model,quality
reflection-store-insights,true
```
| Option | Default | Description |
|--------|---------|-------------|
| `reflection-enabled` | `true` | Enable/disable reflection globally |
| `reflection-interval` | `10` | Messages between reflection runs |
| `reflection-min-messages` | `3` | Minimum messages before reflecting |
| `reflection-model` | `quality` | LLM model for reflection analysis |
| `reflection-store-insights` | `true` | Store insights in database |
## How Reflection Works
1. **Collection** - Conversation history is collected
2. **Analysis** - LLM analyzes the conversation against metrics
3. **Scoring** - Quality scores are calculated
4. **Identification** - Issues and patterns are identified
5. **Suggestion** - Improvement suggestions are generated
6. **Storage** - Results stored for dashboards and trends
## Related Keywords
| Keyword | Description |
|---------|-------------|
| [`LLM`](./keyword-llm.md) | Query the language model |
| [`SET BOT MEMORY`](./keyword-set-bot-memory.md) | Store bot-level data |
| [`PRINT`](./keyword-print.md) | Debug output |
## Performance Considerations
- Reflection uses LLM calls (affects cost/latency)
- Run reflection periodically, not on every message
- Use smaller models for reflection when possible
- Consider async reflection for production
## Best Practices
1. **Enable for complex bots** - Most valuable for customer-facing agents
2. **Review insights regularly** - Use dashboards to spot trends
3. **Act on suggestions** - Update prompts and tools based on insights
4. **Set appropriate intervals** - Balance insight freshness vs cost
5. **Store for analysis** - Track improvements over time
## Limitations
- Reflection adds LLM cost per analysis
- Analysis quality depends on model capability
- Cannot analyze real-time user emotions
- Historical only (not predictive)
## See Also
- [Multi-Agent Orchestration](../chapter-11-features/multi-agent-orchestration.md) - Multi-agent systems
- [Observability](../chapter-11-features/observability.md) - Monitoring and metrics
- [LLM Configuration](../chapter-08-config/llm-config.md) - Model setup

View file

@ -0,0 +1,219 @@
# DELEGATE TO BOT
Delegates a task or message to another bot in a multi-agent system. This enables agent-to-agent communication using the A2A (Agent-to-Agent) protocol.
## Syntax
```basic
DELEGATE "message" TO BOT "botname"
DELEGATE "message" TO BOT "botname" TIMEOUT seconds
result = DELEGATE "message" TO BOT "botname"
```
## Parameters
| Parameter | Type | Description |
|-----------|------|-------------|
| `message` | String | The task or message to send to the target bot |
| `botname` | String | Name of the target bot to delegate to |
| `seconds` | Number | Optional timeout in seconds (default: 30) |
## Description
`DELEGATE TO BOT` sends a message or task to another bot and optionally waits for a response. This is the core keyword for multi-agent orchestration, enabling:
- **Task specialization** - Route tasks to specialized bots
- **Agent collaboration** - Multiple bots working together
- **Workload distribution** - Spread tasks across agents
- **Expert consultation** - Query domain-specific bots
The delegation uses the A2A (Agent-to-Agent) protocol which handles:
- Message routing between agents
- Correlation IDs for request/response matching
- Timeout handling
- Error propagation
## Examples
### Basic Delegation
```basic
' Delegate a translation task to a specialized bot
DELEGATE "Translate 'Hello World' to Portuguese" TO BOT "translator-bot"
TALK "Translation request sent!"
```
### Get Response from Delegated Bot
```basic
' Ask the finance bot for a calculation
result = DELEGATE "Calculate ROI for investment of $10000 with 12% annual return over 5 years" TO BOT "finance-bot"
TALK "The finance expert says: " + result
```
### Delegation with Timeout
```basic
' Long-running task with extended timeout
result = DELEGATE "Analyze this quarterly report and provide insights" TO BOT "analyst-bot" TIMEOUT 120
TALK result
```
### Multi-Bot Workflow
```basic
' Customer support escalation workflow
issue = "Customer reports billing discrepancy"
' First, check with billing bot
billingInfo = DELEGATE "Check account status for customer " + customerid TO BOT "billing-bot" TIMEOUT 30
IF INSTR(billingInfo, "discrepancy") > 0 THEN
' Escalate to senior support
resolution = DELEGATE "Priority: " + issue + " Details: " + billingInfo TO BOT "senior-support-bot" TIMEOUT 60
TALK "A senior agent is handling your case: " + resolution
ELSE
TALK "Your account looks fine: " + billingInfo
END IF
```
### Parallel Expert Consultation
```basic
' Get opinions from multiple specialized bots
question = "What's the best approach for this investment portfolio?"
' Delegate to multiple experts
stockAnalysis = DELEGATE question TO BOT "stock-analyst"
bondAnalysis = DELEGATE question TO BOT "bond-analyst"
riskAssessment = DELEGATE question TO BOT "risk-assessor"
' Combine insights
BEGIN TALK
**Investment Analysis Summary**
📈 **Stock Analysis:** {stockAnalysis}
📊 **Bond Analysis:** {bondAnalysis}
⚠️ **Risk Assessment:** {riskAssessment}
END TALK
```
### Conditional Routing
```basic
' Route to appropriate specialist based on query type
HEAR userquery
' Use LLM to classify the query
category = LLM "Classify this query into one of: billing, technical, sales, general. Query: " + userquery
SWITCH category
CASE "billing"
response = DELEGATE userquery TO BOT "billing-bot"
CASE "technical"
response = DELEGATE userquery TO BOT "tech-support-bot"
CASE "sales"
response = DELEGATE userquery TO BOT "sales-bot"
CASE ELSE
response = DELEGATE userquery TO BOT "general-assistant"
END SWITCH
TALK response
```
### Chain of Delegation
```basic
' Research assistant that coordinates multiple bots
topic = "renewable energy trends 2025"
' Step 1: Gather data
rawData = DELEGATE "Search for recent data on " + topic TO BOT "research-bot" TIMEOUT 60
' Step 2: Analyze data
analysis = DELEGATE "Analyze this data and identify key trends: " + rawData TO BOT "analyst-bot" TIMEOUT 45
' Step 3: Generate report
report = DELEGATE "Create an executive summary from this analysis: " + analysis TO BOT "writer-bot" TIMEOUT 30
TALK report
```
## A2A Protocol Details
When you use `DELEGATE TO BOT`, the system creates an A2A message with:
| Field | Description |
|-------|-------------|
| `from_agent` | The current bot's identifier |
| `to_agent` | The target bot name |
| `message_type` | `Delegate` for task delegation |
| `payload` | The message content |
| `correlation_id` | Unique ID to match response |
| `timestamp` | When the message was sent |
## Error Handling
```basic
' Handle delegation failures gracefully
ON ERROR RESUME NEXT
result = DELEGATE "Process payment" TO BOT "payment-bot" TIMEOUT 30
IF ERROR THEN
TALK "I'm having trouble reaching our payment system. Please try again in a moment."
' Log the error
PRINT "Delegation failed: " + ERROR_MESSAGE
ELSE
TALK result
END IF
```
## Related Keywords
| Keyword | Description |
|---------|-------------|
| [`ADD BOT`](./keyword-add-bot.md) | Add a bot to the current session |
| [`BROADCAST TO BOTS`](./keyword-broadcast-to-bots.md) | Send message to all bots |
| [`TRANSFER CONVERSATION`](./keyword-transfer-conversation.md) | Hand off conversation to another bot |
## Config.csv Options
```csv
name,value
a2a-enabled,true
a2a-timeout,30
a2a-max-hops,5
a2a-retry-count,3
```
| Option | Default | Description |
|--------|---------|-------------|
| `a2a-enabled` | `true` | Enable agent-to-agent communication |
| `a2a-timeout` | `30` | Default timeout in seconds |
| `a2a-max-hops` | `5` | Maximum delegation chain depth |
| `a2a-retry-count` | `3` | Number of retry attempts on failure |
## Best Practices
1. **Set appropriate timeouts** - Long tasks need longer timeouts
2. **Handle failures gracefully** - Always have a fallback
3. **Avoid circular delegation** - Bot A → Bot B → Bot A
4. **Keep delegation chains short** - Max 3-4 hops recommended
5. **Log delegations** - Helps with debugging multi-agent flows
6. **Use descriptive bot names** - `billing-bot` not `bot2`
## Limitations
- Maximum message size: 1MB
- Maximum timeout: 300 seconds (5 minutes)
- Maximum concurrent delegations: 10 per session
- Target bot must be registered and active
## See Also
- [Multi-Agent Orchestration](../chapter-11-features/multi-agent-orchestration.md) - Complete multi-agent guide
- [A2A Protocol](../chapter-11-features/a2a-protocol.md) - Technical protocol details
- [Bot Configuration](../chapter-08-config/parameters.md) - Bot setup

View file

@ -0,0 +1,191 @@
# GET USER MEMORY
Retrieves data stored at the user level, accessible across sessions and bots. This is the companion to `SET USER MEMORY` for reading persistent user data.
## Syntax
```basic
value = GET USER MEMORY("key")
```
## Parameters
| Parameter | Type | Description |
|-----------|------|-------------|
| `key` | String | The identifier for the stored value |
## Returns
The stored value, or empty string (`""`) if the key doesn't exist.
## Description
`GET USER MEMORY` retrieves persistent data associated with a specific user. This data:
- **Persists across sessions** - Available when user returns days/weeks later
- **Persists across bots** - Accessible from any bot the user interacts with
- **Returns original type** - Objects, arrays, strings, numbers preserved
- **Returns empty on miss** - No error if key doesn't exist
## Examples
### Basic Usage
```basic
' Retrieve user preferences
language = GET USER MEMORY("language")
timezone = GET USER MEMORY("timezone")
theme = GET USER MEMORY("theme")
TALK "Your settings: " + language + ", " + timezone + ", " + theme
```
### Check If User Is Returning
```basic
' Personalized greeting based on stored name
name = GET USER MEMORY("name")
IF name = "" THEN
TALK "Hello! I don't think we've met. What's your name?"
HEAR name
SET USER MEMORY "name", name
ELSE
TALK "Welcome back, " + name + "! How can I help you today?"
END IF
```
### Retrieve Complex Objects
```basic
' Get stored user profile
profile = GET USER MEMORY("profile")
IF profile <> "" THEN
TALK "Hello " + profile.name + "!"
TALK "Your plan: " + profile.plan
TALK "Member since: " + profile.signupDate
ELSE
TALK "Please complete your profile first."
END IF
```
### Cross-Bot Data Access
```basic
' Support bot accessing sales data
lastPurchase = GET USER MEMORY("lastPurchase")
IF lastPurchase <> "" THEN
TALK "I can see your recent order #" + lastPurchase.orderId
TALK "Purchased on: " + lastPurchase.date
TALK "Amount: $" + lastPurchase.amount
TALK "How can I help with this order?"
ELSE
TALK "I don't see any recent purchases. How can I help?"
END IF
```
### Retrieve User Facts for AI Context
```basic
' Load user facts into context for personalization
occupation = GET USER MEMORY("fact_occupation")
interests = GET USER MEMORY("fact_interests")
company = GET USER MEMORY("fact_company")
IF occupation <> "" THEN
SET CONTEXT "user_occupation" AS occupation
END IF
IF interests <> "" THEN
SET CONTEXT "user_interests" AS interests
END IF
' Now AI responses will be personalized based on these facts
```
### Default Values Pattern
```basic
' Get with fallback to default
language = GET USER MEMORY("language")
IF language = "" THEN
language = "en-US"
END IF
' Or use inline default
theme = GET USER MEMORY("theme")
IF theme = "" THEN theme = "light"
TALK "Using language: " + language + ", theme: " + theme
```
### Session Continuity
```basic
' Resume conversation from previous session
lastTopic = GET USER MEMORY("lastTopic")
lastQuestion = GET USER MEMORY("lastQuestion")
IF lastTopic <> "" THEN
TALK "Last time we were discussing " + lastTopic
TALK "You asked: " + lastQuestion
TALK "Would you like to continue from there?"
HEAR continueChoice AS BOOLEAN
IF continueChoice THEN
' Resume previous conversation
SET CONTEXT "topic" AS lastTopic
END IF
END IF
```
## Related Keywords
| Keyword | Description |
|---------|-------------|
| [`SET USER MEMORY`](./keyword-set-user-memory.md) | Store user-level persistent data |
| [`GET BOT MEMORY`](./keyword-get-bot-memory.md) | Retrieve bot-level data |
| [`SET BOT MEMORY`](./keyword-set-bot-memory.md) | Store data at bot level |
| [`USER FACTS`](./keyword-user-facts.md) | Get all stored user facts |
## Comparison: User Memory vs Bot Memory
| Aspect | User Memory | Bot Memory |
|--------|-------------|------------|
| **Scope** | Per user, across all bots | Per bot, across all users |
| **Use case** | User preferences, profile | Bot state, counters |
| **Access** | Any bot can read/write | Only owning bot |
| **Example** | `language`, `name`, `timezone` | `totalOrders`, `lastDeployed` |
## Error Handling
```basic
' GET USER MEMORY never throws - returns empty on missing key
value = GET USER MEMORY("nonexistent_key")
' value = ""
' Always check for empty before using
data = GET USER MEMORY("important_data")
IF data = "" THEN
TALK "Data not found. Please provide it."
' Handle missing data case
ELSE
' Use the data
END IF
```
## Best Practices
1. **Always check for empty** - Keys may not exist for new users
2. **Use consistent key naming** - `user_name` vs `userName` vs `name`
3. **Document your keys** - Keep track of what data you're storing
4. **Handle missing gracefully** - New users won't have stored data
5. **Don't assume structure** - Stored objects might have missing fields
## See Also
- [Memory Management](../chapter-11-features/memory-management.md) - Complete memory architecture
- [Multi-Agent Orchestration](../chapter-11-features/multi-agent-orchestration.md) - Cross-bot data sharing
- [User Context](../chapter-12-auth/user-system-context.md) - User vs system context

View file

@ -0,0 +1,340 @@
# RUN PYTHON / RUN JAVASCRIPT / RUN BASH
Executes code in a sandboxed environment. Enables safe execution of dynamic code for data processing, calculations, and automation tasks.
## Syntax
```basic
result = RUN PYTHON "code"
result = RUN JAVASCRIPT "code"
result = RUN BASH "code"
```
```basic
result = RUN PYTHON WITH FILE "script.py"
result = RUN JAVASCRIPT WITH FILE "script.js"
result = RUN BASH WITH FILE "script.sh"
```
## Parameters
| Parameter | Type | Description |
|-----------|------|-------------|
| `code` | String | Inline code to execute |
| `filepath` | String | Path to script file (with `WITH FILE` variant) |
## Returns
The output (stdout) from the executed code as a string.
## Description
The `RUN` keywords execute code in isolated, sandboxed environments. This provides:
- **Security** - Code runs in isolated containers (LXC, Docker, or Firecracker)
- **Flexibility** - Use the right language for the task
- **Safety** - Resource limits prevent runaway processes
- **Integration** - Pass data between BASIC and other languages
The sandbox prevents:
- File system access outside designated areas
- Network access (unless explicitly enabled)
- System calls and privilege escalation
- Excessive CPU or memory usage
## Examples
### Basic Python Execution
```basic
' Simple calculation
result = RUN PYTHON "print(2 + 2)"
TALK "2 + 2 = " + result
' Data processing
code = "
import json
data = [1, 2, 3, 4, 5]
print(json.dumps({'sum': sum(data), 'avg': sum(data)/len(data)}))
"
stats = RUN PYTHON code
TALK "Statistics: " + stats
```
### JavaScript for JSON Processing
```basic
' Parse and transform JSON
jsonData = GET "https://api.example.com/data"
code = "
const data = JSON.parse('" + jsonData + "');
const transformed = data.items.map(i => ({
id: i.id,
name: i.name.toUpperCase()
}));
console.log(JSON.stringify(transformed));
"
result = RUN JAVASCRIPT code
TALK result
```
### Bash for System Tasks
```basic
' List files and get disk usage
result = RUN BASH "ls -la /data && df -h"
TALK "System info:\n" + result
```
### Run Script from File
```basic
' Execute a Python script from .gbdrive
result = RUN PYTHON WITH FILE "scripts/analyze_data.py"
TALK "Analysis complete: " + result
' Run a bash script
output = RUN BASH WITH FILE "scripts/backup.sh"
PRINT "Backup output: " + output
```
### Data Pipeline
```basic
' Fetch data, process with Python, store result
rawData = GET "https://api.example.com/sales"
pythonCode = "
import json
import statistics
data = json.loads('''" + rawData + "''')
sales = [item['amount'] for item in data]
result = {
'total': sum(sales),
'average': statistics.mean(sales),
'median': statistics.median(sales),
'std_dev': statistics.stdev(sales) if len(sales) > 1 else 0
}
print(json.dumps(result))
"
analysis = RUN PYTHON pythonCode
SAVE "sales_analysis.csv", analysis
TALK "Sales analysis saved!"
```
### Machine Learning Inference
```basic
' Run ML model for prediction
inputData = #{ features: [1.5, 2.3, 4.1, 0.8] }
code = "
import json
import pickle
# Load pre-trained model (stored in sandbox)
with open('/data/model.pkl', 'rb') as f:
model = pickle.load(f)
input_data = " + JSON(inputData) + "
prediction = model.predict([input_data['features']])[0]
print(json.dumps({'prediction': float(prediction)}))
"
result = RUN PYTHON code
prediction = JSON_PARSE(result)
TALK "Predicted value: " + prediction.prediction
```
### Image Processing
```basic
' Process an uploaded image
imagePath = UPLOAD userImage, "uploads/"
code = "
from PIL import Image
import json
img = Image.open('/data/" + imagePath + "')
width, height = img.size
format = img.format
# Resize if too large
if width > 1920:
ratio = 1920 / width
new_size = (1920, int(height * ratio))
img = img.resize(new_size)
img.save('/data/resized_" + imagePath + "')
print(json.dumps({
'original_size': [width, height],
'format': format,
'resized': width > 1920
}))
"
result = RUN PYTHON code
TALK "Image processed: " + result
```
### Multi-Language Pipeline
```basic
' Use different languages for different strengths
data = GET "https://api.example.com/raw-data"
' Step 1: Clean data with Python (pandas)
cleanCode = "
import pandas as pd
import json
df = pd.read_json('''" + data + "''')
df = df.dropna()
df = df[df['value'] > 0]
print(df.to_json(orient='records'))
"
cleanedData = RUN PYTHON cleanCode
' Step 2: Transform with JavaScript (fast JSON manipulation)
transformCode = "
const data = JSON.parse('" + cleanedData + "');
const result = data.reduce((acc, item) => {
acc[item.category] = (acc[item.category] || 0) + item.value;
return acc;
}, {});
console.log(JSON.stringify(result));
"
aggregated = RUN JAVASCRIPT transformCode
TALK "Results: " + aggregated
```
## Sandbox Configuration
### Runtime Options
The sandbox supports multiple isolation backends:
| Runtime | Security | Performance | Requirements |
|---------|----------|-------------|--------------|
| `LXC` | High | Excellent | LXC installed |
| `Docker` | High | Good | Docker daemon |
| `Firecracker` | Highest | Good | Firecracker binary |
| `Process` | Low | Best | None (fallback) |
### Config.csv Options
```csv
name,value
sandbox-runtime,lxc
sandbox-timeout,30
sandbox-memory-mb,512
sandbox-cpu-percent,50
sandbox-network,false
sandbox-python-packages,numpy,pandas,pillow
sandbox-allowed-paths,/data,/tmp
```
| Option | Default | Description |
|--------|---------|-------------|
| `sandbox-runtime` | `lxc` | Isolation backend to use |
| `sandbox-timeout` | `30` | Maximum execution time (seconds) |
| `sandbox-memory-mb` | `512` | Memory limit in MB |
| `sandbox-cpu-percent` | `50` | CPU usage limit |
| `sandbox-network` | `false` | Allow network access |
| `sandbox-python-packages` | (none) | Pre-installed Python packages |
| `sandbox-allowed-paths` | `/data,/tmp` | Accessible filesystem paths |
## Security Considerations
### What's Blocked
- Direct file system access outside sandbox
- Network connections (unless `sandbox-network=true`)
- System calls (fork, exec, etc.)
- Environment variable access
- Process spawning
### What's Allowed
- Standard library operations
- File I/O within `/data` and `/tmp`
- Computation up to resource limits
- Pre-approved packages
### Input Sanitization
```basic
' IMPORTANT: Always sanitize user input before embedding in code
userInput = HEAR input
' Remove potential code injection
safeInput = REPLACE(userInput, "'", "\'")
safeInput = REPLACE(safeInput, '"', '\"')
code = "print('User said: " + safeInput + "')"
result = RUN PYTHON code
```
## Error Handling
```basic
' Handle execution errors
ON ERROR RESUME NEXT
result = RUN PYTHON "
import nonexistent_module
print('hello')
"
IF ERROR THEN
TALK "Code execution failed: " + ERROR_MESSAGE
' Fall back to alternative approach
ELSE
TALK result
END IF
```
## Resource Limits
| Resource | Default | Maximum |
|----------|---------|---------|
| Execution time | 30s | 300s |
| Memory | 512 MB | 4096 MB |
| CPU | 50% | 100% |
| Output size | 1 MB | 10 MB |
| File writes | 10 MB | 100 MB |
## Related Keywords
| Keyword | Description |
|---------|-------------|
| [`LLM`](./keyword-llm.md) | AI-generated code execution |
| [`GET`](./keyword-get.md) | Fetch data for processing |
| [`SAVE`](./keyword-save.md) | Store processed results |
## Best Practices
1. **Keep code snippets small** - Large scripts should use `WITH FILE`
2. **Sanitize all inputs** - Never trust user data in code strings
3. **Set appropriate timeouts** - Match timeout to expected execution time
4. **Use the right language** - Python for data, JS for JSON, Bash for files
5. **Handle errors gracefully** - Code can fail for many reasons
6. **Pre-install packages** - Don't pip install in every execution
7. **Log execution times** - Monitor for performance issues
## Limitations
- No persistent state between executions
- No GPU access (use dedicated ML endpoints instead)
- No interactive input (stdin)
- No graphical output (use file output instead)
- Package installation not allowed at runtime
## See Also
- [Code Sandbox Architecture](../chapter-07-gbapp/containers.md) - Technical details
- [Security Features](../chapter-12-auth/security-features.md) - Sandbox security model
- [Data Operations](./keywords-data.md) - Alternative data processing keywords

View file

@ -0,0 +1,150 @@
# SET USER MEMORY
Persists data at the user level, accessible across sessions and bots. Unlike `SET BOT MEMORY` which stores data per-bot, user memory follows the user wherever they go.
## Syntax
```basic
SET USER MEMORY "key", value
```
## Parameters
| Parameter | Type | Description |
|-----------|------|-------------|
| `key` | String | Unique identifier for the stored value |
| `value` | Any | The value to store (string, number, object) |
## Description
`SET USER MEMORY` stores persistent data associated with a specific user. This data:
- **Persists across sessions** - Available when user returns days/weeks later
- **Persists across bots** - Accessible from any bot the user interacts with
- **Survives restarts** - Stored in the database, not just memory
- **Supports TTL** - Optional time-to-live for automatic expiration
This is ideal for user preferences, profile data, and cross-bot personalization.
## Examples
### Basic Usage
```basic
' Store user preferences
SET USER MEMORY "language", "pt-BR"
SET USER MEMORY "timezone", "America/Sao_Paulo"
SET USER MEMORY "theme", "dark"
TALK "Preferences saved!"
```
### Store Complex Objects
```basic
' Store user profile
profile = #{
name: username,
email: useremail,
plan: "premium",
signupDate: NOW()
}
SET USER MEMORY "profile", profile
TALK "Profile updated successfully!"
```
### Cross-Bot Data Sharing
```basic
' In sales-bot: Store purchase history
purchase = #{
orderId: orderid,
amount: total,
date: NOW()
}
SET USER MEMORY "lastPurchase", purchase
' In support-bot: Access the same data
lastPurchase = GET USER MEMORY("lastPurchase")
TALK "I see your last order was #" + lastPurchase.orderId
```
### User Preferences for Personalization
```basic
' Check if returning user
name = GET USER MEMORY("name")
IF name = "" THEN
TALK "Welcome! What's your name?"
HEAR name
SET USER MEMORY "name", name
TALK "Nice to meet you, " + name + "!"
ELSE
TALK "Welcome back, " + name + "!"
END IF
```
### Store User Facts
```basic
' Store facts about the user for AI context
SET USER MEMORY "fact_occupation", "software engineer"
SET USER MEMORY "fact_interests", "AI, automation, productivity"
SET USER MEMORY "fact_company", "Acme Corp"
' These can be used to personalize AI responses
```
## Related Keywords
| Keyword | Description |
|---------|-------------|
| [`GET USER MEMORY`](./keyword-get-user-memory.md) | Retrieve user-level persisted data |
| [`SET BOT MEMORY`](./keyword-set-bot-memory.md) | Store data at bot level |
| [`GET BOT MEMORY`](./keyword-get-bot-memory.md) | Retrieve bot-level data |
| [`USER FACTS`](./keyword-user-facts.md) | Get all stored user facts |
## Database Storage
User memory is stored in the `user_memory` table with the following structure:
| Column | Description |
|--------|-------------|
| `user_id` | The user's unique identifier |
| `key` | The memory key |
| `value` | JSON-encoded value |
| `memory_type` | Type classification (preference, fact, context) |
| `ttl` | Optional expiration timestamp |
| `created_at` | When the memory was created |
| `updated_at` | Last modification time |
## Config.csv Options
```csv
name,value
user-memory-enabled,true
user-memory-max-keys,1000
user-memory-default-ttl,0
```
| Option | Default | Description |
|--------|---------|-------------|
| `user-memory-enabled` | `true` | Enable/disable user memory |
| `user-memory-max-keys` | `1000` | Maximum keys per user |
| `user-memory-default-ttl` | `0` | Default TTL in seconds (0 = no expiry) |
## Best Practices
1. **Use descriptive keys** - `user_language` not `lang`
2. **Prefix related keys** - `pref_theme`, `pref_language`, `fact_name`
3. **Don't store sensitive data** - No passwords or tokens
4. **Consider TTL for temporary data** - Session-specific data should expire
5. **Keep values reasonable size** - Don't store large files or blobs
## See Also
- [Memory Management](../chapter-11-features/memory-management.md) - Complete memory architecture
- [Multi-Agent Orchestration](../chapter-11-features/multi-agent-orchestration.md) - Cross-bot data sharing
- [User Context](../chapter-12-auth/user-system-context.md) - User vs system context

View file

@ -0,0 +1,185 @@
# USE MODEL
Dynamically switches the LLM model used for AI operations within a script. Enables model routing based on task requirements, cost optimization, or performance needs.
## Syntax
```basic
USE MODEL "modelname"
```
```basic
USE MODEL "auto"
```
## Parameters
| Parameter | Type | Description |
|-----------|------|-------------|
| `modelname` | String | Name of the model to use, or "auto" for automatic routing |
## Description
`USE MODEL` allows scripts to dynamically select which language model to use for subsequent AI operations. This is essential for:
- **Cost optimization** - Use smaller/cheaper models for simple tasks
- **Quality control** - Use powerful models for complex reasoning
- **Speed optimization** - Use fast models for real-time responses
- **Specialized tasks** - Use code-specific models for programming
When set to `"auto"`, the system automatically routes queries to the most appropriate model based on task complexity, latency requirements, and cost considerations.
## Examples
### Basic Model Selection
```basic
' Use a fast model for simple queries
USE MODEL "fast"
response = LLM "What time is it in New York?"
TALK response
' Switch to quality model for complex analysis
USE MODEL "quality"
analysis = LLM "Analyze the market trends for Q4 and provide recommendations"
TALK analysis
```
### Automatic Model Routing
```basic
' Let the system choose the best model
USE MODEL "auto"
' Simple query -> routes to fast model
greeting = LLM "Say hello"
' Complex query -> routes to quality model
report = LLM "Generate a detailed financial analysis with projections"
```
### Code Generation
```basic
' Use code-specialized model
USE MODEL "code"
code = LLM "Write a Python function to calculate fibonacci numbers"
TALK code
```
### Cost-Aware Processing
```basic
' Process bulk items with cheap model
USE MODEL "fast"
FOR EACH item IN items
summary = LLM "Summarize in one sentence: " + item.text
item.summary = summary
NEXT item
' Final review with quality model
USE MODEL "quality"
review = LLM "Review these summaries for accuracy: " + summaries
```
### Model Fallback Pattern
```basic
' Try preferred model first
USE MODEL "gpt-4"
ON ERROR GOTO fallback
response = LLM prompt
GOTO done
fallback:
' Fall back to local model if API fails
USE MODEL "local"
response = LLM prompt
done:
TALK response
```
## Model Routing Strategies
The system supports several routing strategies configured in `config.csv`:
| Strategy | Description |
|----------|-------------|
| `manual` | Explicit model selection only |
| `auto` | Automatic routing based on query analysis |
| `load-balanced` | Distribute across models for throughput |
| `fallback` | Try models in order until one succeeds |
## Built-in Model Aliases
| Alias | Description | Use Case |
|-------|-------------|----------|
| `fast` | Optimized for speed | Simple queries, real-time chat |
| `quality` | Optimized for accuracy | Complex reasoning, analysis |
| `code` | Code-specialized model | Programming tasks |
| `local` | Local GGUF model | Offline/private operation |
| `auto` | System-selected | Let routing decide |
## Config.csv Options
```csv
name,value
model-routing-strategy,auto
model-default,fast
model-fast,DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf
model-quality,gpt-4
model-code,codellama-7b.gguf
model-fallback-enabled,true
model-fallback-order,quality,fast,local
```
| Option | Default | Description |
|--------|---------|-------------|
| `model-routing-strategy` | `auto` | Routing strategy to use |
| `model-default` | `fast` | Default model when not specified |
| `model-fast` | (configured) | Model for fast/simple tasks |
| `model-quality` | (configured) | Model for quality/complex tasks |
| `model-code` | (configured) | Model for code generation |
| `model-fallback-enabled` | `true` | Enable automatic fallback |
| `model-fallback-order` | `quality,fast,local` | Order to try on failure |
## Auto-Routing Criteria
When `USE MODEL "auto"` is active, the system considers:
1. **Query complexity** - Token count, reasoning required
2. **Task type** - Code, analysis, chat, translation
3. **Latency requirements** - Real-time vs batch
4. **Cost budget** - Per-query and daily limits
5. **Model availability** - Health checks, rate limits
## Related Keywords
| Keyword | Description |
|---------|-------------|
| [`LLM`](./keyword-llm.md) | Query the language model |
| [`SET CONTEXT`](./keyword-set-context.md) | Add context for LLM |
| [`BEGIN SYSTEM PROMPT`](./prompt-blocks.md) | Define AI persona |
## Performance Considerations
- Model switching has minimal overhead
- Auto-routing adds ~10ms for classification
- Consider batching similar queries under one model
- Local models avoid network latency
## Best Practices
1. **Start with auto** - Let the system optimize, then tune
2. **Batch by model** - Group similar tasks to reduce switching
3. **Monitor costs** - Track per-model usage in analytics
4. **Test fallbacks** - Ensure graceful degradation
5. **Profile your queries** - Understand which need quality vs speed
## See Also
- [LLM Configuration](../chapter-08-config/llm-config.md) - Model setup
- [Multi-Agent Orchestration](../chapter-11-features/multi-agent-orchestration.md) - Model routing in multi-agent systems
- [Cost Tracking](../chapter-11-features/observability.md#cost-tracking) - Monitor model costs

View file

@ -0,0 +1,276 @@
# Multi-Agent Keywords
This section covers keywords for building multi-agent systems where multiple specialized bots collaborate to handle complex tasks.
## Overview
Multi-agent orchestration enables:
- **Task specialization** - Each bot focuses on what it does best
- **Collaborative problem-solving** - Bots work together on complex tasks
- **Scalable architectures** - Add new specialists without modifying existing bots
- **Resilient systems** - Failures are isolated and handled gracefully
## Keyword Summary
| Keyword | Syntax | Description |
|---------|--------|-------------|
| `ADD BOT` | `ADD BOT "name" TRIGGER ON "keywords"` | Add bot to session with triggers |
| `DELEGATE TO BOT` | `result = DELEGATE "msg" TO BOT "name"` | Send task to another bot |
| `BROADCAST TO BOTS` | `BROADCAST "message" TO BOTS` | Send message to all bots |
| `TRANSFER CONVERSATION` | `TRANSFER CONVERSATION TO "botname"` | Hand off conversation |
| `BOT REFLECTION` | `BOT REFLECTION true` | Enable agent self-analysis |
| `BOT REFLECTION INSIGHTS` | `insights = BOT REFLECTION INSIGHTS()` | Get reflection results |
## ADD BOT
Adds a bot to the current session with optional triggers, tools, and schedules.
```basic
' Add bot with keyword triggers
ADD BOT "billing-bot" TRIGGER ON "billing,invoice,payment"
' Add bot with tool access
ADD BOT "analyst-bot" TOOLS "calculate,forecast,report"
' Add bot with scheduled execution
ADD BOT "monitor-bot" SCHEDULE "0 */1 * * *"
' Add bot with multiple configurations
ADD BOT "support-bot" TRIGGER ON "help,support" TOOLS "ticket,escalate"
```
### Trigger Types
| Type | Description | Example |
|------|-------------|---------|
| `TRIGGER ON` | Keyword-based activation | `TRIGGER ON "billing,payment"` |
| `TOOLS` | Tool-based activation | `TOOLS "calculate,search"` |
| `SCHEDULE` | Cron-based activation | `SCHEDULE "0 9 * * *"` |
## DELEGATE TO BOT
Sends a task to another bot and optionally waits for a response.
```basic
' Fire-and-forget delegation
DELEGATE "Process this order" TO BOT "order-processor"
' Get response from delegation
result = DELEGATE "Calculate ROI" TO BOT "finance-bot"
TALK "Result: " + result
' Delegation with timeout
result = DELEGATE "Analyze report" TO BOT "analyst-bot" TIMEOUT 60
```
### Parameters
| Parameter | Type | Description |
|-----------|------|-------------|
| `message` | String | Task or message to send |
| `botname` | String | Target bot name |
| `TIMEOUT` | Number | Optional timeout in seconds (default: 30) |
## BROADCAST TO BOTS
Sends a message to all bots in the current session.
```basic
' Notify all bots of an event
BROADCAST "New customer signup: " + customerid TO BOTS
' Emergency signal
BROADCAST "MAINTENANCE_MODE" TO BOTS
' Data update notification
BROADCAST "PRICE_UPDATE:" + JSON(prices) TO BOTS
```
## TRANSFER CONVERSATION
Hands off the entire conversation to another bot. The current bot exits.
```basic
' Simple transfer
TALK "Let me connect you with our billing specialist."
TRANSFER CONVERSATION TO "billing-bot"
' Transfer with context
SET CONTEXT "issue" AS "refund request"
SET CONTEXT "amount" AS "$150"
TRANSFER CONVERSATION TO "refunds-bot"
' Conditional transfer
IF issueType = "technical" THEN
TRANSFER CONVERSATION TO "tech-support-bot"
ELSE
TRANSFER CONVERSATION TO "general-support-bot"
END IF
```
## BOT REFLECTION
Enables agent self-analysis for continuous improvement.
```basic
' Enable reflection
BOT REFLECTION true
' Disable reflection
BOT REFLECTION false
' Monitor specific metric
BOT REFLECTION ON "conversation_quality"
BOT REFLECTION ON "response_accuracy"
BOT REFLECTION ON "user_satisfaction"
```
### Reflection Metrics
| Metric | Description |
|--------|-------------|
| `conversation_quality` | Overall conversation effectiveness |
| `response_accuracy` | Correctness of responses |
| `user_satisfaction` | Estimated user satisfaction |
| `tone_appropriateness` | Whether tone matches context |
| `resolution_rate` | Whether issues were resolved |
## BOT REFLECTION INSIGHTS
Retrieves the results of reflection analysis.
```basic
' Get insights
insights = BOT REFLECTION INSIGHTS()
' Access properties
PRINT "Quality Score: " + insights.qualityScore
PRINT "Issues: " + insights.issuesCount
' Iterate suggestions
FOR EACH suggestion IN insights.suggestions
PRINT "Suggestion: " + suggestion
NEXT suggestion
' Use for alerting
IF insights.qualityScore < 0.5 THEN
SEND MAIL admin, "Low Quality Alert", insights.summary
END IF
```
### Insights Object
| Property | Type | Description |
|----------|------|-------------|
| `qualityScore` | Number | Overall quality (0-1) |
| `summary` | String | Text summary |
| `issues` | Array | Identified issues |
| `issuesCount` | Number | Count of issues |
| `suggestions` | Array | Improvement suggestions |
| `criticalIssues` | Number | Critical problem count |
| `timestamp` | DateTime | When analyzed |
## Common Patterns
### Router Pattern
A central bot routes queries to specialists.
```basic
' router-bot/start.bas
HEAR userquery
' Classify the query
category = LLM "Classify into: billing, technical, sales, general. Query: " + userquery
SWITCH category
CASE "billing"
result = DELEGATE userquery TO BOT "billing-bot"
CASE "technical"
result = DELEGATE userquery TO BOT "tech-bot"
CASE "sales"
result = DELEGATE userquery TO BOT "sales-bot"
CASE ELSE
result = LLM userquery
END SWITCH
TALK result
```
### Expert Panel Pattern
Multiple bots provide perspectives.
```basic
question = "Should we expand into Europe?"
' Get multiple expert opinions
marketView = DELEGATE question TO BOT "market-analyst"
financeView = DELEGATE question TO BOT "finance-expert"
riskView = DELEGATE question TO BOT "risk-assessor"
' Synthesize
synthesis = LLM "Combine these expert views: " + marketView + financeView + riskView
TALK synthesis
```
### Escalation Pattern
Automatic escalation when confidence is low.
```basic
' First-line bot
confidence = LLM "Rate confidence (0-100) for: " + userquery
IF confidence < 50 THEN
TALK "Let me connect you with a specialist."
SET CONTEXT "escalation_reason" AS "low_confidence"
TRANSFER CONVERSATION TO "senior-support-bot"
ELSE
response = LLM userquery
TALK response
END IF
```
## Configuration
### config.csv Options
```csv
name,value
a2a-enabled,true
a2a-timeout,30
a2a-max-hops,5
a2a-retry-count,3
reflection-enabled,true
reflection-interval,10
reflection-min-messages,3
```
| Option | Default | Description |
|--------|---------|-------------|
| `a2a-enabled` | `true` | Enable agent-to-agent communication |
| `a2a-timeout` | `30` | Default delegation timeout (seconds) |
| `a2a-max-hops` | `5` | Maximum delegation chain depth |
| `a2a-retry-count` | `3` | Retry attempts on failure |
| `reflection-enabled` | `true` | Enable bot reflection |
| `reflection-interval` | `10` | Messages between reflections |
## Best Practices
1. **Use descriptive bot names** - `billing-bot` not `bot2`
2. **Set appropriate timeouts** - Long tasks need longer timeouts
3. **Handle failures gracefully** - Always have fallback paths
4. **Avoid circular delegation** - Bot A → Bot B → Bot A
5. **Keep chains short** - Max 3-4 delegation hops
6. **Log delegations** - Helps debug multi-agent flows
7. **Review reflection insights** - Act on improvement suggestions
## See Also
- [ADD BOT](./keyword-add-bot.md) - Detailed ADD BOT reference
- [DELEGATE TO BOT](./keyword-delegate-to-bot.md) - Delegation details
- [BOT REFLECTION](./keyword-bot-reflection.md) - Reflection details
- [Multi-Agent Orchestration](../chapter-11-features/multi-agent-orchestration.md) - Complete guide
- [A2A Protocol](../chapter-11-features/a2a-protocol.md) - Protocol details

View file

@ -52,11 +52,14 @@ See [Script Execution Flow](./script-execution-flow.md) for complete details.
| Keyword | Category | Description | | Keyword | Category | Description |
|---------|----------|-------------| |---------|----------|-------------|
| `ADD BOT` | Multi-Agent | Add a bot to the current session with triggers |
| `ADD MEMBER` | Communication | Add member to a group | | `ADD MEMBER` | Communication | Add member to a group |
| `ADD SUGGESTION` | UI | Add clickable suggestion button | | `ADD SUGGESTION` | UI | Add clickable suggestion button |
| `ADD TOOL` | Tools | Register a tool for the session | | `ADD TOOL` | Tools | Register a tool for the session |
| `AGGREGATE` | Data | Perform SUM, AVG, COUNT, MIN, MAX operations | | `AGGREGATE` | Data | Perform SUM, AVG, COUNT, MIN, MAX operations |
| `BOOK` | Special | Book an appointment | | `BOOK` | Special | Book an appointment |
| `BOT REFLECTION` | Multi-Agent | Enable agent self-analysis and improvement |
| `BROADCAST TO BOTS` | Multi-Agent | Send message to all bots in session |
| `CLEAR HEADERS` | HTTP | Clear all HTTP headers | | `CLEAR HEADERS` | HTTP | Clear all HTTP headers |
| `CLEAR KB` | Knowledge | Unload knowledge base from session | | `CLEAR KB` | Knowledge | Unload knowledge base from session |
| `CLEAR SUGGESTIONS` | UI | Remove all suggestion buttons | | `CLEAR SUGGESTIONS` | UI | Remove all suggestion buttons |
@ -66,6 +69,7 @@ See [Script Execution Flow](./script-execution-flow.md) for complete details.
| `CREATE DRAFT` | Communication | Create email draft | | `CREATE DRAFT` | Communication | Create email draft |
| `CREATE SITE` | Tools | Generate a website | | `CREATE SITE` | Tools | Generate a website |
| `CREATE TASK` | Tools | Create a task | | `CREATE TASK` | Tools | Create a task |
| `DELEGATE TO BOT` | Multi-Agent | Send task to another bot |
| `DELETE` | Data | Delete records from table | | `DELETE` | Data | Delete records from table |
| `DELETE FILE` | Files | Delete a file | | `DELETE FILE` | Files | Delete a file |
| `DELETE HTTP` | HTTP | Send HTTP DELETE request | | `DELETE HTTP` | HTTP | Send HTTP DELETE request |
@ -81,6 +85,7 @@ See [Script Execution Flow](./script-execution-flow.md) for complete details.
| `GENERATE PDF` | Files | Generate PDF from template | | `GENERATE PDF` | Files | Generate PDF from template |
| `GET` | Variables | Get variable or API data | | `GET` | Variables | Get variable or API data |
| `GET BOT MEMORY` | Memory | Retrieve bot-level persisted data | | `GET BOT MEMORY` | Memory | Retrieve bot-level persisted data |
| `GET USER MEMORY` | Memory | Retrieve user-level persisted data (cross-bot) |
| `GRAPHQL` | HTTP | Execute GraphQL query | | `GRAPHQL` | HTTP | Execute GraphQL query |
| `GROUP BY` | Data | Group data by field | | `GROUP BY` | Data | Group data by field |
| `HEAR` | Dialog | Get input from user | | `HEAR` | Dialog | Get input from user |
@ -104,6 +109,9 @@ See [Script Execution Flow](./script-execution-flow.md) for complete details.
| `PUT` | HTTP | Send HTTP PUT request | | `PUT` | HTTP | Send HTTP PUT request |
| `READ` | Files | Read file content | | `READ` | Files | Read file content |
| `REMEMBER` | Memory | Store user-specific memory | | `REMEMBER` | Memory | Store user-specific memory |
| `RUN BASH` | Code Execution | Execute Bash script in sandbox |
| `RUN JAVASCRIPT` | Code Execution | Execute JavaScript in sandbox |
| `RUN PYTHON` | Code Execution | Execute Python code in sandbox |
| `SAVE` | Data | Save data to table (upsert) | | `SAVE` | Data | Save data to table (upsert) |
| `SAVE FROM UNSTRUCTURED` | Data | Extract structured data from text | | `SAVE FROM UNSTRUCTURED` | Data | Extract structured data from text |
| `SEND MAIL` | Communication | Send email | | `SEND MAIL` | Communication | Send email |
@ -113,11 +121,16 @@ See [Script Execution Flow](./script-execution-flow.md) for complete details.
| `SET HEADER` | HTTP | Set HTTP header | | `SET HEADER` | HTTP | Set HTTP header |
| `SET SCHEDULE` | Events | Schedule script execution | | `SET SCHEDULE` | Events | Schedule script execution |
| `SET USER` | Session | Set user context | | `SET USER` | Session | Set user context |
| `SET USER FACT` | Memory | Store a fact about the user |
| `SET USER MEMORY` | Memory | Persist data at user level (cross-bot) |
| `SOAP` | HTTP | Execute SOAP API call | | `SOAP` | HTTP | Execute SOAP API call |
| `SWITCH ... CASE ... END SWITCH` | Control | Switch statement | | `SWITCH ... CASE ... END SWITCH` | Control | Switch statement |
| `SYNCHRONIZE` | Data | Sync API data to table (planned) | | `SYNCHRONIZE` | Data | Sync API data to table (planned) |
| `TALK` | Dialog | Send message to user | | `TALK` | Dialog | Send message to user |
| `TRANSFER CONVERSATION` | Multi-Agent | Hand off conversation to another bot |
| `UPDATE` | Data | Update existing records | | `UPDATE` | Data | Update existing records |
| `USE MODEL` | AI | Switch LLM model for subsequent operations |
| `USER FACTS` | Memory | Get all stored user facts |
| `UPLOAD` | Files | Upload file to storage | | `UPLOAD` | Files | Upload file to storage |
| `USE KB` | Knowledge | Load knowledge base | | `USE KB` | Knowledge | Load knowledge base |
| `USE TOOL` | Tools | Register tool definition | | `USE TOOL` | Tools | Register tool definition |
@ -149,6 +162,10 @@ See [Script Execution Flow](./script-execution-flow.md) for complete details.
| GET | `result = GET "path"` | Get variable or fetch data | | GET | `result = GET "path"` | Get variable or fetch data |
| SET BOT MEMORY | `SET BOT MEMORY "key", value` | Persist data at bot level | | SET BOT MEMORY | `SET BOT MEMORY "key", value` | Persist data at bot level |
| GET BOT MEMORY | `value = GET BOT MEMORY("key")` | Retrieve persisted data | | GET BOT MEMORY | `value = GET BOT MEMORY("key")` | Retrieve persisted data |
| SET USER MEMORY | `SET USER MEMORY "key", value` | Persist data at user level (cross-bot) |
| GET USER MEMORY | `value = GET USER MEMORY("key")` | Retrieve user-level data |
| SET USER FACT | `SET USER FACT "key", value` | Store fact about user |
| USER FACTS | `facts = USER FACTS()` | Get all user facts |
| REMEMBER | `REMEMBER "key", value` | Store user-specific memory | | REMEMBER | `REMEMBER "key", value` | Store user-specific memory |
### AI & Context ### AI & Context
@ -158,6 +175,27 @@ See [Script Execution Flow](./script-execution-flow.md) for complete details.
| LLM | `result = LLM "prompt"` | Query language model | | LLM | `result = LLM "prompt"` | Query language model |
| SET CONTEXT | `SET CONTEXT "name" AS "value"` | Add context for LLM | | SET CONTEXT | `SET CONTEXT "name" AS "value"` | Add context for LLM |
| SET USER | `SET USER userid` | Set user context | | SET USER | `SET USER userid` | Set user context |
| USE MODEL | `USE MODEL "modelname"` | Switch LLM model (fast/quality/code/auto) |
### Multi-Agent Orchestration
| Keyword | Syntax | Description |
|---------|--------|-------------|
| ADD BOT | `ADD BOT "name" TRIGGER ON "keywords"` | Add bot with triggers |
| DELEGATE TO BOT | `result = DELEGATE "message" TO BOT "name"` | Send task to bot |
| BROADCAST TO BOTS | `BROADCAST "message" TO BOTS` | Message all bots |
| TRANSFER CONVERSATION | `TRANSFER CONVERSATION TO "botname"` | Hand off to bot |
| BOT REFLECTION | `BOT REFLECTION true` | Enable self-analysis |
| BOT REFLECTION INSIGHTS | `insights = BOT REFLECTION INSIGHTS()` | Get analysis results |
### Code Execution (Sandboxed)
| Keyword | Syntax | Description |
|---------|--------|-------------|
| RUN PYTHON | `result = RUN PYTHON "code"` | Execute Python in sandbox |
| RUN JAVASCRIPT | `result = RUN JAVASCRIPT "code"` | Execute JS in sandbox |
| RUN BASH | `result = RUN BASH "code"` | Execute Bash in sandbox |
| RUN ... WITH FILE | `result = RUN PYTHON WITH FILE "script.py"` | Run script file |
### Knowledge Base ### Knowledge Base

View file

@ -137,6 +137,106 @@ llm-model,mixtral-8x7b-32768
| `custom-username` | Database user | Not set | String | | `custom-username` | Database user | Not set | String |
| `custom-password` | Database password | Not set | String | | `custom-password` | Database password | Not set | String |
## Multi-Agent Parameters
### Agent-to-Agent (A2A) Communication
| Parameter | Description | Default | Type |
|-----------|-------------|---------|------|
| `a2a-enabled` | Enable agent-to-agent communication | `true` | Boolean |
| `a2a-timeout` | Default delegation timeout | `30` | Seconds |
| `a2a-max-hops` | Maximum delegation chain depth | `5` | Number |
| `a2a-retry-count` | Retry attempts on failure | `3` | Number |
| `a2a-queue-size` | Maximum pending messages | `100` | Number |
### Bot Reflection
| Parameter | Description | Default | Type |
|-----------|-------------|---------|------|
| `reflection-enabled` | Enable bot self-analysis | `true` | Boolean |
| `reflection-interval` | Messages between reflections | `10` | Number |
| `reflection-min-messages` | Minimum messages before reflecting | `3` | Number |
| `reflection-model` | LLM model for reflection | `quality` | String |
| `reflection-store-insights` | Store insights in database | `true` | Boolean |
## Memory Parameters
### User Memory (Cross-Bot)
| Parameter | Description | Default | Type |
|-----------|-------------|---------|------|
| `user-memory-enabled` | Enable user-level memory | `true` | Boolean |
| `user-memory-max-keys` | Maximum keys per user | `1000` | Number |
| `user-memory-default-ttl` | Default time-to-live (0=no expiry) | `0` | Seconds |
### Episodic Memory
| Parameter | Description | Default | Type |
|-----------|-------------|---------|------|
| `episodic-memory-enabled` | Enable conversation summaries | `true` | Boolean |
| `episodic-summary-model` | Model for summarization | `fast` | String |
| `episodic-max-episodes` | Maximum episodes per user | `100` | Number |
| `episodic-retention-days` | Days to retain episodes | `365` | Number |
## Model Routing Parameters
| Parameter | Description | Default | Type |
|-----------|-------------|---------|------|
| `model-routing-strategy` | Routing strategy (manual/auto/load-balanced/fallback) | `auto` | String |
| `model-default` | Default model alias | `fast` | String |
| `model-fast` | Model for fast/simple tasks | (configured) | Path/String |
| `model-quality` | Model for quality/complex tasks | (configured) | Path/String |
| `model-code` | Model for code generation | (configured) | Path/String |
| `model-fallback-enabled` | Enable automatic fallback | `true` | Boolean |
| `model-fallback-order` | Order to try on failure | `quality,fast,local` | String |
## Hybrid RAG Search Parameters
| Parameter | Description | Default | Type |
|-----------|-------------|---------|------|
| `rag-hybrid-enabled` | Enable hybrid dense+sparse search | `true` | Boolean |
| `rag-dense-weight` | Weight for semantic results | `0.7` | Float (0-1) |
| `rag-sparse-weight` | Weight for keyword results | `0.3` | Float (0-1) |
| `rag-reranker-enabled` | Enable LLM reranking | `false` | Boolean |
| `rag-reranker-model` | Model for reranking | `quality` | String |
| `rag-reranker-top-n` | Candidates for reranking | `20` | Number |
| `rag-top-k` | Results to return | `10` | Number |
| `rag-rrf-k` | RRF smoothing constant | `60` | Number |
| `rag-cache-enabled` | Enable search result caching | `true` | Boolean |
| `rag-cache-ttl` | Cache time-to-live | `3600` | Seconds |
### BM25 (Sparse Search) Tuning
| Parameter | Description | Default | Type |
|-----------|-------------|---------|------|
| `bm25-k1` | Term saturation parameter | `1.2` | Float |
| `bm25-b` | Length normalization | `0.75` | Float |
| `bm25-stemming` | Enable word stemming | `true` | Boolean |
| `bm25-stopwords` | Filter common words | `true` | Boolean |
## Code Sandbox Parameters
| Parameter | Description | Default | Type |
|-----------|-------------|---------|------|
| `sandbox-runtime` | Isolation backend (lxc/docker/firecracker/process) | `lxc` | String |
| `sandbox-timeout` | Maximum execution time | `30` | Seconds |
| `sandbox-memory-mb` | Memory limit | `512` | MB |
| `sandbox-cpu-percent` | CPU usage limit | `50` | Percent |
| `sandbox-network` | Allow network access | `false` | Boolean |
| `sandbox-python-packages` | Pre-installed Python packages | (none) | Comma-separated |
| `sandbox-allowed-paths` | Accessible filesystem paths | `/data,/tmp` | Comma-separated |
## SSE Streaming Parameters
| Parameter | Description | Default | Type |
|-----------|-------------|---------|------|
| `sse-enabled` | Enable Server-Sent Events | `true` | Boolean |
| `sse-heartbeat` | Heartbeat interval | `30` | Seconds |
| `sse-max-connections` | Maximum concurrent connections | `1000` | Number |
## OpenAPI Tool Generation Parameters
| Parameter | Description | Default | Type |
|-----------|-------------|---------|------|
| `openapi-server` | OpenAPI spec URL for auto tool generation | Not set | URL |
| `openapi-auth-header` | Authentication header name | `Authorization` | String |
| `openapi-auth-value` | Authentication header value | Not set | String |
## Parameter Types ## Parameter Types
### Boolean ### Boolean
@ -223,6 +323,35 @@ llm-server-no-mmap,false
llm-cache,false llm-cache,false
``` ```
### For Multi-Agent Systems
```csv
a2a-enabled,true
a2a-timeout,30
a2a-max-hops,5
model-routing-strategy,auto
reflection-enabled,true
reflection-interval,10
user-memory-enabled,true
```
### For Hybrid RAG
```csv
rag-hybrid-enabled,true
rag-dense-weight,0.7
rag-sparse-weight,0.3
rag-reranker-enabled,true
rag-cache-enabled,true
```
### For Code Execution
```csv
sandbox-runtime,lxc
sandbox-timeout,30
sandbox-memory-mb,512
sandbox-network,false
sandbox-python-packages,numpy,pandas,requests
```
## Validation Rules ## Validation Rules
1. **Paths**: Model files must exist 1. **Paths**: Model files must exist

View file

@ -0,0 +1,409 @@
# Hybrid RAG Search
General Bots implements a hybrid search system that combines dense (semantic) and sparse (keyword) retrieval methods for improved search quality. This approach, known as RAG 2.0, provides better recall and precision than either method alone.
## Overview
Traditional RAG systems use either:
- **Dense retrieval** - Semantic similarity via vector embeddings
- **Sparse retrieval** - Keyword matching (BM25, TF-IDF)
Hybrid search combines both approaches using Reciprocal Rank Fusion (RRF), getting the best of both worlds:
- **Dense** excels at understanding meaning and synonyms
- **Sparse** excels at exact matches and rare terms
## How It Works
```
┌─────────────────────────────────────────────────────────────┐
│ User Query │
│ "customer refund policy" │
└─────────────────────┬───────────────────────────────────────┘
┌───────────┴───────────┐
▼ ▼
┌─────────────────┐ ┌─────────────────┐
│ Dense Search │ │ Sparse Search │
│ (Semantic) │ │ (BM25) │
│ │ │ │
│ Weight: 0.7 │ │ Weight: 0.3 │
└────────┬────────┘ └────────┬────────┘
│ │
│ Results + Scores │
└───────────┬───────────┘
┌───────────────────┐
│ Reciprocal Rank │
│ Fusion │
│ │
│ RRF(d) = Σ 1/(k+r)│
└─────────┬─────────┘
┌───────────────────┐
│ Optional LLM │
│ Reranking │
└─────────┬─────────┘
┌───────────────────┐
│ Final Results │
└───────────────────┘
```
## Configuration
### Enable Hybrid Search
```csv
name,value
rag-hybrid-enabled,true
rag-dense-weight,0.7
rag-sparse-weight,0.3
rag-reranker-enabled,true
rag-reranker-model,quality
rag-top-k,10
rag-rrf-k,60
```
### Configuration Options
| Option | Default | Description |
|--------|---------|-------------|
| `rag-hybrid-enabled` | `true` | Enable hybrid search |
| `rag-dense-weight` | `0.7` | Weight for semantic results (0-1) |
| `rag-sparse-weight` | `0.3` | Weight for keyword results (0-1) |
| `rag-reranker-enabled` | `false` | Enable LLM reranking |
| `rag-reranker-model` | `quality` | Model for reranking |
| `rag-top-k` | `10` | Number of results to return |
| `rag-rrf-k` | `60` | RRF smoothing constant |
## Usage in BASIC
Hybrid search is automatic when enabled. No code changes needed.
### Basic Search
```basic
' Load knowledge base
USE KB "company-policies"
' Search uses hybrid method automatically
result = FIND "refund policy for damaged items"
TALK result
```
### Search with Context
```basic
' Multiple KBs for comprehensive search
USE KB "product-docs"
USE KB "support-articles"
USE KB "faq"
' Query searches all KBs with hybrid method
answer = FIND "how to reset password"
TALK answer
```
### Contextual Search
```basic
' Set context to improve search relevance
SET CONTEXT "department" AS "billing"
SET CONTEXT "customer_tier" AS "premium"
' Search considers context
result = FIND "payment options"
' Results prioritized for billing + premium context
```
## Reciprocal Rank Fusion (RRF)
RRF combines rankings from multiple retrieval methods:
```
RRF_score(d) = Σ 1 / (k + rank_i(d))
```
Where:
- `d` = document
- `k` = smoothing constant (default: 60)
- `rank_i(d)` = rank of document in result list i
### Why RRF?
- **Rank-based** - Works regardless of score scales
- **Robust** - Handles missing documents gracefully
- **Simple** - No training required
- **Effective** - Proven in information retrieval research
### Example
Document appears at:
- Dense search: rank 2
- Sparse search: rank 5
RRF score = 1/(60+2) + 1/(60+5) = 0.0161 + 0.0154 = 0.0315
## Dense Search (Semantic)
Uses vector embeddings to find semantically similar content.
### Strengths
- Understands synonyms ("car" matches "automobile")
- Captures semantic meaning
- Handles paraphrasing
- Works across languages (with multilingual models)
### Configuration
```csv
name,value
embedding-model,all-MiniLM-L6-v2
embedding-dimension,384
vector-db,qdrant
vector-similarity,cosine
```
### When Dense Excels
- "What's your return policy?" matches "refund guidelines"
- "How do I contact support?" matches "reach customer service"
- Conceptual queries without exact keywords
## Sparse Search (BM25)
Uses keyword matching with term frequency weighting.
### Strengths
- Exact term matching
- Handles rare/unique terms
- Fast and efficient
- No embedding computation needed
### BM25 Formula
```
score(D,Q) = Σ IDF(qi) · (f(qi,D) · (k1 + 1)) / (f(qi,D) + k1 · (1 - b + b · |D|/avgdl))
```
Where:
- `IDF(qi)` = Inverse document frequency of term
- `f(qi,D)` = Term frequency in document
- `k1`, `b` = Tuning parameters
- `|D|` = Document length
- `avgdl` = Average document length
### Configuration
```csv
name,value
bm25-k1,1.2
bm25-b,0.75
bm25-stemming,true
bm25-stopwords,true
```
### When BM25 Excels
- Product codes: "SKU-12345"
- Technical terms: "NullPointerException"
- Proper nouns: "John Smith"
- Exact phrases: "Terms of Service"
## Reranking
Optional LLM-based reranking for highest quality results.
### How It Works
1. Hybrid search returns top-N candidates
2. LLM scores each candidate for query relevance
3. Results reordered by LLM scores
### Enable Reranking
```csv
name,value
rag-reranker-enabled,true
rag-reranker-model,quality
rag-reranker-top-n,20
```
### Trade-offs
| Aspect | Without Reranking | With Reranking |
|--------|-------------------|----------------|
| Latency | ~50ms | ~500ms |
| Quality | Good | Excellent |
| Cost | None | LLM API cost |
| Use case | Real-time chat | Quality-critical |
### When to Use Reranking
- Legal/compliance queries
- Medical information
- Financial advice
- Any high-stakes answers
## Tuning Weights
### Default (Balanced)
```csv
rag-dense-weight,0.7
rag-sparse-weight,0.3
```
Good for general-purpose search.
### Semantic-Heavy
```csv
rag-dense-weight,0.9
rag-sparse-weight,0.1
```
Best for:
- Conversational queries
- Concept-based search
- Multilingual content
### Keyword-Heavy
```csv
rag-dense-weight,0.4
rag-sparse-weight,0.6
```
Best for:
- Technical documentation
- Code search
- Product catalogs
### Equal Weight
```csv
rag-dense-weight,0.5
rag-sparse-weight,0.5
```
When you're unsure which method works better.
## Performance Optimization
### Index Configuration
```csv
name,value
vector-index-type,hnsw
vector-ef-construct,200
vector-m,16
bm25-index-shards,4
```
### Caching
```csv
name,value
rag-cache-enabled,true
rag-cache-ttl,3600
rag-cache-max-size,10000
```
### Batch Processing
```basic
' Process multiple queries efficiently
queries = ["policy question", "pricing info", "support contact"]
FOR EACH query IN queries
result = FIND query
SAVE "results.csv", query, result
NEXT query
```
## Monitoring
### Search Quality Metrics
| Metric | Description | Target |
|--------|-------------|--------|
| MRR | Mean Reciprocal Rank | > 0.7 |
| Recall@10 | Relevant docs in top 10 | > 0.9 |
| Latency P95 | 95th percentile latency | < 200ms |
| Cache Hit Rate | Queries served from cache | > 40% |
### Logging
```csv
name,value
rag-logging-enabled,true
rag-log-queries,true
rag-log-scores,true
```
### Debug Mode
```basic
' Enable debug output for search
PRINT "Searching: " + query
result = FIND query
PRINT "Dense score: " + result.dense_score
PRINT "Sparse score: " + result.sparse_score
PRINT "Final score: " + result.rrf_score
```
## Comparison with Pure Methods
| Aspect | Dense Only | Sparse Only | Hybrid |
|--------|------------|-------------|--------|
| Semantic understanding | ✅ Excellent | ❌ Poor | ✅ Excellent |
| Exact matching | ❌ Poor | ✅ Excellent | ✅ Excellent |
| Rare terms | ❌ Poor | ✅ Excellent | ✅ Good |
| Synonyms | ✅ Excellent | ❌ Poor | ✅ Excellent |
| Latency | Medium | Fast | Medium |
| Overall quality | Good | Good | Best |
## Troubleshooting
### Poor Search Results
1. Check weights match your content type
2. Verify embeddings are generated correctly
3. Test with both pure dense and pure sparse
4. Enable reranking for critical queries
### High Latency
1. Reduce `rag-top-k` value
2. Enable caching
3. Use faster embedding model
4. Consider disabling reranking
### Missing Expected Results
1. Check document is indexed
2. Verify no filters excluding it
3. Test with exact keyword match
4. Check chunk size isn't too large
## Best Practices
1. **Start with defaults** - 0.7/0.3 works well for most cases
2. **Monitor and tune** - Use metrics to guide weight adjustments
3. **Use reranking selectively** - Only for quality-critical paths
4. **Cache aggressively** - Many queries repeat
5. **Test both methods** - Understand where each excels
6. **Keep chunks reasonable** - 500-1000 tokens optimal
7. **Update indices regularly** - Fresh content needs reindexing
## See Also
- [Knowledge Base](./knowledge-base.md) - KB setup and management
- [Vector Collections](../chapter-03/vector-collections.md) - Vector DB details
- [Semantic Search](../chapter-03/semantic-search.md) - Dense search deep dive
- [Document Indexing](../chapter-03/indexing.md) - How documents are indexed
- [LLM Configuration](../chapter-08-config/llm-config.md) - Reranker model setup

View file

@ -0,0 +1,454 @@
# Memory Management
General Bots provides a comprehensive memory system that enables persistent storage, cross-session continuity, and multi-agent data sharing. This chapter covers all memory types, their use cases, and best practices.
## Overview
The memory system supports four distinct scopes:
| Memory Type | Scope | Persistence | Use Case |
|-------------|-------|-------------|----------|
| **User Memory** | Per user, all bots | Permanent | Preferences, profile, facts |
| **Bot Memory** | Per bot, all users | Permanent | Bot state, counters, config |
| **Session Memory** | Per session | Session lifetime | Current conversation context |
| **Episodic Memory** | Per conversation | Permanent | Conversation summaries |
## User Memory
User memory follows users across all bots and sessions, enabling personalization and continuity.
### Keywords
```basic
' Store user data
SET USER MEMORY "key", value
' Retrieve user data
value = GET USER MEMORY("key")
' Store a fact about the user
SET USER FACT "occupation", "software engineer"
' Get all user facts
facts = USER FACTS()
```
### Examples
#### Personalized Greeting
```basic
' Check if returning user
name = GET USER MEMORY("name")
IF name = "" THEN
TALK "Hello! What's your name?"
HEAR name
SET USER MEMORY "name", name
TALK "Nice to meet you, " + name + "!"
ELSE
TALK "Welcome back, " + name + "!"
END IF
```
#### Cross-Bot Preferences
```basic
' In any bot - store preference
SET USER MEMORY "language", "pt-BR"
SET USER MEMORY "timezone", "America/Sao_Paulo"
' In any other bot - use preference
language = GET USER MEMORY("language")
IF language = "pt-BR" THEN
TALK "Olá! Como posso ajudar?"
ELSE
TALK "Hello! How can I help?"
END IF
```
#### User Facts for AI Context
```basic
' Store facts about the user
SET USER FACT "company", "Acme Corp"
SET USER FACT "role", "Product Manager"
SET USER FACT "interests", "AI, automation, productivity"
' Later, use facts to personalize AI responses
facts = USER FACTS()
SET CONTEXT "user_profile" AS facts
response = LLM "Help me draft a product roadmap"
' AI now knows user's role and interests
```
### Database Schema
User memory is stored in the `user_memory` table:
| Column | Type | Description |
|--------|------|-------------|
| `id` | UUID | Primary key |
| `user_id` | UUID | User identifier |
| `key` | VARCHAR(255) | Memory key |
| `value` | JSONB | Stored value (any type) |
| `memory_type` | VARCHAR(50) | preference, fact, context |
| `ttl` | TIMESTAMP | Optional expiration |
| `created_at` | TIMESTAMP | Creation time |
| `updated_at` | TIMESTAMP | Last update |
### Configuration
```csv
name,value
user-memory-enabled,true
user-memory-max-keys,1000
user-memory-default-ttl,0
```
| Option | Default | Description |
|--------|---------|-------------|
| `user-memory-enabled` | `true` | Enable user memory |
| `user-memory-max-keys` | `1000` | Max keys per user |
| `user-memory-default-ttl` | `0` | Default TTL (0 = no expiry) |
## Bot Memory
Bot memory stores data at the bot level, shared across all users but isolated per bot.
### Keywords
```basic
' Store bot data
SET BOT MEMORY "key", value
' Retrieve bot data
value = GET BOT MEMORY("key")
```
### Examples
#### Bot Statistics
```basic
' Track bot usage
conversations = GET BOT MEMORY("total_conversations")
conversations = conversations + 1
SET BOT MEMORY "total_conversations", conversations
PRINT "This bot has had " + conversations + " conversations"
```
#### Feature Flags
```basic
' Store feature configuration
SET BOT MEMORY "enable_voice", true
SET BOT MEMORY "max_retries", 3
SET BOT MEMORY "welcome_message", "Hello! I'm your assistant."
' Use in logic
enableVoice = GET BOT MEMORY("enable_voice")
IF enableVoice THEN
' Enable voice features
END IF
```
#### Cache API Results
```basic
' Cache expensive API calls
cachedRates = GET BOT MEMORY("exchange_rates")
cachedTime = GET BOT MEMORY("exchange_rates_time")
IF cachedRates = "" OR (NOW() - cachedTime) > 3600 THEN
' Refresh cache
rates = GET "https://api.exchangerate.host/latest"
SET BOT MEMORY "exchange_rates", rates
SET BOT MEMORY "exchange_rates_time", NOW()
ELSE
rates = cachedRates
END IF
```
### Use Cases
| Use Case | Example Key | Description |
|----------|-------------|-------------|
| Counters | `total_orders` | Track bot-wide metrics |
| Config | `max_items` | Runtime configuration |
| Cache | `api_cache_products` | Cached API responses |
| State | `last_sync_time` | Operational state |
## Session Memory
Session memory is temporary storage for the current conversation session.
### Keywords
```basic
' Store in session
SET "key", value
' Retrieve from session
value = GET "key"
' Set context for AI
SET CONTEXT "topic" AS "billing inquiry"
```
### Examples
#### Conversation State
```basic
' Track conversation flow
SET "current_step", "collecting_info"
SET "collected_name", username
SET "collected_email", useremail
' Later in conversation
step = GET "current_step"
IF step = "collecting_info" THEN
' Continue collecting
END IF
```
#### Multi-Turn Context
```basic
' Build context through conversation
SET CONTEXT "customer_id" AS customerid
SET CONTEXT "issue_type" AS "refund"
SET CONTEXT "order_id" AS orderid
' AI has full context for responses
response = LLM "Help resolve this customer issue"
```
### Session Lifetime
- Created when user starts conversation
- Persists across messages in same conversation
- Cleared when conversation ends or times out
- Default timeout: 30 minutes of inactivity
## Episodic Memory
Episodic memory stores summaries of past conversations for long-term context.
### How It Works
1. **Conversation Ends** - System detects conversation completion
2. **Summary Generated** - LLM creates concise summary
3. **Stored** - Summary saved with metadata
4. **Retrieved** - Used in future conversations for context
### Example
```basic
' System automatically creates episode summaries
' Example summary stored:
' {
' "conversation_id": "abc123",
' "summary": "User asked about refund policy, was satisfied with explanation",
' "topics": ["refunds", "policy"],
' "sentiment": "positive",
' "resolution": "resolved",
' "created_at": "2025-01-15T10:30:00Z"
' }
' In future conversations, retrieve relevant episodes
episodes = GET USER MEMORY("recent_episodes")
SET CONTEXT "previous_interactions" AS episodes
```
### Configuration
```csv
name,value
episodic-memory-enabled,true
episodic-summary-model,fast
episodic-max-episodes,100
episodic-retention-days,365
```
## Memory Patterns
### Profile Builder Pattern
Build user profile progressively through conversations.
```basic
' Check what we know
profile = GET USER MEMORY("profile")
IF profile = "" THEN
profile = #{ }
END IF
' Fill in missing information naturally
IF profile.name = "" THEN
' Ask for name when appropriate
END IF
IF profile.preferences = "" THEN
' Learn preferences from behavior
END IF
' Update profile
SET USER MEMORY "profile", profile
```
### Preference Learning Pattern
Learn preferences from user behavior.
```basic
' Track user choices
choice = HEAR selection
choices = GET USER MEMORY("choices_history")
IF choices = "" THEN choices = []
' Add new choice
choices = APPEND(choices, #{
choice: choice,
context: currentContext,
timestamp: NOW()
})
SET USER MEMORY "choices_history", choices
' Analyze patterns periodically
IF LEN(choices) >= 10 THEN
preferences = LLM "Analyze these choices and identify preferences: " + JSON(choices)
SET USER MEMORY "learned_preferences", preferences
END IF
```
### Context Handoff Pattern
Pass context between bots in multi-agent scenarios.
```basic
' Sending bot: Store context for receiving bot
handoffContext = #{
topic: currentTopic,
userIntent: detectedIntent,
conversationSummary: summary,
relevantFacts: facts
}
SET USER MEMORY "handoff_context", handoffContext
' Transfer to specialist
TRANSFER CONVERSATION TO "specialist-bot"
' Receiving bot: Retrieve context
context = GET USER MEMORY("handoff_context")
SET CONTEXT "background" AS context.conversationSummary
SET CONTEXT "intent" AS context.userIntent
' Clear handoff context after use
SET USER MEMORY "handoff_context", ""
```
### TTL Pattern
Use time-to-live for temporary data.
```basic
' Store with expiration (implementation depends on memory type)
' For session-like data in user memory:
SET USER MEMORY "temp_auth_code", #{
code: authCode,
expires: NOW() + 300 ' 5 minutes
}
' Check expiration
stored = GET USER MEMORY("temp_auth_code")
IF stored <> "" AND stored.expires > NOW() THEN
' Valid
ELSE
' Expired or not found
SET USER MEMORY "temp_auth_code", ""
END IF
```
## Best Practices
### Key Naming Conventions
```basic
' Use consistent prefixes
SET USER MEMORY "pref_language", "en" ' Preferences
SET USER MEMORY "pref_timezone", "UTC"
SET USER MEMORY "fact_name", "John" ' Facts
SET USER MEMORY "fact_company", "Acme"
SET USER MEMORY "ctx_last_topic", "sales" ' Context
SET USER MEMORY "cache_products", data ' Cached data
```
### Don't Store Sensitive Data
```basic
' ❌ DON'T: Store sensitive data
SET USER MEMORY "password", userPassword
SET USER MEMORY "ssn", socialSecurityNumber
SET USER MEMORY "credit_card", cardNumber
' ✅ DO: Store references only
SET USER MEMORY "payment_method_id", paymentId
SET USER MEMORY "verified", true
```
### Handle Missing Data Gracefully
```basic
' Always check for empty/missing
name = GET USER MEMORY("name")
IF name = "" THEN
name = "there" ' Default value
END IF
TALK "Hello, " + name + "!"
```
### Clean Up Old Data
```basic
' Periodic cleanup of old data
lastCleanup = GET BOT MEMORY("last_memory_cleanup")
IF lastCleanup = "" OR (NOW() - lastCleanup) > 86400 THEN
' Run cleanup logic
' Remove expired entries, old cache, etc.
SET BOT MEMORY "last_memory_cleanup", NOW()
END IF
```
## Troubleshooting
### Memory Not Persisting
1. Check memory type - session memory doesn't persist
2. Verify database connection
3. Check for key name typos (keys are case-sensitive)
4. Review memory limits
### Cross-Bot Memory Not Sharing
1. Ensure using `USER MEMORY` not `BOT MEMORY`
2. Verify same user identity
3. Check `user-memory-enabled` config
### Memory Full Errors
1. Clean up old/unused keys
2. Increase `user-memory-max-keys`
3. Use TTL for temporary data
4. Consolidate related keys into objects
## See Also
- [SET USER MEMORY](../chapter-06-gbdialog/keyword-set-user-memory.md) - Store user memory
- [GET USER MEMORY](../chapter-06-gbdialog/keyword-get-user-memory.md) - Retrieve user memory
- [SET BOT MEMORY](../chapter-06-gbdialog/keyword-set-bot-memory.md) - Store bot memory
- [GET BOT MEMORY](../chapter-06-gbdialog/keyword-get-bot-memory.md) - Retrieve bot memory
- [Multi-Agent Orchestration](./multi-agent-orchestration.md) - Cross-bot data sharing

View file

@ -0,0 +1,561 @@
# Multi-Agent Orchestration
General Bots supports sophisticated multi-agent systems where multiple specialized bots collaborate to handle complex tasks. This chapter covers the architecture, keywords, and best practices for building multi-agent solutions.
## Overview
Multi-agent orchestration enables:
- **Task specialization** - Each bot focuses on what it does best
- **Collaborative problem-solving** - Bots work together on complex tasks
- **Scalable architectures** - Add new specialists without modifying existing bots
- **Resilient systems** - Failures are isolated and handled gracefully
## Architecture
```
┌─────────────────────────────────────────────────────────────┐
│ Multi-Agent System │
├─────────────────────────────────────────────────────────────┤
│ │
│ ┌──────────┐ A2A Protocol ┌──────────┐ │
│ │ │◄──────────────────►│ │ │
│ │ Sales │ │ Support │ │
│ │ Bot │ ┌──────────┐ │ Bot │ │
│ │ │◄──►│ │◄──►│ │ │
│ └──────────┘ │ Billing │ └──────────┘ │
│ │ Bot │ │
│ ┌──────────┐ │ │ ┌──────────┐ │
│ │ │◄──►└──────────┘◄──►│ │ │
│ │ Research │ │ Analytics│ │
│ │ Bot │ │ Bot │ │
│ │ │ │ │ │
│ └──────────┘ └──────────┘ │
│ │
└─────────────────────────────────────────────────────────────┘
```
## Core Keywords
### ADD BOT
Adds a bot to the current session with optional triggers and tools.
```basic
' Add a bot with keyword triggers
ADD BOT "billing-bot" TRIGGER ON "billing,invoice,payment"
' Add a bot with tool access
ADD BOT "analyst-bot" TOOLS "calculate,forecast,report"
' Add a bot with scheduled tasks
ADD BOT "monitor-bot" SCHEDULE "0 */1 * * *"
```
### DELEGATE TO BOT
Sends a task to another bot and optionally waits for response.
```basic
' Fire-and-forget delegation
DELEGATE "Process this order" TO BOT "order-processor"
' Get response from delegation
result = DELEGATE "Calculate total for items" TO BOT "calculator-bot"
TALK "Total: " + result
' Delegation with timeout
result = DELEGATE "Analyze report" TO BOT "analyst-bot" TIMEOUT 60
```
### BROADCAST TO BOTS
Sends a message to all bots in the session.
```basic
' Notify all bots of an event
BROADCAST "New customer signup: " + customerid TO BOTS
' Emergency shutdown signal
BROADCAST "SHUTDOWN" TO BOTS
```
### TRANSFER CONVERSATION
Hands off the entire conversation to another bot.
```basic
' Transfer to specialist
TALK "Let me connect you with our billing specialist."
TRANSFER CONVERSATION TO "billing-bot"
' Transfer with context
SET CONTEXT "issue" AS "refund request"
SET CONTEXT "amount" AS "$150"
TRANSFER CONVERSATION TO "refunds-bot"
```
## A2A Protocol
The Agent-to-Agent (A2A) protocol handles all inter-bot communication.
### Message Types
| Type | Description | Use Case |
|------|-------------|----------|
| `Request` | Ask bot to perform task | Task delegation |
| `Response` | Reply to a request | Return results |
| `Broadcast` | Message to all bots | Notifications |
| `Delegate` | Hand off task | Specialization |
| `Collaborate` | Joint task | Team work |
### Message Structure
```basic
' A2A messages contain:
' - from_agent: Source bot ID
' - to_agent: Target bot ID
' - message_type: Request, Response, etc.
' - payload: The actual content
' - correlation_id: Links request/response
' - timestamp: When sent
```
### Configuration
```csv
name,value
a2a-enabled,true
a2a-timeout,30
a2a-max-hops,5
a2a-retry-count,3
a2a-queue-size,100
```
| Option | Default | Description |
|--------|---------|-------------|
| `a2a-enabled` | `true` | Enable A2A communication |
| `a2a-timeout` | `30` | Default timeout (seconds) |
| `a2a-max-hops` | `5` | Maximum delegation chain depth |
| `a2a-retry-count` | `3` | Retries on failure |
| `a2a-queue-size` | `100` | Max pending messages |
## Memory Management
### User Memory (Cross-Bot)
User memory is accessible across all bots, enabling seamless personalization.
```basic
' In any bot - store user preference
SET USER MEMORY "language", "pt-BR"
SET USER MEMORY "timezone", "America/Sao_Paulo"
' In any other bot - retrieve preference
language = GET USER MEMORY("language")
TALK "Olá!" IF language = "pt-BR"
```
### Bot Memory (Per-Bot)
Bot memory is isolated to each bot for bot-specific state.
```basic
' In sales-bot
SET BOT MEMORY "deals_closed", dealscount
' In support-bot (different memory space)
SET BOT MEMORY "tickets_resolved", ticketcount
```
### Session Memory (Temporary)
Session memory is shared within a conversation session.
```basic
' Store in session
SET "current_topic", "billing"
' Available to all bots in session
topic = GET "current_topic"
```
### Memory Scope Comparison
| Memory Type | Scope | Persistence | Use Case |
|-------------|-------|-------------|----------|
| User Memory | Per user, all bots | Permanent | Preferences, profile |
| Bot Memory | Per bot, all users | Permanent | Bot state, counters |
| Session Memory | Per session | Session lifetime | Current context |
## Model Routing
Different bots can use different models optimized for their tasks.
### USE MODEL Keyword
```basic
' In customer service bot - use quality model
USE MODEL "quality"
' In quick-answer bot - use fast model
USE MODEL "fast"
' In code helper bot - use code model
USE MODEL "code"
' Let system decide
USE MODEL "auto"
```
### Model Routing Strategies
| Strategy | Description |
|----------|-------------|
| `manual` | Explicit model selection only |
| `auto` | System chooses based on query |
| `load-balanced` | Distribute for throughput |
| `fallback` | Try models in order |
### Configuration
```csv
name,value
model-routing-strategy,auto
model-default,fast
model-fast,DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf
model-quality,gpt-4
model-code,codellama-7b.gguf
```
## Hybrid RAG Search
Multi-agent systems benefit from shared knowledge bases with advanced search.
### Configuration
```csv
name,value
rag-hybrid-enabled,true
rag-dense-weight,0.7
rag-sparse-weight,0.3
rag-reranker-enabled,true
```
### How It Works
1. **Dense Search** - Semantic/vector similarity (0.7 weight)
2. **Sparse Search** - BM25 keyword matching (0.3 weight)
3. **Fusion** - Reciprocal Rank Fusion combines results
4. **Reranking** - Optional LLM reranking for quality
```basic
' Hybrid search is automatic when enabled
USE KB "company-knowledge"
result = FIND "customer refund policy"
' Returns results using both semantic and keyword matching
```
## Code Sandbox
Bots can execute code in isolated sandboxes for data processing.
### Supported Languages
```basic
' Python for data science
result = RUN PYTHON "
import pandas as pd
df = pd.DataFrame({'a': [1,2,3]})
print(df.sum().to_json())
"
' JavaScript for JSON processing
result = RUN JAVASCRIPT "
const data = {items: [1,2,3]};
console.log(JSON.stringify(data.items.map(x => x * 2)));
"
' Bash for system tasks
result = RUN BASH "ls -la /data"
```
### Sandbox Configuration
```csv
name,value
sandbox-runtime,lxc
sandbox-timeout,30
sandbox-memory-mb,512
sandbox-cpu-percent,50
sandbox-network,false
```
### Runtimes
| Runtime | Security | Performance | Requirements |
|---------|----------|-------------|--------------|
| LXC | High | Excellent | LXC installed |
| Docker | High | Good | Docker daemon |
| Firecracker | Highest | Good | Firecracker |
| Process | Low | Best | None (fallback) |
## Agent Reflection
Bots can self-analyze and improve through reflection.
### Enable Reflection
```basic
' Enable self-reflection
BOT REFLECTION true
' Monitor specific metrics
BOT REFLECTION ON "conversation_quality"
BOT REFLECTION ON "response_accuracy"
```
### Get Insights
```basic
' Retrieve reflection analysis
insights = BOT REFLECTION INSIGHTS()
PRINT "Quality Score: " + insights.qualityScore
PRINT "Issues: " + insights.issuesCount
FOR EACH suggestion IN insights.suggestions
PRINT "Suggestion: " + suggestion
NEXT suggestion
```
### Reflection Metrics
| Metric | Description |
|--------|-------------|
| `conversation_quality` | Overall conversation effectiveness |
| `response_accuracy` | Correctness of responses |
| `user_satisfaction` | Estimated user satisfaction |
| `tone_appropriateness` | Tone matches context |
| `resolution_rate` | Issues successfully resolved |
## SSE Streaming
Real-time streaming for responsive multi-agent UIs.
### Enable Streaming
```csv
name,value
sse-enabled,true
sse-heartbeat,30
sse-max-connections,1000
```
### Client Integration
```javascript
// Connect to SSE endpoint
const eventSource = new EventSource('/api/chat/stream?session=' + sessionId);
eventSource.onmessage = (event) => {
const data = JSON.parse(event.data);
if (data.type === 'token') {
// Streaming token
appendToMessage(data.content);
} else if (data.type === 'bot_switch') {
// Different bot responding
showBotIndicator(data.botName);
} else if (data.type === 'complete') {
// Response complete
finalizeMessage();
}
};
```
## Patterns and Best Practices
### Router Pattern
A central router bot directs queries to specialists.
```basic
' router-bot/start.bas
HEAR userquery
' Classify the query
category = LLM "Classify into: billing, technical, sales, general. Query: " + userquery
SWITCH category
CASE "billing"
result = DELEGATE userquery TO BOT "billing-bot"
CASE "technical"
result = DELEGATE userquery TO BOT "tech-bot"
CASE "sales"
result = DELEGATE userquery TO BOT "sales-bot"
CASE ELSE
result = LLM userquery
END SWITCH
TALK result
```
### Expert Panel Pattern
Multiple bots provide perspectives on complex questions.
```basic
' Get input from multiple experts
question = "Should we expand into the European market?"
marketAnalysis = DELEGATE question TO BOT "market-analyst"
financialView = DELEGATE question TO BOT "finance-expert"
riskAssessment = DELEGATE question TO BOT "risk-assessor"
' Synthesize responses
synthesis = LLM "Synthesize these expert opinions into a recommendation:
Market: " + marketAnalysis + "
Finance: " + financialView + "
Risk: " + riskAssessment
BEGIN TALK
**Expert Panel Summary**
📊 **Market Analysis:** {marketAnalysis}
💰 **Financial View:** {financialView}
⚠️ **Risk Assessment:** {riskAssessment}
📋 **Recommendation:** {synthesis}
END TALK
```
### Escalation Pattern
Automatic escalation when bot can't handle query.
```basic
' First-line support bot
confidence = LLM "Rate your confidence (0-100) in answering: " + userquery
IF confidence < 50 THEN
' Escalate to specialist
TALK "Let me connect you with a specialist who can better help."
SET CONTEXT "escalation_reason" AS "low_confidence"
SET CONTEXT "original_query" AS userquery
TRANSFER CONVERSATION TO "senior-support-bot"
ELSE
' Handle normally
response = LLM userquery
TALK response
END IF
```
### Supervisor Pattern
A supervisor bot monitors and coordinates workers.
```basic
' supervisor-bot/monitor.bas
SET SCHEDULE "*/5 * * * *" ' Run every 5 minutes
' Check all worker bots
workers = ["processor-1", "processor-2", "processor-3"]
FOR EACH worker IN workers
status = DELEGATE "HEALTH_CHECK" TO BOT worker TIMEOUT 10
IF status = "" OR status = "ERROR" THEN
' Worker unresponsive
SEND MAIL admin, "Bot Alert", worker + " is unresponsive"
DELEGATE "RESTART" TO BOT "bot-manager"
END IF
NEXT worker
```
## Database Schema
Multi-agent systems use several database tables:
### a2a_messages
Stores inter-agent communication.
| Column | Type | Description |
|--------|------|-------------|
| `id` | UUID | Message ID |
| `from_agent` | VARCHAR | Sender bot ID |
| `to_agent` | VARCHAR | Recipient bot ID |
| `message_type` | VARCHAR | Request, Response, etc. |
| `payload` | JSONB | Message content |
| `correlation_id` | UUID | Links request/response |
| `status` | VARCHAR | pending, delivered, failed |
| `created_at` | TIMESTAMP | When created |
### user_memory
Stores cross-bot user data.
| Column | Type | Description |
|--------|------|-------------|
| `user_id` | UUID | User identifier |
| `key` | VARCHAR | Memory key |
| `value` | JSONB | Stored value |
| `memory_type` | VARCHAR | preference, fact, context |
| `ttl` | TIMESTAMP | Optional expiration |
### agent_reflections
Stores reflection analysis results.
| Column | Type | Description |
|--------|------|-------------|
| `id` | UUID | Reflection ID |
| `bot_id` | UUID | Bot that was analyzed |
| `conversation_id` | UUID | Analyzed conversation |
| `quality_score` | FLOAT | Overall quality (0-1) |
| `insights` | JSONB | Analysis details |
| `created_at` | TIMESTAMP | When analyzed |
## Troubleshooting
### Bot Not Responding to Delegation
1. Check bot is registered: `LIST BOTS`
2. Verify A2A is enabled: `a2a-enabled,true`
3. Check timeout is sufficient
4. Review bot logs for errors
### Memory Not Sharing Between Bots
1. Ensure using `SET USER MEMORY` not `SET BOT MEMORY`
2. Check `user-memory-enabled,true`
3. Verify same user identity across bots
### Circular Delegation Detected
1. Review delegation chains
2. Increase `a2a-max-hops` if legitimately deep
3. Add guards to prevent loops:
```basic
hops = GET "delegation_hops"
IF hops > 3 THEN
TALK "I'll handle this directly."
' Don't delegate further
ELSE
SET "delegation_hops", hops + 1
DELEGATE task TO BOT "specialist"
END IF
```
## See Also
- [ADD BOT Keyword](../chapter-06-gbdialog/keyword-add-bot.md)
- [DELEGATE TO BOT Keyword](../chapter-06-gbdialog/keyword-delegate-to-bot.md)
- [Memory Management](./memory-management.md)
- [Model Routing](../chapter-08-config/llm-config.md)
- [Code Sandbox](../chapter-07-gbapp/containers.md)
- [SSE Streaming](./streaming.md)

View file

@ -0,0 +1,301 @@
# What's New: Multi-Agent Features
General Bots has been enhanced with powerful multi-agent orchestration capabilities. This document summarizes the new features, keywords, and configuration options.
## Overview
The multi-agent update introduces:
- **Agent-to-Agent (A2A) Protocol** - Bots communicate and delegate tasks
- **Cross-Session User Memory** - User data persists across bots and sessions
- **Dynamic Model Routing** - Switch LLM models based on task requirements
- **Hybrid RAG Search** - Combined semantic + keyword search with RRF
- **Code Sandbox** - Safe Python/JavaScript/Bash execution
- **Agent Reflection** - Self-analysis for continuous improvement
- **SSE Streaming** - Real-time response streaming
## New BASIC Keywords
### Multi-Agent Keywords
| Keyword | Description |
|---------|-------------|
| `ADD BOT` | Add a bot with triggers, tools, or schedules |
| `DELEGATE TO BOT` | Send task to another bot and get response |
| `BROADCAST TO BOTS` | Send message to all session bots |
| `TRANSFER CONVERSATION` | Hand off conversation to another bot |
| `BOT REFLECTION` | Enable agent self-analysis |
| `BOT REFLECTION INSIGHTS` | Get reflection analysis results |
### Memory Keywords
| Keyword | Description |
|---------|-------------|
| `SET USER MEMORY` | Store data at user level (cross-bot) |
| `GET USER MEMORY` | Retrieve user-level data |
| `SET USER FACT` | Store a fact about the user |
| `USER FACTS` | Get all stored user facts |
### Model Routing Keywords
| Keyword | Description |
|---------|-------------|
| `USE MODEL` | Switch LLM model (fast/quality/code/auto) |
### Code Execution Keywords
| Keyword | Description |
|---------|-------------|
| `RUN PYTHON` | Execute Python in sandbox |
| `RUN JAVASCRIPT` | Execute JavaScript in sandbox |
| `RUN BASH` | Execute Bash script in sandbox |
| `RUN ... WITH FILE` | Run script from file |
## Quick Examples
### Multi-Agent Routing
```basic
' Router bot directing queries to specialists
HEAR userquery
category = LLM "Classify into billing, technical, sales: " + userquery
SWITCH category
CASE "billing"
result = DELEGATE userquery TO BOT "billing-bot"
CASE "technical"
result = DELEGATE userquery TO BOT "tech-bot"
CASE "sales"
result = DELEGATE userquery TO BOT "sales-bot"
END SWITCH
TALK result
```
### Cross-Bot User Memory
```basic
' Store user preference (accessible from any bot)
SET USER MEMORY "language", "pt-BR"
SET USER MEMORY "timezone", "America/Sao_Paulo"
' In another bot - retrieve preference
language = GET USER MEMORY("language")
IF language = "pt-BR" THEN
TALK "Olá! Como posso ajudar?"
END IF
```
### Dynamic Model Selection
```basic
' Use fast model for simple queries
USE MODEL "fast"
greeting = LLM "Say hello"
' Switch to quality model for complex analysis
USE MODEL "quality"
analysis = LLM "Analyze market trends and provide recommendations"
' Let system decide automatically
USE MODEL "auto"
```
### Code Sandbox
```basic
' Execute Python for data processing
code = "
import json
data = [1, 2, 3, 4, 5]
print(json.dumps({'sum': sum(data), 'avg': sum(data)/len(data)}))
"
result = RUN PYTHON code
TALK "Statistics: " + result
```
### Agent Reflection
```basic
' Enable self-analysis
BOT REFLECTION true
BOT REFLECTION ON "conversation_quality"
' Later, check insights
insights = BOT REFLECTION INSIGHTS()
IF insights.qualityScore < 0.7 THEN
SEND MAIL admin, "Low Quality Alert", insights.summary
END IF
```
## New Configuration Options
Add these to your `config.csv`:
### Multi-Agent (A2A)
```csv
name,value
a2a-enabled,true
a2a-timeout,30
a2a-max-hops,5
a2a-retry-count,3
```
### User Memory
```csv
name,value
user-memory-enabled,true
user-memory-max-keys,1000
user-memory-default-ttl,0
```
### Model Routing
```csv
name,value
model-routing-strategy,auto
model-default,fast
model-fast,DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf
model-quality,gpt-4
model-code,codellama-7b.gguf
```
### Hybrid RAG Search
```csv
name,value
rag-hybrid-enabled,true
rag-dense-weight,0.7
rag-sparse-weight,0.3
rag-reranker-enabled,true
```
### Code Sandbox
```csv
name,value
sandbox-runtime,lxc
sandbox-timeout,30
sandbox-memory-mb,512
sandbox-cpu-percent,50
sandbox-network,false
sandbox-python-packages,numpy,pandas,pillow
```
### Bot Reflection
```csv
name,value
reflection-enabled,true
reflection-interval,10
reflection-min-messages,3
reflection-model,quality
```
### SSE Streaming
```csv
name,value
sse-enabled,true
sse-heartbeat,30
sse-max-connections,1000
```
## Database Migrations
Run migrations to create the new tables:
```bash
cd botserver
cargo run -- migrate
```
### New Tables
| Table | Purpose |
|-------|---------|
| `user_memory` | Cross-session user preferences and facts |
| `user_preferences` | Per-session user settings |
| `a2a_messages` | Agent-to-Agent protocol messages |
| `user_memory_extended` | Enhanced memory with TTL |
| `kg_relationships` | Knowledge graph relationships |
| `conversation_summaries` | Episodic memory summaries |
| `conversation_costs` | LLM cost tracking |
| `openapi_tools` | OpenAPI tool tracking |
| `agent_reflections` | Agent self-analysis results |
| `chat_history` | Conversation history |
| `session_bots` | Multi-agent session tracking |
## Architecture
```
┌─────────────────────────────────────────────────────────────┐
│ Multi-Agent System │
├─────────────────────────────────────────────────────────────┤
│ │
│ ┌──────────┐ A2A Protocol ┌──────────┐ │
│ │ Router │◄──────────────────►│ Billing │ │
│ │ Bot │ │ Bot │ │
│ └────┬─────┘ ┌──────────┐ └──────────┘ │
│ │ │ Support │ │
│ └─────────►│ Bot │◄──────────────────┐ │
│ └──────────┘ │ │
│ │ │
│ ┌──────────────────────────────────────────────┼──────┐ │
│ │ Shared Resources │ │ │
│ │ ┌────────────┐ ┌────────────┐ ┌──────────┴─┐ │ │
│ │ │ User │ │ Hybrid │ │ Model │ │ │
│ │ │ Memory │ │ RAG Search │ │ Router │ │ │
│ │ └────────────┘ └────────────┘ └────────────┘ │ │
│ └───────────────────────────────────────────────────────┘ │
│ │
└─────────────────────────────────────────────────────────────┘
```
## Design Principles
These features follow General Bots' core principles:
1. **BASIC-First** - All features accessible via simple BASIC keywords
2. **KISS** - Simple syntax, predictable behavior
3. **Pragmatismo** - Real-world utility over theoretical purity
4. **No Lock-in** - Local deployment, own your data
## Performance Considerations
| Feature | Impact | Mitigation |
|---------|--------|------------|
| A2A Protocol | Adds network latency | Use timeouts, local bots |
| User Memory | Database queries | Caching, indexing |
| Hybrid Search | Dual search paths | Results cached |
| Code Sandbox | Container startup | Warm containers |
| Reflection | LLM calls | Run periodically, not per-message |
| SSE Streaming | Connection overhead | Connection pooling |
## Migration Guide
### From Single-Bot to Multi-Agent
1. **Identify Specializations** - What tasks need dedicated bots?
2. **Create Specialist Bots** - Each with focused config
3. **Build Router** - Central bot to direct traffic
4. **Share Memory** - Move shared data to User Memory
5. **Test Delegation** - Verify communication paths
### Upgrading Existing Bots
1. Run database migrations
2. Add new config options as needed
3. Existing keywords continue to work unchanged
4. Gradually adopt new features
## See Also
- [Multi-Agent Orchestration](./multi-agent-orchestration.md) - Complete guide
- [Memory Management](./memory-management.md) - Memory deep dive
- [Hybrid RAG Search](./hybrid-search.md) - Search configuration
- [Keywords Reference](../chapter-06-gbdialog/keywords.md) - All keywords
- [Configuration Parameters](../chapter-08-config/parameters.md) - All config options

View file

@ -0,0 +1,64 @@
-- Migration: 6.1.1 Multi-Agent Memory Support (DOWN)
-- Description: Rollback for user memory, session preferences, and A2A protocol messaging
-- Drop triggers first
DROP TRIGGER IF EXISTS update_user_memories_updated_at ON user_memories;
DROP TRIGGER IF EXISTS update_bot_memory_extended_updated_at ON bot_memory_extended;
DROP TRIGGER IF EXISTS update_kg_entities_updated_at ON kg_entities;
-- Drop functions
DROP FUNCTION IF EXISTS update_updated_at_column();
DROP FUNCTION IF EXISTS cleanup_expired_bot_memory();
DROP FUNCTION IF EXISTS cleanup_expired_a2a_messages();
-- Drop indexes (will be dropped with tables, but explicit for clarity)
DROP INDEX IF EXISTS idx_session_bots_active;
DROP INDEX IF EXISTS idx_session_bots_session;
DROP INDEX IF EXISTS idx_gen_api_tools_bot;
DROP INDEX IF EXISTS idx_conv_costs_time;
DROP INDEX IF EXISTS idx_conv_costs_bot;
DROP INDEX IF EXISTS idx_conv_costs_user;
DROP INDEX IF EXISTS idx_conv_costs_session;
DROP INDEX IF EXISTS idx_episodic_time;
DROP INDEX IF EXISTS idx_episodic_session;
DROP INDEX IF EXISTS idx_episodic_user;
DROP INDEX IF EXISTS idx_episodic_bot;
DROP INDEX IF EXISTS idx_kg_rel_type;
DROP INDEX IF EXISTS idx_kg_rel_to;
DROP INDEX IF EXISTS idx_kg_rel_from;
DROP INDEX IF EXISTS idx_kg_rel_bot;
DROP INDEX IF EXISTS idx_kg_entities_name;
DROP INDEX IF EXISTS idx_kg_entities_type;
DROP INDEX IF EXISTS idx_kg_entities_bot;
DROP INDEX IF EXISTS idx_bot_memory_ext_expires;
DROP INDEX IF EXISTS idx_bot_memory_ext_type;
DROP INDEX IF EXISTS idx_bot_memory_ext_session;
DROP INDEX IF EXISTS idx_bot_memory_ext_bot;
DROP INDEX IF EXISTS idx_a2a_messages_timestamp;
DROP INDEX IF EXISTS idx_a2a_messages_pending;
DROP INDEX IF EXISTS idx_a2a_messages_correlation;
DROP INDEX IF EXISTS idx_a2a_messages_to_agent;
DROP INDEX IF EXISTS idx_a2a_messages_session;
DROP INDEX IF EXISTS idx_session_preferences_session;
DROP INDEX IF EXISTS idx_user_memories_type;
DROP INDEX IF EXISTS idx_user_memories_user_id;
DROP INDEX IF EXISTS idx_bot_reflections_bot;
DROP INDEX IF EXISTS idx_bot_reflections_session;
DROP INDEX IF EXISTS idx_bot_reflections_time;
DROP INDEX IF EXISTS idx_conv_messages_session;
DROP INDEX IF EXISTS idx_conv_messages_time;
DROP INDEX IF EXISTS idx_conv_messages_bot;
-- Drop tables (order matters due to foreign keys)
DROP TABLE IF EXISTS conversation_messages;
DROP TABLE IF EXISTS bot_reflections;
DROP TABLE IF EXISTS session_bots;
DROP TABLE IF EXISTS generated_api_tools;
DROP TABLE IF EXISTS conversation_costs;
DROP TABLE IF EXISTS episodic_memories;
DROP TABLE IF EXISTS kg_relationships;
DROP TABLE IF EXISTS kg_entities;
DROP TABLE IF EXISTS bot_memory_extended;
DROP TABLE IF EXISTS a2a_messages;
DROP TABLE IF EXISTS session_preferences;
DROP TABLE IF EXISTS user_memories;

View file

@ -0,0 +1,315 @@
-- Migration: 6.1.1 Multi-Agent Memory Support
-- Description: Adds tables for user memory, session preferences, and A2A protocol messaging
-- ============================================================================
-- User Memories Table
-- Cross-session memory that persists for users across all sessions and bots
-- ============================================================================
CREATE TABLE IF NOT EXISTS user_memories (
id UUID PRIMARY KEY,
user_id UUID NOT NULL,
key VARCHAR(255) NOT NULL,
value TEXT NOT NULL,
memory_type VARCHAR(50) NOT NULL DEFAULT 'preference',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT user_memories_unique_key UNIQUE (user_id, key)
);
CREATE INDEX IF NOT EXISTS idx_user_memories_user_id ON user_memories(user_id);
CREATE INDEX IF NOT EXISTS idx_user_memories_type ON user_memories(user_id, memory_type);
-- ============================================================================
-- Session Preferences Table
-- Stores per-session configuration like current model, routing strategy, etc.
-- ============================================================================
CREATE TABLE IF NOT EXISTS session_preferences (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
session_id UUID NOT NULL,
preference_key VARCHAR(255) NOT NULL,
preference_value TEXT NOT NULL,
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT session_preferences_unique UNIQUE (session_id, preference_key)
);
CREATE INDEX IF NOT EXISTS idx_session_preferences_session ON session_preferences(session_id);
-- ============================================================================
-- A2A Messages Table
-- Agent-to-Agent protocol messages for multi-agent orchestration
-- Based on https://a2a-protocol.org/latest/
-- ============================================================================
CREATE TABLE IF NOT EXISTS a2a_messages (
id UUID PRIMARY KEY,
session_id UUID NOT NULL,
from_agent VARCHAR(255) NOT NULL,
to_agent VARCHAR(255), -- NULL for broadcast messages
message_type VARCHAR(50) NOT NULL,
payload TEXT NOT NULL,
correlation_id UUID NOT NULL,
timestamp TIMESTAMPTZ NOT NULL DEFAULT NOW(),
metadata TEXT DEFAULT '{}',
ttl_seconds INTEGER NOT NULL DEFAULT 30,
hop_count INTEGER NOT NULL DEFAULT 0,
processed BOOLEAN NOT NULL DEFAULT FALSE,
processed_at TIMESTAMPTZ,
error_message TEXT
);
CREATE INDEX IF NOT EXISTS idx_a2a_messages_session ON a2a_messages(session_id);
CREATE INDEX IF NOT EXISTS idx_a2a_messages_to_agent ON a2a_messages(session_id, to_agent);
CREATE INDEX IF NOT EXISTS idx_a2a_messages_correlation ON a2a_messages(correlation_id);
CREATE INDEX IF NOT EXISTS idx_a2a_messages_pending ON a2a_messages(session_id, to_agent, processed) WHERE processed = FALSE;
CREATE INDEX IF NOT EXISTS idx_a2a_messages_timestamp ON a2a_messages(timestamp);
-- ============================================================================
-- Extended Bot Memory Table
-- Enhanced memory with TTL and different memory types
-- ============================================================================
CREATE TABLE IF NOT EXISTS bot_memory_extended (
id UUID PRIMARY KEY,
bot_id UUID NOT NULL,
session_id UUID, -- NULL for long-term memory
memory_type VARCHAR(20) NOT NULL CHECK (memory_type IN ('short', 'long', 'episodic')),
key VARCHAR(255) NOT NULL,
value TEXT NOT NULL,
ttl_seconds INTEGER,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
expires_at TIMESTAMPTZ,
CONSTRAINT bot_memory_extended_unique UNIQUE (bot_id, session_id, key)
);
CREATE INDEX IF NOT EXISTS idx_bot_memory_ext_bot ON bot_memory_extended(bot_id);
CREATE INDEX IF NOT EXISTS idx_bot_memory_ext_session ON bot_memory_extended(bot_id, session_id);
CREATE INDEX IF NOT EXISTS idx_bot_memory_ext_type ON bot_memory_extended(bot_id, memory_type);
CREATE INDEX IF NOT EXISTS idx_bot_memory_ext_expires ON bot_memory_extended(expires_at) WHERE expires_at IS NOT NULL;
-- ============================================================================
-- Knowledge Graph Entities Table
-- For graph-based memory and entity relationships
-- ============================================================================
CREATE TABLE IF NOT EXISTS kg_entities (
id UUID PRIMARY KEY,
bot_id UUID NOT NULL,
entity_type VARCHAR(100) NOT NULL,
entity_name VARCHAR(500) NOT NULL,
properties JSONB DEFAULT '{}',
embedding_vector BYTEA, -- For vector similarity search
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT kg_entities_unique UNIQUE (bot_id, entity_type, entity_name)
);
CREATE INDEX IF NOT EXISTS idx_kg_entities_bot ON kg_entities(bot_id);
CREATE INDEX IF NOT EXISTS idx_kg_entities_type ON kg_entities(bot_id, entity_type);
CREATE INDEX IF NOT EXISTS idx_kg_entities_name ON kg_entities(entity_name);
-- ============================================================================
-- Knowledge Graph Relationships Table
-- For storing relationships between entities
-- ============================================================================
CREATE TABLE IF NOT EXISTS kg_relationships (
id UUID PRIMARY KEY,
bot_id UUID NOT NULL,
from_entity_id UUID NOT NULL REFERENCES kg_entities(id) ON DELETE CASCADE,
to_entity_id UUID NOT NULL REFERENCES kg_entities(id) ON DELETE CASCADE,
relationship_type VARCHAR(100) NOT NULL,
properties JSONB DEFAULT '{}',
weight FLOAT DEFAULT 1.0,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT kg_relationships_unique UNIQUE (from_entity_id, to_entity_id, relationship_type)
);
CREATE INDEX IF NOT EXISTS idx_kg_rel_bot ON kg_relationships(bot_id);
CREATE INDEX IF NOT EXISTS idx_kg_rel_from ON kg_relationships(from_entity_id);
CREATE INDEX IF NOT EXISTS idx_kg_rel_to ON kg_relationships(to_entity_id);
CREATE INDEX IF NOT EXISTS idx_kg_rel_type ON kg_relationships(bot_id, relationship_type);
-- ============================================================================
-- Episodic Memory Table
-- For storing conversation summaries and episodes
-- ============================================================================
CREATE TABLE IF NOT EXISTS episodic_memories (
id UUID PRIMARY KEY,
bot_id UUID NOT NULL,
user_id UUID NOT NULL,
session_id UUID,
summary TEXT NOT NULL,
key_topics JSONB DEFAULT '[]',
decisions JSONB DEFAULT '[]',
action_items JSONB DEFAULT '[]',
message_count INTEGER NOT NULL DEFAULT 0,
start_timestamp TIMESTAMPTZ NOT NULL,
end_timestamp TIMESTAMPTZ NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_episodic_bot ON episodic_memories(bot_id);
CREATE INDEX IF NOT EXISTS idx_episodic_user ON episodic_memories(user_id);
CREATE INDEX IF NOT EXISTS idx_episodic_session ON episodic_memories(session_id);
CREATE INDEX IF NOT EXISTS idx_episodic_time ON episodic_memories(bot_id, user_id, created_at);
-- ============================================================================
-- Conversation Cost Tracking Table
-- For monitoring LLM usage and costs
-- ============================================================================
CREATE TABLE IF NOT EXISTS conversation_costs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
session_id UUID NOT NULL,
user_id UUID NOT NULL,
bot_id UUID NOT NULL,
model_used VARCHAR(100),
input_tokens INTEGER NOT NULL DEFAULT 0,
output_tokens INTEGER NOT NULL DEFAULT 0,
cost_usd DECIMAL(10, 6) NOT NULL DEFAULT 0,
timestamp TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_conv_costs_session ON conversation_costs(session_id);
CREATE INDEX IF NOT EXISTS idx_conv_costs_user ON conversation_costs(user_id);
CREATE INDEX IF NOT EXISTS idx_conv_costs_bot ON conversation_costs(bot_id);
CREATE INDEX IF NOT EXISTS idx_conv_costs_time ON conversation_costs(timestamp);
-- ============================================================================
-- Generated API Tools Table
-- For tracking tools generated from OpenAPI specs
-- ============================================================================
CREATE TABLE IF NOT EXISTS generated_api_tools (
id UUID PRIMARY KEY,
bot_id UUID NOT NULL,
api_name VARCHAR(255) NOT NULL,
spec_url TEXT NOT NULL,
spec_hash VARCHAR(64) NOT NULL,
tool_count INTEGER NOT NULL DEFAULT 0,
last_synced_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT generated_api_tools_unique UNIQUE (bot_id, api_name)
);
CREATE INDEX IF NOT EXISTS idx_gen_api_tools_bot ON generated_api_tools(bot_id);
-- ============================================================================
-- Session Bots Junction Table (if not exists)
-- For multi-agent sessions
-- ============================================================================
CREATE TABLE IF NOT EXISTS session_bots (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
session_id UUID NOT NULL,
bot_id UUID NOT NULL,
bot_name VARCHAR(255) NOT NULL,
trigger_config JSONB DEFAULT '{}',
priority INTEGER NOT NULL DEFAULT 0,
is_active BOOLEAN NOT NULL DEFAULT TRUE,
added_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT session_bots_unique UNIQUE (session_id, bot_name)
);
CREATE INDEX IF NOT EXISTS idx_session_bots_session ON session_bots(session_id);
CREATE INDEX IF NOT EXISTS idx_session_bots_active ON session_bots(session_id, is_active);
-- ============================================================================
-- Cleanup function for expired A2A messages
-- ============================================================================
CREATE OR REPLACE FUNCTION cleanup_expired_a2a_messages()
RETURNS INTEGER AS $$
DECLARE
deleted_count INTEGER;
BEGIN
DELETE FROM a2a_messages
WHERE ttl_seconds > 0
AND timestamp + (ttl_seconds || ' seconds')::INTERVAL < NOW();
GET DIAGNOSTICS deleted_count = ROW_COUNT;
RETURN deleted_count;
END;
$$ LANGUAGE plpgsql;
-- ============================================================================
-- Cleanup function for expired bot memory
-- ============================================================================
CREATE OR REPLACE FUNCTION cleanup_expired_bot_memory()
RETURNS INTEGER AS $$
DECLARE
deleted_count INTEGER;
BEGIN
DELETE FROM bot_memory_extended
WHERE expires_at IS NOT NULL AND expires_at < NOW();
GET DIAGNOSTICS deleted_count = ROW_COUNT;
RETURN deleted_count;
END;
$$ LANGUAGE plpgsql;
-- ============================================================================
-- Trigger to update updated_at timestamp
-- ============================================================================
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Apply trigger to tables with updated_at
DROP TRIGGER IF EXISTS update_user_memories_updated_at ON user_memories;
CREATE TRIGGER update_user_memories_updated_at
BEFORE UPDATE ON user_memories
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
DROP TRIGGER IF EXISTS update_bot_memory_extended_updated_at ON bot_memory_extended;
CREATE TRIGGER update_bot_memory_extended_updated_at
BEFORE UPDATE ON bot_memory_extended
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
DROP TRIGGER IF EXISTS update_kg_entities_updated_at ON kg_entities;
CREATE TRIGGER update_kg_entities_updated_at
BEFORE UPDATE ON kg_entities
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- ============================================================================
-- Bot Reflections Table
-- For storing agent self-reflection analysis results
-- ============================================================================
CREATE TABLE IF NOT EXISTS bot_reflections (
id UUID PRIMARY KEY,
bot_id UUID NOT NULL,
session_id UUID NOT NULL,
reflection_type TEXT NOT NULL,
score FLOAT NOT NULL DEFAULT 0.0,
insights TEXT NOT NULL DEFAULT '[]',
improvements TEXT NOT NULL DEFAULT '[]',
positive_patterns TEXT NOT NULL DEFAULT '[]',
concerns TEXT NOT NULL DEFAULT '[]',
raw_response TEXT NOT NULL DEFAULT '',
messages_analyzed INTEGER NOT NULL DEFAULT 0,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_bot_reflections_bot ON bot_reflections(bot_id);
CREATE INDEX IF NOT EXISTS idx_bot_reflections_session ON bot_reflections(session_id);
CREATE INDEX IF NOT EXISTS idx_bot_reflections_time ON bot_reflections(bot_id, created_at);
-- ============================================================================
-- Conversation Messages Table
-- For storing conversation history (if not already exists)
-- ============================================================================
CREATE TABLE IF NOT EXISTS conversation_messages (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
session_id UUID NOT NULL,
bot_id UUID NOT NULL,
user_id UUID,
role VARCHAR(50) NOT NULL,
content TEXT NOT NULL,
metadata JSONB DEFAULT '{}',
token_count INTEGER,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_conv_messages_session ON conversation_messages(session_id);
CREATE INDEX IF NOT EXISTS idx_conv_messages_time ON conversation_messages(session_id, created_at);
CREATE INDEX IF NOT EXISTS idx_conv_messages_bot ON conversation_messages(bot_id);

View file

@ -0,0 +1,124 @@
-- Migration Rollback: 6.1.2_phase3_phase4
-- Description: Rollback Phase 3 and Phase 4 multi-agent features
-- WARNING: This will delete all data in the affected tables!
-- ============================================
-- DROP VIEWS
-- ============================================
DROP VIEW IF EXISTS v_llm_usage_24h;
DROP VIEW IF EXISTS v_approval_summary;
DROP VIEW IF EXISTS v_kg_stats;
DROP VIEW IF EXISTS v_recent_episodes;
-- ============================================
-- DROP FUNCTIONS
-- ============================================
DROP FUNCTION IF EXISTS cleanup_old_observability_data(INTEGER);
DROP FUNCTION IF EXISTS reset_monthly_budgets();
DROP FUNCTION IF EXISTS reset_daily_budgets();
DROP FUNCTION IF EXISTS aggregate_llm_metrics_hourly();
-- ============================================
-- DROP TRIGGERS
-- ============================================
DROP TRIGGER IF EXISTS update_llm_budget_updated_at ON llm_budget;
DROP TRIGGER IF EXISTS update_workflow_definitions_updated_at ON workflow_definitions;
DROP TRIGGER IF EXISTS update_kg_entities_updated_at ON kg_entities;
-- Note: We don't drop the update_updated_at_column() function as it may be used by other tables
-- ============================================
-- DROP WORKFLOW TABLES
-- ============================================
DROP TABLE IF EXISTS workflow_step_executions CASCADE;
DROP TABLE IF EXISTS workflow_executions CASCADE;
DROP TABLE IF EXISTS workflow_definitions CASCADE;
-- ============================================
-- DROP LLM OBSERVABILITY TABLES
-- ============================================
DROP TABLE IF EXISTS llm_traces CASCADE;
DROP TABLE IF EXISTS llm_budget CASCADE;
DROP TABLE IF EXISTS llm_metrics_hourly CASCADE;
DROP TABLE IF EXISTS llm_metrics CASCADE;
-- ============================================
-- DROP APPROVAL TABLES
-- ============================================
DROP TABLE IF EXISTS approval_tokens CASCADE;
DROP TABLE IF EXISTS approval_audit_log CASCADE;
DROP TABLE IF EXISTS approval_chains CASCADE;
DROP TABLE IF EXISTS approval_requests CASCADE;
-- ============================================
-- DROP KNOWLEDGE GRAPH TABLES
-- ============================================
DROP TABLE IF EXISTS kg_relationships CASCADE;
DROP TABLE IF EXISTS kg_entities CASCADE;
-- ============================================
-- DROP EPISODIC MEMORY TABLES
-- ============================================
DROP TABLE IF EXISTS conversation_episodes CASCADE;
-- ============================================
-- DROP INDEXES (if any remain)
-- ============================================
-- Episodic memory indexes
DROP INDEX IF EXISTS idx_episodes_user_id;
DROP INDEX IF EXISTS idx_episodes_bot_id;
DROP INDEX IF EXISTS idx_episodes_session_id;
DROP INDEX IF EXISTS idx_episodes_created_at;
DROP INDEX IF EXISTS idx_episodes_key_topics;
DROP INDEX IF EXISTS idx_episodes_resolution;
DROP INDEX IF EXISTS idx_episodes_summary_fts;
-- Knowledge graph indexes
DROP INDEX IF EXISTS idx_kg_entities_bot_id;
DROP INDEX IF EXISTS idx_kg_entities_type;
DROP INDEX IF EXISTS idx_kg_entities_name;
DROP INDEX IF EXISTS idx_kg_entities_name_lower;
DROP INDEX IF EXISTS idx_kg_entities_aliases;
DROP INDEX IF EXISTS idx_kg_entities_name_fts;
DROP INDEX IF EXISTS idx_kg_relationships_bot_id;
DROP INDEX IF EXISTS idx_kg_relationships_from;
DROP INDEX IF EXISTS idx_kg_relationships_to;
DROP INDEX IF EXISTS idx_kg_relationships_type;
-- Approval indexes
DROP INDEX IF EXISTS idx_approval_requests_bot_id;
DROP INDEX IF EXISTS idx_approval_requests_session_id;
DROP INDEX IF EXISTS idx_approval_requests_status;
DROP INDEX IF EXISTS idx_approval_requests_expires_at;
DROP INDEX IF EXISTS idx_approval_requests_pending;
DROP INDEX IF EXISTS idx_approval_audit_request_id;
DROP INDEX IF EXISTS idx_approval_audit_timestamp;
DROP INDEX IF EXISTS idx_approval_tokens_token;
DROP INDEX IF EXISTS idx_approval_tokens_request_id;
-- Observability indexes
DROP INDEX IF EXISTS idx_llm_metrics_bot_id;
DROP INDEX IF EXISTS idx_llm_metrics_session_id;
DROP INDEX IF EXISTS idx_llm_metrics_timestamp;
DROP INDEX IF EXISTS idx_llm_metrics_model;
DROP INDEX IF EXISTS idx_llm_metrics_hourly_bot_id;
DROP INDEX IF EXISTS idx_llm_metrics_hourly_hour;
DROP INDEX IF EXISTS idx_llm_traces_trace_id;
DROP INDEX IF EXISTS idx_llm_traces_start_time;
DROP INDEX IF EXISTS idx_llm_traces_component;
-- Workflow indexes
DROP INDEX IF EXISTS idx_workflow_definitions_bot_id;
DROP INDEX IF EXISTS idx_workflow_executions_workflow_id;
DROP INDEX IF EXISTS idx_workflow_executions_bot_id;
DROP INDEX IF EXISTS idx_workflow_executions_status;
DROP INDEX IF EXISTS idx_workflow_step_executions_execution_id;

View file

@ -0,0 +1,538 @@
-- Migration: 6.1.2_phase3_phase4
-- Description: Phase 3 and Phase 4 multi-agent features
-- Features:
-- - Episodic memory (conversation summaries)
-- - Knowledge graphs (entity relationships)
-- - Human-in-the-loop approvals
-- - LLM observability and cost tracking
-- ============================================
-- EPISODIC MEMORY TABLES
-- ============================================
-- Conversation episodes (summaries)
CREATE TABLE IF NOT EXISTS conversation_episodes (
id UUID PRIMARY KEY,
user_id UUID NOT NULL,
bot_id UUID NOT NULL,
session_id UUID NOT NULL,
summary TEXT NOT NULL,
key_topics JSONB NOT NULL DEFAULT '[]',
decisions JSONB NOT NULL DEFAULT '[]',
action_items JSONB NOT NULL DEFAULT '[]',
sentiment JSONB NOT NULL DEFAULT '{"score": 0, "label": "neutral", "confidence": 0.5}',
resolution VARCHAR(50) NOT NULL DEFAULT 'unknown',
message_count INTEGER NOT NULL DEFAULT 0,
message_ids JSONB NOT NULL DEFAULT '[]',
conversation_start TIMESTAMP WITH TIME ZONE NOT NULL,
conversation_end TIMESTAMP WITH TIME ZONE NOT NULL,
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);
-- Indexes for episodic memory
CREATE INDEX IF NOT EXISTS idx_episodes_user_id ON conversation_episodes(user_id);
CREATE INDEX IF NOT EXISTS idx_episodes_bot_id ON conversation_episodes(bot_id);
CREATE INDEX IF NOT EXISTS idx_episodes_session_id ON conversation_episodes(session_id);
CREATE INDEX IF NOT EXISTS idx_episodes_created_at ON conversation_episodes(created_at DESC);
CREATE INDEX IF NOT EXISTS idx_episodes_key_topics ON conversation_episodes USING GIN(key_topics);
CREATE INDEX IF NOT EXISTS idx_episodes_resolution ON conversation_episodes(resolution);
-- Full-text search on summaries
CREATE INDEX IF NOT EXISTS idx_episodes_summary_fts ON conversation_episodes
USING GIN(to_tsvector('english', summary));
-- ============================================
-- KNOWLEDGE GRAPH TABLES
-- ============================================
-- Knowledge graph entities
CREATE TABLE IF NOT EXISTS kg_entities (
id UUID PRIMARY KEY,
bot_id UUID NOT NULL,
entity_type VARCHAR(100) NOT NULL,
entity_name VARCHAR(500) NOT NULL,
aliases JSONB NOT NULL DEFAULT '[]',
properties JSONB NOT NULL DEFAULT '{}',
confidence DOUBLE PRECISION NOT NULL DEFAULT 1.0,
source VARCHAR(50) NOT NULL DEFAULT 'manual',
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
UNIQUE(bot_id, entity_type, entity_name)
);
-- Knowledge graph relationships
CREATE TABLE IF NOT EXISTS kg_relationships (
id UUID PRIMARY KEY,
bot_id UUID NOT NULL,
from_entity_id UUID NOT NULL REFERENCES kg_entities(id) ON DELETE CASCADE,
to_entity_id UUID NOT NULL REFERENCES kg_entities(id) ON DELETE CASCADE,
relationship_type VARCHAR(100) NOT NULL,
properties JSONB NOT NULL DEFAULT '{}',
confidence DOUBLE PRECISION NOT NULL DEFAULT 1.0,
bidirectional BOOLEAN NOT NULL DEFAULT false,
source VARCHAR(50) NOT NULL DEFAULT 'manual',
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
UNIQUE(bot_id, from_entity_id, to_entity_id, relationship_type)
);
-- Indexes for knowledge graph
CREATE INDEX IF NOT EXISTS idx_kg_entities_bot_id ON kg_entities(bot_id);
CREATE INDEX IF NOT EXISTS idx_kg_entities_type ON kg_entities(entity_type);
CREATE INDEX IF NOT EXISTS idx_kg_entities_name ON kg_entities(entity_name);
CREATE INDEX IF NOT EXISTS idx_kg_entities_name_lower ON kg_entities(LOWER(entity_name));
CREATE INDEX IF NOT EXISTS idx_kg_entities_aliases ON kg_entities USING GIN(aliases);
CREATE INDEX IF NOT EXISTS idx_kg_relationships_bot_id ON kg_relationships(bot_id);
CREATE INDEX IF NOT EXISTS idx_kg_relationships_from ON kg_relationships(from_entity_id);
CREATE INDEX IF NOT EXISTS idx_kg_relationships_to ON kg_relationships(to_entity_id);
CREATE INDEX IF NOT EXISTS idx_kg_relationships_type ON kg_relationships(relationship_type);
-- Full-text search on entity names
CREATE INDEX IF NOT EXISTS idx_kg_entities_name_fts ON kg_entities
USING GIN(to_tsvector('english', entity_name));
-- ============================================
-- HUMAN-IN-THE-LOOP APPROVAL TABLES
-- ============================================
-- Approval requests
CREATE TABLE IF NOT EXISTS approval_requests (
id UUID PRIMARY KEY,
bot_id UUID NOT NULL,
session_id UUID NOT NULL,
initiated_by UUID NOT NULL,
approval_type VARCHAR(100) NOT NULL,
status VARCHAR(50) NOT NULL DEFAULT 'pending',
channel VARCHAR(50) NOT NULL,
recipient VARCHAR(500) NOT NULL,
context JSONB NOT NULL DEFAULT '{}',
message TEXT NOT NULL,
timeout_seconds INTEGER NOT NULL DEFAULT 3600,
default_action VARCHAR(50),
current_level INTEGER NOT NULL DEFAULT 1,
total_levels INTEGER NOT NULL DEFAULT 1,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
expires_at TIMESTAMP WITH TIME ZONE NOT NULL,
reminders_sent JSONB NOT NULL DEFAULT '[]',
decision VARCHAR(50),
decided_by VARCHAR(500),
decided_at TIMESTAMP WITH TIME ZONE,
comments TEXT
);
-- Approval chains
CREATE TABLE IF NOT EXISTS approval_chains (
id UUID PRIMARY KEY,
name VARCHAR(200) NOT NULL,
bot_id UUID NOT NULL,
levels JSONB NOT NULL DEFAULT '[]',
stop_on_reject BOOLEAN NOT NULL DEFAULT true,
require_all BOOLEAN NOT NULL DEFAULT false,
description TEXT,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
UNIQUE(bot_id, name)
);
-- Approval audit log
CREATE TABLE IF NOT EXISTS approval_audit_log (
id UUID PRIMARY KEY,
request_id UUID NOT NULL REFERENCES approval_requests(id) ON DELETE CASCADE,
action VARCHAR(50) NOT NULL,
actor VARCHAR(500) NOT NULL,
details JSONB NOT NULL DEFAULT '{}',
timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
ip_address VARCHAR(50),
user_agent TEXT
);
-- Approval tokens (for secure links)
CREATE TABLE IF NOT EXISTS approval_tokens (
id UUID PRIMARY KEY,
request_id UUID NOT NULL REFERENCES approval_requests(id) ON DELETE CASCADE,
token VARCHAR(100) NOT NULL UNIQUE,
action VARCHAR(50) NOT NULL,
used BOOLEAN NOT NULL DEFAULT false,
used_at TIMESTAMP WITH TIME ZONE,
expires_at TIMESTAMP WITH TIME ZONE NOT NULL,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);
-- Indexes for approval tables
CREATE INDEX IF NOT EXISTS idx_approval_requests_bot_id ON approval_requests(bot_id);
CREATE INDEX IF NOT EXISTS idx_approval_requests_session_id ON approval_requests(session_id);
CREATE INDEX IF NOT EXISTS idx_approval_requests_status ON approval_requests(status);
CREATE INDEX IF NOT EXISTS idx_approval_requests_expires_at ON approval_requests(expires_at);
CREATE INDEX IF NOT EXISTS idx_approval_requests_pending ON approval_requests(status, expires_at)
WHERE status = 'pending';
CREATE INDEX IF NOT EXISTS idx_approval_audit_request_id ON approval_audit_log(request_id);
CREATE INDEX IF NOT EXISTS idx_approval_audit_timestamp ON approval_audit_log(timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_approval_tokens_token ON approval_tokens(token);
CREATE INDEX IF NOT EXISTS idx_approval_tokens_request_id ON approval_tokens(request_id);
-- ============================================
-- LLM OBSERVABILITY TABLES
-- ============================================
-- LLM request metrics
CREATE TABLE IF NOT EXISTS llm_metrics (
id UUID PRIMARY KEY,
request_id UUID NOT NULL,
session_id UUID NOT NULL,
bot_id UUID NOT NULL,
model VARCHAR(200) NOT NULL,
request_type VARCHAR(50) NOT NULL,
input_tokens BIGINT NOT NULL DEFAULT 0,
output_tokens BIGINT NOT NULL DEFAULT 0,
total_tokens BIGINT NOT NULL DEFAULT 0,
latency_ms BIGINT NOT NULL DEFAULT 0,
ttft_ms BIGINT,
cached BOOLEAN NOT NULL DEFAULT false,
success BOOLEAN NOT NULL DEFAULT true,
error TEXT,
estimated_cost DOUBLE PRECISION NOT NULL DEFAULT 0,
timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
metadata JSONB NOT NULL DEFAULT '{}'
);
-- Aggregated metrics (hourly rollup)
CREATE TABLE IF NOT EXISTS llm_metrics_hourly (
id UUID PRIMARY KEY,
bot_id UUID NOT NULL,
hour TIMESTAMP WITH TIME ZONE NOT NULL,
total_requests BIGINT NOT NULL DEFAULT 0,
successful_requests BIGINT NOT NULL DEFAULT 0,
failed_requests BIGINT NOT NULL DEFAULT 0,
cache_hits BIGINT NOT NULL DEFAULT 0,
cache_misses BIGINT NOT NULL DEFAULT 0,
total_input_tokens BIGINT NOT NULL DEFAULT 0,
total_output_tokens BIGINT NOT NULL DEFAULT 0,
total_tokens BIGINT NOT NULL DEFAULT 0,
total_cost DOUBLE PRECISION NOT NULL DEFAULT 0,
avg_latency_ms DOUBLE PRECISION NOT NULL DEFAULT 0,
p50_latency_ms DOUBLE PRECISION NOT NULL DEFAULT 0,
p95_latency_ms DOUBLE PRECISION NOT NULL DEFAULT 0,
p99_latency_ms DOUBLE PRECISION NOT NULL DEFAULT 0,
max_latency_ms BIGINT NOT NULL DEFAULT 0,
min_latency_ms BIGINT NOT NULL DEFAULT 0,
requests_by_model JSONB NOT NULL DEFAULT '{}',
tokens_by_model JSONB NOT NULL DEFAULT '{}',
cost_by_model JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
UNIQUE(bot_id, hour)
);
-- Budget tracking
CREATE TABLE IF NOT EXISTS llm_budget (
id UUID PRIMARY KEY,
bot_id UUID NOT NULL UNIQUE,
daily_limit DOUBLE PRECISION NOT NULL DEFAULT 100,
monthly_limit DOUBLE PRECISION NOT NULL DEFAULT 2000,
alert_threshold DOUBLE PRECISION NOT NULL DEFAULT 0.8,
daily_spend DOUBLE PRECISION NOT NULL DEFAULT 0,
monthly_spend DOUBLE PRECISION NOT NULL DEFAULT 0,
daily_reset_date DATE NOT NULL DEFAULT CURRENT_DATE,
monthly_reset_date DATE NOT NULL DEFAULT DATE_TRUNC('month', CURRENT_DATE)::DATE,
daily_alert_sent BOOLEAN NOT NULL DEFAULT false,
monthly_alert_sent BOOLEAN NOT NULL DEFAULT false,
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);
-- Trace events
CREATE TABLE IF NOT EXISTS llm_traces (
id UUID PRIMARY KEY,
parent_id UUID,
trace_id UUID NOT NULL,
name VARCHAR(200) NOT NULL,
component VARCHAR(100) NOT NULL,
event_type VARCHAR(50) NOT NULL,
duration_ms BIGINT,
start_time TIMESTAMP WITH TIME ZONE NOT NULL,
end_time TIMESTAMP WITH TIME ZONE,
attributes JSONB NOT NULL DEFAULT '{}',
status VARCHAR(50) NOT NULL DEFAULT 'in_progress',
error TEXT,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);
-- Indexes for observability tables
CREATE INDEX IF NOT EXISTS idx_llm_metrics_bot_id ON llm_metrics(bot_id);
CREATE INDEX IF NOT EXISTS idx_llm_metrics_session_id ON llm_metrics(session_id);
CREATE INDEX IF NOT EXISTS idx_llm_metrics_timestamp ON llm_metrics(timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_llm_metrics_model ON llm_metrics(model);
CREATE INDEX IF NOT EXISTS idx_llm_metrics_hourly_bot_id ON llm_metrics_hourly(bot_id);
CREATE INDEX IF NOT EXISTS idx_llm_metrics_hourly_hour ON llm_metrics_hourly(hour DESC);
CREATE INDEX IF NOT EXISTS idx_llm_traces_trace_id ON llm_traces(trace_id);
CREATE INDEX IF NOT EXISTS idx_llm_traces_start_time ON llm_traces(start_time DESC);
CREATE INDEX IF NOT EXISTS idx_llm_traces_component ON llm_traces(component);
-- ============================================
-- WORKFLOW TABLES
-- ============================================
-- Workflow definitions
CREATE TABLE IF NOT EXISTS workflow_definitions (
id UUID PRIMARY KEY,
bot_id UUID NOT NULL,
name VARCHAR(200) NOT NULL,
description TEXT,
steps JSONB NOT NULL DEFAULT '[]',
triggers JSONB NOT NULL DEFAULT '[]',
error_handling JSONB NOT NULL DEFAULT '{}',
enabled BOOLEAN NOT NULL DEFAULT true,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
UNIQUE(bot_id, name)
);
-- Workflow executions
CREATE TABLE IF NOT EXISTS workflow_executions (
id UUID PRIMARY KEY,
workflow_id UUID NOT NULL REFERENCES workflow_definitions(id) ON DELETE CASCADE,
bot_id UUID NOT NULL,
session_id UUID,
initiated_by UUID,
status VARCHAR(50) NOT NULL DEFAULT 'pending',
current_step INTEGER NOT NULL DEFAULT 0,
input_data JSONB NOT NULL DEFAULT '{}',
output_data JSONB NOT NULL DEFAULT '{}',
step_results JSONB NOT NULL DEFAULT '[]',
error TEXT,
started_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
completed_at TIMESTAMP WITH TIME ZONE,
metadata JSONB NOT NULL DEFAULT '{}'
);
-- Workflow step executions
CREATE TABLE IF NOT EXISTS workflow_step_executions (
id UUID PRIMARY KEY,
execution_id UUID NOT NULL REFERENCES workflow_executions(id) ON DELETE CASCADE,
step_name VARCHAR(200) NOT NULL,
step_index INTEGER NOT NULL,
status VARCHAR(50) NOT NULL DEFAULT 'pending',
input_data JSONB NOT NULL DEFAULT '{}',
output_data JSONB NOT NULL DEFAULT '{}',
error TEXT,
started_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
completed_at TIMESTAMP WITH TIME ZONE,
duration_ms BIGINT
);
-- Indexes for workflow tables
CREATE INDEX IF NOT EXISTS idx_workflow_definitions_bot_id ON workflow_definitions(bot_id);
CREATE INDEX IF NOT EXISTS idx_workflow_executions_workflow_id ON workflow_executions(workflow_id);
CREATE INDEX IF NOT EXISTS idx_workflow_executions_bot_id ON workflow_executions(bot_id);
CREATE INDEX IF NOT EXISTS idx_workflow_executions_status ON workflow_executions(status);
CREATE INDEX IF NOT EXISTS idx_workflow_step_executions_execution_id ON workflow_step_executions(execution_id);
-- ============================================
-- FUNCTIONS AND TRIGGERS
-- ============================================
-- Function to update updated_at timestamp
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ language 'plpgsql';
-- Triggers for updated_at
DROP TRIGGER IF EXISTS update_kg_entities_updated_at ON kg_entities;
CREATE TRIGGER update_kg_entities_updated_at
BEFORE UPDATE ON kg_entities
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
DROP TRIGGER IF EXISTS update_workflow_definitions_updated_at ON workflow_definitions;
CREATE TRIGGER update_workflow_definitions_updated_at
BEFORE UPDATE ON workflow_definitions
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
DROP TRIGGER IF EXISTS update_llm_budget_updated_at ON llm_budget;
CREATE TRIGGER update_llm_budget_updated_at
BEFORE UPDATE ON llm_budget
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
-- Function to aggregate hourly metrics
CREATE OR REPLACE FUNCTION aggregate_llm_metrics_hourly()
RETURNS void AS $$
DECLARE
last_hour TIMESTAMP WITH TIME ZONE;
BEGIN
last_hour := DATE_TRUNC('hour', NOW() - INTERVAL '1 hour');
INSERT INTO llm_metrics_hourly (
id, bot_id, hour, total_requests, successful_requests, failed_requests,
cache_hits, cache_misses, total_input_tokens, total_output_tokens,
total_tokens, total_cost, avg_latency_ms, p50_latency_ms, p95_latency_ms,
p99_latency_ms, max_latency_ms, min_latency_ms, requests_by_model,
tokens_by_model, cost_by_model
)
SELECT
gen_random_uuid(),
bot_id,
last_hour,
COUNT(*),
COUNT(*) FILTER (WHERE success = true),
COUNT(*) FILTER (WHERE success = false),
COUNT(*) FILTER (WHERE cached = true),
COUNT(*) FILTER (WHERE cached = false),
SUM(input_tokens),
SUM(output_tokens),
SUM(total_tokens),
SUM(estimated_cost),
AVG(latency_ms),
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY latency_ms),
PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY latency_ms),
PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY latency_ms),
MAX(latency_ms),
MIN(latency_ms),
jsonb_object_agg(model, model_count) FILTER (WHERE model IS NOT NULL),
jsonb_object_agg(model, model_tokens) FILTER (WHERE model IS NOT NULL),
jsonb_object_agg(model, model_cost) FILTER (WHERE model IS NOT NULL)
FROM (
SELECT
bot_id, model, success, cached, input_tokens, output_tokens,
total_tokens, estimated_cost, latency_ms,
COUNT(*) OVER (PARTITION BY bot_id, model) as model_count,
SUM(total_tokens) OVER (PARTITION BY bot_id, model) as model_tokens,
SUM(estimated_cost) OVER (PARTITION BY bot_id, model) as model_cost
FROM llm_metrics
WHERE timestamp >= last_hour
AND timestamp < last_hour + INTERVAL '1 hour'
) sub
GROUP BY bot_id
ON CONFLICT (bot_id, hour) DO UPDATE SET
total_requests = EXCLUDED.total_requests,
successful_requests = EXCLUDED.successful_requests,
failed_requests = EXCLUDED.failed_requests,
cache_hits = EXCLUDED.cache_hits,
cache_misses = EXCLUDED.cache_misses,
total_input_tokens = EXCLUDED.total_input_tokens,
total_output_tokens = EXCLUDED.total_output_tokens,
total_tokens = EXCLUDED.total_tokens,
total_cost = EXCLUDED.total_cost,
avg_latency_ms = EXCLUDED.avg_latency_ms,
p50_latency_ms = EXCLUDED.p50_latency_ms,
p95_latency_ms = EXCLUDED.p95_latency_ms,
p99_latency_ms = EXCLUDED.p99_latency_ms,
max_latency_ms = EXCLUDED.max_latency_ms,
min_latency_ms = EXCLUDED.min_latency_ms,
requests_by_model = EXCLUDED.requests_by_model,
tokens_by_model = EXCLUDED.tokens_by_model,
cost_by_model = EXCLUDED.cost_by_model;
END;
$$ LANGUAGE plpgsql;
-- Function to reset daily budget
CREATE OR REPLACE FUNCTION reset_daily_budgets()
RETURNS void AS $$
BEGIN
UPDATE llm_budget
SET daily_spend = 0,
daily_reset_date = CURRENT_DATE,
daily_alert_sent = false
WHERE daily_reset_date < CURRENT_DATE;
END;
$$ LANGUAGE plpgsql;
-- Function to reset monthly budget
CREATE OR REPLACE FUNCTION reset_monthly_budgets()
RETURNS void AS $$
BEGIN
UPDATE llm_budget
SET monthly_spend = 0,
monthly_reset_date = DATE_TRUNC('month', CURRENT_DATE)::DATE,
monthly_alert_sent = false
WHERE monthly_reset_date < DATE_TRUNC('month', CURRENT_DATE)::DATE;
END;
$$ LANGUAGE plpgsql;
-- ============================================
-- VIEWS
-- ============================================
-- View for recent episode summaries with user info
CREATE OR REPLACE VIEW v_recent_episodes AS
SELECT
e.id,
e.user_id,
e.bot_id,
e.session_id,
e.summary,
e.key_topics,
e.sentiment,
e.resolution,
e.message_count,
e.created_at,
e.conversation_start,
e.conversation_end
FROM conversation_episodes e
ORDER BY e.created_at DESC;
-- View for knowledge graph statistics
CREATE OR REPLACE VIEW v_kg_stats AS
SELECT
bot_id,
COUNT(DISTINCT id) as total_entities,
COUNT(DISTINCT entity_type) as entity_types,
(SELECT COUNT(*) FROM kg_relationships r WHERE r.bot_id = e.bot_id) as total_relationships
FROM kg_entities e
GROUP BY bot_id;
-- View for approval status summary
CREATE OR REPLACE VIEW v_approval_summary AS
SELECT
bot_id,
status,
COUNT(*) as count,
AVG(EXTRACT(EPOCH FROM (COALESCE(decided_at, NOW()) - created_at))) as avg_resolution_seconds
FROM approval_requests
GROUP BY bot_id, status;
-- View for LLM usage summary (last 24 hours)
CREATE OR REPLACE VIEW v_llm_usage_24h AS
SELECT
bot_id,
model,
COUNT(*) as request_count,
SUM(total_tokens) as total_tokens,
SUM(estimated_cost) as total_cost,
AVG(latency_ms) as avg_latency_ms,
SUM(CASE WHEN cached THEN 1 ELSE 0 END)::FLOAT / COUNT(*) as cache_hit_rate,
SUM(CASE WHEN success THEN 0 ELSE 1 END)::FLOAT / COUNT(*) as error_rate
FROM llm_metrics
WHERE timestamp > NOW() - INTERVAL '24 hours'
GROUP BY bot_id, model;
-- ============================================
-- CLEANUP POLICIES (retention)
-- ============================================
-- Create a cleanup function for old data
CREATE OR REPLACE FUNCTION cleanup_old_observability_data(retention_days INTEGER DEFAULT 30)
RETURNS void AS $$
BEGIN
-- Delete old LLM metrics (keep hourly aggregates longer)
DELETE FROM llm_metrics WHERE timestamp < NOW() - (retention_days || ' days')::INTERVAL;
-- Delete old traces
DELETE FROM llm_traces WHERE start_time < NOW() - (retention_days || ' days')::INTERVAL;
-- Delete old approval audit logs
DELETE FROM approval_audit_log WHERE timestamp < NOW() - (retention_days * 3 || ' days')::INTERVAL;
-- Delete expired approval tokens
DELETE FROM approval_tokens WHERE expires_at < NOW() - INTERVAL '1 day';
END;
$$ LANGUAGE plpgsql;

View file

@ -0,0 +1,937 @@
use crate::shared::models::UserSession;
use crate::shared::state::AppState;
use diesel::prelude::*;
use log::{error, info, trace, warn};
use rhai::{Dynamic, Engine};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::Arc;
use uuid::Uuid;
/// A2A Protocol Message Types
/// Based on https://a2a-protocol.org/latest/
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum A2AMessageType {
/// Agent requesting action from another agent
Request,
/// Agent responding to a request
Response,
/// Message broadcast to all agents in session
Broadcast,
/// Hand off conversation to another agent
Delegate,
/// Request collaboration on a task
Collaborate,
/// Acknowledge receipt of message
Ack,
/// Error response
Error,
}
impl Default for A2AMessageType {
fn default() -> Self {
A2AMessageType::Request
}
}
impl std::fmt::Display for A2AMessageType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
A2AMessageType::Request => write!(f, "request"),
A2AMessageType::Response => write!(f, "response"),
A2AMessageType::Broadcast => write!(f, "broadcast"),
A2AMessageType::Delegate => write!(f, "delegate"),
A2AMessageType::Collaborate => write!(f, "collaborate"),
A2AMessageType::Ack => write!(f, "ack"),
A2AMessageType::Error => write!(f, "error"),
}
}
}
impl From<&str> for A2AMessageType {
fn from(s: &str) -> Self {
match s.to_lowercase().as_str() {
"request" => A2AMessageType::Request,
"response" => A2AMessageType::Response,
"broadcast" => A2AMessageType::Broadcast,
"delegate" => A2AMessageType::Delegate,
"collaborate" => A2AMessageType::Collaborate,
"ack" => A2AMessageType::Ack,
"error" => A2AMessageType::Error,
_ => A2AMessageType::Request,
}
}
}
/// A2A Protocol Message
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct A2AMessage {
/// Unique message identifier
pub id: Uuid,
/// Source agent identifier
pub from_agent: String,
/// Target agent identifier (None for broadcasts)
pub to_agent: Option<String>,
/// Message type
pub message_type: A2AMessageType,
/// Message payload (JSON)
pub payload: serde_json::Value,
/// Correlation ID for request-response matching
pub correlation_id: Uuid,
/// Session ID
pub session_id: Uuid,
/// Timestamp
pub timestamp: chrono::DateTime<chrono::Utc>,
/// Optional metadata
pub metadata: HashMap<String, String>,
/// TTL in seconds (0 = no expiry)
pub ttl_seconds: u32,
/// Hop count for preventing infinite loops
pub hop_count: u32,
}
impl A2AMessage {
pub fn new(
from_agent: &str,
to_agent: Option<&str>,
message_type: A2AMessageType,
payload: serde_json::Value,
session_id: Uuid,
) -> Self {
Self {
id: Uuid::new_v4(),
from_agent: from_agent.to_string(),
to_agent: to_agent.map(|s| s.to_string()),
message_type,
payload,
correlation_id: Uuid::new_v4(),
session_id,
timestamp: chrono::Utc::now(),
metadata: HashMap::new(),
ttl_seconds: 30,
hop_count: 0,
}
}
/// Create a response to this message
pub fn create_response(&self, from_agent: &str, payload: serde_json::Value) -> Self {
Self {
id: Uuid::new_v4(),
from_agent: from_agent.to_string(),
to_agent: Some(self.from_agent.clone()),
message_type: A2AMessageType::Response,
payload,
correlation_id: self.correlation_id,
session_id: self.session_id,
timestamp: chrono::Utc::now(),
metadata: HashMap::new(),
ttl_seconds: 30,
hop_count: self.hop_count + 1,
}
}
/// Check if message has expired
pub fn is_expired(&self) -> bool {
if self.ttl_seconds == 0 {
return false;
}
let now = chrono::Utc::now();
let expiry = self.timestamp + chrono::Duration::seconds(self.ttl_seconds as i64);
now > expiry
}
/// Check if max hops exceeded
pub fn max_hops_exceeded(&self, max_hops: u32) -> bool {
self.hop_count >= max_hops
}
}
/// A2A Protocol Configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct A2AConfig {
/// Whether A2A protocol is enabled
pub enabled: bool,
/// Default timeout in seconds
pub timeout_seconds: u32,
/// Maximum hops to prevent infinite loops
pub max_hops: u32,
/// Protocol version
pub protocol_version: String,
/// Enable message persistence
pub persist_messages: bool,
}
impl Default for A2AConfig {
fn default() -> Self {
Self {
enabled: true,
timeout_seconds: 30,
max_hops: 5,
protocol_version: "1.0".to_string(),
persist_messages: true,
}
}
}
/// Load A2A configuration from bot config
pub fn load_a2a_config(state: &AppState, bot_id: Uuid) -> A2AConfig {
let mut config = A2AConfig::default();
if let Ok(mut conn) = state.conn.get() {
#[derive(QueryableByName)]
struct ConfigRow {
#[diesel(sql_type = diesel::sql_types::Text)]
config_key: String,
#[diesel(sql_type = diesel::sql_types::Text)]
config_value: String,
}
let configs: Vec<ConfigRow> = diesel::sql_query(
"SELECT config_key, config_value FROM bot_configuration \
WHERE bot_id = $1 AND config_key LIKE 'a2a-%'",
)
.bind::<diesel::sql_types::Uuid, _>(bot_id)
.load(&mut conn)
.unwrap_or_default();
for row in configs {
match row.config_key.as_str() {
"a2a-enabled" => {
config.enabled = row.config_value.to_lowercase() == "true";
}
"a2a-timeout" => {
config.timeout_seconds = row.config_value.parse().unwrap_or(30);
}
"a2a-max-hops" => {
config.max_hops = row.config_value.parse().unwrap_or(5);
}
"a2a-protocol-version" => {
config.protocol_version = row.config_value;
}
"a2a-persist-messages" => {
config.persist_messages = row.config_value.to_lowercase() == "true";
}
_ => {}
}
}
}
config
}
/// Register all A2A protocol keywords
pub fn register_a2a_keywords(state: Arc<AppState>, user: UserSession, engine: &mut Engine) {
send_to_bot_keyword(state.clone(), user.clone(), engine);
broadcast_message_keyword(state.clone(), user.clone(), engine);
collaborate_with_keyword(state.clone(), user.clone(), engine);
wait_for_bot_keyword(state.clone(), user.clone(), engine);
delegate_conversation_keyword(state.clone(), user.clone(), engine);
get_a2a_messages_keyword(state.clone(), user.clone(), engine);
}
/// SEND TO BOT "bot_name" MESSAGE "message_content"
/// Send a message to a specific bot
pub fn send_to_bot_keyword(state: Arc<AppState>, user: UserSession, engine: &mut Engine) {
let state_clone = Arc::clone(&state);
let user_clone = user.clone();
engine
.register_custom_syntax(
&["SEND", "TO", "BOT", "$expr$", "MESSAGE", "$expr$"],
false,
move |context, inputs| {
let target_bot = context
.eval_expression_tree(&inputs[0])?
.to_string()
.trim_matches('"')
.to_string();
let message_content = context
.eval_expression_tree(&inputs[1])?
.to_string()
.trim_matches('"')
.to_string();
trace!(
"SEND TO BOT '{}' MESSAGE for session: {}",
target_bot,
user_clone.id
);
let state_for_task = Arc::clone(&state_clone);
let session_id = user_clone.id;
let bot_id = user_clone.bot_id;
let from_bot = format!("bot_{}", bot_id);
let (tx, rx) = std::sync::mpsc::channel();
std::thread::spawn(move || {
let rt = tokio::runtime::Runtime::new().expect("Failed to create runtime");
let result = rt.block_on(async {
send_a2a_message(
&state_for_task,
session_id,
&from_bot,
Some(&target_bot),
A2AMessageType::Request,
serde_json::json!({ "content": message_content }),
)
.await
});
let _ = tx.send(result);
});
match rx.recv_timeout(std::time::Duration::from_secs(30)) {
Ok(Ok(msg_id)) => Ok(Dynamic::from(msg_id.to_string())),
Ok(Err(e)) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime(
e.into(),
rhai::Position::NONE,
))),
Err(_) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime(
"SEND TO BOT timed out".into(),
rhai::Position::NONE,
))),
}
},
)
.expect("Failed to register SEND TO BOT syntax");
}
/// BROADCAST MESSAGE "message_content"
/// Broadcast a message to all bots in the session
pub fn broadcast_message_keyword(state: Arc<AppState>, user: UserSession, engine: &mut Engine) {
let state_clone = Arc::clone(&state);
let user_clone = user.clone();
engine
.register_custom_syntax(
&["BROADCAST", "MESSAGE", "$expr$"],
false,
move |context, inputs| {
let message_content = context
.eval_expression_tree(&inputs[0])?
.to_string()
.trim_matches('"')
.to_string();
trace!("BROADCAST MESSAGE for session: {}", user_clone.id);
let state_for_task = Arc::clone(&state_clone);
let session_id = user_clone.id;
let bot_id = user_clone.bot_id;
let from_bot = format!("bot_{}", bot_id);
let (tx, rx) = std::sync::mpsc::channel();
std::thread::spawn(move || {
let rt = tokio::runtime::Runtime::new().expect("Failed to create runtime");
let result = rt.block_on(async {
send_a2a_message(
&state_for_task,
session_id,
&from_bot,
None, // No target = broadcast
A2AMessageType::Broadcast,
serde_json::json!({ "content": message_content }),
)
.await
});
let _ = tx.send(result);
});
match rx.recv_timeout(std::time::Duration::from_secs(30)) {
Ok(Ok(msg_id)) => Ok(Dynamic::from(msg_id.to_string())),
Ok(Err(e)) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime(
e.into(),
rhai::Position::NONE,
))),
Err(_) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime(
"BROADCAST MESSAGE timed out".into(),
rhai::Position::NONE,
))),
}
},
)
.expect("Failed to register BROADCAST MESSAGE syntax");
}
/// COLLABORATE WITH "bot1", "bot2" ON "task"
/// Request collaboration from multiple bots on a task
pub fn collaborate_with_keyword(state: Arc<AppState>, user: UserSession, engine: &mut Engine) {
let state_clone = Arc::clone(&state);
let user_clone = user.clone();
engine
.register_custom_syntax(
&["COLLABORATE", "WITH", "$expr$", "ON", "$expr$"],
false,
move |context, inputs| {
let bots_str = context
.eval_expression_tree(&inputs[0])?
.to_string()
.trim_matches('"')
.to_string();
let task = context
.eval_expression_tree(&inputs[1])?
.to_string()
.trim_matches('"')
.to_string();
let bots: Vec<String> = bots_str
.split(',')
.map(|s| s.trim().trim_matches('"').to_string())
.filter(|s| !s.is_empty())
.collect();
trace!(
"COLLABORATE WITH {:?} ON '{}' for session: {}",
bots,
task,
user_clone.id
);
let state_for_task = Arc::clone(&state_clone);
let session_id = user_clone.id;
let bot_id = user_clone.bot_id;
let from_bot = format!("bot_{}", bot_id);
let (tx, rx) = std::sync::mpsc::channel();
std::thread::spawn(move || {
let rt = tokio::runtime::Runtime::new().expect("Failed to create runtime");
let result = rt.block_on(async {
let mut message_ids = Vec::new();
for target_bot in &bots {
match send_a2a_message(
&state_for_task,
session_id,
&from_bot,
Some(target_bot),
A2AMessageType::Collaborate,
serde_json::json!({
"task": task,
"collaborators": bots.clone()
}),
)
.await
{
Ok(id) => message_ids.push(id.to_string()),
Err(e) => {
warn!(
"Failed to send collaboration request to {}: {}",
target_bot, e
);
}
}
}
Ok::<Vec<String>, String>(message_ids)
});
let _ = tx.send(result);
});
match rx.recv_timeout(std::time::Duration::from_secs(30)) {
Ok(Ok(ids)) => {
let array: rhai::Array = ids.into_iter().map(Dynamic::from).collect();
Ok(Dynamic::from(array))
}
Ok(Err(e)) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime(
e.into(),
rhai::Position::NONE,
))),
Err(_) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime(
"COLLABORATE WITH timed out".into(),
rhai::Position::NONE,
))),
}
},
)
.expect("Failed to register COLLABORATE WITH syntax");
}
/// response = WAIT FOR BOT "bot_name" TIMEOUT seconds
/// Wait for a response from a specific bot
pub fn wait_for_bot_keyword(state: Arc<AppState>, user: UserSession, engine: &mut Engine) {
let state_clone = Arc::clone(&state);
let user_clone = user.clone();
engine
.register_custom_syntax(
&["WAIT", "FOR", "BOT", "$expr$", "TIMEOUT", "$expr$"],
false,
move |context, inputs| {
let target_bot = context
.eval_expression_tree(&inputs[0])?
.to_string()
.trim_matches('"')
.to_string();
let timeout_secs: i64 = context
.eval_expression_tree(&inputs[1])?
.as_int()
.unwrap_or(30);
trace!(
"WAIT FOR BOT '{}' TIMEOUT {} for session: {}",
target_bot,
timeout_secs,
user_clone.id
);
let state_for_task = Arc::clone(&state_clone);
let session_id = user_clone.id;
let bot_id = user_clone.bot_id;
let current_bot = format!("bot_{}", bot_id);
let (tx, rx) = std::sync::mpsc::channel();
std::thread::spawn(move || {
let rt = tokio::runtime::Runtime::new().expect("Failed to create runtime");
let result = rt.block_on(async {
wait_for_bot_response(
&state_for_task,
session_id,
&target_bot,
&current_bot,
timeout_secs as u64,
)
.await
});
let _ = tx.send(result);
});
let timeout_duration = std::time::Duration::from_secs(timeout_secs as u64 + 5);
match rx.recv_timeout(timeout_duration) {
Ok(Ok(response)) => Ok(Dynamic::from(response)),
Ok(Err(e)) => Ok(Dynamic::from(format!("Error: {}", e))),
Err(_) => Ok(Dynamic::from("Timeout waiting for response".to_string())),
}
},
)
.expect("Failed to register WAIT FOR BOT syntax");
}
/// DELEGATE CONVERSATION TO "bot_name"
/// Hand off the entire conversation to another bot
pub fn delegate_conversation_keyword(state: Arc<AppState>, user: UserSession, engine: &mut Engine) {
let state_clone = Arc::clone(&state);
let user_clone = user.clone();
engine
.register_custom_syntax(
&["DELEGATE", "CONVERSATION", "TO", "$expr$"],
false,
move |context, inputs| {
let target_bot = context
.eval_expression_tree(&inputs[0])?
.to_string()
.trim_matches('"')
.to_string();
trace!(
"DELEGATE CONVERSATION TO '{}' for session: {}",
target_bot,
user_clone.id
);
let state_for_task = Arc::clone(&state_clone);
let session_id = user_clone.id;
let bot_id = user_clone.bot_id;
let from_bot = format!("bot_{}", bot_id);
let (tx, rx) = std::sync::mpsc::channel();
std::thread::spawn(move || {
let rt = tokio::runtime::Runtime::new().expect("Failed to create runtime");
let result = rt.block_on(async {
// Send delegate message
send_a2a_message(
&state_for_task,
session_id,
&from_bot,
Some(&target_bot),
A2AMessageType::Delegate,
serde_json::json!({
"action": "take_over",
"reason": "delegation_request"
}),
)
.await?;
// Update session to set new active bot
set_session_active_bot(&state_for_task, session_id, &target_bot).await
});
let _ = tx.send(result);
});
match rx.recv_timeout(std::time::Duration::from_secs(30)) {
Ok(Ok(msg)) => Ok(Dynamic::from(msg)),
Ok(Err(e)) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime(
e.into(),
rhai::Position::NONE,
))),
Err(_) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime(
"DELEGATE CONVERSATION timed out".into(),
rhai::Position::NONE,
))),
}
},
)
.expect("Failed to register DELEGATE CONVERSATION syntax");
}
/// GET A2A MESSAGES()
/// Get all A2A messages for this bot in current session
pub fn get_a2a_messages_keyword(state: Arc<AppState>, user: UserSession, engine: &mut Engine) {
let state_clone = Arc::clone(&state);
let user_clone = user.clone();
engine.register_fn("GET A2A MESSAGES", move || -> rhai::Array {
let state = Arc::clone(&state_clone);
let session_id = user_clone.id;
let bot_id = user_clone.bot_id;
let current_bot = format!("bot_{}", bot_id);
if let Ok(mut conn) = state.conn.get() {
get_pending_messages_sync(&mut conn, session_id, &current_bot)
.unwrap_or_default()
.into_iter()
.map(|msg| Dynamic::from(serde_json::to_string(&msg).unwrap_or_default()))
.collect()
} else {
rhai::Array::new()
}
});
}
// ============================================================================
// Database Operations
// ============================================================================
/// Send an A2A message
async fn send_a2a_message(
state: &AppState,
session_id: Uuid,
from_agent: &str,
to_agent: Option<&str>,
message_type: A2AMessageType,
payload: serde_json::Value,
) -> Result<Uuid, String> {
let mut conn = state
.conn
.get()
.map_err(|e| format!("Failed to acquire database connection: {}", e))?;
let message = A2AMessage::new(from_agent, to_agent, message_type, payload, session_id);
let message_id = message.id;
let payload_str = serde_json::to_string(&message.payload)
.map_err(|e| format!("Failed to serialize payload: {}", e))?;
let metadata_str = serde_json::to_string(&message.metadata)
.map_err(|e| format!("Failed to serialize metadata: {}", e))?;
diesel::sql_query(
"INSERT INTO a2a_messages \
(id, session_id, from_agent, to_agent, message_type, payload, correlation_id, \
timestamp, metadata, ttl_seconds, hop_count, processed) \
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, false)",
)
.bind::<diesel::sql_types::Uuid, _>(message.id)
.bind::<diesel::sql_types::Uuid, _>(message.session_id)
.bind::<diesel::sql_types::Text, _>(&message.from_agent)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(message.to_agent.as_deref())
.bind::<diesel::sql_types::Text, _>(message.message_type.to_string())
.bind::<diesel::sql_types::Text, _>(&payload_str)
.bind::<diesel::sql_types::Uuid, _>(message.correlation_id)
.bind::<diesel::sql_types::Timestamptz, _>(message.timestamp)
.bind::<diesel::sql_types::Text, _>(&metadata_str)
.bind::<diesel::sql_types::Integer, _>(message.ttl_seconds as i32)
.bind::<diesel::sql_types::Integer, _>(message.hop_count as i32)
.execute(&mut conn)
.map_err(|e| format!("Failed to insert A2A message: {}", e))?;
info!(
"A2A message sent: {} -> {:?} (type: {})",
from_agent, to_agent, message.message_type
);
Ok(message_id)
}
/// Wait for a response from a specific bot
async fn wait_for_bot_response(
state: &AppState,
session_id: Uuid,
from_bot: &str,
to_bot: &str,
timeout_secs: u64,
) -> Result<String, String> {
let start = std::time::Instant::now();
let timeout = std::time::Duration::from_secs(timeout_secs);
loop {
if start.elapsed() > timeout {
return Err("Timeout waiting for bot response".to_string());
}
// Check for response
if let Ok(mut conn) = state.conn.get() {
#[derive(QueryableByName)]
struct MessageRow {
#[diesel(sql_type = diesel::sql_types::Uuid)]
id: Uuid,
#[diesel(sql_type = diesel::sql_types::Text)]
payload: String,
}
let result: Option<MessageRow> = diesel::sql_query(
"SELECT id, payload FROM a2a_messages \
WHERE session_id = $1 AND from_agent = $2 AND to_agent = $3 \
AND message_type = 'response' AND processed = false \
ORDER BY timestamp DESC LIMIT 1",
)
.bind::<diesel::sql_types::Uuid, _>(session_id)
.bind::<diesel::sql_types::Text, _>(from_bot)
.bind::<diesel::sql_types::Text, _>(to_bot)
.get_result(&mut conn)
.optional()
.map_err(|e| format!("Failed to query messages: {}", e))?;
if let Some(msg) = result {
// Mark as processed
let _ = diesel::sql_query("UPDATE a2a_messages SET processed = true WHERE id = $1")
.bind::<diesel::sql_types::Uuid, _>(msg.id)
.execute(&mut conn);
// Extract content from payload
if let Ok(payload) = serde_json::from_str::<serde_json::Value>(&msg.payload) {
if let Some(content) = payload.get("content").and_then(|c| c.as_str()) {
return Ok(content.to_string());
}
}
return Ok(msg.payload);
}
}
// Sleep before next check
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
}
}
/// Set the active bot for a session
async fn set_session_active_bot(
state: &AppState,
session_id: Uuid,
bot_name: &str,
) -> Result<String, String> {
let mut conn = state
.conn
.get()
.map_err(|e| format!("Failed to acquire database connection: {}", e))?;
let now = chrono::Utc::now();
diesel::sql_query(
"INSERT INTO session_preferences (session_id, preference_key, preference_value, updated_at) \
VALUES ($1, 'active_bot', $2, $3) \
ON CONFLICT (session_id, preference_key) DO UPDATE SET \
preference_value = EXCLUDED.preference_value, \
updated_at = EXCLUDED.updated_at",
)
.bind::<diesel::sql_types::Uuid, _>(session_id)
.bind::<diesel::sql_types::Text, _>(bot_name)
.bind::<diesel::sql_types::Timestamptz, _>(now)
.execute(&mut conn)
.map_err(|e| format!("Failed to set active bot: {}", e))?;
info!("Session {} delegated to bot: {}", session_id, bot_name);
Ok(format!("Conversation delegated to {}", bot_name))
}
/// Get pending messages for a bot (sync version)
fn get_pending_messages_sync(
conn: &mut diesel::PgConnection,
session_id: Uuid,
to_agent: &str,
) -> Result<Vec<A2AMessage>, String> {
#[derive(QueryableByName)]
struct MessageRow {
#[diesel(sql_type = diesel::sql_types::Uuid)]
id: Uuid,
#[diesel(sql_type = diesel::sql_types::Uuid)]
session_id: Uuid,
#[diesel(sql_type = diesel::sql_types::Text)]
from_agent: String,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
to_agent: Option<String>,
#[diesel(sql_type = diesel::sql_types::Text)]
message_type: String,
#[diesel(sql_type = diesel::sql_types::Text)]
payload: String,
#[diesel(sql_type = diesel::sql_types::Uuid)]
correlation_id: Uuid,
#[diesel(sql_type = diesel::sql_types::Timestamptz)]
timestamp: chrono::DateTime<chrono::Utc>,
#[diesel(sql_type = diesel::sql_types::Integer)]
ttl_seconds: i32,
#[diesel(sql_type = diesel::sql_types::Integer)]
hop_count: i32,
}
let rows: Vec<MessageRow> = diesel::sql_query(
"SELECT id, session_id, from_agent, to_agent, message_type, payload, \
correlation_id, timestamp, ttl_seconds, hop_count \
FROM a2a_messages \
WHERE session_id = $1 AND (to_agent = $2 OR to_agent IS NULL) AND processed = false \
ORDER BY timestamp ASC",
)
.bind::<diesel::sql_types::Uuid, _>(session_id)
.bind::<diesel::sql_types::Text, _>(to_agent)
.load(conn)
.map_err(|e| format!("Failed to get pending messages: {}", e))?;
let messages: Vec<A2AMessage> = rows
.into_iter()
.map(|row| A2AMessage {
id: row.id,
session_id: row.session_id,
from_agent: row.from_agent,
to_agent: row.to_agent,
message_type: A2AMessageType::from(row.message_type.as_str()),
payload: serde_json::from_str(&row.payload).unwrap_or(serde_json::json!({})),
correlation_id: row.correlation_id,
timestamp: row.timestamp,
metadata: HashMap::new(),
ttl_seconds: row.ttl_seconds as u32,
hop_count: row.hop_count as u32,
})
.filter(|msg| !msg.is_expired())
.collect();
Ok(messages)
}
/// Respond to an A2A message (helper for bots to use)
pub async fn respond_to_a2a_message(
state: &AppState,
original_message: &A2AMessage,
from_agent: &str,
response_content: &str,
) -> Result<Uuid, String> {
send_a2a_message(
state,
original_message.session_id,
from_agent,
Some(&original_message.from_agent),
A2AMessageType::Response,
serde_json::json!({ "content": response_content }),
)
.await
}
// ============================================================================
// Tests
// ============================================================================
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_a2a_message_creation() {
let msg = A2AMessage::new(
"bot_a",
Some("bot_b"),
A2AMessageType::Request,
serde_json::json!({"test": "data"}),
Uuid::new_v4(),
);
assert_eq!(msg.from_agent, "bot_a");
assert_eq!(msg.to_agent, Some("bot_b".to_string()));
assert_eq!(msg.message_type, A2AMessageType::Request);
assert_eq!(msg.hop_count, 0);
}
#[test]
fn test_a2a_message_response() {
let original = A2AMessage::new(
"bot_a",
Some("bot_b"),
A2AMessageType::Request,
serde_json::json!({"question": "test"}),
Uuid::new_v4(),
);
let response = original.create_response("bot_b", serde_json::json!({"answer": "result"}));
assert_eq!(response.from_agent, "bot_b");
assert_eq!(response.to_agent, Some("bot_a".to_string()));
assert_eq!(response.message_type, A2AMessageType::Response);
assert_eq!(response.correlation_id, original.correlation_id);
assert_eq!(response.hop_count, 1);
}
#[test]
fn test_message_type_display() {
assert_eq!(A2AMessageType::Request.to_string(), "request");
assert_eq!(A2AMessageType::Response.to_string(), "response");
assert_eq!(A2AMessageType::Broadcast.to_string(), "broadcast");
assert_eq!(A2AMessageType::Delegate.to_string(), "delegate");
}
#[test]
fn test_message_type_from_str() {
assert_eq!(A2AMessageType::from("request"), A2AMessageType::Request);
assert_eq!(A2AMessageType::from("RESPONSE"), A2AMessageType::Response);
assert_eq!(A2AMessageType::from("unknown"), A2AMessageType::Request);
}
#[test]
fn test_a2a_config_default() {
let config = A2AConfig::default();
assert!(config.enabled);
assert_eq!(config.timeout_seconds, 30);
assert_eq!(config.max_hops, 5);
assert_eq!(config.protocol_version, "1.0");
}
#[test]
fn test_message_not_expired() {
let msg = A2AMessage::new(
"bot_a",
Some("bot_b"),
A2AMessageType::Request,
serde_json::json!({}),
Uuid::new_v4(),
);
assert!(!msg.is_expired());
}
#[test]
fn test_max_hops_not_exceeded() {
let msg = A2AMessage::new(
"bot_a",
Some("bot_b"),
A2AMessageType::Request,
serde_json::json!({}),
Uuid::new_v4(),
);
assert!(!msg.max_hops_exceeded(5));
}
#[test]
fn test_max_hops_exceeded() {
let mut msg = A2AMessage::new(
"bot_a",
Some("bot_b"),
A2AMessageType::Request,
serde_json::json!({}),
Uuid::new_v4(),
);
msg.hop_count = 5;
assert!(msg.max_hops_exceeded(5));
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,819 @@
//! API Tool Generator
//!
//! Automatically generates BASIC tools from OpenAPI/Swagger specifications.
//! Tools are created in the bot's .gbdialog folder and become immediately available.
//!
//! Config.csv format:
//! ```csv
//! name,value
//! myweather-api-server,https://api.weather.com/openapi.json
//! payment-api-server,https://api.stripe.com/v3/spec
//! ```
use crate::shared::state::AppState;
use diesel::prelude::*;
use log::{error, info, trace, warn};
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use std::collections::HashMap;
use std::sync::Arc;
use uuid::Uuid;
/// OpenAPI specification (simplified)
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OpenAPISpec {
pub openapi: Option<String>,
pub swagger: Option<String>,
pub info: OpenAPIInfo,
pub servers: Option<Vec<OpenAPIServer>>,
pub paths: HashMap<String, HashMap<String, OpenAPIOperation>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OpenAPIInfo {
pub title: String,
pub description: Option<String>,
pub version: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OpenAPIServer {
pub url: String,
pub description: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OpenAPIOperation {
#[serde(rename = "operationId")]
pub operation_id: Option<String>,
pub summary: Option<String>,
pub description: Option<String>,
pub parameters: Option<Vec<OpenAPIParameter>>,
#[serde(rename = "requestBody")]
pub request_body: Option<OpenAPIRequestBody>,
pub responses: Option<HashMap<String, OpenAPIResponse>>,
pub tags: Option<Vec<String>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OpenAPIParameter {
pub name: String,
#[serde(rename = "in")]
pub location: String,
pub description: Option<String>,
pub required: Option<bool>,
pub schema: Option<OpenAPISchema>,
pub example: Option<serde_json::Value>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OpenAPISchema {
#[serde(rename = "type")]
pub schema_type: Option<String>,
pub format: Option<String>,
#[serde(rename = "enum")]
pub enum_values: Option<Vec<String>>,
pub default: Option<serde_json::Value>,
pub example: Option<serde_json::Value>,
pub properties: Option<HashMap<String, OpenAPISchema>>,
pub required: Option<Vec<String>>,
pub items: Option<Box<OpenAPISchema>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OpenAPIRequestBody {
pub description: Option<String>,
pub required: Option<bool>,
pub content: Option<HashMap<String, OpenAPIMediaType>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OpenAPIMediaType {
pub schema: Option<OpenAPISchema>,
pub example: Option<serde_json::Value>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OpenAPIResponse {
pub description: Option<String>,
pub content: Option<HashMap<String, OpenAPIMediaType>>,
}
/// Generated endpoint information
#[derive(Debug, Clone)]
pub struct GeneratedEndpoint {
pub operation_id: String,
pub method: String,
pub path: String,
pub description: String,
pub parameters: Vec<EndpointParameter>,
pub base_url: String,
}
#[derive(Debug, Clone)]
pub struct EndpointParameter {
pub name: String,
pub param_type: String,
pub location: String, // "path", "query", "header", "body"
pub description: String,
pub required: bool,
pub example: Option<String>,
}
/// API Tool Generator
pub struct ApiToolGenerator {
state: Arc<AppState>,
bot_id: Uuid,
work_path: String,
}
impl ApiToolGenerator {
pub fn new(state: Arc<AppState>, bot_id: Uuid, work_path: &str) -> Self {
Self {
state,
bot_id,
work_path: work_path.to_string(),
}
}
/// Sync all API tools from config.csv
/// Looks for entries like: myweather-api-server,https://api.weather.com/openapi.json
pub async fn sync_all_api_tools(&self) -> Result<SyncResult, String> {
let api_configs = self.get_api_configs().await?;
let mut result = SyncResult::default();
for (api_name, spec_url) in api_configs {
info!("Processing API: {} from {}", api_name, spec_url);
match self.sync_api_tools(&api_name, &spec_url).await {
Ok(count) => {
result.apis_synced += 1;
result.tools_generated += count;
info!("Generated {} tools for API: {}", count, api_name);
}
Err(e) => {
result.errors.push(format!("{}: {}", api_name, e));
error!("Failed to sync API {}: {}", api_name, e);
}
}
}
// Clean up removed APIs
let removed = self.cleanup_removed_apis(&api_configs).await?;
result.tools_removed = removed;
Ok(result)
}
/// Sync tools for a single API
pub async fn sync_api_tools(&self, api_name: &str, spec_url: &str) -> Result<usize, String> {
// Fetch the OpenAPI spec
let spec_content = self.fetch_spec(spec_url).await?;
let spec_hash = self.calculate_hash(&spec_content);
// Check if spec has changed
if !self.has_spec_changed(api_name, &spec_hash).await? {
trace!("API spec unchanged for {}, skipping", api_name);
return Ok(0);
}
// Parse the spec
let spec: OpenAPISpec = serde_json::from_str(&spec_content)
.map_err(|e| format!("Failed to parse OpenAPI spec: {}", e))?;
// Generate endpoints
let endpoints = self.extract_endpoints(&spec)?;
// Create .gbdialog folder for this API
let api_folder = format!(
"{}/{}.gbai/.gbdialog/{}",
self.work_path, self.bot_id, api_name
);
std::fs::create_dir_all(&api_folder)
.map_err(|e| format!("Failed to create API folder: {}", e))?;
// Generate .bas files
let mut generated_count = 0;
for endpoint in &endpoints {
let bas_content = self.generate_bas_file(&api_name, endpoint)?;
let file_path = format!("{}/{}.bas", api_folder, endpoint.operation_id);
std::fs::write(&file_path, &bas_content)
.map_err(|e| format!("Failed to write .bas file: {}", e))?;
generated_count += 1;
}
// Update database record
self.update_api_record(api_name, spec_url, &spec_hash, generated_count)
.await?;
Ok(generated_count)
}
/// Fetch OpenAPI spec from URL
async fn fetch_spec(&self, spec_url: &str) -> Result<String, String> {
// Handle local file paths
if spec_url.starts_with("./") || spec_url.starts_with("/") {
return std::fs::read_to_string(spec_url)
.map_err(|e| format!("Failed to read local spec file: {}", e));
}
// Fetch from URL
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(30))
.build()
.map_err(|e| format!("Failed to create HTTP client: {}", e))?;
let response = client
.get(spec_url)
.header("Accept", "application/json")
.send()
.await
.map_err(|e| format!("Failed to fetch spec: {}", e))?;
if !response.status().is_success() {
return Err(format!("Failed to fetch spec: HTTP {}", response.status()));
}
response
.text()
.await
.map_err(|e| format!("Failed to read spec body: {}", e))
}
/// Extract endpoints from OpenAPI spec
fn extract_endpoints(&self, spec: &OpenAPISpec) -> Result<Vec<GeneratedEndpoint>, String> {
let mut endpoints = Vec::new();
// Determine base URL
let base_url = spec
.servers
.as_ref()
.and_then(|s| s.first())
.map(|s| s.url.clone())
.unwrap_or_else(|| "http://localhost".to_string());
for (path, methods) in &spec.paths {
for (method, operation) in methods {
// Skip if no operationId
let operation_id = match &operation.operation_id {
Some(id) => self.sanitize_operation_id(id),
None => self.generate_operation_id(&method, &path),
};
// Build description
let description = operation
.summary
.clone()
.or_else(|| operation.description.clone())
.unwrap_or_else(|| format!("{} {}", method.to_uppercase(), path));
// Extract parameters
let mut parameters = Vec::new();
// Path and query parameters
if let Some(params) = &operation.parameters {
for param in params {
parameters.push(self.convert_parameter(param));
}
}
// Request body parameters
if let Some(body) = &operation.request_body {
if let Some(content) = &body.content {
if let Some(json_content) = content.get("application/json") {
if let Some(schema) = &json_content.schema {
let body_params = self.extract_body_parameters(
schema,
body.required.unwrap_or(false),
);
parameters.extend(body_params);
}
}
}
}
endpoints.push(GeneratedEndpoint {
operation_id,
method: method.to_uppercase(),
path: path.clone(),
description,
parameters,
base_url: base_url.clone(),
});
}
}
Ok(endpoints)
}
/// Convert OpenAPI parameter to our format
fn convert_parameter(&self, param: &OpenAPIParameter) -> EndpointParameter {
let param_type = param
.schema
.as_ref()
.and_then(|s| s.schema_type.clone())
.unwrap_or_else(|| "string".to_string());
let example = param
.example
.as_ref()
.or_else(|| param.schema.as_ref().and_then(|s| s.example.as_ref()))
.map(|v| self.value_to_string(v));
EndpointParameter {
name: param.name.clone(),
param_type: self.map_openapi_type(&param_type),
location: param.location.clone(),
description: param.description.clone().unwrap_or_default(),
required: param.required.unwrap_or(false),
example,
}
}
/// Extract parameters from request body schema
fn extract_body_parameters(
&self,
schema: &OpenAPISchema,
required: bool,
) -> Vec<EndpointParameter> {
let mut params = Vec::new();
if let Some(properties) = &schema.properties {
let required_fields = schema.required.clone().unwrap_or_default();
for (name, prop_schema) in properties {
let param_type = prop_schema
.schema_type
.clone()
.unwrap_or_else(|| "string".to_string());
let example = prop_schema
.example
.as_ref()
.map(|v| self.value_to_string(v));
params.push(EndpointParameter {
name: name.clone(),
param_type: self.map_openapi_type(&param_type),
location: "body".to_string(),
description: String::new(),
required: required && required_fields.contains(name),
example,
});
}
}
params
}
/// Generate BASIC file for an endpoint
fn generate_bas_file(
&self,
api_name: &str,
endpoint: &GeneratedEndpoint,
) -> Result<String, String> {
let mut bas = String::new();
// Header comment
bas.push_str(&format!("' Auto-generated tool for {} API\n", api_name));
bas.push_str(&format!(
"' Endpoint: {} {}\n",
endpoint.method, endpoint.path
));
bas.push_str(&format!(
"' Generated at: {}\n\n",
chrono::Utc::now().to_rfc3339()
));
// PARAM declarations
for param in &endpoint.parameters {
let example = param.example.as_deref().unwrap_or("");
let required_marker = if param.required { "" } else { " ' optional" };
bas.push_str(&format!(
"PARAM {} AS {} LIKE \"{}\" DESCRIPTION \"{}\"{}\n",
self.sanitize_param_name(&param.name),
param.param_type,
example,
self.escape_description(&param.description),
required_marker
));
}
// DESCRIPTION
bas.push_str(&format!(
"\nDESCRIPTION \"{}\"\n\n",
self.escape_description(&endpoint.description)
));
// Build URL with path parameters
let mut url = format!("{}{}", endpoint.base_url, endpoint.path);
let path_params: Vec<&EndpointParameter> = endpoint
.parameters
.iter()
.filter(|p| p.location == "path")
.collect();
for param in &path_params {
url = url.replace(
&format!("{{{}}}", param.name),
&format!("\" + {} + \"", self.sanitize_param_name(&param.name)),
);
}
// Build query string
let query_params: Vec<&EndpointParameter> = endpoint
.parameters
.iter()
.filter(|p| p.location == "query")
.collect();
if !query_params.is_empty() {
bas.push_str("' Build query string\n");
bas.push_str("query_params = \"\"\n");
for (i, param) in query_params.iter().enumerate() {
let name = self.sanitize_param_name(&param.name);
let sep = if i == 0 { "?" } else { "&" };
bas.push_str(&format!(
"IF NOT ISEMPTY({}) THEN query_params = query_params + \"{}{}=\" + {}\n",
name, sep, param.name, name
));
}
bas.push('\n');
}
// Build request body
let body_params: Vec<&EndpointParameter> = endpoint
.parameters
.iter()
.filter(|p| p.location == "body")
.collect();
if !body_params.is_empty() {
bas.push_str("' Build request body\n");
bas.push_str("body = {}\n");
for param in &body_params {
let name = self.sanitize_param_name(&param.name);
bas.push_str(&format!(
"IF NOT ISEMPTY({}) THEN body.{} = {}\n",
name, param.name, name
));
}
bas.push('\n');
}
// Make HTTP request
bas.push_str("' Make API request\n");
let full_url = if query_params.is_empty() {
format!("\"{}\"", url)
} else {
format!("\"{}\" + query_params", url)
};
if body_params.is_empty() {
bas.push_str(&format!("result = {} HTTP {}\n", endpoint.method, full_url));
} else {
bas.push_str(&format!(
"result = {} HTTP {} WITH body\n",
endpoint.method, full_url
));
}
// Return result
bas.push_str("\n' Return result\n");
bas.push_str("RETURN result\n");
Ok(bas)
}
/// Get API configurations from bot config
async fn get_api_configs(&self) -> Result<Vec<(String, String)>, String> {
let mut conn = self
.state
.conn
.get()
.map_err(|e| format!("Failed to acquire database connection: {}", e))?;
#[derive(QueryableByName)]
struct ConfigRow {
#[diesel(sql_type = diesel::sql_types::Text)]
config_key: String,
#[diesel(sql_type = diesel::sql_types::Text)]
config_value: String,
}
let configs: Vec<ConfigRow> = diesel::sql_query(
"SELECT config_key, config_value FROM bot_configuration \
WHERE bot_id = $1 AND config_key LIKE '%-api-server'",
)
.bind::<diesel::sql_types::Uuid, _>(self.bot_id)
.load(&mut conn)
.map_err(|e| format!("Failed to query API configs: {}", e))?;
let result: Vec<(String, String)> = configs
.into_iter()
.map(|c| {
let api_name = c.config_key.trim_end_matches("-api-server").to_string();
(api_name, c.config_value)
})
.collect();
Ok(result)
}
/// Check if spec has changed since last sync
async fn has_spec_changed(&self, api_name: &str, current_hash: &str) -> Result<bool, String> {
let mut conn = self
.state
.conn
.get()
.map_err(|e| format!("Failed to acquire database connection: {}", e))?;
#[derive(QueryableByName)]
struct HashRow {
#[diesel(sql_type = diesel::sql_types::Text)]
spec_hash: String,
}
let result: Option<HashRow> = diesel::sql_query(
"SELECT spec_hash FROM generated_api_tools \
WHERE bot_id = $1 AND api_name = $2 LIMIT 1",
)
.bind::<diesel::sql_types::Uuid, _>(self.bot_id)
.bind::<diesel::sql_types::Text, _>(api_name)
.get_result(&mut conn)
.optional()
.map_err(|e| format!("Failed to check spec hash: {}", e))?;
match result {
Some(row) => Ok(row.spec_hash != current_hash),
None => Ok(true), // No record exists, need to sync
}
}
/// Update API record in database
async fn update_api_record(
&self,
api_name: &str,
spec_url: &str,
spec_hash: &str,
tool_count: usize,
) -> Result<(), String> {
let mut conn = self
.state
.conn
.get()
.map_err(|e| format!("Failed to acquire database connection: {}", e))?;
let now = chrono::Utc::now();
let new_id = Uuid::new_v4();
diesel::sql_query(
"INSERT INTO generated_api_tools \
(id, bot_id, api_name, spec_url, spec_hash, tool_count, last_synced_at, created_at) \
VALUES ($1, $2, $3, $4, $5, $6, $7, $7) \
ON CONFLICT (bot_id, api_name) DO UPDATE SET \
spec_url = EXCLUDED.spec_url, \
spec_hash = EXCLUDED.spec_hash, \
tool_count = EXCLUDED.tool_count, \
last_synced_at = EXCLUDED.last_synced_at",
)
.bind::<diesel::sql_types::Uuid, _>(new_id)
.bind::<diesel::sql_types::Uuid, _>(self.bot_id)
.bind::<diesel::sql_types::Text, _>(api_name)
.bind::<diesel::sql_types::Text, _>(spec_url)
.bind::<diesel::sql_types::Text, _>(spec_hash)
.bind::<diesel::sql_types::Integer, _>(tool_count as i32)
.bind::<diesel::sql_types::Timestamptz, _>(now)
.execute(&mut conn)
.map_err(|e| format!("Failed to update API record: {}", e))?;
Ok(())
}
/// Cleanup APIs that have been removed from config
async fn cleanup_removed_apis(
&self,
current_apis: &[(String, String)],
) -> Result<usize, String> {
let mut conn = self
.state
.conn
.get()
.map_err(|e| format!("Failed to acquire database connection: {}", e))?;
#[derive(QueryableByName)]
struct ApiRow {
#[diesel(sql_type = diesel::sql_types::Text)]
api_name: String,
}
let existing: Vec<ApiRow> =
diesel::sql_query("SELECT api_name FROM generated_api_tools WHERE bot_id = $1")
.bind::<diesel::sql_types::Uuid, _>(self.bot_id)
.load(&mut conn)
.map_err(|e| format!("Failed to query existing APIs: {}", e))?;
let current_names: Vec<&str> = current_apis.iter().map(|(n, _)| n.as_str()).collect();
let mut removed_count = 0;
for api in existing {
if !current_names.contains(&api.api_name.as_str()) {
// Remove from database
diesel::sql_query(
"DELETE FROM generated_api_tools WHERE bot_id = $1 AND api_name = $2",
)
.bind::<diesel::sql_types::Uuid, _>(self.bot_id)
.bind::<diesel::sql_types::Text, _>(&api.api_name)
.execute(&mut conn)
.ok();
// Remove folder
let api_folder = format!(
"{}/{}.gbai/.gbdialog/{}",
self.work_path, self.bot_id, api.api_name
);
if let Err(e) = std::fs::remove_dir_all(&api_folder) {
warn!("Failed to remove API folder {}: {}", api_folder, e);
} else {
info!("Removed API folder: {}", api_folder);
removed_count += 1;
}
}
}
Ok(removed_count)
}
// Helper functions
fn calculate_hash(&self, content: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(content.as_bytes());
format!("{:x}", hasher.finalize())
}
fn sanitize_operation_id(&self, id: &str) -> String {
id.chars()
.map(|c| {
if c.is_alphanumeric() || c == '_' {
c
} else {
'_'
}
})
.collect::<String>()
.to_lowercase()
}
fn generate_operation_id(&self, method: &str, path: &str) -> String {
let path_part = path
.trim_matches('/')
.replace('/', "_")
.replace('{', "")
.replace('}', "");
format!("{}_{}", method.to_lowercase(), path_part)
}
fn sanitize_param_name(&self, name: &str) -> String {
name.chars()
.map(|c| {
if c.is_alphanumeric() || c == '_' {
c
} else {
'_'
}
})
.collect::<String>()
.to_lowercase()
}
fn map_openapi_type(&self, openapi_type: &str) -> String {
match openapi_type.to_lowercase().as_str() {
"integer" | "number" => "number".to_string(),
"boolean" => "boolean".to_string(),
"array" => "array".to_string(),
"object" => "object".to_string(),
_ => "string".to_string(),
}
}
fn value_to_string(&self, value: &serde_json::Value) -> String {
match value {
serde_json::Value::String(s) => s.clone(),
serde_json::Value::Number(n) => n.to_string(),
serde_json::Value::Bool(b) => b.to_string(),
_ => serde_json::to_string(value).unwrap_or_default(),
}
}
fn escape_description(&self, desc: &str) -> String {
desc.replace('"', "'").replace('\n', " ").trim().to_string()
}
}
/// Result of API sync operation
#[derive(Debug, Default)]
pub struct SyncResult {
pub apis_synced: usize,
pub tools_generated: usize,
pub tools_removed: usize,
pub errors: Vec<String>,
}
impl SyncResult {
pub fn is_success(&self) -> bool {
self.errors.is_empty()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sanitize_operation_id() {
let generator = ApiToolGenerator {
state: Arc::new(AppState::default_for_tests()),
bot_id: Uuid::new_v4(),
work_path: "/tmp".to_string(),
};
assert_eq!(
generator.sanitize_operation_id("getUserById"),
"getuserbyid"
);
assert_eq!(
generator.sanitize_operation_id("get-user-by-id"),
"get_user_by_id"
);
}
#[test]
fn test_generate_operation_id() {
let generator = ApiToolGenerator {
state: Arc::new(AppState::default_for_tests()),
bot_id: Uuid::new_v4(),
work_path: "/tmp".to_string(),
};
assert_eq!(
generator.generate_operation_id("get", "/users/{id}"),
"get_users_id"
);
assert_eq!(
generator.generate_operation_id("post", "/users"),
"post_users"
);
}
#[test]
fn test_map_openapi_type() {
let generator = ApiToolGenerator {
state: Arc::new(AppState::default_for_tests()),
bot_id: Uuid::new_v4(),
work_path: "/tmp".to_string(),
};
assert_eq!(generator.map_openapi_type("integer"), "number");
assert_eq!(generator.map_openapi_type("string"), "string");
assert_eq!(generator.map_openapi_type("boolean"), "boolean");
assert_eq!(generator.map_openapi_type("array"), "array");
}
#[test]
fn test_escape_description() {
let generator = ApiToolGenerator {
state: Arc::new(AppState::default_for_tests()),
bot_id: Uuid::new_v4(),
work_path: "/tmp".to_string(),
};
assert_eq!(
generator.escape_description("Test \"description\" here"),
"Test 'description' here"
);
assert_eq!(
generator.escape_description("Line 1\nLine 2"),
"Line 1 Line 2"
);
}
#[test]
fn test_calculate_hash() {
let generator = ApiToolGenerator {
state: Arc::new(AppState::default_for_tests()),
bot_id: Uuid::new_v4(),
work_path: "/tmp".to_string(),
};
let hash1 = generator.calculate_hash("test content");
let hash2 = generator.calculate_hash("test content");
let hash3 = generator.calculate_hash("different content");
assert_eq!(hash1, hash2);
assert_ne!(hash1, hash3);
}
}

View file

@ -0,0 +1,979 @@
//! Code Sandbox Module
//!
//! Provides sandboxed execution of Python and JavaScript code using LXC containers.
//! Supports the RUN keyword for executing arbitrary code safely.
//!
//! BASIC Keywords:
//! - RUN PYTHON "code"
//! - RUN JAVASCRIPT "code"
//! - RUN PYTHON WITH FILE "script.py"
//!
//! Config.csv properties:
//! ```csv
//! sandbox-enabled,true
//! sandbox-timeout,30
//! sandbox-memory-limit,256
//! sandbox-cpu-limit,50
//! sandbox-network-enabled,false
//! sandbox-runtime,lxc
//! ```
use crate::shared::models::UserSession;
use crate::shared::state::AppState;
use diesel::prelude::*;
use log::{debug, error, info, trace, warn};
use rhai::{Dynamic, Engine};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::io::Write;
use std::process::{Command, Stdio};
use std::sync::Arc;
use std::time::Duration;
use tokio::time::timeout;
use uuid::Uuid;
/// Supported sandbox runtimes
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum SandboxRuntime {
/// LXC containers (recommended for security)
LXC,
/// Docker containers
Docker,
/// Firecracker microVMs (highest security)
Firecracker,
/// Direct process isolation (fallback, least secure)
Process,
}
impl Default for SandboxRuntime {
fn default() -> Self {
SandboxRuntime::Process
}
}
impl From<&str> for SandboxRuntime {
fn from(s: &str) -> Self {
match s.to_lowercase().as_str() {
"lxc" => SandboxRuntime::LXC,
"docker" => SandboxRuntime::Docker,
"firecracker" => SandboxRuntime::Firecracker,
_ => SandboxRuntime::Process,
}
}
}
/// Programming language for code execution
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum CodeLanguage {
Python,
JavaScript,
Bash,
}
impl From<&str> for CodeLanguage {
fn from(s: &str) -> Self {
match s.to_lowercase().as_str() {
"python" | "py" => CodeLanguage::Python,
"javascript" | "js" | "node" => CodeLanguage::JavaScript,
"bash" | "sh" | "shell" => CodeLanguage::Bash,
_ => CodeLanguage::Python,
}
}
}
impl CodeLanguage {
pub fn file_extension(&self) -> &str {
match self {
CodeLanguage::Python => "py",
CodeLanguage::JavaScript => "js",
CodeLanguage::Bash => "sh",
}
}
pub fn interpreter(&self) -> &str {
match self {
CodeLanguage::Python => "python3",
CodeLanguage::JavaScript => "node",
CodeLanguage::Bash => "bash",
}
}
pub fn lxc_image(&self) -> &str {
match self {
CodeLanguage::Python => "gb-sandbox-python",
CodeLanguage::JavaScript => "gb-sandbox-node",
CodeLanguage::Bash => "gb-sandbox-base",
}
}
}
/// Sandbox configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SandboxConfig {
/// Whether sandbox execution is enabled
pub enabled: bool,
/// Sandbox runtime to use
pub runtime: SandboxRuntime,
/// Maximum execution time in seconds
pub timeout_seconds: u64,
/// Memory limit in MB
pub memory_limit_mb: u64,
/// CPU limit as percentage (0-100)
pub cpu_limit_percent: u32,
/// Whether network access is allowed
pub network_enabled: bool,
/// Working directory for execution
pub work_dir: String,
/// Additional environment variables
pub env_vars: HashMap<String, String>,
/// Allowed file paths for read access
pub allowed_read_paths: Vec<String>,
/// Allowed file paths for write access
pub allowed_write_paths: Vec<String>,
}
impl Default for SandboxConfig {
fn default() -> Self {
Self {
enabled: true,
runtime: SandboxRuntime::Process,
timeout_seconds: 30,
memory_limit_mb: 256,
cpu_limit_percent: 50,
network_enabled: false,
work_dir: "/tmp/gb-sandbox".to_string(),
env_vars: HashMap::new(),
allowed_read_paths: vec![],
allowed_write_paths: vec![],
}
}
}
impl SandboxConfig {
/// Load configuration from bot settings
pub fn from_bot_config(state: &AppState, bot_id: Uuid) -> Self {
let mut config = Self::default();
if let Ok(mut conn) = state.conn.get() {
#[derive(QueryableByName)]
struct ConfigRow {
#[diesel(sql_type = diesel::sql_types::Text)]
config_key: String,
#[diesel(sql_type = diesel::sql_types::Text)]
config_value: String,
}
let configs: Vec<ConfigRow> = diesel::sql_query(
"SELECT config_key, config_value FROM bot_configuration \
WHERE bot_id = $1 AND config_key LIKE 'sandbox-%'",
)
.bind::<diesel::sql_types::Uuid, _>(bot_id)
.load(&mut conn)
.unwrap_or_default();
for row in configs {
match row.config_key.as_str() {
"sandbox-enabled" => {
config.enabled = row.config_value.to_lowercase() == "true";
}
"sandbox-runtime" => {
config.runtime = SandboxRuntime::from(row.config_value.as_str());
}
"sandbox-timeout" => {
config.timeout_seconds = row.config_value.parse().unwrap_or(30);
}
"sandbox-memory-limit" => {
config.memory_limit_mb = row.config_value.parse().unwrap_or(256);
}
"sandbox-cpu-limit" => {
config.cpu_limit_percent = row.config_value.parse().unwrap_or(50);
}
"sandbox-network-enabled" => {
config.network_enabled = row.config_value.to_lowercase() == "true";
}
_ => {}
}
}
}
config
}
}
/// Result of code execution
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExecutionResult {
/// Standard output
pub stdout: String,
/// Standard error
pub stderr: String,
/// Exit code (0 = success)
pub exit_code: i32,
/// Execution time in milliseconds
pub execution_time_ms: u64,
/// Whether execution timed out
pub timed_out: bool,
/// Whether execution was killed due to resource limits
pub killed: bool,
/// Any error message
pub error: Option<String>,
}
impl ExecutionResult {
pub fn success(stdout: String, stderr: String, execution_time_ms: u64) -> Self {
Self {
stdout,
stderr,
exit_code: 0,
execution_time_ms,
timed_out: false,
killed: false,
error: None,
}
}
pub fn error(message: &str) -> Self {
Self {
stdout: String::new(),
stderr: String::new(),
exit_code: -1,
execution_time_ms: 0,
timed_out: false,
killed: false,
error: Some(message.to_string()),
}
}
pub fn timeout() -> Self {
Self {
stdout: String::new(),
stderr: "Execution timed out".to_string(),
exit_code: -1,
execution_time_ms: 0,
timed_out: true,
killed: true,
error: Some("Execution exceeded time limit".to_string()),
}
}
pub fn is_success(&self) -> bool {
self.exit_code == 0 && self.error.is_none() && !self.timed_out
}
pub fn output(&self) -> String {
if self.is_success() {
self.stdout.clone()
} else if let Some(err) = &self.error {
format!("Error: {}\n{}", err, self.stderr)
} else {
format!("Error (exit code {}): {}", self.exit_code, self.stderr)
}
}
}
/// Code Sandbox for safe execution
pub struct CodeSandbox {
config: SandboxConfig,
session_id: Uuid,
}
impl CodeSandbox {
pub fn new(config: SandboxConfig, session_id: Uuid) -> Self {
Self { config, session_id }
}
/// Execute code in the sandbox
pub async fn execute(&self, code: &str, language: CodeLanguage) -> ExecutionResult {
if !self.config.enabled {
return ExecutionResult::error("Sandbox execution is disabled");
}
let start_time = std::time::Instant::now();
let result = match self.config.runtime {
SandboxRuntime::LXC => self.execute_lxc(code, &language).await,
SandboxRuntime::Docker => self.execute_docker(code, &language).await,
SandboxRuntime::Firecracker => self.execute_firecracker(code, &language).await,
SandboxRuntime::Process => self.execute_process(code, &language).await,
};
let execution_time_ms = start_time.elapsed().as_millis() as u64;
match result {
Ok(mut exec_result) => {
exec_result.execution_time_ms = execution_time_ms;
exec_result
}
Err(e) => {
let mut err_result = ExecutionResult::error(&e);
err_result.execution_time_ms = execution_time_ms;
err_result
}
}
}
/// Execute code in LXC container
async fn execute_lxc(
&self,
code: &str,
language: &CodeLanguage,
) -> Result<ExecutionResult, String> {
// Create unique container name
let container_name = format!("gb-sandbox-{}", Uuid::new_v4());
// Ensure work directory exists
std::fs::create_dir_all(&self.config.work_dir)
.map_err(|e| format!("Failed to create work dir: {}", e))?;
// Write code to temp file
let code_file = format!(
"{}/{}.{}",
self.config.work_dir,
self.session_id,
language.file_extension()
);
std::fs::write(&code_file, code)
.map_err(|e| format!("Failed to write code file: {}", e))?;
// Build LXC command
let mut cmd = Command::new("lxc-execute");
cmd.arg("-n")
.arg(&container_name)
.arg("-f")
.arg(format!("/etc/lxc/{}.conf", language.lxc_image()))
.arg("--")
.arg(language.interpreter())
.arg(&code_file);
// Set resource limits via cgroups
cmd.env(
"LXC_CGROUP_MEMORY_LIMIT",
format!("{}M", self.config.memory_limit_mb),
)
.env(
"LXC_CGROUP_CPU_QUOTA",
format!("{}", self.config.cpu_limit_percent * 1000),
);
// Execute with timeout
let timeout_duration = Duration::from_secs(self.config.timeout_seconds);
let output = timeout(timeout_duration, async {
tokio::process::Command::new("lxc-execute")
.arg("-n")
.arg(&container_name)
.arg("-f")
.arg(format!("/etc/lxc/{}.conf", language.lxc_image()))
.arg("--")
.arg(language.interpreter())
.arg(&code_file)
.output()
.await
})
.await;
// Cleanup temp file
let _ = std::fs::remove_file(&code_file);
match output {
Ok(Ok(output)) => {
let stdout = String::from_utf8_lossy(&output.stdout).to_string();
let stderr = String::from_utf8_lossy(&output.stderr).to_string();
let exit_code = output.status.code().unwrap_or(-1);
Ok(ExecutionResult {
stdout,
stderr,
exit_code,
execution_time_ms: 0,
timed_out: false,
killed: !output.status.success(),
error: None,
})
}
Ok(Err(e)) => Err(format!("LXC execution failed: {}", e)),
Err(_) => Ok(ExecutionResult::timeout()),
}
}
/// Execute code in Docker container
async fn execute_docker(
&self,
code: &str,
language: &CodeLanguage,
) -> Result<ExecutionResult, String> {
// Determine Docker image
let image = match language {
CodeLanguage::Python => "python:3.11-slim",
CodeLanguage::JavaScript => "node:20-slim",
CodeLanguage::Bash => "alpine:latest",
};
// Build Docker command
let mut args = vec![
"run".to_string(),
"--rm".to_string(),
"--network".to_string(),
if self.config.network_enabled {
"bridge"
} else {
"none"
}
.to_string(),
"--memory".to_string(),
format!("{}m", self.config.memory_limit_mb),
"--cpus".to_string(),
format!("{:.2}", self.config.cpu_limit_percent as f64 / 100.0),
"--read-only".to_string(),
"--security-opt".to_string(),
"no-new-privileges".to_string(),
image.to_string(),
language.interpreter().to_string(),
"-c".to_string(),
code.to_string(),
];
// Execute with timeout
let timeout_duration = Duration::from_secs(self.config.timeout_seconds);
let output = timeout(timeout_duration, async {
tokio::process::Command::new("docker")
.args(&args)
.output()
.await
})
.await;
match output {
Ok(Ok(output)) => {
let stdout = String::from_utf8_lossy(&output.stdout).to_string();
let stderr = String::from_utf8_lossy(&output.stderr).to_string();
let exit_code = output.status.code().unwrap_or(-1);
Ok(ExecutionResult {
stdout,
stderr,
exit_code,
execution_time_ms: 0,
timed_out: false,
killed: !output.status.success(),
error: None,
})
}
Ok(Err(e)) => Err(format!("Docker execution failed: {}", e)),
Err(_) => Ok(ExecutionResult::timeout()),
}
}
/// Execute code in Firecracker microVM
async fn execute_firecracker(
&self,
code: &str,
language: &CodeLanguage,
) -> Result<ExecutionResult, String> {
// Firecracker requires more complex setup
// For now, fall back to process-based execution with a warning
warn!("Firecracker runtime not yet implemented, falling back to process isolation");
self.execute_process(code, language).await
}
/// Execute code in isolated process (fallback)
async fn execute_process(
&self,
code: &str,
language: &CodeLanguage,
) -> Result<ExecutionResult, String> {
// Create temp directory
let temp_dir = format!("{}/{}", self.config.work_dir, Uuid::new_v4());
std::fs::create_dir_all(&temp_dir)
.map_err(|e| format!("Failed to create temp dir: {}", e))?;
// Write code to temp file
let code_file = format!("{}/code.{}", temp_dir, language.file_extension());
std::fs::write(&code_file, code)
.map_err(|e| format!("Failed to write code file: {}", e))?;
// Build command based on language
let (cmd_name, cmd_args): (&str, Vec<&str>) = match language {
CodeLanguage::Python => ("python3", vec![&code_file]),
CodeLanguage::JavaScript => ("node", vec![&code_file]),
CodeLanguage::Bash => ("bash", vec![&code_file]),
};
// Execute with timeout
let timeout_duration = Duration::from_secs(self.config.timeout_seconds);
let output = timeout(timeout_duration, async {
tokio::process::Command::new(cmd_name)
.args(&cmd_args)
.current_dir(&temp_dir)
.env_clear()
.env("PATH", "/usr/local/bin:/usr/bin:/bin")
.env("HOME", &temp_dir)
.env("TMPDIR", &temp_dir)
.output()
.await
})
.await;
// Cleanup temp directory
let _ = std::fs::remove_dir_all(&temp_dir);
match output {
Ok(Ok(output)) => {
let stdout = String::from_utf8_lossy(&output.stdout).to_string();
let stderr = String::from_utf8_lossy(&output.stderr).to_string();
let exit_code = output.status.code().unwrap_or(-1);
Ok(ExecutionResult {
stdout,
stderr,
exit_code,
execution_time_ms: 0,
timed_out: false,
killed: false,
error: None,
})
}
Ok(Err(e)) => Err(format!("Process execution failed: {}", e)),
Err(_) => Ok(ExecutionResult::timeout()),
}
}
/// Execute code from a file path
pub async fn execute_file(&self, file_path: &str, language: CodeLanguage) -> ExecutionResult {
match std::fs::read_to_string(file_path) {
Ok(code) => self.execute(&code, language).await,
Err(e) => ExecutionResult::error(&format!("Failed to read file: {}", e)),
}
}
}
/// Register code sandbox keywords with the engine
pub fn register_sandbox_keywords(state: Arc<AppState>, user: UserSession, engine: &mut Engine) {
run_python_keyword(state.clone(), user.clone(), engine);
run_javascript_keyword(state.clone(), user.clone(), engine);
run_bash_keyword(state.clone(), user.clone(), engine);
run_file_keyword(state.clone(), user.clone(), engine);
}
/// RUN PYTHON "code"
pub fn run_python_keyword(state: Arc<AppState>, user: UserSession, engine: &mut Engine) {
let state_clone = Arc::clone(&state);
let user_clone = user.clone();
engine
.register_custom_syntax(
&["RUN", "PYTHON", "$expr$"],
false,
move |context, inputs| {
let code = context
.eval_expression_tree(&inputs[0])?
.to_string()
.trim_matches('"')
.to_string();
trace!("RUN PYTHON for session: {}", user_clone.id);
let state_for_task = Arc::clone(&state_clone);
let session_id = user_clone.id;
let bot_id = user_clone.bot_id;
let (tx, rx) = std::sync::mpsc::channel();
std::thread::spawn(move || {
let rt = tokio::runtime::Runtime::new().expect("Failed to create runtime");
let result = rt.block_on(async {
let config = SandboxConfig::from_bot_config(&state_for_task, bot_id);
let sandbox = CodeSandbox::new(config, session_id);
sandbox.execute(&code, CodeLanguage::Python).await
});
let _ = tx.send(result);
});
match rx.recv_timeout(std::time::Duration::from_secs(60)) {
Ok(result) => Ok(Dynamic::from(result.output())),
Err(_) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime(
"RUN PYTHON timed out".into(),
rhai::Position::NONE,
))),
}
},
)
.expect("Failed to register RUN PYTHON syntax");
}
/// RUN JAVASCRIPT "code"
pub fn run_javascript_keyword(state: Arc<AppState>, user: UserSession, engine: &mut Engine) {
let state_clone = Arc::clone(&state);
let user_clone = user.clone();
engine
.register_custom_syntax(
&["RUN", "JAVASCRIPT", "$expr$"],
false,
move |context, inputs| {
let code = context
.eval_expression_tree(&inputs[0])?
.to_string()
.trim_matches('"')
.to_string();
trace!("RUN JAVASCRIPT for session: {}", user_clone.id);
let state_for_task = Arc::clone(&state_clone);
let session_id = user_clone.id;
let bot_id = user_clone.bot_id;
let (tx, rx) = std::sync::mpsc::channel();
std::thread::spawn(move || {
let rt = tokio::runtime::Runtime::new().expect("Failed to create runtime");
let result = rt.block_on(async {
let config = SandboxConfig::from_bot_config(&state_for_task, bot_id);
let sandbox = CodeSandbox::new(config, session_id);
sandbox.execute(&code, CodeLanguage::JavaScript).await
});
let _ = tx.send(result);
});
match rx.recv_timeout(std::time::Duration::from_secs(60)) {
Ok(result) => Ok(Dynamic::from(result.output())),
Err(_) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime(
"RUN JAVASCRIPT timed out".into(),
rhai::Position::NONE,
))),
}
},
)
.expect("Failed to register RUN JAVASCRIPT syntax");
// Also register JS as alias
let state_clone2 = Arc::clone(&state);
let user_clone2 = user.clone();
engine
.register_custom_syntax(&["RUN", "JS", "$expr$"], false, move |context, inputs| {
let code = context
.eval_expression_tree(&inputs[0])?
.to_string()
.trim_matches('"')
.to_string();
let state_for_task = Arc::clone(&state_clone2);
let session_id = user_clone2.id;
let bot_id = user_clone2.bot_id;
let (tx, rx) = std::sync::mpsc::channel();
std::thread::spawn(move || {
let rt = tokio::runtime::Runtime::new().expect("Failed to create runtime");
let result = rt.block_on(async {
let config = SandboxConfig::from_bot_config(&state_for_task, bot_id);
let sandbox = CodeSandbox::new(config, session_id);
sandbox.execute(&code, CodeLanguage::JavaScript).await
});
let _ = tx.send(result);
});
match rx.recv_timeout(std::time::Duration::from_secs(60)) {
Ok(result) => Ok(Dynamic::from(result.output())),
Err(_) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime(
"RUN JS timed out".into(),
rhai::Position::NONE,
))),
}
})
.expect("Failed to register RUN JS syntax");
}
/// RUN BASH "code"
pub fn run_bash_keyword(state: Arc<AppState>, user: UserSession, engine: &mut Engine) {
let state_clone = Arc::clone(&state);
let user_clone = user.clone();
engine
.register_custom_syntax(&["RUN", "BASH", "$expr$"], false, move |context, inputs| {
let code = context
.eval_expression_tree(&inputs[0])?
.to_string()
.trim_matches('"')
.to_string();
trace!("RUN BASH for session: {}", user_clone.id);
let state_for_task = Arc::clone(&state_clone);
let session_id = user_clone.id;
let bot_id = user_clone.bot_id;
let (tx, rx) = std::sync::mpsc::channel();
std::thread::spawn(move || {
let rt = tokio::runtime::Runtime::new().expect("Failed to create runtime");
let result = rt.block_on(async {
let config = SandboxConfig::from_bot_config(&state_for_task, bot_id);
let sandbox = CodeSandbox::new(config, session_id);
sandbox.execute(&code, CodeLanguage::Bash).await
});
let _ = tx.send(result);
});
match rx.recv_timeout(std::time::Duration::from_secs(60)) {
Ok(result) => Ok(Dynamic::from(result.output())),
Err(_) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime(
"RUN BASH timed out".into(),
rhai::Position::NONE,
))),
}
})
.expect("Failed to register RUN BASH syntax");
}
/// RUN PYTHON WITH FILE "script.py"
pub fn run_file_keyword(state: Arc<AppState>, user: UserSession, engine: &mut Engine) {
let state_clone = Arc::clone(&state);
let user_clone = user.clone();
engine
.register_custom_syntax(
&["RUN", "PYTHON", "WITH", "FILE", "$expr$"],
false,
move |context, inputs| {
let file_path = context
.eval_expression_tree(&inputs[0])?
.to_string()
.trim_matches('"')
.to_string();
trace!(
"RUN PYTHON WITH FILE {} for session: {}",
file_path,
user_clone.id
);
let state_for_task = Arc::clone(&state_clone);
let session_id = user_clone.id;
let bot_id = user_clone.bot_id;
let (tx, rx) = std::sync::mpsc::channel();
std::thread::spawn(move || {
let rt = tokio::runtime::Runtime::new().expect("Failed to create runtime");
let result = rt.block_on(async {
let config = SandboxConfig::from_bot_config(&state_for_task, bot_id);
let sandbox = CodeSandbox::new(config, session_id);
sandbox.execute_file(&file_path, CodeLanguage::Python).await
});
let _ = tx.send(result);
});
match rx.recv_timeout(std::time::Duration::from_secs(60)) {
Ok(result) => Ok(Dynamic::from(result.output())),
Err(_) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime(
"RUN PYTHON WITH FILE timed out".into(),
rhai::Position::NONE,
))),
}
},
)
.expect("Failed to register RUN PYTHON WITH FILE syntax");
// JavaScript file execution
let state_clone2 = Arc::clone(&state);
let user_clone2 = user.clone();
engine
.register_custom_syntax(
&["RUN", "JAVASCRIPT", "WITH", "FILE", "$expr$"],
false,
move |context, inputs| {
let file_path = context
.eval_expression_tree(&inputs[0])?
.to_string()
.trim_matches('"')
.to_string();
let state_for_task = Arc::clone(&state_clone2);
let session_id = user_clone2.id;
let bot_id = user_clone2.bot_id;
let (tx, rx) = std::sync::mpsc::channel();
std::thread::spawn(move || {
let rt = tokio::runtime::Runtime::new().expect("Failed to create runtime");
let result = rt.block_on(async {
let config = SandboxConfig::from_bot_config(&state_for_task, bot_id);
let sandbox = CodeSandbox::new(config, session_id);
sandbox
.execute_file(&file_path, CodeLanguage::JavaScript)
.await
});
let _ = tx.send(result);
});
match rx.recv_timeout(std::time::Duration::from_secs(60)) {
Ok(result) => Ok(Dynamic::from(result.output())),
Err(_) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime(
"RUN JAVASCRIPT WITH FILE timed out".into(),
rhai::Position::NONE,
))),
}
},
)
.expect("Failed to register RUN JAVASCRIPT WITH FILE syntax");
}
// ============================================================================
// LXC Container Setup Templates
// ============================================================================
/// Generate LXC configuration for Python sandbox
pub fn generate_python_lxc_config() -> String {
r#"
# LXC configuration for Python sandbox
lxc.include = /usr/share/lxc/config/common.conf
lxc.arch = linux64
# Container name template
lxc.uts.name = gb-sandbox-python
# Root filesystem
lxc.rootfs.path = dir:/var/lib/lxc/gb-sandbox-python/rootfs
# Network - isolated by default
lxc.net.0.type = empty
# Resource limits
lxc.cgroup2.memory.max = 256M
lxc.cgroup2.cpu.max = 50000 100000
# Security
lxc.cap.drop = sys_admin sys_boot sys_module sys_time
lxc.apparmor.profile = generated
lxc.seccomp.profile = /usr/share/lxc/config/common.seccomp
# Mount points - minimal
lxc.mount.auto = proc:mixed sys:ro
lxc.mount.entry = /usr/bin/python3 usr/bin/python3 none ro,bind 0 0
lxc.mount.entry = /usr/lib/python3 usr/lib/python3 none ro,bind 0 0
lxc.mount.entry = tmpfs tmp tmpfs defaults 0 0
"#
.to_string()
}
/// Generate LXC configuration for Node.js sandbox
pub fn generate_node_lxc_config() -> String {
r#"
# LXC configuration for Node.js sandbox
lxc.include = /usr/share/lxc/config/common.conf
lxc.arch = linux64
# Container name template
lxc.uts.name = gb-sandbox-node
# Root filesystem
lxc.rootfs.path = dir:/var/lib/lxc/gb-sandbox-node/rootfs
# Network - isolated by default
lxc.net.0.type = empty
# Resource limits
lxc.cgroup2.memory.max = 256M
lxc.cgroup2.cpu.max = 50000 100000
# Security
lxc.cap.drop = sys_admin sys_boot sys_module sys_time
lxc.apparmor.profile = generated
lxc.seccomp.profile = /usr/share/lxc/config/common.seccomp
# Mount points - minimal
lxc.mount.auto = proc:mixed sys:ro
lxc.mount.entry = /usr/bin/node usr/bin/node none ro,bind 0 0
lxc.mount.entry = /usr/lib/node_modules usr/lib/node_modules none ro,bind 0 0
lxc.mount.entry = tmpfs tmp tmpfs defaults 0 0
"#
.to_string()
}
// ============================================================================
// Tests
// ============================================================================
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sandbox_config_default() {
let config = SandboxConfig::default();
assert!(config.enabled);
assert_eq!(config.timeout_seconds, 30);
assert_eq!(config.memory_limit_mb, 256);
assert!(!config.network_enabled);
}
#[test]
fn test_execution_result_success() {
let result = ExecutionResult::success("Hello, World!".to_string(), String::new(), 100);
assert!(result.is_success());
assert_eq!(result.output(), "Hello, World!");
}
#[test]
fn test_execution_result_error() {
let result = ExecutionResult::error("Something went wrong");
assert!(!result.is_success());
assert!(result.output().contains("Error"));
}
#[test]
fn test_execution_result_timeout() {
let result = ExecutionResult::timeout();
assert!(!result.is_success());
assert!(result.timed_out);
}
#[test]
fn test_code_language_from_str() {
assert_eq!(CodeLanguage::from("python"), CodeLanguage::Python);
assert_eq!(CodeLanguage::from("PYTHON"), CodeLanguage::Python);
assert_eq!(CodeLanguage::from("py"), CodeLanguage::Python);
assert_eq!(CodeLanguage::from("javascript"), CodeLanguage::JavaScript);
assert_eq!(CodeLanguage::from("js"), CodeLanguage::JavaScript);
assert_eq!(CodeLanguage::from("node"), CodeLanguage::JavaScript);
assert_eq!(CodeLanguage::from("bash"), CodeLanguage::Bash);
}
#[test]
fn test_code_language_file_extension() {
assert_eq!(CodeLanguage::Python.file_extension(), "py");
assert_eq!(CodeLanguage::JavaScript.file_extension(), "js");
assert_eq!(CodeLanguage::Bash.file_extension(), "sh");
}
#[test]
fn test_code_language_interpreter() {
assert_eq!(CodeLanguage::Python.interpreter(), "python3");
assert_eq!(CodeLanguage::JavaScript.interpreter(), "node");
assert_eq!(CodeLanguage::Bash.interpreter(), "bash");
}
#[test]
fn test_sandbox_runtime_from_str() {
assert_eq!(SandboxRuntime::from("lxc"), SandboxRuntime::LXC);
assert_eq!(SandboxRuntime::from("docker"), SandboxRuntime::Docker);
assert_eq!(
SandboxRuntime::from("firecracker"),
SandboxRuntime::Firecracker
);
assert_eq!(SandboxRuntime::from("unknown"), SandboxRuntime::Process);
}
#[test]
fn test_lxc_config_generation() {
let python_config = generate_python_lxc_config();
assert!(python_config.contains("gb-sandbox-python"));
assert!(python_config.contains("memory.max"));
let node_config = generate_node_lxc_config();
assert!(node_config.contains("gb-sandbox-node"));
assert!(node_config.contains("/usr/bin/node"));
}
}

View file

@ -0,0 +1,752 @@
//! Episodic Memory - Conversation Summaries
//!
//! This module provides episodic memory capabilities that compress long conversations
//! into summaries for efficient context management. Episodic memory enables:
//!
//! - Automatic conversation summarization
//! - Key topic extraction
//! - Decision and action item tracking
//! - Long-term memory without context overflow
//!
//! ## BASIC Keywords
//!
//! ```basic
//! ' Create episode summary manually
//! summary = CREATE EPISODE SUMMARY
//!
//! ' Get recent episodes for a user
//! episodes = GET EPISODES(10)
//!
//! ' Search episodes by topic
//! related = SEARCH EPISODES "billing issues"
//!
//! ' Clear old episodes
//! CLEAR EPISODES OLDER THAN 30
//! ```
//!
//! ## Config.csv Properties
//!
//! ```csv
//! name,value
//! episodic-memory-enabled,true
//! episodic-summary-threshold,20
//! episodic-summary-model,fast
//! episodic-max-episodes,100
//! episodic-retention-days,365
//! episodic-auto-summarize,true
//! ```
use chrono::{DateTime, Duration, Utc};
use rhai::{Dynamic, Engine, EvalAltResult, Map, Array};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use tokio::sync::RwLock;
use tracing::{debug, error, info, warn};
use uuid::Uuid;
use crate::state::AppState;
/// Episode summary structure
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Episode {
/// Unique episode identifier
pub id: Uuid,
/// User ID this episode belongs to
pub user_id: Uuid,
/// Bot ID that created the episode
pub bot_id: Uuid,
/// Session/conversation ID
pub session_id: Uuid,
/// Condensed summary of the conversation
pub summary: String,
/// Key topics discussed
pub key_topics: Vec<String>,
/// Decisions made during conversation
pub decisions: Vec<String>,
/// Action items identified
pub action_items: Vec<ActionItem>,
/// Sentiment analysis result
pub sentiment: Sentiment,
/// Resolution status
pub resolution: ResolutionStatus,
/// Number of messages summarized
pub message_count: usize,
/// Original message IDs (for reference)
pub message_ids: Vec<Uuid>,
/// When the episode was created
pub created_at: DateTime<Utc>,
/// Time range of original conversation
pub conversation_start: DateTime<Utc>,
pub conversation_end: DateTime<Utc>,
/// Metadata
pub metadata: serde_json::Value,
}
/// Action item extracted from conversation
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ActionItem {
/// Description of the action
pub description: String,
/// Who is responsible
pub assignee: Option<String>,
/// Due date if mentioned
pub due_date: Option<DateTime<Utc>>,
/// Priority level
pub priority: Priority,
/// Completion status
pub completed: bool,
}
/// Priority levels for action items
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum Priority {
Low,
Medium,
High,
Critical,
}
impl Default for Priority {
fn default() -> Self {
Priority::Medium
}
}
/// Sentiment analysis result
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Sentiment {
/// Overall sentiment score (-1.0 to 1.0)
pub score: f64,
/// Sentiment label
pub label: SentimentLabel,
/// Confidence in the assessment
pub confidence: f64,
}
/// Sentiment labels
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum SentimentLabel {
VeryNegative,
Negative,
Neutral,
Positive,
VeryPositive,
}
impl Default for SentimentLabel {
fn default() -> Self {
SentimentLabel::Neutral
}
}
impl Default for Sentiment {
fn default() -> Self {
Sentiment {
score: 0.0,
label: SentimentLabel::Neutral,
confidence: 0.5,
}
}
}
/// Resolution status of the conversation
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum ResolutionStatus {
Resolved,
Unresolved,
Escalated,
Pending,
Unknown,
}
impl Default for ResolutionStatus {
fn default() -> Self {
ResolutionStatus::Unknown
}
}
/// Configuration for episodic memory
#[derive(Debug, Clone)]
pub struct EpisodicMemoryConfig {
/// Whether episodic memory is enabled
pub enabled: bool,
/// Message count threshold before auto-summarization
pub summary_threshold: usize,
/// Model to use for summarization
pub summary_model: String,
/// Maximum episodes to keep per user
pub max_episodes: usize,
/// Days to retain episodes
pub retention_days: u32,
/// Whether to auto-summarize conversations
pub auto_summarize: bool,
}
impl Default for EpisodicMemoryConfig {
fn default() -> Self {
EpisodicMemoryConfig {
enabled: true,
summary_threshold: 20,
summary_model: "fast".to_string(),
max_episodes: 100,
retention_days: 365,
auto_summarize: true,
}
}
}
/// Message structure for summarization
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConversationMessage {
pub id: Uuid,
pub role: String,
pub content: String,
pub timestamp: DateTime<Utc>,
}
/// Episodic Memory Manager
pub struct EpisodicMemoryManager {
config: EpisodicMemoryConfig,
}
impl EpisodicMemoryManager {
/// Create a new episodic memory manager
pub fn new(config: EpisodicMemoryConfig) -> Self {
EpisodicMemoryManager { config }
}
/// Create from config map
pub fn from_config(config_map: &std::collections::HashMap<String, String>) -> Self {
let config = EpisodicMemoryConfig {
enabled: config_map
.get("episodic-memory-enabled")
.map(|v| v == "true")
.unwrap_or(true),
summary_threshold: config_map
.get("episodic-summary-threshold")
.and_then(|v| v.parse().ok())
.unwrap_or(20),
summary_model: config_map
.get("episodic-summary-model")
.cloned()
.unwrap_or_else(|| "fast".to_string()),
max_episodes: config_map
.get("episodic-max-episodes")
.and_then(|v| v.parse().ok())
.unwrap_or(100),
retention_days: config_map
.get("episodic-retention-days")
.and_then(|v| v.parse().ok())
.unwrap_or(365),
auto_summarize: config_map
.get("episodic-auto-summarize")
.map(|v| v == "true")
.unwrap_or(true),
};
EpisodicMemoryManager::new(config)
}
/// Check if auto-summarization should trigger
pub fn should_summarize(&self, message_count: usize) -> bool {
self.config.enabled
&& self.config.auto_summarize
&& message_count >= self.config.summary_threshold
}
/// Generate the summarization prompt
pub fn generate_summary_prompt(&self, messages: &[ConversationMessage]) -> String {
let formatted_messages = messages
.iter()
.map(|m| format!("[{}] {}: {}", m.timestamp.format("%H:%M"), m.role, m.content))
.collect::<Vec<_>>()
.join("\n");
format!(
r#"Analyze and summarize this conversation. Extract key information.
CONVERSATION:
{}
Respond with valid JSON only:
{{
"summary": "A concise 2-3 sentence summary of the conversation",
"key_topics": ["topic1", "topic2"],
"decisions": ["decision1", "decision2"],
"action_items": [
{{"description": "action description", "assignee": "user/bot/null", "priority": "low/medium/high/critical"}}
],
"sentiment": {{
"score": 0.0,
"label": "very_negative/negative/neutral/positive/very_positive",
"confidence": 0.8
}},
"resolution": "resolved/unresolved/escalated/pending/unknown"
}}"#,
formatted_messages
)
}
/// Parse LLM response into episode data
pub fn parse_summary_response(
&self,
response: &str,
messages: &[ConversationMessage],
user_id: Uuid,
bot_id: Uuid,
session_id: Uuid,
) -> Result<Episode, String> {
// Try to extract JSON from response
let json_str = extract_json(response)?;
let parsed: serde_json::Value = serde_json::from_str(&json_str)
.map_err(|e| format!("Failed to parse JSON: {}", e))?;
let summary = parsed["summary"]
.as_str()
.unwrap_or("Conversation summary unavailable")
.to_string();
let key_topics: Vec<String> = parsed["key_topics"]
.as_array()
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
})
.unwrap_or_default();
let decisions: Vec<String> = parsed["decisions"]
.as_array()
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
})
.unwrap_or_default();
let action_items: Vec<ActionItem> = parsed["action_items"]
.as_array()
.map(|arr| {
arr.iter()
.filter_map(|v| {
Some(ActionItem {
description: v["description"].as_str()?.to_string(),
assignee: v["assignee"].as_str().map(String::from),
due_date: None,
priority: match v["priority"].as_str().unwrap_or("medium") {
"low" => Priority::Low,
"high" => Priority::High,
"critical" => Priority::Critical,
_ => Priority::Medium,
},
completed: false,
})
})
.collect()
})
.unwrap_or_default();
let sentiment = Sentiment {
score: parsed["sentiment"]["score"].as_f64().unwrap_or(0.0),
label: match parsed["sentiment"]["label"].as_str().unwrap_or("neutral") {
"very_negative" => SentimentLabel::VeryNegative,
"negative" => SentimentLabel::Negative,
"positive" => SentimentLabel::Positive,
"very_positive" => SentimentLabel::VeryPositive,
_ => SentimentLabel::Neutral,
},
confidence: parsed["sentiment"]["confidence"].as_f64().unwrap_or(0.5),
};
let resolution = match parsed["resolution"].as_str().unwrap_or("unknown") {
"resolved" => ResolutionStatus::Resolved,
"unresolved" => ResolutionStatus::Unresolved,
"escalated" => ResolutionStatus::Escalated,
"pending" => ResolutionStatus::Pending,
_ => ResolutionStatus::Unknown,
};
let conversation_start = messages
.first()
.map(|m| m.timestamp)
.unwrap_or_else(Utc::now);
let conversation_end = messages
.last()
.map(|m| m.timestamp)
.unwrap_or_else(Utc::now);
Ok(Episode {
id: Uuid::new_v4(),
user_id,
bot_id,
session_id,
summary,
key_topics,
decisions,
action_items,
sentiment,
resolution,
message_count: messages.len(),
message_ids: messages.iter().map(|m| m.id).collect(),
created_at: Utc::now(),
conversation_start,
conversation_end,
metadata: serde_json::json!({}),
})
}
/// Get retention cutoff date
pub fn get_retention_cutoff(&self) -> DateTime<Utc> {
Utc::now() - Duration::days(self.config.retention_days as i64)
}
}
/// Extract JSON from LLM response (handles markdown code blocks)
fn extract_json(response: &str) -> Result<String, String> {
// Try to find JSON in code blocks first
if let Some(start) = response.find("```json") {
if let Some(end) = response[start + 7..].find("```") {
return Ok(response[start + 7..start + 7 + end].trim().to_string());
}
}
// Try to find JSON in generic code blocks
if let Some(start) = response.find("```") {
let after_start = start + 3;
// Skip language identifier if present
let json_start = response[after_start..]
.find('\n')
.map(|i| after_start + i + 1)
.unwrap_or(after_start);
if let Some(end) = response[json_start..].find("```") {
return Ok(response[json_start..json_start + end].trim().to_string());
}
}
// Try to find raw JSON (starts with {)
if let Some(start) = response.find('{') {
if let Some(end) = response.rfind('}') {
if end > start {
return Ok(response[start..=end].to_string());
}
}
}
Err("No JSON found in response".to_string())
}
/// Convert Episode to Rhai Dynamic
impl Episode {
pub fn to_dynamic(&self) -> Dynamic {
let mut map = Map::new();
map.insert("id".into(), self.id.to_string().into());
map.insert("user_id".into(), self.user_id.to_string().into());
map.insert("bot_id".into(), self.bot_id.to_string().into());
map.insert("session_id".into(), self.session_id.to_string().into());
map.insert("summary".into(), self.summary.clone().into());
let topics: Array = self.key_topics
.iter()
.map(|t| Dynamic::from(t.clone()))
.collect();
map.insert("key_topics".into(), topics.into());
let decisions: Array = self.decisions
.iter()
.map(|d| Dynamic::from(d.clone()))
.collect();
map.insert("decisions".into(), decisions.into());
let action_items: Array = self.action_items
.iter()
.map(|a| {
let mut item_map = Map::new();
item_map.insert("description".into(), a.description.clone().into());
item_map.insert("assignee".into(), a.assignee.clone().unwrap_or_default().into());
item_map.insert("priority".into(), format!("{:?}", a.priority).to_lowercase().into());
item_map.insert("completed".into(), a.completed.into());
Dynamic::from(item_map)
})
.collect();
map.insert("action_items".into(), action_items.into());
let mut sentiment_map = Map::new();
sentiment_map.insert("score".into(), self.sentiment.score.into());
sentiment_map.insert("label".into(), format!("{:?}", self.sentiment.label).to_lowercase().into());
sentiment_map.insert("confidence".into(), self.sentiment.confidence.into());
map.insert("sentiment".into(), sentiment_map.into());
map.insert("resolution".into(), format!("{:?}", self.resolution).to_lowercase().into());
map.insert("message_count".into(), (self.message_count as i64).into());
map.insert("created_at".into(), self.created_at.to_rfc3339().into());
map.insert("conversation_start".into(), self.conversation_start.to_rfc3339().into());
map.insert("conversation_end".into(), self.conversation_end.to_rfc3339().into());
Dynamic::from(map)
}
}
/// Register episodic memory keywords with Rhai engine
pub fn register_episodic_memory_keywords(engine: &mut Engine) {
// CREATE EPISODE SUMMARY - creates a summary of current conversation
// This is typically called from the runtime with state access
// Helper functions for working with episodes in scripts
engine.register_fn("episode_summary", |episode: Map| -> String {
episode
.get("summary")
.and_then(|v| v.clone().try_cast::<String>())
.unwrap_or_default()
});
engine.register_fn("episode_topics", |episode: Map| -> Array {
episode
.get("key_topics")
.and_then(|v| v.clone().try_cast::<Array>())
.unwrap_or_default()
});
engine.register_fn("episode_decisions", |episode: Map| -> Array {
episode
.get("decisions")
.and_then(|v| v.clone().try_cast::<Array>())
.unwrap_or_default()
});
engine.register_fn("episode_action_items", |episode: Map| -> Array {
episode
.get("action_items")
.and_then(|v| v.clone().try_cast::<Array>())
.unwrap_or_default()
});
engine.register_fn("episode_sentiment_score", |episode: Map| -> f64 {
episode
.get("sentiment")
.and_then(|v| v.clone().try_cast::<Map>())
.and_then(|m| m.get("score").and_then(|s| s.clone().try_cast::<f64>()))
.unwrap_or(0.0)
});
engine.register_fn("episode_was_resolved", |episode: Map| -> bool {
episode
.get("resolution")
.and_then(|v| v.clone().try_cast::<String>())
.map(|s| s == "resolved")
.unwrap_or(false)
});
info!("Episodic memory keywords registered");
}
/// SQL for creating episodic memory tables
pub const EPISODIC_MEMORY_SCHEMA: &str = r#"
-- Conversation episodes (summaries)
CREATE TABLE IF NOT EXISTS conversation_episodes (
id UUID PRIMARY KEY,
user_id UUID NOT NULL,
bot_id UUID NOT NULL,
session_id UUID NOT NULL,
summary TEXT NOT NULL,
key_topics JSONB NOT NULL DEFAULT '[]',
decisions JSONB NOT NULL DEFAULT '[]',
action_items JSONB NOT NULL DEFAULT '[]',
sentiment JSONB NOT NULL DEFAULT '{"score": 0, "label": "neutral", "confidence": 0.5}',
resolution VARCHAR(50) NOT NULL DEFAULT 'unknown',
message_count INTEGER NOT NULL DEFAULT 0,
message_ids JSONB NOT NULL DEFAULT '[]',
conversation_start TIMESTAMP WITH TIME ZONE NOT NULL,
conversation_end TIMESTAMP WITH TIME ZONE NOT NULL,
metadata JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);
-- Indexes for efficient querying
CREATE INDEX IF NOT EXISTS idx_episodes_user_id ON conversation_episodes(user_id);
CREATE INDEX IF NOT EXISTS idx_episodes_bot_id ON conversation_episodes(bot_id);
CREATE INDEX IF NOT EXISTS idx_episodes_session_id ON conversation_episodes(session_id);
CREATE INDEX IF NOT EXISTS idx_episodes_created_at ON conversation_episodes(created_at DESC);
CREATE INDEX IF NOT EXISTS idx_episodes_key_topics ON conversation_episodes USING GIN(key_topics);
CREATE INDEX IF NOT EXISTS idx_episodes_resolution ON conversation_episodes(resolution);
-- Full-text search on summaries
CREATE INDEX IF NOT EXISTS idx_episodes_summary_fts ON conversation_episodes
USING GIN(to_tsvector('english', summary));
"#;
/// SQL for episode operations
pub mod sql {
pub const INSERT_EPISODE: &str = r#"
INSERT INTO conversation_episodes (
id, user_id, bot_id, session_id, summary, key_topics, decisions,
action_items, sentiment, resolution, message_count, message_ids,
conversation_start, conversation_end, metadata, created_at
) VALUES (
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16
)
"#;
pub const GET_EPISODES_BY_USER: &str = r#"
SELECT * FROM conversation_episodes
WHERE user_id = $1
ORDER BY created_at DESC
LIMIT $2
"#;
pub const GET_EPISODES_BY_SESSION: &str = r#"
SELECT * FROM conversation_episodes
WHERE session_id = $1
ORDER BY created_at DESC
"#;
pub const SEARCH_EPISODES: &str = r#"
SELECT * FROM conversation_episodes
WHERE user_id = $1
AND (
to_tsvector('english', summary) @@ plainto_tsquery('english', $2)
OR key_topics @> $3::jsonb
)
ORDER BY created_at DESC
LIMIT $4
"#;
pub const DELETE_OLD_EPISODES: &str = r#"
DELETE FROM conversation_episodes
WHERE created_at < $1
"#;
pub const COUNT_USER_EPISODES: &str = r#"
SELECT COUNT(*) FROM conversation_episodes
WHERE user_id = $1
"#;
pub const DELETE_OLDEST_EPISODES: &str = r#"
DELETE FROM conversation_episodes
WHERE id IN (
SELECT id FROM conversation_episodes
WHERE user_id = $1
ORDER BY created_at ASC
LIMIT $2
)
"#;
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_config() {
let config = EpisodicMemoryConfig::default();
assert!(config.enabled);
assert_eq!(config.summary_threshold, 20);
assert_eq!(config.max_episodes, 100);
}
#[test]
fn test_should_summarize() {
let manager = EpisodicMemoryManager::new(EpisodicMemoryConfig {
enabled: true,
summary_threshold: 10,
auto_summarize: true,
..Default::default()
});
assert!(!manager.should_summarize(5));
assert!(manager.should_summarize(10));
assert!(manager.should_summarize(15));
}
#[test]
fn test_extract_json() {
// Test with code block
let response = "Here's the summary:\n```json\n{\"summary\": \"test\"}\n```\n";
assert!(extract_json(response).is_ok());
// Test with raw JSON
let response = "The result is {\"summary\": \"test\"}";
assert!(extract_json(response).is_ok());
}
#[test]
fn test_generate_summary_prompt() {
let manager = EpisodicMemoryManager::new(EpisodicMemoryConfig::default());
let messages = vec![
ConversationMessage {
id: Uuid::new_v4(),
role: "user".to_string(),
content: "Hello".to_string(),
timestamp: Utc::now(),
},
];
let prompt = manager.generate_summary_prompt(&messages);
assert!(prompt.contains("CONVERSATION:"));
assert!(prompt.contains("Hello"));
}
#[test]
fn test_parse_summary_response() {
let manager = EpisodicMemoryManager::new(EpisodicMemoryConfig::default());
let response = r#"{
"summary": "User asked about billing",
"key_topics": ["billing", "payment"],
"decisions": [],
"action_items": [],
"sentiment": {"score": 0.5, "label": "positive", "confidence": 0.8},
"resolution": "resolved"
}"#;
let messages = vec![
ConversationMessage {
id: Uuid::new_v4(),
role: "user".to_string(),
content: "What's my balance?".to_string(),
timestamp: Utc::now(),
},
];
let episode = manager.parse_summary_response(
response,
&messages,
Uuid::new_v4(),
Uuid::new_v4(),
Uuid::new_v4(),
);
assert!(episode.is_ok());
let ep = episode.unwrap();
assert_eq!(ep.summary, "User asked about billing");
assert_eq!(ep.key_topics, vec!["billing", "payment"]);
assert_eq!(ep.resolution, ResolutionStatus::Resolved);
}
#[test]
fn test_episode_to_dynamic() {
let episode = Episode {
id: Uuid::new_v4(),
user_id: Uuid::new_v4(),
bot_id: Uuid::new_v4(),
session_id: Uuid::new_v4(),
summary: "Test summary".to_string(),
key_topics: vec!["topic1".to_string()],
decisions: vec![],
action_items: vec![],
sentiment: Sentiment::default(),
resolution: ResolutionStatus::Resolved,
message_count: 5,
message_ids: vec![],
created_at: Utc::now(),
conversation_start: Utc::now(),
conversation_end: Utc::now(),
metadata: serde_json::json!({}),
};
let dynamic = episode.to_dynamic();
assert!(dynamic.is::<Map>());
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,929 @@
//! Knowledge Graph - Entity Relationship Management
//!
//! This module provides knowledge graph capabilities for tracking relationships
//! between entities mentioned in conversations. It enables:
//!
//! - Entity extraction from text
//! - Relationship mapping between entities
//! - Graph queries for complex questions
//! - Integration with RAG for context enrichment
//!
//! ## BASIC Keywords
//!
//! ```basic
//! ' Extract entities from text
//! EXTRACT ENTITIES FROM text INTO KNOWLEDGE GRAPH
//!
//! ' Query the knowledge graph
//! results = QUERY GRAPH "people who work on Project Alpha"
//!
//! ' Add entity manually
//! ADD ENTITY "John Smith" TYPE "person" WITH {"department": "Sales"}
//!
//! ' Add relationship
//! ADD RELATIONSHIP "John Smith" -> "works_on" -> "Project Alpha"
//!
//! ' Get entity details
//! entity = GET ENTITY "John Smith"
//!
//! ' Find related entities
//! related = GET RELATED "Project Alpha" BY "works_on"
//!
//! ' Delete entity
//! DELETE ENTITY "John Smith"
//! ```
//!
//! ## Config.csv Properties
//!
//! ```csv
//! name,value
//! knowledge-graph-enabled,true
//! knowledge-graph-backend,postgresql
//! knowledge-graph-extract-entities,true
//! knowledge-graph-extraction-model,quality
//! knowledge-graph-max-entities,10000
//! knowledge-graph-max-relationships,50000
//! ```
use chrono::{DateTime, Utc};
use rhai::{Array, Dynamic, Engine, Map};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use tracing::{debug, error, info, warn};
use uuid::Uuid;
/// Entity in the knowledge graph
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct KgEntity {
/// Unique identifier
pub id: Uuid,
/// Bot ID this entity belongs to
pub bot_id: Uuid,
/// Entity type (person, organization, project, product, etc.)
pub entity_type: String,
/// Entity name (canonical form)
pub entity_name: String,
/// Alternative names/aliases
pub aliases: Vec<String>,
/// Entity properties
pub properties: serde_json::Value,
/// Confidence score (0-1) if extracted automatically
pub confidence: f64,
/// Source of the entity (manual, extracted, imported)
pub source: EntitySource,
/// When the entity was created
pub created_at: DateTime<Utc>,
/// When the entity was last updated
pub updated_at: DateTime<Utc>,
}
/// Source of entity creation
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum EntitySource {
Manual,
Extracted,
Imported,
Inferred,
}
impl Default for EntitySource {
fn default() -> Self {
EntitySource::Manual
}
}
/// Relationship between two entities
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct KgRelationship {
/// Unique identifier
pub id: Uuid,
/// Bot ID this relationship belongs to
pub bot_id: Uuid,
/// Source entity ID
pub from_entity_id: Uuid,
/// Target entity ID
pub to_entity_id: Uuid,
/// Relationship type (works_on, reports_to, owns, etc.)
pub relationship_type: String,
/// Relationship properties (strength, since, etc.)
pub properties: serde_json::Value,
/// Confidence score (0-1) if extracted automatically
pub confidence: f64,
/// Whether this is a bidirectional relationship
pub bidirectional: bool,
/// Source of the relationship
pub source: EntitySource,
/// When the relationship was created
pub created_at: DateTime<Utc>,
}
/// Entity extraction result from text
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtractedEntity {
/// Entity name as found in text
pub name: String,
/// Normalized/canonical name
pub canonical_name: String,
/// Entity type
pub entity_type: String,
/// Start position in text
pub start_pos: usize,
/// End position in text
pub end_pos: usize,
/// Confidence score
pub confidence: f64,
/// Additional properties extracted
pub properties: serde_json::Value,
}
/// Extracted relationship from text
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtractedRelationship {
/// Source entity name
pub from_entity: String,
/// Target entity name
pub to_entity: String,
/// Relationship type
pub relationship_type: String,
/// Confidence score
pub confidence: f64,
/// Supporting text snippet
pub evidence: String,
}
/// Knowledge graph extraction result
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtractionResult {
/// Extracted entities
pub entities: Vec<ExtractedEntity>,
/// Extracted relationships
pub relationships: Vec<ExtractedRelationship>,
/// Processing metadata
pub metadata: ExtractionMetadata,
}
/// Metadata about the extraction process
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtractionMetadata {
/// Model used for extraction
pub model: String,
/// Processing time in milliseconds
pub processing_time_ms: u64,
/// Number of tokens processed
pub tokens_processed: usize,
/// Source text length
pub text_length: usize,
}
/// Query result from the knowledge graph
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GraphQueryResult {
/// Matching entities
pub entities: Vec<KgEntity>,
/// Relationships between matched entities
pub relationships: Vec<KgRelationship>,
/// Query explanation
pub explanation: String,
/// Confidence in the result
pub confidence: f64,
}
/// Configuration for knowledge graph
#[derive(Debug, Clone)]
pub struct KnowledgeGraphConfig {
/// Whether knowledge graph is enabled
pub enabled: bool,
/// Backend storage (postgresql, neo4j, etc.)
pub backend: String,
/// Whether to auto-extract entities from conversations
pub extract_entities: bool,
/// Model to use for entity extraction
pub extraction_model: String,
/// Maximum entities per bot
pub max_entities: usize,
/// Maximum relationships per bot
pub max_relationships: usize,
/// Minimum confidence threshold for extraction
pub min_confidence: f64,
/// Entity types to extract
pub entity_types: Vec<String>,
}
impl Default for KnowledgeGraphConfig {
fn default() -> Self {
KnowledgeGraphConfig {
enabled: true,
backend: "postgresql".to_string(),
extract_entities: true,
extraction_model: "quality".to_string(),
max_entities: 10000,
max_relationships: 50000,
min_confidence: 0.7,
entity_types: vec![
"person".to_string(),
"organization".to_string(),
"project".to_string(),
"product".to_string(),
"location".to_string(),
"event".to_string(),
"concept".to_string(),
],
}
}
}
/// Knowledge Graph Manager
pub struct KnowledgeGraphManager {
config: KnowledgeGraphConfig,
}
impl KnowledgeGraphManager {
/// Create a new knowledge graph manager
pub fn new(config: KnowledgeGraphConfig) -> Self {
KnowledgeGraphManager { config }
}
/// Create from config map
pub fn from_config(config_map: &HashMap<String, String>) -> Self {
let config = KnowledgeGraphConfig {
enabled: config_map
.get("knowledge-graph-enabled")
.map(|v| v == "true")
.unwrap_or(true),
backend: config_map
.get("knowledge-graph-backend")
.cloned()
.unwrap_or_else(|| "postgresql".to_string()),
extract_entities: config_map
.get("knowledge-graph-extract-entities")
.map(|v| v == "true")
.unwrap_or(true),
extraction_model: config_map
.get("knowledge-graph-extraction-model")
.cloned()
.unwrap_or_else(|| "quality".to_string()),
max_entities: config_map
.get("knowledge-graph-max-entities")
.and_then(|v| v.parse().ok())
.unwrap_or(10000),
max_relationships: config_map
.get("knowledge-graph-max-relationships")
.and_then(|v| v.parse().ok())
.unwrap_or(50000),
min_confidence: config_map
.get("knowledge-graph-min-confidence")
.and_then(|v| v.parse().ok())
.unwrap_or(0.7),
entity_types: config_map
.get("knowledge-graph-entity-types")
.map(|v| v.split(',').map(|s| s.trim().to_string()).collect())
.unwrap_or_else(|| KnowledgeGraphConfig::default().entity_types),
};
KnowledgeGraphManager::new(config)
}
/// Generate entity extraction prompt
pub fn generate_extraction_prompt(&self, text: &str) -> String {
let entity_types = self.config.entity_types.join(", ");
format!(
r#"Extract entities and relationships from the following text.
ENTITY TYPES TO EXTRACT: {entity_types}
TEXT:
{text}
Respond with valid JSON only:
{{
"entities": [
{{
"name": "exact name as in text",
"canonical_name": "normalized name",
"entity_type": "person|organization|project|product|location|event|concept",
"confidence": 0.95,
"properties": {{"key": "value"}}
}}
],
"relationships": [
{{
"from_entity": "entity name",
"to_entity": "entity name",
"relationship_type": "works_on|reports_to|owns|part_of|located_in|related_to",
"confidence": 0.9,
"evidence": "text snippet supporting this relationship"
}}
]
}}"#
)
}
/// Generate graph query prompt
pub fn generate_query_prompt(&self, query: &str, context: &str) -> String {
format!(
r#"Answer this question using the knowledge graph context.
QUESTION: {query}
KNOWLEDGE GRAPH CONTEXT:
{context}
Provide a natural language answer based on the entities and relationships.
If the information is not available, say so clearly.
"#
)
}
/// Parse extraction response from LLM
pub fn parse_extraction_response(
&self,
response: &str,
text_length: usize,
processing_time_ms: u64,
) -> Result<ExtractionResult, String> {
let json_str = extract_json(response)?;
let parsed: serde_json::Value =
serde_json::from_str(&json_str).map_err(|e| format!("Failed to parse JSON: {}", e))?;
let entities: Vec<ExtractedEntity> = parsed["entities"]
.as_array()
.map(|arr| {
arr.iter()
.filter_map(|v| {
Some(ExtractedEntity {
name: v["name"].as_str()?.to_string(),
canonical_name: v["canonical_name"]
.as_str()
.unwrap_or(v["name"].as_str()?)
.to_string(),
entity_type: v["entity_type"].as_str()?.to_string(),
start_pos: 0,
end_pos: 0,
confidence: v["confidence"].as_f64().unwrap_or(0.8),
properties: v["properties"].clone(),
})
})
.filter(|e| e.confidence >= self.config.min_confidence)
.collect()
})
.unwrap_or_default();
let relationships: Vec<ExtractedRelationship> = parsed["relationships"]
.as_array()
.map(|arr| {
arr.iter()
.filter_map(|v| {
Some(ExtractedRelationship {
from_entity: v["from_entity"].as_str()?.to_string(),
to_entity: v["to_entity"].as_str()?.to_string(),
relationship_type: v["relationship_type"].as_str()?.to_string(),
confidence: v["confidence"].as_f64().unwrap_or(0.8),
evidence: v["evidence"].as_str().unwrap_or("").to_string(),
})
})
.filter(|r| r.confidence >= self.config.min_confidence)
.collect()
})
.unwrap_or_default();
Ok(ExtractionResult {
entities,
relationships,
metadata: ExtractionMetadata {
model: self.config.extraction_model.clone(),
processing_time_ms,
tokens_processed: text_length / 4, // Rough estimate
text_length,
},
})
}
/// Check if extraction should run
pub fn should_extract(&self) -> bool {
self.config.enabled && self.config.extract_entities
}
/// Validate entity type
pub fn is_valid_entity_type(&self, entity_type: &str) -> bool {
self.config
.entity_types
.iter()
.any(|t| t.eq_ignore_ascii_case(entity_type))
}
}
/// Extract JSON from LLM response
fn extract_json(response: &str) -> Result<String, String> {
// Try to find JSON in code blocks first
if let Some(start) = response.find("```json") {
if let Some(end) = response[start + 7..].find("```") {
return Ok(response[start + 7..start + 7 + end].trim().to_string());
}
}
// Try to find JSON in generic code blocks
if let Some(start) = response.find("```") {
let after_start = start + 3;
let json_start = response[after_start..]
.find('\n')
.map(|i| after_start + i + 1)
.unwrap_or(after_start);
if let Some(end) = response[json_start..].find("```") {
return Ok(response[json_start..json_start + end].trim().to_string());
}
}
// Try to find raw JSON
if let Some(start) = response.find('{') {
if let Some(end) = response.rfind('}') {
if end > start {
return Ok(response[start..=end].to_string());
}
}
}
Err("No JSON found in response".to_string())
}
/// Convert KgEntity to Rhai Dynamic
impl KgEntity {
pub fn to_dynamic(&self) -> Dynamic {
let mut map = Map::new();
map.insert("id".into(), self.id.to_string().into());
map.insert("bot_id".into(), self.bot_id.to_string().into());
map.insert("entity_type".into(), self.entity_type.clone().into());
map.insert("entity_name".into(), self.entity_name.clone().into());
let aliases: Array = self
.aliases
.iter()
.map(|a| Dynamic::from(a.clone()))
.collect();
map.insert("aliases".into(), aliases.into());
map.insert("properties".into(), json_to_dynamic(&self.properties));
map.insert("confidence".into(), self.confidence.into());
map.insert(
"source".into(),
format!("{:?}", self.source).to_lowercase().into(),
);
map.insert("created_at".into(), self.created_at.to_rfc3339().into());
map.insert("updated_at".into(), self.updated_at.to_rfc3339().into());
Dynamic::from(map)
}
}
/// Convert KgRelationship to Rhai Dynamic
impl KgRelationship {
pub fn to_dynamic(&self) -> Dynamic {
let mut map = Map::new();
map.insert("id".into(), self.id.to_string().into());
map.insert("bot_id".into(), self.bot_id.to_string().into());
map.insert(
"from_entity_id".into(),
self.from_entity_id.to_string().into(),
);
map.insert("to_entity_id".into(), self.to_entity_id.to_string().into());
map.insert(
"relationship_type".into(),
self.relationship_type.clone().into(),
);
map.insert("properties".into(), json_to_dynamic(&self.properties));
map.insert("confidence".into(), self.confidence.into());
map.insert("bidirectional".into(), self.bidirectional.into());
map.insert(
"source".into(),
format!("{:?}", self.source).to_lowercase().into(),
);
map.insert("created_at".into(), self.created_at.to_rfc3339().into());
Dynamic::from(map)
}
}
/// Convert JSON value to Rhai Dynamic
fn json_to_dynamic(value: &serde_json::Value) -> Dynamic {
match value {
serde_json::Value::Null => Dynamic::UNIT,
serde_json::Value::Bool(b) => Dynamic::from(*b),
serde_json::Value::Number(n) => {
if let Some(i) = n.as_i64() {
Dynamic::from(i)
} else if let Some(f) = n.as_f64() {
Dynamic::from(f)
} else {
Dynamic::UNIT
}
}
serde_json::Value::String(s) => Dynamic::from(s.clone()),
serde_json::Value::Array(arr) => {
let array: Array = arr.iter().map(json_to_dynamic).collect();
Dynamic::from(array)
}
serde_json::Value::Object(obj) => {
let mut map = Map::new();
for (k, v) in obj {
map.insert(k.clone().into(), json_to_dynamic(v));
}
Dynamic::from(map)
}
}
}
/// Register knowledge graph keywords with Rhai engine
pub fn register_knowledge_graph_keywords(engine: &mut Engine) {
// Helper functions for working with entities in scripts
engine.register_fn("entity_name", |entity: Map| -> String {
entity
.get("entity_name")
.and_then(|v| v.clone().try_cast::<String>())
.unwrap_or_default()
});
engine.register_fn("entity_type", |entity: Map| -> String {
entity
.get("entity_type")
.and_then(|v| v.clone().try_cast::<String>())
.unwrap_or_default()
});
engine.register_fn("entity_properties", |entity: Map| -> Map {
entity
.get("properties")
.and_then(|v| v.clone().try_cast::<Map>())
.unwrap_or_default()
});
engine.register_fn("relationship_type", |rel: Map| -> String {
rel.get("relationship_type")
.and_then(|v| v.clone().try_cast::<String>())
.unwrap_or_default()
});
engine.register_fn("is_bidirectional", |rel: Map| -> bool {
rel.get("bidirectional")
.and_then(|v| v.clone().try_cast::<bool>())
.unwrap_or(false)
});
info!("Knowledge graph keywords registered");
}
/// SQL for creating knowledge graph tables
pub const KNOWLEDGE_GRAPH_SCHEMA: &str = r#"
-- Knowledge graph entities
CREATE TABLE IF NOT EXISTS kg_entities (
id UUID PRIMARY KEY,
bot_id UUID NOT NULL,
entity_type VARCHAR(100) NOT NULL,
entity_name VARCHAR(500) NOT NULL,
aliases JSONB NOT NULL DEFAULT '[]',
properties JSONB NOT NULL DEFAULT '{}',
confidence DOUBLE PRECISION NOT NULL DEFAULT 1.0,
source VARCHAR(50) NOT NULL DEFAULT 'manual',
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
UNIQUE(bot_id, entity_type, entity_name)
);
-- Knowledge graph relationships
CREATE TABLE IF NOT EXISTS kg_relationships (
id UUID PRIMARY KEY,
bot_id UUID NOT NULL,
from_entity_id UUID NOT NULL REFERENCES kg_entities(id) ON DELETE CASCADE,
to_entity_id UUID NOT NULL REFERENCES kg_entities(id) ON DELETE CASCADE,
relationship_type VARCHAR(100) NOT NULL,
properties JSONB NOT NULL DEFAULT '{}',
confidence DOUBLE PRECISION NOT NULL DEFAULT 1.0,
bidirectional BOOLEAN NOT NULL DEFAULT false,
source VARCHAR(50) NOT NULL DEFAULT 'manual',
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
UNIQUE(bot_id, from_entity_id, to_entity_id, relationship_type)
);
-- Indexes for efficient querying
CREATE INDEX IF NOT EXISTS idx_kg_entities_bot_id ON kg_entities(bot_id);
CREATE INDEX IF NOT EXISTS idx_kg_entities_type ON kg_entities(entity_type);
CREATE INDEX IF NOT EXISTS idx_kg_entities_name ON kg_entities(entity_name);
CREATE INDEX IF NOT EXISTS idx_kg_entities_name_lower ON kg_entities(LOWER(entity_name));
CREATE INDEX IF NOT EXISTS idx_kg_entities_aliases ON kg_entities USING GIN(aliases);
CREATE INDEX IF NOT EXISTS idx_kg_relationships_bot_id ON kg_relationships(bot_id);
CREATE INDEX IF NOT EXISTS idx_kg_relationships_from ON kg_relationships(from_entity_id);
CREATE INDEX IF NOT EXISTS idx_kg_relationships_to ON kg_relationships(to_entity_id);
CREATE INDEX IF NOT EXISTS idx_kg_relationships_type ON kg_relationships(relationship_type);
-- Full-text search on entity names
CREATE INDEX IF NOT EXISTS idx_kg_entities_name_fts ON kg_entities
USING GIN(to_tsvector('english', entity_name));
"#;
/// SQL for knowledge graph operations
pub mod sql {
pub const INSERT_ENTITY: &str = r#"
INSERT INTO kg_entities (
id, bot_id, entity_type, entity_name, aliases, properties,
confidence, source, created_at, updated_at
) VALUES (
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10
)
ON CONFLICT (bot_id, entity_type, entity_name)
DO UPDATE SET
aliases = kg_entities.aliases || $5,
properties = kg_entities.properties || $6,
confidence = GREATEST(kg_entities.confidence, $7),
updated_at = $10
RETURNING id
"#;
pub const INSERT_RELATIONSHIP: &str = r#"
INSERT INTO kg_relationships (
id, bot_id, from_entity_id, to_entity_id, relationship_type,
properties, confidence, bidirectional, source, created_at
) VALUES (
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10
)
ON CONFLICT (bot_id, from_entity_id, to_entity_id, relationship_type)
DO UPDATE SET
properties = kg_relationships.properties || $6,
confidence = GREATEST(kg_relationships.confidence, $7)
RETURNING id
"#;
pub const GET_ENTITY_BY_NAME: &str = r#"
SELECT * FROM kg_entities
WHERE bot_id = $1
AND (
LOWER(entity_name) = LOWER($2)
OR aliases @> $3::jsonb
)
LIMIT 1
"#;
pub const GET_ENTITY_BY_ID: &str = r#"
SELECT * FROM kg_entities WHERE id = $1
"#;
pub const SEARCH_ENTITIES: &str = r#"
SELECT * FROM kg_entities
WHERE bot_id = $1
AND (
to_tsvector('english', entity_name) @@ plainto_tsquery('english', $2)
OR LOWER(entity_name) LIKE LOWER($3)
)
ORDER BY confidence DESC
LIMIT $4
"#;
pub const GET_ENTITIES_BY_TYPE: &str = r#"
SELECT * FROM kg_entities
WHERE bot_id = $1 AND entity_type = $2
ORDER BY entity_name
LIMIT $3
"#;
pub const GET_RELATED_ENTITIES: &str = r#"
SELECT e.*, r.relationship_type, r.confidence as rel_confidence
FROM kg_entities e
JOIN kg_relationships r ON (
(r.from_entity_id = $1 AND r.to_entity_id = e.id)
OR (r.bidirectional AND r.to_entity_id = $1 AND r.from_entity_id = e.id)
)
WHERE r.bot_id = $2
ORDER BY r.confidence DESC
LIMIT $3
"#;
pub const GET_RELATED_BY_TYPE: &str = r#"
SELECT e.*, r.relationship_type, r.confidence as rel_confidence
FROM kg_entities e
JOIN kg_relationships r ON (
(r.from_entity_id = $1 AND r.to_entity_id = e.id)
OR (r.bidirectional AND r.to_entity_id = $1 AND r.from_entity_id = e.id)
)
WHERE r.bot_id = $2 AND r.relationship_type = $3
ORDER BY r.confidence DESC
LIMIT $4
"#;
pub const GET_RELATIONSHIP: &str = r#"
SELECT * FROM kg_relationships
WHERE bot_id = $1
AND from_entity_id = $2
AND to_entity_id = $3
AND relationship_type = $4
"#;
pub const GET_ALL_RELATIONSHIPS_FOR_ENTITY: &str = r#"
SELECT r.*,
e1.entity_name as from_name, e1.entity_type as from_type,
e2.entity_name as to_name, e2.entity_type as to_type
FROM kg_relationships r
JOIN kg_entities e1 ON r.from_entity_id = e1.id
JOIN kg_entities e2 ON r.to_entity_id = e2.id
WHERE r.bot_id = $1
AND (r.from_entity_id = $2 OR r.to_entity_id = $2)
ORDER BY r.confidence DESC
"#;
pub const DELETE_ENTITY: &str = r#"
DELETE FROM kg_entities WHERE id = $1 AND bot_id = $2
"#;
pub const DELETE_RELATIONSHIP: &str = r#"
DELETE FROM kg_relationships WHERE id = $1 AND bot_id = $2
"#;
pub const COUNT_ENTITIES: &str = r#"
SELECT COUNT(*) FROM kg_entities WHERE bot_id = $1
"#;
pub const COUNT_RELATIONSHIPS: &str = r#"
SELECT COUNT(*) FROM kg_relationships WHERE bot_id = $1
"#;
pub const GET_ENTITY_TYPES: &str = r#"
SELECT DISTINCT entity_type, COUNT(*) as count
FROM kg_entities
WHERE bot_id = $1
GROUP BY entity_type
ORDER BY count DESC
"#;
pub const GET_RELATIONSHIP_TYPES: &str = r#"
SELECT DISTINCT relationship_type, COUNT(*) as count
FROM kg_relationships
WHERE bot_id = $1
GROUP BY relationship_type
ORDER BY count DESC
"#;
/// Graph traversal query (find path between two entities)
pub const FIND_PATH: &str = r#"
WITH RECURSIVE path_finder AS (
-- Base case: start from source entity
SELECT
from_entity_id,
to_entity_id,
relationship_type,
ARRAY[from_entity_id] as path,
1 as depth
FROM kg_relationships
WHERE bot_id = $1 AND from_entity_id = $2
UNION ALL
-- Recursive case: follow relationships
SELECT
r.from_entity_id,
r.to_entity_id,
r.relationship_type,
pf.path || r.from_entity_id,
pf.depth + 1
FROM kg_relationships r
JOIN path_finder pf ON r.from_entity_id = pf.to_entity_id
WHERE r.bot_id = $1
AND NOT r.from_entity_id = ANY(pf.path) -- Prevent cycles
AND pf.depth < $4 -- Max depth
)
SELECT * FROM path_finder
WHERE to_entity_id = $3
ORDER BY depth
LIMIT 1
"#;
}
/// Common relationship types
pub mod relationship_types {
pub const WORKS_ON: &str = "works_on";
pub const REPORTS_TO: &str = "reports_to";
pub const MANAGES: &str = "manages";
pub const OWNS: &str = "owns";
pub const PART_OF: &str = "part_of";
pub const LOCATED_IN: &str = "located_in";
pub const RELATED_TO: &str = "related_to";
pub const CREATED_BY: &str = "created_by";
pub const DEPENDS_ON: &str = "depends_on";
pub const CONNECTED_TO: &str = "connected_to";
pub const MEMBER_OF: &str = "member_of";
pub const SUCCESSOR_OF: &str = "successor_of";
pub const PREDECESSOR_OF: &str = "predecessor_of";
pub const ALIAS_OF: &str = "alias_of";
}
/// Common entity types
pub mod entity_types {
pub const PERSON: &str = "person";
pub const ORGANIZATION: &str = "organization";
pub const PROJECT: &str = "project";
pub const PRODUCT: &str = "product";
pub const LOCATION: &str = "location";
pub const EVENT: &str = "event";
pub const CONCEPT: &str = "concept";
pub const DOCUMENT: &str = "document";
pub const TEAM: &str = "team";
pub const ROLE: &str = "role";
pub const SKILL: &str = "skill";
pub const TECHNOLOGY: &str = "technology";
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_config() {
let config = KnowledgeGraphConfig::default();
assert!(config.enabled);
assert_eq!(config.backend, "postgresql");
assert!(config.entity_types.contains(&"person".to_string()));
}
#[test]
fn test_extraction_prompt() {
let manager = KnowledgeGraphManager::new(KnowledgeGraphConfig::default());
let prompt = manager.generate_extraction_prompt("John works at Acme Corp.");
assert!(prompt.contains("John works at Acme Corp."));
assert!(prompt.contains("ENTITY TYPES TO EXTRACT"));
}
#[test]
fn test_parse_extraction_response() {
let manager = KnowledgeGraphManager::new(KnowledgeGraphConfig::default());
let response = r#"{
"entities": [
{
"name": "John",
"canonical_name": "John Smith",
"entity_type": "person",
"confidence": 0.9,
"properties": {}
}
],
"relationships": [
{
"from_entity": "John",
"to_entity": "Acme Corp",
"relationship_type": "works_on",
"confidence": 0.85,
"evidence": "John works at Acme Corp"
}
]
}"#;
let result = manager.parse_extraction_response(response, 100, 50);
assert!(result.is_ok());
let extraction = result.unwrap();
assert_eq!(extraction.entities.len(), 1);
assert_eq!(extraction.relationships.len(), 1);
}
#[test]
fn test_entity_to_dynamic() {
let entity = KgEntity {
id: Uuid::new_v4(),
bot_id: Uuid::new_v4(),
entity_type: "person".to_string(),
entity_name: "John Smith".to_string(),
aliases: vec!["John".to_string()],
properties: serde_json::json!({"department": "Sales"}),
confidence: 0.95,
source: EntitySource::Manual,
created_at: Utc::now(),
updated_at: Utc::now(),
};
let dynamic = entity.to_dynamic();
assert!(dynamic.is::<Map>());
}
#[test]
fn test_is_valid_entity_type() {
let manager = KnowledgeGraphManager::new(KnowledgeGraphConfig::default());
assert!(manager.is_valid_entity_type("person"));
assert!(manager.is_valid_entity_type("PERSON"));
assert!(manager.is_valid_entity_type("organization"));
assert!(!manager.is_valid_entity_type("unknown_type"));
}
#[test]
fn test_json_to_dynamic() {
let json = serde_json::json!({
"name": "test",
"count": 42,
"active": true,
"tags": ["a", "b"]
});
let dynamic = json_to_dynamic(&json);
assert!(dynamic.is::<Map>());
}
}

View file

@ -1,11 +1,15 @@
pub mod a2a_protocol;
pub mod add_bot; pub mod add_bot;
pub mod add_member; pub mod add_member;
pub mod add_suggestion; pub mod add_suggestion;
pub mod agent_reflection;
pub mod api_tool_generator;
pub mod arrays; pub mod arrays;
pub mod book; pub mod book;
pub mod bot_memory; pub mod bot_memory;
pub mod clear_kb; pub mod clear_kb;
pub mod clear_tools; pub mod clear_tools;
pub mod code_sandbox;
pub mod core_functions; pub mod core_functions;
pub mod create_draft; pub mod create_draft;
pub mod create_site; pub mod create_site;
@ -13,6 +17,7 @@ pub mod create_task;
pub mod crm; pub mod crm;
pub mod data_operations; pub mod data_operations;
pub mod datetime; pub mod datetime;
pub mod episodic_memory;
pub mod errors; pub mod errors;
pub mod file_operations; pub mod file_operations;
pub mod find; pub mod find;
@ -22,14 +27,17 @@ pub mod format;
pub mod get; pub mod get;
pub mod hear_talk; pub mod hear_talk;
pub mod http_operations; pub mod http_operations;
pub mod human_approval;
pub mod import_export; pub mod import_export;
pub mod kb_statistics; pub mod kb_statistics;
pub mod knowledge_graph;
pub mod last; pub mod last;
pub mod lead_scoring; pub mod lead_scoring;
pub mod llm_keyword; pub mod llm_keyword;
pub mod llm_macros; pub mod llm_macros;
pub mod math; pub mod math;
pub mod messaging; pub mod messaging;
pub mod model_routing;
pub mod multimodal; pub mod multimodal;
pub mod on; pub mod on;
pub mod on_form_submit; pub mod on_form_submit;
@ -55,6 +63,7 @@ pub mod universal_messaging;
pub mod use_kb; pub mod use_kb;
pub mod use_tool; pub mod use_tool;
pub mod use_website; pub mod use_website;
pub mod user_memory;
pub mod validation; pub mod validation;
pub mod wait; pub mod wait;
pub mod weather; pub mod weather;

View file

@ -0,0 +1,638 @@
use crate::shared::models::UserSession;
use crate::shared::state::AppState;
use diesel::prelude::*;
use log::{error, info, trace};
use rhai::{Dynamic, Engine};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::Arc;
use uuid::Uuid;
/// Model routing configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelConfig {
pub name: String,
pub url: String,
pub model_path: String,
pub api_key: Option<String>,
pub max_tokens: Option<u32>,
pub temperature: Option<f32>,
}
/// Routing strategy for automatic model selection
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum RoutingStrategy {
Manual, // User explicitly specifies model
Auto, // Route based on query analysis
LoadBalanced, // Distribute across available models
Fallback, // Try models in order until success
}
impl Default for RoutingStrategy {
fn default() -> Self {
RoutingStrategy::Manual
}
}
/// Model router for managing multiple LLM configurations
#[derive(Debug, Clone)]
pub struct ModelRouter {
pub models: HashMap<String, ModelConfig>,
pub default_model: String,
pub routing_strategy: RoutingStrategy,
}
impl ModelRouter {
pub fn new() -> Self {
Self {
models: HashMap::new(),
default_model: "default".to_string(),
routing_strategy: RoutingStrategy::Manual,
}
}
/// Load models from config.csv llm-models property
/// Format: llm-models,default;fast;quality;code
pub fn from_config(config_models: &str, bot_id: Uuid, state: &AppState) -> Self {
let mut router = Self::new();
let model_names: Vec<&str> = config_models.split(';').collect();
for name in model_names {
let name = name.trim();
if name.is_empty() {
continue;
}
// Try to load model config from bot configuration
// Looking for: llm-model-{name}, llm-url-{name}, llm-key-{name}
if let Ok(mut conn) = state.conn.get() {
let model_config = load_model_config(&mut conn, bot_id, name);
if let Some(config) = model_config {
router.models.insert(name.to_string(), config);
}
}
}
// Set first model as default if available
if let Some(first_name) = config_models.split(';').next() {
router.default_model = first_name.trim().to_string();
}
router
}
/// Get model configuration by name
pub fn get_model(&self, name: &str) -> Option<&ModelConfig> {
self.models.get(name)
}
/// Get the default model configuration
pub fn get_default(&self) -> Option<&ModelConfig> {
self.models.get(&self.default_model)
}
/// Route a query to the optimal model based on strategy
pub fn route_query(&self, query: &str) -> &str {
match self.routing_strategy {
RoutingStrategy::Auto => self.auto_route(query),
RoutingStrategy::LoadBalanced => self.load_balanced_route(),
RoutingStrategy::Fallback => &self.default_model,
RoutingStrategy::Manual => &self.default_model,
}
}
/// Automatic routing based on query analysis
fn auto_route(&self, query: &str) -> &str {
let query_lower = query.to_lowercase();
// Code-related queries
if query_lower.contains("code")
|| query_lower.contains("program")
|| query_lower.contains("function")
|| query_lower.contains("debug")
|| query_lower.contains("error")
|| query_lower.contains("syntax")
{
if self.models.contains_key("code") {
return "code";
}
}
// Complex reasoning queries
if query_lower.contains("analyze")
|| query_lower.contains("explain")
|| query_lower.contains("compare")
|| query_lower.contains("evaluate")
|| query.len() > 500
{
if self.models.contains_key("quality") {
return "quality";
}
}
// Simple/fast queries
if query.len() < 100
|| query_lower.contains("what is")
|| query_lower.contains("define")
|| query_lower.contains("hello")
{
if self.models.contains_key("fast") {
return "fast";
}
}
&self.default_model
}
/// Simple round-robin load balancing
fn load_balanced_route(&self) -> &str {
// For simplicity, return default - a full implementation would track usage
&self.default_model
}
}
/// Load model configuration from database
fn load_model_config(
conn: &mut diesel::PgConnection,
bot_id: Uuid,
model_name: &str,
) -> Option<ModelConfig> {
#[derive(QueryableByName)]
struct ConfigRow {
#[diesel(sql_type = diesel::sql_types::Text)]
config_key: String,
#[diesel(sql_type = diesel::sql_types::Text)]
config_value: String,
}
// Query for model-specific config
let suffix = if model_name == "default" {
"".to_string()
} else {
format!("-{}", model_name)
};
let model_key = format!("llm-model{}", suffix);
let url_key = format!("llm-url{}", suffix);
let key_key = format!("llm-key{}", suffix);
let configs: Vec<ConfigRow> = diesel::sql_query(
"SELECT config_key, config_value FROM bot_configuration \
WHERE bot_id = $1 AND config_key IN ($2, $3, $4)",
)
.bind::<diesel::sql_types::Uuid, _>(bot_id)
.bind::<diesel::sql_types::Text, _>(&model_key)
.bind::<diesel::sql_types::Text, _>(&url_key)
.bind::<diesel::sql_types::Text, _>(&key_key)
.load(conn)
.ok()?;
let mut model_path = String::new();
let mut url = String::new();
let mut api_key = None;
for config in configs {
if config.config_key == model_key {
model_path = config.config_value;
} else if config.config_key == url_key {
url = config.config_value;
} else if config.config_key == key_key && config.config_value != "none" {
api_key = Some(config.config_value);
}
}
if model_path.is_empty() && url.is_empty() {
return None;
}
Some(ModelConfig {
name: model_name.to_string(),
url,
model_path,
api_key,
max_tokens: None,
temperature: None,
})
}
/// Registers model routing keywords
pub fn register_model_routing_keywords(
state: Arc<AppState>,
user: UserSession,
engine: &mut Engine,
) {
use_model_keyword(state.clone(), user.clone(), engine);
set_model_routing_keyword(state.clone(), user.clone(), engine);
get_current_model_keyword(state.clone(), user.clone(), engine);
list_models_keyword(state.clone(), user.clone(), engine);
}
/// USE MODEL "model_name"
/// Switch the LLM model for the current conversation
pub fn use_model_keyword(state: Arc<AppState>, user: UserSession, engine: &mut Engine) {
let state_clone = Arc::clone(&state);
let user_clone = user.clone();
engine
.register_custom_syntax(
&["USE", "MODEL", "$expr$"],
false,
move |context, inputs| {
let model_name = context
.eval_expression_tree(&inputs[0])?
.to_string()
.trim_matches('"')
.to_string();
trace!(
"USE MODEL '{}' for session: {}",
model_name,
user_clone.id
);
let state_for_task = Arc::clone(&state_clone);
let session_id = user_clone.id;
let model_name_clone = model_name.clone();
let (tx, rx) = std::sync::mpsc::channel();
std::thread::spawn(move || {
let rt = tokio::runtime::Runtime::new().expect("Failed to create runtime");
let result = rt.block_on(async {
set_session_model(&state_for_task, session_id, &model_name_clone).await
});
let _ = tx.send(result);
});
match rx.recv_timeout(std::time::Duration::from_secs(10)) {
Ok(Ok(msg)) => Ok(Dynamic::from(msg)),
Ok(Err(e)) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime(
e.into(),
rhai::Position::NONE,
))),
Err(_) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime(
"USE MODEL timed out".into(),
rhai::Position::NONE,
))),
}
},
)
.expect("Failed to register USE MODEL syntax");
}
/// SET MODEL ROUTING "strategy"
/// Set the model routing strategy: "manual", "auto", "load-balanced", "fallback"
pub fn set_model_routing_keyword(state: Arc<AppState>, user: UserSession, engine: &mut Engine) {
let state_clone = Arc::clone(&state);
let user_clone = user.clone();
engine
.register_custom_syntax(
&["SET", "MODEL", "ROUTING", "$expr$"],
false,
move |context, inputs| {
let strategy_str = context
.eval_expression_tree(&inputs[0])?
.to_string()
.trim_matches('"')
.to_lowercase();
let strategy = match strategy_str.as_str() {
"auto" => RoutingStrategy::Auto,
"load-balanced" | "loadbalanced" => RoutingStrategy::LoadBalanced,
"fallback" => RoutingStrategy::Fallback,
_ => RoutingStrategy::Manual,
};
trace!(
"SET MODEL ROUTING {:?} for session: {}",
strategy,
user_clone.id
);
let state_for_task = Arc::clone(&state_clone);
let session_id = user_clone.id;
let (tx, rx) = std::sync::mpsc::channel();
std::thread::spawn(move || {
let rt = tokio::runtime::Runtime::new().expect("Failed to create runtime");
let result = rt.block_on(async {
set_session_routing_strategy(&state_for_task, session_id, strategy).await
});
let _ = tx.send(result);
});
match rx.recv_timeout(std::time::Duration::from_secs(10)) {
Ok(Ok(msg)) => Ok(Dynamic::from(msg)),
Ok(Err(e)) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime(
e.into(),
rhai::Position::NONE,
))),
Err(_) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime(
"SET MODEL ROUTING timed out".into(),
rhai::Position::NONE,
))),
}
},
)
.expect("Failed to register SET MODEL ROUTING syntax");
}
/// GET CURRENT MODEL()
/// Returns the name of the currently active model for this session
pub fn get_current_model_keyword(state: Arc<AppState>, user: UserSession, engine: &mut Engine) {
let state_clone = Arc::clone(&state);
let user_clone = user.clone();
engine.register_fn("GET CURRENT MODEL", move || -> String {
let state = Arc::clone(&state_clone);
if let Ok(mut conn) = state.conn.get() {
get_session_model_sync(&mut conn, user_clone.id).unwrap_or_else(|_| "default".to_string())
} else {
"default".to_string()
}
});
}
/// LIST MODELS()
/// Returns an array of available model names
pub fn list_models_keyword(state: Arc<AppState>, user: UserSession, engine: &mut Engine) {
let state_clone = Arc::clone(&state);
let user_clone = user.clone();
engine.register_fn("LIST MODELS", move || -> rhai::Array {
let state = Arc::clone(&state_clone);
if let Ok(mut conn) = state.conn.get() {
list_available_models_sync(&mut conn, user_clone.bot_id)
.unwrap_or_default()
.into_iter()
.map(Dynamic::from)
.collect()
} else {
rhai::Array::new()
}
});
}
// ============================================================================
// Database Operations
// ============================================================================
/// Set the model for a session
async fn set_session_model(
state: &AppState,
session_id: Uuid,
model_name: &str,
) -> Result<String, String> {
let mut conn = state
.conn
.get()
.map_err(|e| format!("Failed to acquire database connection: {}", e))?;
let now = chrono::Utc::now();
// Update or insert session model preference
diesel::sql_query(
"INSERT INTO session_preferences (session_id, preference_key, preference_value, updated_at) \
VALUES ($1, 'current_model', $2, $3) \
ON CONFLICT (session_id, preference_key) DO UPDATE SET \
preference_value = EXCLUDED.preference_value, \
updated_at = EXCLUDED.updated_at",
)
.bind::<diesel::sql_types::Uuid, _>(session_id)
.bind::<diesel::sql_types::Text, _>(model_name)
.bind::<diesel::sql_types::Timestamptz, _>(now)
.execute(&mut conn)
.map_err(|e| format!("Failed to set session model: {}", e))?;
info!("Session {} now using model: {}", session_id, model_name);
Ok(format!("Now using model: {}", model_name))
}
/// Set the routing strategy for a session
async fn set_session_routing_strategy(
state: &AppState,
session_id: Uuid,
strategy: RoutingStrategy,
) -> Result<String, String> {
let mut conn = state
.conn
.get()
.map_err(|e| format!("Failed to acquire database connection: {}", e))?;
let now = chrono::Utc::now();
let strategy_str = match strategy {
RoutingStrategy::Manual => "manual",
RoutingStrategy::Auto => "auto",
RoutingStrategy::LoadBalanced => "load-balanced",
RoutingStrategy::Fallback => "fallback",
};
diesel::sql_query(
"INSERT INTO session_preferences (session_id, preference_key, preference_value, updated_at) \
VALUES ($1, 'model_routing', $2, $3) \
ON CONFLICT (session_id, preference_key) DO UPDATE SET \
preference_value = EXCLUDED.preference_value, \
updated_at = EXCLUDED.updated_at",
)
.bind::<diesel::sql_types::Uuid, _>(session_id)
.bind::<diesel::sql_types::Text, _>(strategy_str)
.bind::<diesel::sql_types::Timestamptz, _>(now)
.execute(&mut conn)
.map_err(|e| format!("Failed to set routing strategy: {}", e))?;
info!(
"Session {} routing strategy set to: {}",
session_id, strategy_str
);
Ok(format!("Model routing set to: {}", strategy_str))
}
/// Get the current model for a session (sync version)
fn get_session_model_sync(
conn: &mut diesel::PgConnection,
session_id: Uuid,
) -> Result<String, String> {
#[derive(QueryableByName)]
struct PrefValue {
#[diesel(sql_type = diesel::sql_types::Text)]
preference_value: String,
}
let result: Option<PrefValue> = diesel::sql_query(
"SELECT preference_value FROM session_preferences \
WHERE session_id = $1 AND preference_key = 'current_model' LIMIT 1",
)
.bind::<diesel::sql_types::Uuid, _>(session_id)
.get_result(conn)
.optional()
.map_err(|e| format!("Failed to get session model: {}", e))?;
Ok(result.map(|r| r.preference_value).unwrap_or_else(|| "default".to_string()))
}
/// List available models for a bot (sync version)
fn list_available_models_sync(
conn: &mut diesel::PgConnection,
bot_id: Uuid,
) -> Result<Vec<String>, String> {
#[derive(QueryableByName)]
struct ConfigRow {
#[diesel(sql_type = diesel::sql_types::Text)]
config_value: String,
}
// Get llm-models config
let result: Option<ConfigRow> = diesel::sql_query(
"SELECT config_value FROM bot_configuration \
WHERE bot_id = $1 AND config_key = 'llm-models' LIMIT 1",
)
.bind::<diesel::sql_types::Uuid, _>(bot_id)
.get_result(conn)
.optional()
.map_err(|e| format!("Failed to list models: {}", e))?;
if let Some(config) = result {
Ok(config
.config_value
.split(';')
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.collect())
} else {
// Return default model if no models configured
Ok(vec!["default".to_string()])
}
}
/// Get the current model name for a session (public helper)
pub fn get_session_model(state: &AppState, session_id: Uuid) -> String {
if let Ok(mut conn) = state.conn.get() {
get_session_model_sync(&mut conn, session_id).unwrap_or_else(|_| "default".to_string())
} else {
"default".to_string()
}
}
/// Get the routing strategy for a session
pub fn get_session_routing_strategy(state: &AppState, session_id: Uuid) -> RoutingStrategy {
if let Ok(mut conn) = state.conn.get() {
#[derive(QueryableByName)]
struct PrefValue {
#[diesel(sql_type = diesel::sql_types::Text)]
preference_value: String,
}
let result: Option<PrefValue> = diesel::sql_query(
"SELECT preference_value FROM session_preferences \
WHERE session_id = $1 AND preference_key = 'model_routing' LIMIT 1",
)
.bind::<diesel::sql_types::Uuid, _>(session_id)
.get_result(&mut conn)
.optional()
.ok()
.flatten();
if let Some(pref) = result {
match pref.preference_value.as_str() {
"auto" => RoutingStrategy::Auto,
"load-balanced" => RoutingStrategy::LoadBalanced,
"fallback" => RoutingStrategy::Fallback,
_ => RoutingStrategy::Manual,
}
} else {
RoutingStrategy::Manual
}
} else {
RoutingStrategy::Manual
}
}
// ============================================================================
// Tests
// ============================================================================
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_model_router_new() {
let router = ModelRouter::new();
assert_eq!(router.default_model, "default");
assert!(router.models.is_empty());
assert_eq!(router.routing_strategy, RoutingStrategy::Manual);
}
#[test]
fn test_auto_routing_code() {
let mut router = ModelRouter::new();
router.models.insert(
"code".to_string(),
ModelConfig {
name: "code".to_string(),
url: "http://localhost:8081".to_string(),
model_path: "codellama.gguf".to_string(),
api_key: None,
max_tokens: None,
temperature: None,
},
);
router.routing_strategy = RoutingStrategy::Auto;
let result = router.route_query("Help me debug this code");
assert_eq!(result, "code");
}
#[test]
fn test_auto_routing_quality() {
let mut router = ModelRouter::new();
router.models.insert(
"quality".to_string(),
ModelConfig {
name: "quality".to_string(),
url: "http://localhost:8081".to_string(),
model_path: "large-model.gguf".to_string(),
api_key: None,
max_tokens: None,
temperature: None,
},
);
router.routing_strategy = RoutingStrategy::Auto;
let result = router.route_query("Please analyze and compare these two approaches in detail");
assert_eq!(result, "quality");
}
#[test]
fn test_auto_routing_fast() {
let mut router = ModelRouter::new();
router.models.insert(
"fast".to_string(),
ModelConfig {
name: "fast".to_string(),
url: "http://localhost:8081".to_string(),
model_path: "small-model.gguf".to_string(),
api_key: None,
max_tokens: None,
temperature: None,
},
);
router.routing_strategy = RoutingStrategy::Auto;
let result = router.route_query("What is AI?");
assert_eq!(result, "fast");
}
#[test]
fn test_routing_strategy_default() {
let strategy = RoutingStrategy::default();
assert_eq!(strategy, RoutingStrategy::Manual);
}
}

View file

@ -0,0 +1,310 @@
use crate::shared::models::UserSession;
use crate::shared::state::AppState;
use diesel::prelude::*;
use log::{error, trace};
use rhai::{Dynamic, Engine};
use std::sync::Arc;
use uuid::Uuid;
/// Registers user memory keywords for cross-session memory persistence.
/// Unlike bot memory, user memory persists across all sessions and bots for a specific user.
pub fn register_user_memory_keywords(state: Arc<AppState>, user: UserSession, engine: &mut Engine) {
set_user_memory_keyword(state.clone(), user.clone(), engine);
get_user_memory_keyword(state.clone(), user.clone(), engine);
remember_user_fact_keyword(state.clone(), user.clone(), engine);
get_user_facts_keyword(state.clone(), user.clone(), engine);
clear_user_memory_keyword(state.clone(), user.clone(), engine);
}
/// SET USER MEMORY key, value
/// Stores a key-value pair that persists across all sessions for this user
pub fn set_user_memory_keyword(state: Arc<AppState>, user: UserSession, engine: &mut Engine) {
let state_clone = Arc::clone(&state);
let user_clone = user.clone();
engine
.register_custom_syntax(
&["SET", "USER", "MEMORY", "$expr$", ",", "$expr$"],
false,
move |context, inputs| {
let key = context.eval_expression_tree(&inputs[0])?.to_string();
let value = context.eval_expression_tree(&inputs[1])?.to_string();
let state_for_spawn = Arc::clone(&state_clone);
let user_clone_spawn = user_clone.clone();
let key_clone = key.clone();
let value_clone = value.clone();
tokio::spawn(async move {
if let Err(e) = set_user_memory_async(
&state_for_spawn,
user_clone_spawn.user_id,
&key_clone,
&value_clone,
"preference",
)
.await
{
error!("Failed to set user memory: {}", e);
} else {
trace!(
"Set user memory for key: {} with value length: {}",
key_clone,
value_clone.len()
);
}
});
Ok(Dynamic::UNIT)
},
)
.expect("Failed to register SET USER MEMORY syntax");
}
/// GET USER MEMORY("key")
/// Retrieves a value from user's cross-session memory
pub fn get_user_memory_keyword(state: Arc<AppState>, user: UserSession, engine: &mut Engine) {
let state_clone = Arc::clone(&state);
let user_clone = user.clone();
engine.register_fn("GET USER MEMORY", move |key_param: String| -> String {
let state = Arc::clone(&state_clone);
let conn_result = state.conn.get();
if let Ok(mut conn) = conn_result {
get_user_memory_sync(&mut conn, user_clone.user_id, &key_param).unwrap_or_default()
} else {
String::new()
}
});
}
/// REMEMBER USER FACT "fact about user"
/// Stores a learned fact about the user for future reference
pub fn remember_user_fact_keyword(state: Arc<AppState>, user: UserSession, engine: &mut Engine) {
let state_clone = Arc::clone(&state);
let user_clone = user.clone();
engine
.register_custom_syntax(
&["REMEMBER", "USER", "FACT", "$expr$"],
false,
move |context, inputs| {
let fact = context.eval_expression_tree(&inputs[0])?.to_string();
let state_for_spawn = Arc::clone(&state_clone);
let user_clone_spawn = user_clone.clone();
let fact_clone = fact.clone();
tokio::spawn(async move {
if let Err(e) = add_user_fact_async(
&state_for_spawn,
user_clone_spawn.user_id,
&fact_clone,
)
.await
{
error!("Failed to remember user fact: {}", e);
} else {
trace!("Remembered user fact: {}", fact_clone);
}
});
Ok(Dynamic::UNIT)
},
)
.expect("Failed to register REMEMBER USER FACT syntax");
}
/// GET USER FACTS()
/// Retrieves all learned facts about the user
pub fn get_user_facts_keyword(state: Arc<AppState>, user: UserSession, engine: &mut Engine) {
let state_clone = Arc::clone(&state);
let user_clone = user.clone();
engine.register_fn("GET USER FACTS", move || -> rhai::Array {
let state = Arc::clone(&state_clone);
let conn_result = state.conn.get();
if let Ok(mut conn) = conn_result {
get_user_facts_sync(&mut conn, user_clone.user_id)
.unwrap_or_default()
.into_iter()
.map(Dynamic::from)
.collect()
} else {
rhai::Array::new()
}
});
}
/// CLEAR USER MEMORY
/// Clears all user memory (preferences and facts)
pub fn clear_user_memory_keyword(state: Arc<AppState>, user: UserSession, engine: &mut Engine) {
let state_clone = Arc::clone(&state);
let user_clone = user.clone();
engine
.register_custom_syntax(&["CLEAR", "USER", "MEMORY"], false, move |_context, _inputs| {
let state_for_spawn = Arc::clone(&state_clone);
let user_clone_spawn = user_clone.clone();
tokio::spawn(async move {
if let Err(e) = clear_user_memory_async(&state_for_spawn, user_clone_spawn.user_id).await {
error!("Failed to clear user memory: {}", e);
} else {
trace!("Cleared all user memory for user: {}", user_clone_spawn.user_id);
}
});
Ok(Dynamic::UNIT)
})
.expect("Failed to register CLEAR USER MEMORY syntax");
}
// ============================================================================
// Database Operations
// ============================================================================
/// Async function to set user memory
async fn set_user_memory_async(
state: &AppState,
user_id: Uuid,
key: &str,
value: &str,
memory_type: &str,
) -> Result<(), String> {
let mut conn = state
.conn
.get()
.map_err(|e| format!("Failed to acquire database connection: {}", e))?;
let now = chrono::Utc::now();
let new_id = Uuid::new_v4();
// Use raw SQL for upsert since we need to handle the user_memories table
diesel::sql_query(
"INSERT INTO user_memories (id, user_id, key, value, memory_type, created_at, updated_at) \
VALUES ($1, $2, $3, $4, $5, $6, $7) \
ON CONFLICT (user_id, key) DO UPDATE SET \
value = EXCLUDED.value, \
memory_type = EXCLUDED.memory_type, \
updated_at = EXCLUDED.updated_at",
)
.bind::<diesel::sql_types::Uuid, _>(new_id)
.bind::<diesel::sql_types::Uuid, _>(user_id)
.bind::<diesel::sql_types::Text, _>(key)
.bind::<diesel::sql_types::Text, _>(value)
.bind::<diesel::sql_types::Text, _>(memory_type)
.bind::<diesel::sql_types::Timestamptz, _>(now)
.bind::<diesel::sql_types::Timestamptz, _>(now)
.execute(&mut conn)
.map_err(|e| format!("Failed to set user memory: {}", e))?;
Ok(())
}
/// Sync function to get user memory (for use in registered functions)
fn get_user_memory_sync(
conn: &mut diesel::PgConnection,
user_id: Uuid,
key: &str,
) -> Result<String, String> {
#[derive(QueryableByName)]
struct MemoryValue {
#[diesel(sql_type = diesel::sql_types::Text)]
value: String,
}
let result: Option<MemoryValue> = diesel::sql_query(
"SELECT value FROM user_memories WHERE user_id = $1 AND key = $2 LIMIT 1",
)
.bind::<diesel::sql_types::Uuid, _>(user_id)
.bind::<diesel::sql_types::Text, _>(key)
.get_result(conn)
.optional()
.map_err(|e| format!("Failed to get user memory: {}", e))?;
Ok(result.map(|r| r.value).unwrap_or_default())
}
/// Async function to add a user fact
async fn add_user_fact_async(
state: &AppState,
user_id: Uuid,
fact: &str,
) -> Result<(), String> {
let mut conn = state
.conn
.get()
.map_err(|e| format!("Failed to acquire database connection: {}", e))?;
let now = chrono::Utc::now();
let new_id = Uuid::new_v4();
let fact_key = format!("fact_{}", Uuid::new_v4());
diesel::sql_query(
"INSERT INTO user_memories (id, user_id, key, value, memory_type, created_at, updated_at) \
VALUES ($1, $2, $3, $4, 'fact', $5, $6)",
)
.bind::<diesel::sql_types::Uuid, _>(new_id)
.bind::<diesel::sql_types::Uuid, _>(user_id)
.bind::<diesel::sql_types::Text, _>(&fact_key)
.bind::<diesel::sql_types::Text, _>(fact)
.bind::<diesel::sql_types::Timestamptz, _>(now)
.bind::<diesel::sql_types::Timestamptz, _>(now)
.execute(&mut conn)
.map_err(|e| format!("Failed to add user fact: {}", e))?;
Ok(())
}
/// Sync function to get all user facts
fn get_user_facts_sync(
conn: &mut diesel::PgConnection,
user_id: Uuid,
) -> Result<Vec<String>, String> {
#[derive(QueryableByName)]
struct FactValue {
#[diesel(sql_type = diesel::sql_types::Text)]
value: String,
}
let results: Vec<FactValue> = diesel::sql_query(
"SELECT value FROM user_memories WHERE user_id = $1 AND memory_type = 'fact' ORDER BY created_at DESC",
)
.bind::<diesel::sql_types::Uuid, _>(user_id)
.load(conn)
.map_err(|e| format!("Failed to get user facts: {}", e))?;
Ok(results.into_iter().map(|r| r.value).collect())
}
/// Async function to clear all user memory
async fn clear_user_memory_async(state: &AppState, user_id: Uuid) -> Result<(), String> {
let mut conn = state
.conn
.get()
.map_err(|e| format!("Failed to acquire database connection: {}", e))?;
diesel::sql_query("DELETE FROM user_memories WHERE user_id = $1")
.bind::<diesel::sql_types::Uuid, _>(user_id)
.execute(&mut conn)
.map_err(|e| format!("Failed to clear user memory: {}", e))?;
Ok(())
}
// ============================================================================
// Tests
// ============================================================================
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fact_key_generation() {
let fact_key = format!("fact_{}", Uuid::new_v4());
assert!(fact_key.starts_with("fact_"));
assert!(fact_key.len() > 5);
}
}

View file

@ -9,6 +9,7 @@ pub mod cache;
pub mod compact_prompt; pub mod compact_prompt;
pub mod llm_models; pub mod llm_models;
pub mod local; pub mod local;
pub mod observability;
#[async_trait] #[async_trait]
pub trait LLMProvider: Send + Sync { pub trait LLMProvider: Send + Sync {
async fn generate( async fn generate(

1208
src/llm/observability.rs Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,881 @@
//! Hybrid Search Module for RAG 2.0
//!
//! Implements hybrid search combining sparse (BM25) and dense (embedding) retrieval
//! with Reciprocal Rank Fusion (RRF) for optimal results.
//!
//! Config.csv properties:
//! ```csv
//! rag-hybrid-enabled,true
//! rag-dense-weight,0.7
//! rag-sparse-weight,0.3
//! rag-reranker-enabled,true
//! rag-reranker-model,cross-encoder/ms-marco-MiniLM-L-6-v2
//! ```
use log::{debug, error, info, trace, warn};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::Arc;
use uuid::Uuid;
use crate::shared::state::AppState;
/// Configuration for hybrid search
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HybridSearchConfig {
/// Weight for dense (embedding) search results (0.0 - 1.0)
pub dense_weight: f32,
/// Weight for sparse (BM25) search results (0.0 - 1.0)
pub sparse_weight: f32,
/// Whether to use reranker for final results
pub reranker_enabled: bool,
/// Reranker model name/path
pub reranker_model: String,
/// Maximum number of results to return
pub max_results: usize,
/// Minimum score threshold (0.0 - 1.0)
pub min_score: f32,
/// K parameter for RRF (typically 60)
pub rrf_k: u32,
}
impl Default for HybridSearchConfig {
fn default() -> Self {
Self {
dense_weight: 0.7,
sparse_weight: 0.3,
reranker_enabled: false,
reranker_model: "cross-encoder/ms-marco-MiniLM-L-6-v2".to_string(),
max_results: 10,
min_score: 0.0,
rrf_k: 60,
}
}
}
impl HybridSearchConfig {
/// Load config from bot configuration
pub fn from_bot_config(state: &AppState, bot_id: Uuid) -> Self {
use diesel::prelude::*;
let mut config = Self::default();
if let Ok(mut conn) = state.conn.get() {
#[derive(QueryableByName)]
struct ConfigRow {
#[diesel(sql_type = diesel::sql_types::Text)]
config_key: String,
#[diesel(sql_type = diesel::sql_types::Text)]
config_value: String,
}
let configs: Vec<ConfigRow> = diesel::sql_query(
"SELECT config_key, config_value FROM bot_configuration \
WHERE bot_id = $1 AND config_key LIKE 'rag-%'",
)
.bind::<diesel::sql_types::Uuid, _>(bot_id)
.load(&mut conn)
.unwrap_or_default();
for row in configs {
match row.config_key.as_str() {
"rag-dense-weight" => {
config.dense_weight = row.config_value.parse().unwrap_or(0.7);
}
"rag-sparse-weight" => {
config.sparse_weight = row.config_value.parse().unwrap_or(0.3);
}
"rag-reranker-enabled" => {
config.reranker_enabled = row.config_value.to_lowercase() == "true";
}
"rag-reranker-model" => {
config.reranker_model = row.config_value;
}
"rag-max-results" => {
config.max_results = row.config_value.parse().unwrap_or(10);
}
"rag-min-score" => {
config.min_score = row.config_value.parse().unwrap_or(0.0);
}
"rag-rrf-k" => {
config.rrf_k = row.config_value.parse().unwrap_or(60);
}
_ => {}
}
}
}
// Normalize weights
let total = config.dense_weight + config.sparse_weight;
if total > 0.0 {
config.dense_weight /= total;
config.sparse_weight /= total;
}
config
}
}
/// Search result from any retrieval method
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchResult {
/// Unique document identifier
pub doc_id: String,
/// Document content
pub content: String,
/// Source file/email/etc path
pub source: String,
/// Relevance score (0.0 - 1.0)
pub score: f32,
/// Additional metadata
pub metadata: HashMap<String, String>,
/// Search method that produced this result
pub search_method: SearchMethod,
}
/// Search method used to retrieve a result
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum SearchMethod {
Dense,
Sparse,
Hybrid,
Reranked,
}
/// BM25 search index for sparse retrieval
pub struct BM25Index {
/// Document frequency for each term
doc_freq: HashMap<String, usize>,
/// Total number of documents
doc_count: usize,
/// Average document length
avg_doc_len: f32,
/// Document lengths
doc_lengths: HashMap<String, usize>,
/// Term frequencies per document
term_freqs: HashMap<String, HashMap<String, usize>>,
/// BM25 parameters
k1: f32,
b: f32,
}
impl BM25Index {
pub fn new() -> Self {
Self {
doc_freq: HashMap::new(),
doc_count: 0,
avg_doc_len: 0.0,
doc_lengths: HashMap::new(),
term_freqs: HashMap::new(),
k1: 1.2,
b: 0.75,
}
}
/// Add a document to the index
pub fn add_document(&mut self, doc_id: &str, content: &str) {
let terms = self.tokenize(content);
let doc_len = terms.len();
// Update document length
self.doc_lengths.insert(doc_id.to_string(), doc_len);
// Calculate term frequencies
let mut term_freq: HashMap<String, usize> = HashMap::new();
let mut seen_terms: std::collections::HashSet<String> = std::collections::HashSet::new();
for term in &terms {
*term_freq.entry(term.clone()).or_insert(0) += 1;
// Update document frequency (only once per document per term)
if !seen_terms.contains(term) {
*self.doc_freq.entry(term.clone()).or_insert(0) += 1;
seen_terms.insert(term.clone());
}
}
self.term_freqs.insert(doc_id.to_string(), term_freq);
self.doc_count += 1;
// Update average document length
let total_len: usize = self.doc_lengths.values().sum();
self.avg_doc_len = total_len as f32 / self.doc_count as f32;
}
/// Remove a document from the index
pub fn remove_document(&mut self, doc_id: &str) {
if let Some(term_freq) = self.term_freqs.remove(doc_id) {
// Update document frequencies
for term in term_freq.keys() {
if let Some(freq) = self.doc_freq.get_mut(term) {
*freq = freq.saturating_sub(1);
if *freq == 0 {
self.doc_freq.remove(term);
}
}
}
}
self.doc_lengths.remove(doc_id);
self.doc_count = self.doc_count.saturating_sub(1);
// Update average document length
if self.doc_count > 0 {
let total_len: usize = self.doc_lengths.values().sum();
self.avg_doc_len = total_len as f32 / self.doc_count as f32;
} else {
self.avg_doc_len = 0.0;
}
}
/// Search the index with BM25 scoring
pub fn search(&self, query: &str, max_results: usize) -> Vec<(String, f32)> {
let query_terms = self.tokenize(query);
let mut scores: HashMap<String, f32> = HashMap::new();
for term in &query_terms {
let df = *self.doc_freq.get(term).unwrap_or(&0);
if df == 0 {
continue;
}
// IDF calculation
let idf = ((self.doc_count as f32 - df as f32 + 0.5) / (df as f32 + 0.5) + 1.0).ln();
for (doc_id, term_freqs) in &self.term_freqs {
if let Some(&tf) = term_freqs.get(term) {
let doc_len = *self.doc_lengths.get(doc_id).unwrap_or(&1) as f32;
let tf_normalized = (tf as f32 * (self.k1 + 1.0))
/ (tf as f32
+ self.k1 * (1.0 - self.b + self.b * (doc_len / self.avg_doc_len)));
*scores.entry(doc_id.clone()).or_insert(0.0) += idf * tf_normalized;
}
}
}
// Sort by score and return top results
let mut results: Vec<(String, f32)> = scores.into_iter().collect();
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
results.truncate(max_results);
results
}
/// Tokenize text into terms
fn tokenize(&self, text: &str) -> Vec<String> {
text.to_lowercase()
.split(|c: char| !c.is_alphanumeric())
.filter(|s| s.len() > 2) // Filter out very short tokens
.map(|s| s.to_string())
.collect()
}
/// Get index statistics
pub fn stats(&self) -> BM25Stats {
BM25Stats {
doc_count: self.doc_count,
unique_terms: self.doc_freq.len(),
avg_doc_len: self.avg_doc_len,
}
}
}
impl Default for BM25Index {
fn default() -> Self {
Self::new()
}
}
/// BM25 index statistics
#[derive(Debug, Clone)]
pub struct BM25Stats {
pub doc_count: usize,
pub unique_terms: usize,
pub avg_doc_len: f32,
}
/// Hybrid search engine combining dense and sparse retrieval
pub struct HybridSearchEngine {
/// BM25 sparse index
bm25_index: BM25Index,
/// Document store for content retrieval
documents: HashMap<String, DocumentEntry>,
/// Configuration
config: HybridSearchConfig,
/// Qdrant URL for dense search
qdrant_url: String,
/// Collection name
collection_name: String,
}
/// Document entry in the store
#[derive(Debug, Clone)]
struct DocumentEntry {
pub content: String,
pub source: String,
pub metadata: HashMap<String, String>,
}
impl HybridSearchEngine {
pub fn new(config: HybridSearchConfig, qdrant_url: &str, collection_name: &str) -> Self {
Self {
bm25_index: BM25Index::new(),
documents: HashMap::new(),
config,
qdrant_url: qdrant_url.to_string(),
collection_name: collection_name.to_string(),
}
}
/// Index a document for both dense and sparse search
pub async fn index_document(
&mut self,
doc_id: &str,
content: &str,
source: &str,
metadata: HashMap<String, String>,
embedding: Option<Vec<f32>>,
) -> Result<(), String> {
// Add to BM25 index
self.bm25_index.add_document(doc_id, content);
// Store document
self.documents.insert(
doc_id.to_string(),
DocumentEntry {
content: content.to_string(),
source: source.to_string(),
metadata,
},
);
// If embedding provided, add to Qdrant
if let Some(emb) = embedding {
self.upsert_to_qdrant(doc_id, &emb).await?;
}
Ok(())
}
/// Remove a document from all indexes
pub async fn remove_document(&mut self, doc_id: &str) -> Result<(), String> {
self.bm25_index.remove_document(doc_id);
self.documents.remove(doc_id);
self.delete_from_qdrant(doc_id).await?;
Ok(())
}
/// Perform hybrid search
pub async fn search(
&self,
query: &str,
query_embedding: Option<Vec<f32>>,
) -> Result<Vec<SearchResult>, String> {
let fetch_count = self.config.max_results * 3; // Fetch more for fusion
// Sparse search (BM25)
let sparse_results = self.bm25_index.search(query, fetch_count);
trace!(
"BM25 search returned {} results for query: {}",
sparse_results.len(),
query
);
// Dense search (Qdrant)
let dense_results = if let Some(embedding) = query_embedding {
self.search_qdrant(&embedding, fetch_count).await?
} else {
Vec::new()
};
trace!(
"Dense search returned {} results for query: {}",
dense_results.len(),
query
);
// Reciprocal Rank Fusion
let fused_results = self.reciprocal_rank_fusion(&sparse_results, &dense_results);
trace!("RRF produced {} fused results", fused_results.len());
// Convert to SearchResult
let mut results: Vec<SearchResult> = fused_results
.into_iter()
.filter_map(|(doc_id, score)| {
self.documents.get(&doc_id).map(|doc| SearchResult {
doc_id,
content: doc.content.clone(),
source: doc.source.clone(),
score,
metadata: doc.metadata.clone(),
search_method: SearchMethod::Hybrid,
})
})
.filter(|r| r.score >= self.config.min_score)
.take(self.config.max_results)
.collect();
// Optional reranking
if self.config.reranker_enabled && !results.is_empty() {
results = self.rerank(query, results).await?;
}
Ok(results)
}
/// Perform only sparse (BM25) search
pub fn sparse_search(&self, query: &str) -> Vec<SearchResult> {
let results = self.bm25_index.search(query, self.config.max_results);
results
.into_iter()
.filter_map(|(doc_id, score)| {
self.documents.get(&doc_id).map(|doc| SearchResult {
doc_id,
content: doc.content.clone(),
source: doc.source.clone(),
score,
metadata: doc.metadata.clone(),
search_method: SearchMethod::Sparse,
})
})
.collect()
}
/// Perform only dense (embedding) search
pub async fn dense_search(
&self,
query_embedding: Vec<f32>,
) -> Result<Vec<SearchResult>, String> {
let results = self
.search_qdrant(&query_embedding, self.config.max_results)
.await?;
let search_results: Vec<SearchResult> = results
.into_iter()
.filter_map(|(doc_id, score)| {
self.documents.get(&doc_id).map(|doc| SearchResult {
doc_id,
content: doc.content.clone(),
source: doc.source.clone(),
score,
metadata: doc.metadata.clone(),
search_method: SearchMethod::Dense,
})
})
.collect();
Ok(search_results)
}
/// Reciprocal Rank Fusion algorithm
fn reciprocal_rank_fusion(
&self,
sparse: &[(String, f32)],
dense: &[(String, f32)],
) -> Vec<(String, f32)> {
let k = self.config.rrf_k as f32;
let mut scores: HashMap<String, f32> = HashMap::new();
// Score from sparse results
for (rank, (doc_id, _)) in sparse.iter().enumerate() {
let rrf_score = self.config.sparse_weight / (k + rank as f32 + 1.0);
*scores.entry(doc_id.clone()).or_insert(0.0) += rrf_score;
}
// Score from dense results
for (rank, (doc_id, _)) in dense.iter().enumerate() {
let rrf_score = self.config.dense_weight / (k + rank as f32 + 1.0);
*scores.entry(doc_id.clone()).or_insert(0.0) += rrf_score;
}
// Sort by combined score
let mut results: Vec<(String, f32)> = scores.into_iter().collect();
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
// Normalize scores to 0-1 range
if let Some((_, max_score)) = results.first() {
if *max_score > 0.0 {
for (_, score) in &mut results {
*score /= max_score;
}
}
}
results
}
/// Rerank results using cross-encoder model
async fn rerank(
&self,
query: &str,
results: Vec<SearchResult>,
) -> Result<Vec<SearchResult>, String> {
// In a full implementation, this would call a cross-encoder model
// For now, we'll use a simple relevance heuristic
let mut reranked = results;
for result in &mut reranked {
// Simple reranking based on query term overlap
let query_terms: std::collections::HashSet<&str> =
query.to_lowercase().split_whitespace().collect();
let content_lower = result.content.to_lowercase();
let mut overlap_score = 0.0;
for term in &query_terms {
if content_lower.contains(term) {
overlap_score += 1.0;
}
}
// Combine original score with overlap
let overlap_normalized = overlap_score / query_terms.len().max(1) as f32;
result.score = result.score * 0.7 + overlap_normalized * 0.3;
result.search_method = SearchMethod::Reranked;
}
// Re-sort by new scores
reranked.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
Ok(reranked)
}
/// Search Qdrant for similar vectors
async fn search_qdrant(
&self,
embedding: &[f32],
limit: usize,
) -> Result<Vec<(String, f32)>, String> {
let client = reqwest::Client::new();
let search_request = serde_json::json!({
"vector": embedding,
"limit": limit,
"with_payload": false
});
let response = client
.post(&format!(
"{}/collections/{}/points/search",
self.qdrant_url, self.collection_name
))
.json(&search_request)
.send()
.await
.map_err(|e| format!("Qdrant search failed: {}", e))?;
if !response.status().is_success() {
let error_text = response.text().await.unwrap_or_default();
return Err(format!("Qdrant search error: {}", error_text));
}
let result: serde_json::Value = response
.json()
.await
.map_err(|e| format!("Failed to parse Qdrant response: {}", e))?;
let points = result["result"]
.as_array()
.ok_or("Invalid Qdrant response format")?;
let results: Vec<(String, f32)> = points
.iter()
.filter_map(|p| {
let id = p["id"].as_str().map(|s| s.to_string())?;
let score = p["score"].as_f64()? as f32;
Some((id, score))
})
.collect();
Ok(results)
}
/// Upsert vector to Qdrant
async fn upsert_to_qdrant(&self, doc_id: &str, embedding: &[f32]) -> Result<(), String> {
let client = reqwest::Client::new();
let upsert_request = serde_json::json!({
"points": [{
"id": doc_id,
"vector": embedding
}]
});
let response = client
.put(&format!(
"{}/collections/{}/points",
self.qdrant_url, self.collection_name
))
.json(&upsert_request)
.send()
.await
.map_err(|e| format!("Qdrant upsert failed: {}", e))?;
if !response.status().is_success() {
let error_text = response.text().await.unwrap_or_default();
return Err(format!("Qdrant upsert error: {}", error_text));
}
Ok(())
}
/// Delete vector from Qdrant
async fn delete_from_qdrant(&self, doc_id: &str) -> Result<(), String> {
let client = reqwest::Client::new();
let delete_request = serde_json::json!({
"points": [doc_id]
});
let response = client
.post(&format!(
"{}/collections/{}/points/delete",
self.qdrant_url, self.collection_name
))
.json(&delete_request)
.send()
.await
.map_err(|e| format!("Qdrant delete failed: {}", e))?;
if !response.status().is_success() {
warn!(
"Qdrant delete may have failed for {}: {}",
doc_id,
response.status()
);
}
Ok(())
}
/// Get engine statistics
pub fn stats(&self) -> HybridSearchStats {
let bm25_stats = self.bm25_index.stats();
HybridSearchStats {
total_documents: self.documents.len(),
bm25_doc_count: bm25_stats.doc_count,
unique_terms: bm25_stats.unique_terms,
avg_doc_len: bm25_stats.avg_doc_len,
config: self.config.clone(),
}
}
}
/// Hybrid search engine statistics
#[derive(Debug, Clone)]
pub struct HybridSearchStats {
pub total_documents: usize,
pub bm25_doc_count: usize,
pub unique_terms: usize,
pub avg_doc_len: f32,
pub config: HybridSearchConfig,
}
/// Query decomposition for complex questions
pub struct QueryDecomposer {
llm_endpoint: String,
api_key: String,
}
impl QueryDecomposer {
pub fn new(llm_endpoint: &str, api_key: &str) -> Self {
Self {
llm_endpoint: llm_endpoint.to_string(),
api_key: api_key.to_string(),
}
}
/// Decompose a complex query into simpler sub-queries
pub async fn decompose(&self, query: &str) -> Result<Vec<String>, String> {
// Simple heuristic decomposition for common patterns
// A full implementation would use an LLM
let mut sub_queries = Vec::new();
// Check for conjunctions
let conjunctions = ["and", "also", "as well as", "in addition to"];
let mut parts: Vec<&str> = vec![query];
for conj in &conjunctions {
parts = parts
.iter()
.flat_map(|p| p.split(conj))
.map(|s| s.trim())
.filter(|s| !s.is_empty())
.collect();
}
if parts.len() > 1 {
for part in parts {
sub_queries.push(part.to_string());
}
} else {
// Try question word splitting
let question_words = ["what", "how", "why", "when", "where", "who"];
let lower = query.to_lowercase();
let mut has_multiple_questions = false;
for qw in &question_words {
if lower.matches(qw).count() > 1 {
has_multiple_questions = true;
break;
}
}
if has_multiple_questions {
// Split on question marks or question words
for part in query.split('?') {
let trimmed = part.trim();
if !trimmed.is_empty() {
sub_queries.push(format!("{}?", trimmed));
}
}
}
}
// If no decomposition happened, return original query
if sub_queries.is_empty() {
sub_queries.push(query.to_string());
}
Ok(sub_queries)
}
/// Synthesize answers from multiple sub-query results
pub fn synthesize(&self, query: &str, sub_answers: &[String]) -> String {
if sub_answers.len() == 1 {
return sub_answers[0].clone();
}
// Simple concatenation with context
let mut synthesis = format!("Based on your question about \"{}\", here's what I found:\n\n", query);
for (i, answer) in sub_answers.iter().enumerate() {
synthesis.push_str(&format!("{}. {}\n\n", i + 1, answer));
}
synthesis
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_bm25_index_basic() {
let mut index = BM25Index::new();
index.add_document("doc1", "The quick brown fox jumps over the lazy dog");
index.add_document("doc2", "A quick brown dog runs in the park");
index.add_document("doc3", "The lazy cat sleeps all day");
let stats = index.stats();
assert_eq!(stats.doc_count, 3);
assert!(stats.avg_doc_len > 0.0);
}
#[test]
fn test_bm25_search() {
let mut index = BM25Index::new();
index.add_document("doc1", "machine learning artificial intelligence");
index.add_document("doc2", "natural language processing NLP");
index.add_document("doc3", "computer vision image recognition");
let results = index.search("machine learning", 10);
assert!(!results.is_empty());
assert_eq!(results[0].0, "doc1"); // doc1 should be first
}
#[test]
fn test_bm25_remove_document() {
let mut index = BM25Index::new();
index.add_document("doc1", "test document one");
index.add_document("doc2", "test document two");
assert_eq!(index.stats().doc_count, 2);
index.remove_document("doc1");
assert_eq!(index.stats().doc_count, 1);
let results = index.search("one", 10);
assert!(results.is_empty() || results[0].0 != "doc1");
}
#[test]
fn test_hybrid_config_default() {
let config = HybridSearchConfig::default();
assert_eq!(config.dense_weight, 0.7);
assert_eq!(config.sparse_weight, 0.3);
assert!(!config.reranker_enabled);
assert_eq!(config.max_results, 10);
}
#[test]
fn test_reciprocal_rank_fusion() {
let config = HybridSearchConfig::default();
let engine = HybridSearchEngine::new(config, "http://localhost:6333", "test");
let sparse = vec![
("doc1".to_string(), 0.9),
("doc2".to_string(), 0.7),
("doc3".to_string(), 0.5),
];
let dense = vec![
("doc2".to_string(), 0.95),
("doc1".to_string(), 0.8),
("doc4".to_string(), 0.6),
];
let fused = engine.reciprocal_rank_fusion(&sparse, &dense);
// doc1 and doc2 should be in top results as they appear in both
assert!(!fused.is_empty());
let top_ids: Vec<&str> = fused.iter().take(2).map(|(id, _)| id.as_str()).collect();
assert!(top_ids.contains(&"doc1") || top_ids.contains(&"doc2"));
}
#[test]
fn test_query_decomposer_simple() {
let decomposer = QueryDecomposer::new("http://localhost:8081", "none");
// Use tokio runtime for async test
let rt = tokio::runtime::Runtime::new().unwrap();
let result = rt.block_on(async {
decomposer
.decompose("What is machine learning and how does it work?")
.await
});
assert!(result.is_ok());
let queries = result.unwrap();
assert!(!queries.is_empty());
}
#[test]
fn test_search_result_serialization() {
let result = SearchResult {
doc_id: "test123".to_string(),
content: "Test content".to_string(),
source: "/path/to/file".to_string(),
score: 0.85,
metadata: HashMap::new(),
search_method: SearchMethod::Hybrid,
};
let json = serde_json::to_string(&result);
assert!(json.is_ok());
let parsed: Result<SearchResult, _> = serde_json::from_str(&json.unwrap());
assert!(parsed.is_ok());
assert_eq!(parsed.unwrap().doc_id, "test123");
}
}

View file

@ -1,3 +1,8 @@
pub mod hybrid_search;
pub mod vectordb_indexer; pub mod vectordb_indexer;
pub use hybrid_search::{
BM25Index, BM25Stats, HybridSearchConfig, HybridSearchEngine, HybridSearchStats,
QueryDecomposer, SearchMethod, SearchResult,
};
pub use vectordb_indexer::{IndexingStats, IndexingStatus, VectorDBIndexer}; pub use vectordb_indexer::{IndexingStats, IndexingStatus, VectorDBIndexer};

View file

@ -23,6 +23,7 @@ use crate::shared::state::AppState;
pub mod auth; pub mod auth;
pub mod auth_handlers; pub mod auth_handlers;
pub mod chat_handlers; pub mod chat_handlers;
pub mod stream_handlers;
// Module stubs - to be implemented with full HTMX // Module stubs - to be implemented with full HTMX
pub mod drive { pub mod drive {
@ -464,6 +465,7 @@ pub fn create_router(app_state: AppState) -> Router {
.route("/api/auth/check", get(auth_handlers::check_session)) .route("/api/auth/check", get(auth_handlers::check_session))
// API endpoints // API endpoints
.merge(chat_handlers::routes()) .merge(chat_handlers::routes())
.merge(stream_handlers::routes())
.merge(drive::routes()) .merge(drive::routes())
.merge(mail::routes()) .merge(mail::routes())
.merge(meet::routes()) .merge(meet::routes())

433
src/web/stream_handlers.rs Normal file
View file

@ -0,0 +1,433 @@
//! Server-Sent Events (SSE) streaming handlers for chat responses
//!
//! This module provides real-time streaming of LLM responses using SSE,
//! enabling token-by-token delivery to the client for a responsive chat experience.
use axum::{
extract::{Query, State},
response::{
sse::{Event, KeepAlive, Sse},
IntoResponse,
},
Json,
};
use futures::stream::Stream;
use log::{error, info, trace};
use serde::{Deserialize, Serialize};
use std::{convert::Infallible, sync::Arc, time::Duration};
use tokio::sync::mpsc;
use tokio_stream::wrappers::ReceiverStream;
use uuid::Uuid;
use crate::llm::{LLMProvider, OpenAIClient};
use crate::shared::state::AppState;
/// Request payload for streaming chat
#[derive(Debug, Deserialize)]
pub struct StreamChatRequest {
/// Session ID
pub session_id: String,
/// User message content
pub message: String,
/// Optional system prompt override
pub system_prompt: Option<String>,
/// Optional model name override
pub model: Option<String>,
/// Optional bot ID
pub bot_id: Option<String>,
}
/// Query parameters for SSE connection
#[derive(Debug, Deserialize)]
pub struct StreamQuery {
pub session_id: String,
}
/// SSE event types
#[derive(Debug, Clone, Serialize)]
#[serde(tag = "type", content = "data")]
pub enum StreamEvent {
/// Token chunk
Token { content: String },
/// Thinking/reasoning content (for models that support it)
Thinking { content: String },
/// Tool call request
ToolCall { name: String, arguments: String },
/// Error occurred
Error { message: String },
/// Stream completed
Done { total_tokens: Option<u32> },
/// Stream started
Start { session_id: String, model: String },
/// Metadata update
Meta { key: String, value: String },
}
impl StreamEvent {
pub fn to_sse_event(&self) -> Result<Event, serde_json::Error> {
let event_type = match self {
StreamEvent::Token { .. } => "token",
StreamEvent::Thinking { .. } => "thinking",
StreamEvent::ToolCall { .. } => "tool_call",
StreamEvent::Error { .. } => "error",
StreamEvent::Done { .. } => "done",
StreamEvent::Start { .. } => "start",
StreamEvent::Meta { .. } => "meta",
};
let data = serde_json::to_string(self)?;
Ok(Event::default().event(event_type).data(data))
}
}
/// Stream a chat response using SSE
pub async fn stream_chat_response(
State(state): State<AppState>,
Json(payload): Json<StreamChatRequest>,
) -> Sse<impl Stream<Item = Result<Event, Infallible>>> {
let (tx, rx) = mpsc::channel::<Result<Event, Infallible>>(100);
// Clone state for the spawned task
let state_clone = state.clone();
let session_id = payload.session_id.clone();
let message = payload.message.clone();
let model = payload.model.clone();
let system_prompt = payload.system_prompt.clone();
let bot_id = payload
.bot_id
.as_ref()
.and_then(|id| Uuid::parse_str(id).ok());
// Spawn the streaming task
tokio::spawn(async move {
if let Err(e) = handle_stream_generation(
state_clone,
tx.clone(),
session_id,
message,
model,
system_prompt,
bot_id,
)
.await
{
error!("Stream generation error: {}", e);
let error_event = StreamEvent::Error {
message: e.to_string(),
};
if let Ok(event) = error_event.to_sse_event() {
let _ = tx.send(Ok(event)).await;
}
}
// Send done event
let done_event = StreamEvent::Done { total_tokens: None };
if let Ok(event) = done_event.to_sse_event() {
let _ = tx.send(Ok(event)).await;
}
});
Sse::new(ReceiverStream::new(rx)).keep_alive(
KeepAlive::new()
.interval(Duration::from_secs(15))
.text("keep-alive"),
)
}
/// Handle the actual stream generation
async fn handle_stream_generation(
state: AppState,
tx: mpsc::Sender<Result<Event, Infallible>>,
session_id: String,
message: String,
model_override: Option<String>,
system_prompt_override: Option<String>,
bot_id: Option<Uuid>,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
// Get LLM configuration
let (llm_url, llm_model, llm_key) = get_llm_config(&state, bot_id).await?;
let model = model_override.unwrap_or(llm_model);
// Send start event
let start_event = StreamEvent::Start {
session_id: session_id.clone(),
model: model.clone(),
};
if let Ok(event) = start_event.to_sse_event() {
tx.send(Ok(event)).await?;
}
info!(
"Starting SSE stream for session: {}, model: {}",
session_id, model
);
// Build messages
let system_prompt = system_prompt_override.unwrap_or_else(|| {
"You are a helpful AI assistant powered by General Bots.".to_string()
});
let messages = OpenAIClient::build_messages(&system_prompt, "", &[("user".to_string(), message)]);
// Create LLM client
let client = OpenAIClient::new(llm_key.clone(), Some(llm_url.clone()));
// Create channel for token streaming
let (token_tx, mut token_rx) = mpsc::channel::<String>(100);
// Spawn LLM streaming task
let client_clone = client;
let messages_clone = messages.clone();
let model_clone = model.clone();
let key_clone = llm_key.clone();
tokio::spawn(async move {
if let Err(e) = client_clone
.generate_stream(&"", &messages_clone, token_tx, &model_clone, &key_clone)
.await
{
error!("LLM stream error: {}", e);
}
});
// Forward tokens as SSE events
while let Some(token) = token_rx.recv().await {
trace!("Streaming token: {}", token);
let token_event = StreamEvent::Token {
content: token.clone(),
};
if let Ok(event) = token_event.to_sse_event() {
if tx.send(Ok(event)).await.is_err() {
// Client disconnected
info!("Client disconnected from SSE stream");
break;
}
}
}
Ok(())
}
/// Get LLM configuration for a bot
async fn get_llm_config(
state: &AppState,
bot_id: Option<Uuid>,
) -> Result<(String, String, String), Box<dyn std::error::Error + Send + Sync>> {
use diesel::prelude::*;
let mut conn = state
.conn
.get()
.map_err(|e| format!("Failed to acquire database connection: {}", e))?;
let target_bot_id = bot_id.unwrap_or(Uuid::nil());
#[derive(QueryableByName)]
struct ConfigRow {
#[diesel(sql_type = diesel::sql_types::Text)]
config_key: String,
#[diesel(sql_type = diesel::sql_types::Text)]
config_value: String,
}
let configs: Vec<ConfigRow> = diesel::sql_query(
"SELECT config_key, config_value FROM bot_configuration \
WHERE bot_id = $1 AND config_key IN ('llm-url', 'llm-model', 'llm-key')",
)
.bind::<diesel::sql_types::Uuid, _>(target_bot_id)
.load(&mut conn)
.unwrap_or_default();
let mut llm_url = "http://localhost:8081".to_string();
let mut llm_model = "default".to_string();
let mut llm_key = "none".to_string();
for config in configs {
match config.config_key.as_str() {
"llm-url" => llm_url = config.config_value,
"llm-model" => llm_model = config.config_value,
"llm-key" => llm_key = config.config_value,
_ => {}
}
}
Ok((llm_url, llm_model, llm_key))
}
/// Create routes for streaming endpoints
pub fn routes() -> axum::Router<AppState> {
use axum::routing::post;
axum::Router::new()
.route("/api/chat/stream", post(stream_chat_response))
.route("/api/v1/stream", post(stream_chat_response))
}
/// Streaming chat with conversation history
#[derive(Debug, Deserialize)]
pub struct StreamChatWithHistoryRequest {
pub session_id: String,
pub message: String,
pub history: Option<Vec<HistoryMessage>>,
pub system_prompt: Option<String>,
pub model: Option<String>,
pub bot_id: Option<String>,
pub temperature: Option<f32>,
pub max_tokens: Option<u32>,
}
#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct HistoryMessage {
pub role: String,
pub content: String,
}
/// Stream chat with full conversation history
pub async fn stream_chat_with_history(
State(state): State<AppState>,
Json(payload): Json<StreamChatWithHistoryRequest>,
) -> Sse<impl Stream<Item = Result<Event, Infallible>>> {
let (tx, rx) = mpsc::channel::<Result<Event, Infallible>>(100);
let state_clone = state.clone();
tokio::spawn(async move {
if let Err(e) =
handle_stream_with_history(state_clone, tx.clone(), payload).await
{
error!("Stream with history error: {}", e);
let error_event = StreamEvent::Error {
message: e.to_string(),
};
if let Ok(event) = error_event.to_sse_event() {
let _ = tx.send(Ok(event)).await;
}
}
let done_event = StreamEvent::Done { total_tokens: None };
if let Ok(event) = done_event.to_sse_event() {
let _ = tx.send(Ok(event)).await;
}
});
Sse::new(ReceiverStream::new(rx)).keep_alive(
KeepAlive::new()
.interval(Duration::from_secs(15))
.text("keep-alive"),
)
}
async fn handle_stream_with_history(
state: AppState,
tx: mpsc::Sender<Result<Event, Infallible>>,
payload: StreamChatWithHistoryRequest,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let bot_id = payload
.bot_id
.as_ref()
.and_then(|id| Uuid::parse_str(id).ok());
let (llm_url, llm_model, llm_key) = get_llm_config(&state, bot_id).await?;
let model = payload.model.unwrap_or(llm_model);
// Send start event
let start_event = StreamEvent::Start {
session_id: payload.session_id.clone(),
model: model.clone(),
};
if let Ok(event) = start_event.to_sse_event() {
tx.send(Ok(event)).await?;
}
// Build history
let history: Vec<(String, String)> = payload
.history
.unwrap_or_default()
.into_iter()
.map(|h| (h.role, h.content))
.chain(std::iter::once(("user".to_string(), payload.message)))
.collect();
let system_prompt = payload.system_prompt.unwrap_or_else(|| {
"You are a helpful AI assistant powered by General Bots.".to_string()
});
let messages = OpenAIClient::build_messages(&system_prompt, "", &history);
let client = OpenAIClient::new(llm_key.clone(), Some(llm_url.clone()));
let (token_tx, mut token_rx) = mpsc::channel::<String>(100);
let client_clone = client;
let messages_clone = messages.clone();
let model_clone = model.clone();
let key_clone = llm_key.clone();
tokio::spawn(async move {
if let Err(e) = client_clone
.generate_stream(&"", &messages_clone, token_tx, &model_clone, &key_clone)
.await
{
error!("LLM stream error: {}", e);
}
});
while let Some(token) = token_rx.recv().await {
let token_event = StreamEvent::Token {
content: token.clone(),
};
if let Ok(event) = token_event.to_sse_event() {
if tx.send(Ok(event)).await.is_err() {
break;
}
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_stream_event_to_sse() {
let event = StreamEvent::Token {
content: "Hello".to_string(),
};
let sse = event.to_sse_event();
assert!(sse.is_ok());
}
#[test]
fn test_stream_event_done() {
let event = StreamEvent::Done {
total_tokens: Some(100),
};
let sse = event.to_sse_event();
assert!(sse.is_ok());
}
#[test]
fn test_stream_event_error() {
let event = StreamEvent::Error {
message: "Test error".to_string(),
};
let sse = event.to_sse_event();
assert!(sse.is_ok());
}
#[test]
fn test_history_message_serialization() {
let msg = HistoryMessage {
role: "user".to_string(),
content: "Hello".to_string(),
};
let json = serde_json::to_string(&msg);
assert!(json.is_ok());
}
}