Simplify hallucination detector: only stop if 50+ repetitions
All checks were successful
BotServer CI / build (push) Successful in 9m8s

- Simple: count pattern repetitions, stop at 50
- Async API with Redis-backed counting
- 60-second window for cleanup
This commit is contained in:
Rodrigo Rodriguez (Pragmatismo) 2026-03-09 20:02:29 -03:00
parent 032de108fd
commit 9f35863bff
2 changed files with 69 additions and 255 deletions

View file

@ -1,67 +1,36 @@
//! Hallucination Loop Detector //! Simple Hallucination Loop Detector
//! //!
//! Detects when an LLM gets stuck in a repetition loop (hallucination). //! Detects when an LLM gets stuck in a repetition loop.
//! This module provides detection for all channels (web, WhatsApp, Telegram, etc.). //! Only triggers when the same pattern repeats 50+ times consecutively.
use std::collections::hash_map::DefaultHasher; use std::collections::HashMap;
use std::hash::{Hash, Hasher}; use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio::sync::Mutex;
use log::warn;
const THRESHOLD: usize = 50;
const WINDOW: Duration = Duration::from_secs(60);
/// Configuration for hallucination detection
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct HallucinationConfig { pub struct HallucinationConfig {
/// Minimum text length before detection starts pub threshold: usize,
pub min_text_length: usize, pub window: Duration,
/// Pattern lengths to check (in characters)
pub pattern_lengths: Vec<usize>,
/// Number of consecutive repetitions to trigger detection
pub consecutive_threshold: usize,
/// Number of total occurrences in recent text to trigger detection
pub occurrence_threshold: usize,
/// Recent text window size for occurrence counting
pub recent_text_window: usize,
/// Number of identical tokens to trigger detection
pub identical_token_threshold: usize,
/// Common words to ignore (won't trigger detection when repeated)
pub ignore_words: Vec<String>,
} }
/// Default list of common words that shouldn't trigger hallucination detection
const DEFAULT_IGNORE_WORDS: &[&str] = &[
"the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
"have", "has", "had", "do", "does", "did", "will", "would", "could", "should",
"may", "might", "must", "shall", "can", "need", "dare", "ought", "used",
"to", "of", "in", "for", "on", "with", "at", "by", "from", "as",
"into", "through", "during", "before", "after", "above", "below", "between",
"and", "but", "or", "nor", "so", "yet", "both", "either", "neither",
"not", "only", "own", "same", "than", "too", "very", "just",
"de", "da", "do", "das", "dos", "e", "é", "em", "no", "na", "nos", "nas",
"para", "por", "com", "sem", "sobre", "entre", "após", "antes", "depois",
"que", "se", "ou", "mas", "porém", "como", "assim", "também", "ainda",
"um", "uma", "uns", "umas", "o", "a", "os", "as",
];
impl Default for HallucinationConfig { impl Default for HallucinationConfig {
fn default() -> Self { fn default() -> Self {
Self { Self {
min_text_length: 50, threshold: THRESHOLD,
pattern_lengths: vec![3, 4, 5, 6, 8, 10, 15, 20], window: WINDOW,
consecutive_threshold: 10, // Increased from 5 to 10
occurrence_threshold: 15, // Increased from 8 to 15
recent_text_window: 500,
identical_token_threshold: 15, // Increased from 10 to 15
ignore_words: DEFAULT_IGNORE_WORDS.iter().map(|s| s.to_string()).collect(),
} }
} }
} }
/// State for tracking hallucination during streaming #[derive(Debug, Clone)]
#[derive(Debug)]
pub struct HallucinationDetector { pub struct HallucinationDetector {
config: HallucinationConfig, config: HallucinationConfig,
last_content_hash: u64, pattern_counts: Arc<Mutex<HashMap<String, (usize, Instant)>>>,
identical_count: usize,
detected: bool,
detected_pattern: Option<String>,
} }
impl Default for HallucinationDetector { impl Default for HallucinationDetector {
@ -71,166 +40,41 @@ impl Default for HallucinationDetector {
} }
impl HallucinationDetector { impl HallucinationDetector {
/// Create a new detector with custom configuration
pub fn new(config: HallucinationConfig) -> Self { pub fn new(config: HallucinationConfig) -> Self {
Self { Self {
config, config,
last_content_hash: 0, pattern_counts: Arc::new(Mutex::new(HashMap::new())),
identical_count: 0,
detected: false,
detected_pattern: None,
} }
} }
/// Check if hallucination has been detected /// Check if a pattern is hallucinating (repeating 50+ times)
pub fn is_detected(&self) -> bool { pub async fn check(&self, pattern: &str) -> bool {
self.detected if pattern.trim().is_empty() || pattern.len() < 3 {
return false;
} }
/// Get the detected pattern if any let mut counts = self.pattern_counts.lock().await;
pub fn get_detected_pattern(&self) -> Option<&str> { let now = Instant::now();
self.detected_pattern.as_deref()
}
/// Get the detected pattern as owned String // Clean old entries
pub fn get_detected_pattern_owned(&self) -> Option<String> { counts.retain(|_, (_, time)| now.duration_since(*time) < self.config.window);
self.detected_pattern.clone()
}
/// Check a new token/chunk for hallucination patterns // Increment count for this pattern
/// Returns true if hallucination is detected let (count, _) = counts.entry(pattern.to_string()).or_insert((0, now));
pub fn check_token(&mut self, token: &str) -> bool { *count += 1;
if self.detected {
return true;
}
// Check for identical token repetition if *count >= self.config.threshold {
if !token.trim().is_empty() { warn!("Hallucination detected: pattern {:?} repeated {} times", pattern, count);
let mut hasher = DefaultHasher::new(); true
token.hash(&mut hasher);
let content_hash = hasher.finish();
if content_hash == self.last_content_hash {
self.identical_count += 1;
if self.identical_count >= self.config.identical_token_threshold {
log::warn!(
"LLM hallucination detected: identical token repeated {} times: {:?}",
self.identical_count,
token
);
self.detected = true;
self.detected_pattern = Some(format!("{} ({}x)", token.trim(), self.identical_count));
return true;
}
} else { } else {
self.identical_count = 0;
}
self.last_content_hash = content_hash;
}
false false
} }
/// Check accumulated text for repetition patterns
/// Returns Some(pattern) if hallucination is detected
pub fn check_text(&mut self, text: &str) -> Option<String> {
if self.detected {
return self.detected_pattern.clone();
} }
// Skip detection for short texts /// Reset all counts
if text.len() < self.config.min_text_length { pub async fn reset(&self) {
return None; let mut counts = self.pattern_counts.lock().await;
} counts.clear();
// Check for repeated patterns of various lengths
for pattern_len in &self.config.pattern_lengths {
if text.len() < *pattern_len * 5 {
continue;
}
// Get the last pattern to check
let chars: Vec<char> = text.chars().collect();
let start = chars.len().saturating_sub(*pattern_len);
let pattern: String = chars[start..].iter().collect();
let pattern_str = pattern.trim();
if pattern_str.is_empty() || pattern_str.len() < 2 {
continue;
}
// Ignore common Markdown separators
if pattern_str == "---" || pattern_str == "***" || pattern_str == "___" {
continue;
}
// Count how many times this pattern appears consecutively at the end
let mut count = 0;
let mut search_text = text;
while search_text.ends_with(pattern_str) || search_text.ends_with(&pattern) {
count += 1;
if count >= self.config.consecutive_threshold {
// Found threshold repetitions - likely hallucination
log::warn!(
"LLM hallucination loop detected: pattern {:?} repeated {} times consecutively",
pattern_str,
count
);
self.detected = true;
self.detected_pattern = Some(pattern_str.to_string());
return self.detected_pattern.clone();
}
// Remove one occurrence and continue checking
if search_text.ends_with(pattern_str) {
search_text = &search_text[..search_text.len().saturating_sub(pattern_str.len())];
} else {
search_text = &search_text[..search_text.len().saturating_sub(pattern.len())];
}
}
// Alternative: count total occurrences in recent text
let recent_start = chars.len().saturating_sub(self.config.recent_text_window);
let recent_text: String = chars[recent_start..].iter().collect();
let total_count = recent_text.matches(pattern_str).count();
if total_count >= self.config.occurrence_threshold && pattern_str.len() >= 3 {
log::warn!(
"LLM hallucination loop detected: pattern {:?} appears {} times in recent {} chars",
pattern_str,
total_count,
self.config.recent_text_window
);
self.detected = true;
self.detected_pattern = Some(format!("{} ({}x)", pattern_str, total_count));
return self.detected_pattern.clone();
}
}
None
}
/// Combined check: both token and accumulated text
/// Returns true if hallucination detected
pub fn check(&mut self, token: &str, accumulated_text: &str) -> bool {
// First check token repetition
if self.check_token(token) {
return true;
}
// Then check accumulated text for patterns
if self.check_text(accumulated_text).is_some() {
return true;
}
false
}
/// Reset the detector state (for new conversations)
pub fn reset(&mut self) {
self.last_content_hash = 0;
self.identical_count = 0;
self.detected = false;
self.detected_pattern = None;
} }
} }
@ -238,55 +82,30 @@ impl HallucinationDetector {
mod tests { mod tests {
use super::*; use super::*;
#[test] #[tokio::test]
fn test_identical_token_detection() { async fn test_no_hallucination_below_threshold() {
let mut detector = HallucinationDetector::default(); let detector = HallucinationDetector::default();
for _ in 0..49 {
// Same token repeated assert!(!detector.check("test_pattern").await);
for _ in 0..9 { }
assert!(!detector.check_token("GBJ2KP")); }
}
// 10th repetition should trigger #[tokio::test]
assert!(detector.check_token("GBJ2KP")); async fn test_hallucination_at_threshold() {
} let detector = HallucinationDetector::default();
for _ in 0..50 {
#[test] detector.check("test_pattern").await;
fn test_pattern_repetition() { }
let mut detector = HallucinationDetector::default(); assert!(detector.check("test_pattern").await);
}
// Build text with repeated pattern
let repeated = "XYZ123 ".repeat(6); #[tokio::test]
let result = detector.check_text(&repeated); async fn test_reset() {
let detector = HallucinationDetector::default();
assert!(result.is_some()); for _ in 0..50 {
assert!(detector.is_detected()); detector.check("pattern").await;
} }
detector.reset().await;
#[test] assert!(!detector.check("pattern").await);
fn test_normal_text_not_detected() {
let mut detector = HallucinationDetector::default();
let normal_text = "This is a normal response without any repetition patterns. \
The LLM is generating coherent text that makes sense.";
assert!(!detector.check_token("normal"));
assert!(detector.check_text(normal_text).is_none());
assert!(!detector.is_detected());
}
#[test]
fn test_reset() {
let mut detector = HallucinationDetector::default();
// Trigger detection
for _ in 0..10 {
detector.check_token("REPEAT");
}
assert!(detector.is_detected());
// Reset
detector.reset();
assert!(!detector.is_detected());
assert!(detector.get_detected_pattern().is_none());
} }
} }

View file

@ -1107,21 +1107,16 @@ async fn route_to_bot(
// Rate limiting is handled by WhatsAppAdapter::send_whatsapp_message // Rate limiting is handled by WhatsAppAdapter::send_whatsapp_message
} }
// Use the shared LLM hallucination detector // Use the shared LLM hallucination detector (simple: 50+ repetitions = hallucination)
let mut hallucination_detector = crate::llm::hallucination_detector::HallucinationDetector::default(); let detector = crate::llm::hallucination_detector::HallucinationDetector::default();
while let Some(response) = rx.recv().await { while let Some(response) = rx.recv().await {
let is_final = response.is_complete; let is_final = response.is_complete;
if !response.content.is_empty() { if !response.content.is_empty() {
buffer.push_str(&response.content); // Check for hallucination (50+ repetitions of same pattern)
if detector.check(&response.content).await {
// Check for hallucination using the shared LLM detector warn!("WA hallucination detected: {:?}, stopping stream", response.content);
if hallucination_detector.check(&response.content, &buffer) {
warn!(
"WA hallucination detected: {:?}, stopping stream",
hallucination_detector.get_detected_pattern()
);
// Send what we have and stop // Send what we have and stop
if !buffer.trim().is_empty() { if !buffer.trim().is_empty() {
let clean_buffer = buffer.trim_end(); let clean_buffer = buffer.trim_end();