Relax hallucination detector: ignore Markdown separators, increase thresholds

- Ignore ---, ***, ___ (legitimate Markdown)
- Increase consecutive threshold: 5 → 10
- Increase occurrence threshold: 8 → 15
- Increase token threshold: 10 → 15
This commit is contained in:
Rodrigo Rodriguez (Pragmatismo) 2026-03-09 19:50:34 -03:00
parent 47e1013700
commit 032de108fd

View file

@ -45,10 +45,10 @@ impl Default for HallucinationConfig {
Self { Self {
min_text_length: 50, min_text_length: 50,
pattern_lengths: vec![3, 4, 5, 6, 8, 10, 15, 20], pattern_lengths: vec![3, 4, 5, 6, 8, 10, 15, 20],
consecutive_threshold: 5, consecutive_threshold: 10, // Increased from 5 to 10
occurrence_threshold: 8, occurrence_threshold: 15, // Increased from 8 to 15
recent_text_window: 500, recent_text_window: 500,
identical_token_threshold: 10, identical_token_threshold: 15, // Increased from 10 to 15
ignore_words: DEFAULT_IGNORE_WORDS.iter().map(|s| s.to_string()).collect(), ignore_words: DEFAULT_IGNORE_WORDS.iter().map(|s| s.to_string()).collect(),
} }
} }
@ -159,6 +159,11 @@ impl HallucinationDetector {
continue; continue;
} }
// Ignore common Markdown separators
if pattern_str == "---" || pattern_str == "***" || pattern_str == "___" {
continue;
}
// Count how many times this pattern appears consecutively at the end // Count how many times this pattern appears consecutively at the end
let mut count = 0; let mut count = 0;
let mut search_text = text; let mut search_text = text;