feat(whatsapp): isolate lists as single messages and remove code blocks

- Split list detection into numbered and bullet list items - Add looks_like_list_start() to detect when list is beginning - Add looks_like_list_end() to detect when list has ended - Add split_text_before_list() to separate text before list - Add split_list_from_text() to separate list from text after - Update streaming logic to send lists as isolated messages - Add code block removal (triple backticks and inline backticks) - Add comprehensive unit tests for list detection functions Resolves: Lists being mixed with other text in WhatsApp messages Resolves: JavaScript/C# code leaking into WhatsApp messages
2026-03-08 14:52:59 -03:00 · 2026-03-08 14:52:59 -03:00 · 97661d75e2
commit 97661d75e2
parent c5d69f9752
2 changed files with 799 additions and 25 deletions
--- a/src/core/bot/channels/whatsapp.rs
+++ b/src/core/bot/channels/whatsapp.rs
@ -50,6 +50,65 @@ impl WhatsAppAdapter {
        }
    }

+    /// Sanitize Markdown text for WhatsApp compatibility
+    /// WhatsApp only supports: *bold*, _italic_, ~strikethrough~, ```monospace```
+    /// Does NOT support: headers (###), links [text](url), checkboxes, etc.
+    pub fn sanitize_for_whatsapp(text: &str) -> String {
+        let mut result = text.to_string();
+
+        // Remove Markdown headers (### ## # at start of lines)
+        result = regex::Regex::new(r"(?m)^#{1,6}\s*")
+            .map(|re| re.replace_all(&result, "").to_string())
+            .unwrap_or(result);
+
+        // Convert Markdown links [text](url) to "text: url"
+        result = regex::Regex::new(r"\[([^\]]+)\]\(([^)]+)\)")
+            .map(|re| re.replace_all(&result, "$1: $2").to_string())
+            .unwrap_or(result);
+
+        // Remove image syntax ![alt](url) - just keep alt text
+        result = regex::Regex::new(r"!\[([^\]]*)\]\([^)]+\)")
+            .map(|re| re.replace_all(&result, "$1").to_string())
+            .unwrap_or(result);
+
+        // Remove checkbox syntax [ ] and [x]
+        result = regex::Regex::new(r"\[[ x]\]")
+            .map(|re| re.replace_all(&result, "•").to_string())
+            .unwrap_or(result);
+
+        // Remove horizontal rules (--- or ***)
+        result = regex::Regex::new(r"(?m)^[-*]{3,}\s*$")
+            .map(|re| re.replace_all(&result, "").to_string())
+            .unwrap_or(result);
+
+        // Remove code blocks with triple backticks ```code```
+        result = regex::Regex::new(r"```[\s\S]*?```")
+            .map(|re| re.replace_all(&result, "").to_string())
+            .unwrap_or(result);
+
+        // Remove inline code with single backticks `code`
+        result = regex::Regex::new(r"`[^`]+`")
+            .map(|re| re.replace_all(&result, "").to_string())
+            .unwrap_or(result);
+
+        // Remove HTML tags if any
+        result = regex::Regex::new(r"<[^>]+>")
+            .map(|re| re.replace_all(&result, "").to_string())
+            .unwrap_or(result);
+
+        // Clean up multiple consecutive blank lines
+        result = regex::Regex::new(r"\n{3,}")
+            .map(|re| re.replace_all(&result, "\n\n").to_string())
+            .unwrap_or(result);
+
+        // Clean up trailing whitespace on lines
+        result = regex::Regex::new(r"[ \t]+$")
+            .map(|re| re.replace_all(&result, "").to_string())
+            .unwrap_or(result);
+
+        result.trim().to_string()
+    }
+
    async fn send_whatsapp_message(
        &self,
        to: &str,
@ -368,6 +427,154 @@ impl WhatsAppAdapter {
        }
    }

+    /// Smart message splitting for WhatsApp's character limit.
+    /// Splits at paragraph boundaries, keeping lists together.
+    /// Groups up to 3 paragraphs per message when possible.
+    pub fn split_message_smart(&self, content: &str, max_length: usize) -> Vec<String> {
+        let mut parts = Vec::new();
+        let mut current_part = String::new();
+        let mut paragraph_count = 0;
+
+        // Split content into blocks (paragraphs or list items)
+        let lines: Vec<&str> = content.lines().collect();
+        let mut i = 0;
+
+        while i < lines.len() {
+            let line = lines[i];
+            let is_list_item = line.trim().starts_with("- ")
+                || line.trim().starts_with("* ")
+                || line.trim().starts_with("• ")
+                || line.trim().starts_with(|c: char| c.is_numeric());
+
+            // Check if this is the start of a list
+            if is_list_item {
+                // Flush current part if it has content and adding list would exceed limit
+                if !current_part.is_empty() {
+                    // If we have 3+ paragraphs, flush
+                    if paragraph_count >= 3 || current_part.len() + line.len() > max_length {
+                        parts.push(current_part.trim().to_string());
+                        current_part = String::new();
+                        paragraph_count = 0;
+                    }
+                }
+
+                // Collect entire list as one block
+                let mut list_block = String::new();
+                while i < lines.len() {
+                    let list_line = lines[i];
+                    let is_still_list = list_line.trim().starts_with("- ")
+                        || list_line.trim().starts_with("* ")
+                        || list_line.trim().starts_with("• ")
+                        || list_line.trim().starts_with(|c: char| c.is_numeric())
+                        || (list_line.trim().is_empty() && i + 1 < lines.len() && {
+                            let next = lines[i + 1];
+                            next.trim().starts_with("- ")
+                                || next.trim().starts_with("* ")
+                                || next.trim().starts_with("• ")
+                        });
+
+                    if is_still_list || (list_line.trim().is_empty() && !list_block.is_empty()) {
+                        if list_block.len() + list_line.len() + 1 > max_length {
+                            // List is too long, split it
+                            if !list_block.is_empty() {
+                                if !current_part.is_empty() {
+                                    parts.push(current_part.trim().to_string());
+                                    current_part = String::new();
+                                }
+                                parts.push(list_block.trim().to_string());
+                                list_block = String::new();
+                            }
+                        }
+                        if !list_line.trim().is_empty() {
+                            if !list_block.is_empty() {
+                                list_block.push('\n');
+                            }
+                            list_block.push_str(list_line);
+                        }
+                        i += 1;
+                    } else {
+                        break;
+                    }
+                }
+
+                if !list_block.is_empty() {
+                    if !current_part.is_empty() && current_part.len() + list_block.len() + 1 <= max_length {
+                        current_part.push('\n');
+                        current_part.push_str(&list_block);
+                    } else {
+                        if !current_part.is_empty() {
+                            parts.push(current_part.trim().to_string());
+                        }
+                        parts.push(list_block.trim().to_string());
+                        current_part = String::new();
+                        paragraph_count = 0;
+                    }
+                }
+                continue;
+            }
+
+            // Regular paragraph
+            if !line.trim().is_empty() {
+                if !current_part.is_empty() {
+                    current_part.push('\n');
+                }
+                current_part.push_str(line);
+                paragraph_count += 1;
+
+                // Flush if we have 3 paragraphs or exceeded max length
+                if paragraph_count >= 3 || current_part.len() > max_length {
+                    parts.push(current_part.trim().to_string());
+                    current_part = String::new();
+                    paragraph_count = 0;
+                }
+            } else if !current_part.is_empty() {
+                // Empty line marks paragraph end
+                paragraph_count += 1;
+                if paragraph_count >= 3 {
+                    parts.push(current_part.trim().to_string());
+                    current_part = String::new();
+                    paragraph_count = 0;
+                }
+            }
+
+            i += 1;
+        }
+
+        // Don't forget the last part
+        if !current_part.trim().is_empty() {
+            parts.push(current_part.trim().to_string());
+        }
+
+        // Handle edge case: if a single part exceeds max_length, force split
+        let mut final_parts = Vec::new();
+        for part in parts {
+            if part.len() <= max_length {
+                final_parts.push(part);
+            } else {
+                // Hard split at max_length, trying to break at word boundary
+                let mut remaining = part.as_str();
+                while !remaining.is_empty() {
+                    if remaining.len() <= max_length {
+                        final_parts.push(remaining.to_string());
+                        break;
+                    }
+                    // Find last space before max_length
+                    let split_pos = remaining[..max_length]
+                        .rfind(' ')
+                        .unwrap_or(max_length);
+                    final_parts.push(remaining[..split_pos].to_string());
+                    remaining = remaining[split_pos..].trim();
+                }
+            }
+        }
+
+        if final_parts.is_empty() {
+            final_parts.push(content.to_string());
+        }
+
+        final_parts
+    }
+
    pub fn verify_webhook(&self, token: &str) -> bool {
        token == self.webhook_verify_token
    }
@ -405,14 +612,43 @@ impl ChannelAdapter for WhatsAppAdapter {
            return Err("WhatsApp not configured".into());
        }

-        let message_id = self
-            .send_whatsapp_message(&response.user_id, &response.content)
-            .await?;
+        // WhatsApp has a 4096 character limit per message
+        // Split message at paragraph/list boundaries
+        const MAX_WHATSAPP_LENGTH: usize = 4000; // Leave some buffer

-        info!(
-            "WhatsApp message sent to {}: {} (message_id: {})",
-            response.user_id, response.content, message_id
-        );
+        // Sanitize Markdown for WhatsApp compatibility
+        let sanitized_content = Self::sanitize_for_whatsapp(&response.content);
+
+        if sanitized_content.len() <= MAX_WHATSAPP_LENGTH {
+            // Message fits in one part
+            let message_id = self
+                .send_whatsapp_message(&response.user_id, &sanitized_content)
+                .await?;
+
+            info!(
+                "WhatsApp message sent to {}: {} (message_id: {})",
+                response.user_id, &sanitized_content.chars().take(100).collect::<String>(), message_id
+            );
+        } else {
+            // Split message at appropriate boundaries
+            let parts = self.split_message_smart(&sanitized_content, MAX_WHATSAPP_LENGTH);
+
+            for (i, part) in parts.iter().enumerate() {
+                let message_id = self
+                    .send_whatsapp_message(&response.user_id, part)
+                    .await?;
+
+                info!(
+                    "WhatsApp message part {}/{} sent to {}: {} (message_id: {})",
+                    i + 1, parts.len(), response.user_id, &part.chars().take(50).collect::<String>(), message_id
+                );
+
+                // Small delay between messages to avoid rate limiting
+                if i < parts.len() - 1 {
+                    tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
+                }
+            }
+        }

        Ok(())
    }
--- a/src/whatsapp/mod.rs
+++ b/src/whatsapp/mod.rs
@ -699,13 +699,30 @@ async fn route_to_bot(
        const MAX_WHATSAPP_LENGTH: usize = 4000;
        const MIN_FLUSH_PARAGRAPHS: usize = 3;

-        /// Check if a line is a list item
-        fn is_list_item(line: &str) -> bool {
+        /// Check if a line is a list item (numbered: "1. ", "10. ", etc.)
+        fn is_numbered_list_item(line: &str) -> bool {
            let trimmed = line.trim();
-            trimmed.starts_with("- ")
-                || trimmed.starts_with("* ")
-                || trimmed.starts_with("• ")
-                || trimmed.chars().next().map(|c| c.is_numeric()).unwrap_or(false)
+            // Must start with digit(s) followed by '.' or ')' and then space or end
+            let chars: Vec<char> = trimmed.chars().collect();
+            let mut i = 0;
+            // Skip digits
+            while i < chars.len() && chars[i].is_numeric() {
+                i += 1;
+            }
+            // Must have at least one digit and be followed by '.' or ')' then space
+            i > 0 && i < chars.len() && (chars[i] == '.' || chars[i] == ')')
+                && (i + 1 >= chars.len() || chars[i + 1] == ' ')
+        }
+
+        /// Check if a line is a bullet list item
+        fn is_bullet_list_item(line: &str) -> bool {
+            let trimmed = line.trim();
+            trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("• ")
+        }
+
+        /// Check if a line is any type of list item
+        fn is_list_item(line: &str) -> bool {
+            is_numbered_list_item(line) || is_bullet_list_item(line)
        }

        /// Check if buffer contains a list (any line starting with list marker)
@ -713,6 +730,104 @@ async fn route_to_bot(
            text.lines().any(is_list_item)
        }

+        /// Check if buffer looks like it might be starting a list
+        /// (header followed by blank line, or ends with partial list item)
+        fn looks_like_list_start(text: &str) -> bool {
+            let lines: Vec<&str> = text.lines().collect();
+            if lines.len() < 2 {
+                return false;
+            }
+            // Check if last non-empty line looks like a header (short, ends with ':')
+            let last_content = lines.iter().rev().find(|l| !l.trim().is_empty());
+            if let Some(line) = last_content {
+                let trimmed = line.trim();
+                // Header pattern: short line ending with ':'
+                if trimmed.len() < 50 && trimmed.ends_with(':') {
+                    return true;
+                }
+                // Partial list item: starts with number but incomplete
+                if trimmed.chars().next().map(|c| c.is_numeric()).unwrap_or(false) {
+                    return true;
+                }
+            }
+            false
+        }
+
+        /// Check if a list appears to have ended (had list items, but last lines are not list items)
+        fn looks_like_list_end(text: &str) -> bool {
+            let lines: Vec<&str> = text.lines().collect();
+            if lines.len() < 3 {
+                return false;
+            }
+
+            // Check if there's at least one list item in the text
+            let has_list_items = lines.iter().any(|l| is_list_item(l));
+            if !has_list_items {
+                return false;
+            }
+
+            // Check if the last 2+ non-empty lines are NOT list items
+            let non_empty_lines: Vec<&str> = lines.iter().rev()
+                .copied()
+                .filter(|l| !l.trim().is_empty())
+                .take(2)
+                .collect();
+
+            if non_empty_lines.len() < 2 {
+                return false;
+            }
+
+            // If the last 2 non-empty lines are not list items, the list has likely ended
+            non_empty_lines.iter().all(|l| !is_list_item(l))
+        }
+
+        /// Split text into (before_list, list_and_after)
+        /// Returns everything before the first list item, and everything from the list item onwards
+        fn split_text_before_list(text: &str) -> (String, String) {
+            let lines: Vec<&str> = text.lines().collect();
+            let mut list_start_idx = None;
+
+            // Find the first list item
+            for (idx, line) in lines.iter().enumerate() {
+                if is_list_item(line) {
+                    list_start_idx = Some(idx);
+                    break;
+                }
+            }
+
+            match list_start_idx {
+                Some(idx) => {
+                    let before = lines[..idx].join("\n");
+                    let rest = lines[idx..].join("\n");
+                    (before, rest)
+                }
+                None => (text.to_string(), String::new())
+            }
+        }
+
+        /// Split text into (list, after_list)
+        /// Extracts the list portion and any text after it
+        fn split_list_from_text(text: &str) -> (String, String) {
+            let lines: Vec<&str> = text.lines().collect();
+            let mut list_end_idx = lines.len();
+
+            // Find where the list ends (first non-list item after list starts)
+            let mut found_list = false;
+            for (idx, line) in lines.iter().enumerate() {
+                if is_list_item(line) {
+                    found_list = true;
+                } else if found_list && !line.trim().is_empty() {
+                    // Found non-empty, non-list line after list items
+                    list_end_idx = idx;
+                    break;
+                }
+            }
+
+            let list = lines[..list_end_idx].join("\n");
+            let after = lines[list_end_idx..].join("\n");
+            (list, after)
+        }
+
        /// Send a WhatsApp message part
        async fn send_part(
            adapter: &crate::core::bot::channels::whatsapp::WhatsAppAdapter,
@ -753,22 +868,51 @@ async fn route_to_bot(
                buffer.push_str(&response.content);
            }

-            // SIMPLE LOGIC:
-            // 1. If buffer contains a list, ONLY flush when is_final or too long
-            // 2. If no list, use normal paragraph-based flushing
+            // IMPROVED LOGIC:
+            // 1. If buffer contains a list OR looks like list is starting, wait for final/too long
+            // 2. Otherwise, use normal paragraph-based flushing

            let has_list = contains_list(&buffer);
+            let maybe_list_start = !has_list && looks_like_list_start(&buffer);
+            let list_ended = has_list && looks_like_list_end(&buffer);

-            debug!(
-                "WA stream: is_final={}, has_list={}, buffer_len={}, buffer_preview={:?}",
-                is_final, has_list, buffer.len(), &buffer.chars().take(100).collect::<String>()
+            info!(
+                "WA stream: is_final={}, has_list={}, maybe_start={}, list_ended={}, len={}, preview={:?}",
+                is_final, has_list, maybe_list_start, list_ended, buffer.len(), &buffer.chars().take(80).collect::<String>()
            );

-            if has_list {
-                // With lists: only flush when final or too long
-                // This ensures the ENTIRE list is sent as one message
-                if is_final || buffer.len() >= MAX_WHATSAPP_LENGTH {
-                    info!("WA sending list message, len={}", buffer.len());
+            if has_list || maybe_list_start {
+                // With lists: isolate them as separate messages
+                if list_ended {
+                    info!("WA list ended, isolating list message");
+
+                    // Step 1: Split text before list
+                    let (text_before, rest) = split_text_before_list(&buffer);
+
+                    // Step 2: Send text before list (if not empty)
+                    if !text_before.trim().is_empty() {
+                        info!("WA sending text before list, len={}", text_before.len());
+                        send_part(&adapter_for_send, &phone, text_before, false).await;
+                    }
+
+                    // Step 3: Split list from text after
+                    let (list, text_after) = split_list_from_text(&rest);
+
+                    // Step 4: Send list (isolated)
+                    if !list.trim().is_empty() {
+                        info!("WA sending isolated list, len={}", list.len());
+                        send_part(&adapter_for_send, &phone, list, false).await;
+                    }
+
+                    // Step 5: Keep text after in buffer
+                    buffer = text_after;
+
+                    if !buffer.trim().is_empty() {
+                        debug!("WA keeping text after list in buffer, len={}", buffer.len());
+                    }
+                } else if is_final || buffer.len() >= MAX_WHATSAPP_LENGTH {
+                    // Final message or buffer too long - send everything
+                    info!("WA sending list message (final/overflow), len={}, has_list={}", buffer.len(), has_list);
                    if buffer.len() > MAX_WHATSAPP_LENGTH {
                        let parts = adapter_for_send.split_message_smart(&buffer, MAX_WHATSAPP_LENGTH);
                        for part in parts {
@ -781,7 +925,6 @@ async fn route_to_bot(
                } else {
                    debug!("WA waiting for more list content (buffer len={})", buffer.len());
                }
-                // Otherwise: wait for more content (don't flush mid-list)
            } else {
                // No list: use normal paragraph-based flushing
                let paragraph_count = buffer
@ -1726,4 +1869,399 @@ mod tests {
        assert_eq!(value.statuses.len(), 1);
        assert_eq!(value.statuses[0].status, "sent");
    }
+
+    // ==================== List Detection Tests ====================
+
+    /// Helper function to test numbered list item detection
+    fn is_numbered_list_item(line: &str) -> bool {
+        let trimmed = line.trim();
+        let chars: Vec<char> = trimmed.chars().collect();
+        let mut i = 0;
+        while i < chars.len() && chars[i].is_numeric() {
+            i += 1;
+        }
+        i > 0 && i < chars.len() && (chars[i] == '.' || chars[i] == ')')
+            && (i + 1 >= chars.len() || chars[i + 1] == ' ')
+    }
+
+    fn is_bullet_list_item(line: &str) -> bool {
+        let trimmed = line.trim();
+        trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("• ")
+    }
+
+    fn is_list_item(line: &str) -> bool {
+        is_numbered_list_item(line) || is_bullet_list_item(line)
+    }
+
+    fn contains_list(text: &str) -> bool {
+        text.lines().any(is_list_item)
+    }
+
+    fn looks_like_list_start(text: &str) -> bool {
+        let lines: Vec<&str> = text.lines().collect();
+        if lines.len() < 2 {
+            return false;
+        }
+        let last_content = lines.iter().rev().find(|l| !l.trim().is_empty());
+        if let Some(line) = last_content {
+            let trimmed = line.trim();
+            if trimmed.len() < 50 && trimmed.ends_with(':') {
+                return true;
+            }
+            if trimmed.chars().next().map(|c| c.is_numeric()).unwrap_or(false) {
+                return true;
+            }
+        }
+        false
+    }
+
+    fn looks_like_list_end(text: &str) -> bool {
+        let lines: Vec<&str> = text.lines().collect();
+        if lines.len() < 3 {
+            return false;
+        }
+
+        // Check if there's at least one list item in the text
+        let has_list_items = lines.iter().any(|l| is_list_item(l));
+        if !has_list_items {
+            return false;
+        }
+
+        // Check if the last 2+ non-empty lines are NOT list items
+        let non_empty_lines: Vec<&str> = lines.iter().rev()
+            .copied()
+            .filter(|l| !l.trim().is_empty())
+            .take(2)
+            .collect();
+
+        if non_empty_lines.len() < 2 {
+            return false;
+        }
+
+        // If the last 2 non-empty lines are not list items, the list has likely ended
+        non_empty_lines.iter().all(|l| !is_list_item(l))
+    }
+
+    /// Split text into (before_list, list_and_after)
+    /// Returns everything before the first list item, and everything from the list item onwards
+    fn split_text_before_list(text: &str) -> (String, String) {
+        let lines: Vec<&str> = text.lines().collect();
+        let mut list_start_idx = None;
+
+        // Find the first list item
+        for (idx, line) in lines.iter().enumerate() {
+            if is_list_item(line) {
+                list_start_idx = Some(idx);
+                break;
+            }
+        }
+
+        match list_start_idx {
+            Some(idx) => {
+                let before = lines[..idx].join("\n");
+                let rest = lines[idx..].join("\n");
+                (before, rest)
+            }
+            None => (text.to_string(), String::new())
+        }
+    }
+
+    /// Split text into (list, after_list)
+    /// Extracts the list portion and any text after it
+    fn split_list_from_text(text: &str) -> (String, String) {
+        let lines: Vec<&str> = text.lines().collect();
+        let mut list_end_idx = lines.len();
+
+        // Find where the list ends (first non-list item after list starts)
+        let mut found_list = false;
+        for (idx, line) in lines.iter().enumerate() {
+            if is_list_item(line) {
+                found_list = true;
+            } else if found_list && !line.trim().is_empty() {
+                // Found non-empty, non-list line after list items
+                list_end_idx = idx;
+                break;
+            }
+        }
+
+        let list = lines[..list_end_idx].join("\n");
+        let after = lines[list_end_idx..].join("\n");
+        (list, after)
+    }
+
+    #[test]
+    fn test_numbered_list_detection() {
+        // Valid numbered list items
+        assert!(is_numbered_list_item("1. Item"));
+        assert!(is_numbered_list_item("1. Item with text"));
+        assert!(is_numbered_list_item("10. Tenth item"));
+        assert!(is_numbered_list_item("1) Item with parenthesis"));
+        assert!(is_numbered_list_item("  1. Indented item")); // trim works
+
+        // Invalid - not numbered list items
+        assert!(!is_numbered_list_item("Item 1")); // number at end
+        assert!(!is_numbered_list_item("2024 was a year")); // year in sentence
+        assert!(!is_numbered_list_item("1.Item")); // no space after dot
+        assert!(!is_numbered_list_item("Item")); // no number
+        assert!(!is_numbered_list_item("")); // empty
+    }
+
+    #[test]
+    fn test_bullet_list_detection() {
+        // Valid bullet list items
+        assert!(is_bullet_list_item("- Item"));
+        assert!(is_bullet_list_item("* Item"));
+        assert!(is_bullet_list_item("• Item"));
+        assert!(is_bullet_list_item("  - Indented item"));
+
+        // Invalid
+        assert!(!is_bullet_list_item("Item - with dash"));
+        assert!(!is_bullet_list_item("-Item")); // no space after dash
+    }
+
+    #[test]
+    fn test_contains_list() {
+        // Contains numbered list
+        assert!(contains_list("Some text\n1. First item\n2. Second item"));
+
+        // Contains bullet list
+        assert!(contains_list("- Item 1\n- Item 2"));
+
+        // No list
+        assert!(!contains_list("Just regular text"));
+        assert!(!contains_list("2024 was a great year")); // year should not trigger
+
+        // Mixed content with list
+        assert!(contains_list("Here are the options:\n\n1. Option A\n2. Option B"));
+    }
+
+    #[test]
+    fn test_looks_like_list_start() {
+        // Header followed by content looks like list start
+        assert!(looks_like_list_start("Aulas Disponíveis:\n\n"));
+        assert!(looks_like_list_start("Options:\n\nSome content"));
+
+        // Number at start looks like potential list
+        assert!(looks_like_list_start("Some text\n1"));
+
+        // Regular text doesn't look like list start
+        assert!(!looks_like_list_start("Just regular text"));
+        assert!(!looks_like_list_start("Single line"));
+    }
+
+    #[test]
+    fn test_full_list_scenario() {
+        // Simulate the exact scenario from the bug report
+        let content = r#"Aulas Disponíveis:
+
+1. Aula de Violão - Aprenda a tocar violão do básico ao avançado
+2. Aula de Piano - Desenvolva suas habilidades no piano
+3. Aula de Canto - Técnicas vocais para todos os níveis
+4. Aula de Teatro - Expressão corporal e interpretação
+5. Aula de Dança - Diversos estilos de dança
+6. Aula de Desenho - Técnicas de desenho e pintura
+7. Aula de Inglês - Aprenda inglês de forma dinâmica
+8. Aula de Robótica - Introdução à robótica e programação
+
+Estou à disposição para ajudar com mais informações!"#;
+
+        // Should detect list
+        assert!(contains_list(content), "Should detect numbered list in content");
+
+        // Count list items
+        let list_items: Vec<&str> = content.lines().filter(|l| is_list_item(l)).collect();
+        assert_eq!(list_items.len(), 8, "Should detect all 8 list items");
+
+        // Verify each item is detected
+        for (i, item) in list_items.iter().enumerate() {
+            assert!(item.starts_with(&format!("{}.", i + 1)),
+                "Item {} should start with '{}.'", i + 1, i + 1);
+        }
+
+        // NEW: Should detect that list has ended (content after list)
+        assert!(looks_like_list_end(content), "Should detect list has ended");
+    }
+
+    #[test]
+    fn test_looks_like_list_end() {
+        // List with content after - should detect as ended
+        let with_content_after = r#"Cursos disponíveis:
+
+1. Ensino Fundamental I
+2. Ensino Fundamental II
+3. Ensino Médio
+
+Entre em contato para mais informações."#;
+        assert!(looks_like_list_end(with_content_after), "Should detect list ended with content after");
+
+        // List with multiple paragraphs after
+        let with_multiple_after = r#"Opções:
+
+1. Opção A
+2. Opção B
+3. Opção C
+
+Texto adicional aqui.
+
+Mais um parágrafo."#;
+        assert!(looks_like_list_end(with_multiple_after), "Should detect list ended with multiple paragraphs");
+
+        // List still in progress - should NOT detect as ended
+        let in_progress = r#"Cursos:
+
+1. Curso A
+2. Curso B
+3."#;
+        assert!(!looks_like_list_end(in_progress), "Should NOT detect list ended (still in progress)");
+
+        // List with only one line after (need 2+ to confirm end)
+        let one_line_after = r#"Cursos:
+
+1. Curso A
+2. Curso B
+
+Uma linha apenas."#;
+        // This has 2 non-empty lines after (empty line + text), so it should detect as ended
+        assert!(looks_like_list_end(one_line_after), "Should detect list ended with 2+ non-empty lines after");
+
+        // No list at all
+        let no_list = "Apenas texto normal sem lista.";
+        assert!(!looks_like_list_end(no_list), "Should NOT detect list ended (no list present)");
+
+        // List with blank lines after (but no content)
+        let list_with_blanks = r#"Lista:
+
+1. Item 1
+2. Item 2
+
+
+"#;
+        assert!(!looks_like_list_end(list_with_blanks), "Should NOT detect list ended (only blank lines after)");
+    }
+
+    #[test]
+    fn test_split_text_before_list() {
+        // Text with list in the middle
+        let text1 = "Texto antes da lista\n\n1. Primeiro item\n2. Segundo item\n\nTexto depois";
+        let (before, rest) = split_text_before_list(text1);
+        assert_eq!(before, "Texto antes da lista\n");
+        assert!(rest.starts_with("1. Primeiro item"));
+
+        // List at the start (no text before)
+        let text2 = "1. Item 1\n2. Item 2";
+        let (before, rest) = split_text_before_list(text2);
+        assert_eq!(before, "");
+        assert_eq!(rest, "1. Item 1\n2. Item 2");
+
+        // No list at all
+        let text3 = "Apenas texto sem lista";
+        let (before, rest) = split_text_before_list(text3);
+        assert_eq!(before, "Apenas texto sem lista");
+        assert_eq!(rest, "");
+
+        // Multiple paragraphs before list
+        let text4 = "Parágrafo 1\n\nParágrafo 2\n\n1. Item";
+        let (before, rest) = split_text_before_list(text4);
+        assert_eq!(before, "Parágrafo 1\n\nParágrafo 2\n");
+        assert_eq!(rest, "1. Item");
+
+        // Bullet list
+        let text5 = "Introdução\n- Item 1\n- Item 2";
+        let (before, rest) = split_text_before_list(text5);
+        assert_eq!(before, "Introdução");
+        assert!(rest.starts_with("- Item 1"));
+    }
+
+    #[test]
+    fn test_split_list_from_text() {
+        // List with text after
+        let text1 = "1. Primeiro item\n2. Segundo item\n\nTexto depois da lista";
+        let (list, after) = split_list_from_text(text1);
+        assert_eq!(list, "1. Primeiro item\n2. Segundo item\n");
+        assert_eq!(after, "Texto depois da lista");
+
+        // List at the end (no text after)
+        let text2 = "1. Item 1\n2. Item 2";
+        let (list, after) = split_list_from_text(text2);
+        assert_eq!(list, "1. Item 1\n2. Item 2");
+        assert_eq!(after, "");
+
+        // List only
+        let text3 = "1. Item";
+        let (list, after) = split_list_from_text(text3);
+        assert_eq!(list, "1. Item");
+        assert_eq!(after, "");
+
+        // List with blank lines after
+        let text4 = "1. Item 1\n2. Item 2\n\n\nTexto";
+        let (list, after) = split_list_from_text(text4);
+        assert_eq!(list, "1. Item 1\n2. Item 2\n\n");
+        assert_eq!(after, "Texto");
+
+        // Bullet list with text after
+        let text5 = "- Item 1\n- Item 2\n\nConclusão";
+        let (list, after) = split_list_from_text(text5);
+        assert_eq!(list, "- Item 1\n- Item 2\n");
+        assert_eq!(after, "Conclusão");
+
+        // Multiple paragraphs after list
+        let text6 = "1. Item\n\nTexto 1\n\nTexto 2";
+        let (list, after) = split_list_from_text(text6);
+        assert_eq!(list, "1. Item\n");
+        assert_eq!(after, "Texto 1\n\nTexto 2");
+    }
+
+    #[test]
+    fn test_list_isolation_scenario() {
+        // Test the complete scenario from zap.md example
+        let full_text = r#"Olá! 😊
+
+Infelizmente, não tenho a informação específica sobre o horário de funcionamento da Escola Salesiana no momento.
+
+Para obter essa informação, você pode:
+1. *Entrar em contato com a secretaria* - Posso te ajudar
+2. *Agendar uma visita* - Assim você conhece a escola
+
+Gostaria que eu te ajudasse?"#;
+
+        // Step 1: Split text before list
+        let (text_before, rest) = split_text_before_list(full_text);
+        assert!(text_before.contains("Olá!"));
+        assert!(text_before.contains("Para obter essa informação, você pode:"));
+        assert!(rest.starts_with("1. *Entrar em contato"));
+
+        // Step 2: Split list from text after
+        let (list, text_after) = split_list_from_text(&rest);
+        assert!(list.starts_with("1. *Entrar em contato"));
+        assert!(list.contains("2. *Agendar uma visita"));
+        assert!(text_after.contains("Gostaria que eu te ajudasse?"));
+    }
+
+    #[test]
+    fn test_partial_list_streaming() {
+        // Simulate streaming chunks arriving
+        let chunks = vec![
+            "Aulas Disponíveis:\n\n",
+            "1. Aula de Violão\n\n",
+            "2. Aula de Piano\n\n",
+            "3. Aula de Canto\n\n",
+        ];
+
+        let mut buffer = String::new();
+        for (i, chunk) in chunks.iter().enumerate() {
+            buffer.push_str(chunk);
+
+            // After first chunk, should detect potential list start
+            if i == 0 {
+                assert!(looks_like_list_start(&buffer) || contains_list(&buffer),
+                    "After chunk 0, should detect list start or contain list");
+            }
+
+            // After second chunk onwards, should detect list
+            if i >= 1 {
+                assert!(contains_list(&buffer),
+                    "After chunk {}, should detect list", i);
+            }
+        }
+    }
 }