botserver/src/llm/mod.rs

use async_trait::async_trait;
use futures::StreamExt;
use serde_json::Value;
use tokio::sync::mpsc;
pub mod local;
#[async_trait]
pub trait LLMProvider: Send + Sync {
    async fn generate(
        &self,
        prompt: &str,
        config: &Value,
    ) -> Result<String, Box<dyn std::error::Error + Send + Sync>>;
    async fn generate_stream(
        &self,
        prompt: &str,
        config: &Value,
        tx: mpsc::Sender<String>,
    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>>;
    async fn summarize(
        &self,
        text: &str,
    ) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
        let prompt = format!("Summarize the following conversation while preserving key details:\n\n{}", text);
        self.generate(&prompt, &serde_json::json!({"max_tokens": 500}))
            .await
    }
    async fn cancel_job(
        &self,
        session_id: &str,
    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>>;
}
pub struct OpenAIClient {
    client: reqwest::Client,
    api_key: String,
    base_url: String,
}
#[async_trait]
impl LLMProvider for OpenAIClient {
    async fn generate(
        &self,
        prompt: &str,
        _config: &Value,
    ) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
        let messages = self.parse_messages(prompt);

        let response = self
            .client
            .post(&format!("{}/v1/chat/completions/", self.base_url))
            .header("Authorization", format!("Bearer {}", self.api_key))
            .json(&serde_json::json!({
                "model": "gpt-3.5-turbo",
                "messages": messages,
                "max_tokens": 1000
            }))
            .send()
            .await?;
        let result: Value = response.json().await?;
        let raw_content = result["choices"][0]["message"]["content"]
            .as_str()
            .unwrap_or("");
        let end_token = "final<|message|>";
        let content = if let Some(pos) = raw_content.find(end_token) {
            raw_content[(pos + end_token.len())..].to_string()
        } else {
            raw_content.to_string()
        };
        Ok(content)
    }
    async fn generate_stream(
        &self,
        prompt: &str,
        _config: &Value,
        tx: mpsc::Sender<String>,
    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
        let messages = self.parse_messages(prompt);

        let response = self
            .client
            .post(&format!("{}/v1/chat/completions", self.base_url))
            .header("Authorization", format!("Bearer {}", self.api_key))
            .json(&serde_json::json!({
                "model": "gpt-3.5-turbo",
                "messages": messages,
                "stream": true
            }))
            .send()
            .await?;
        let mut stream = response.bytes_stream();
        let mut buffer = String::new();
        while let Some(chunk) = stream.next().await {
            let chunk = chunk?;
            let chunk_str = String::from_utf8_lossy(&chunk);
            for line in chunk_str.lines() {
                if line.starts_with("data: ") && !line.contains("[DONE]") {
                    if let Ok(data) = serde_json::from_str::<Value>(&line[6..]) {
                        if let Some(content) = data["choices"][0]["delta"]["content"].as_str() {
                            buffer.push_str(content);
                            let _ = tx.send(content.to_string()).await;
                        }
                    }
                }
            }
        }
        Ok(())
    }
    async fn cancel_job(
        &self,
        _session_id: &str,
    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
        Ok(())
    }
}

impl OpenAIClient {
    pub fn new(api_key: String, base_url: Option<String>) -> Self {
        Self {
            client: reqwest::Client::new(),
            api_key,
            base_url: base_url.unwrap()
        }
    }

    fn parse_messages(&self, prompt: &str) -> Vec<Value> {
        let mut messages = Vec::new();
        let mut current_role = None;
        let mut current_content = String::new();

        for line in prompt.lines() {
            if let Some(role_end) = line.find(':') {
                let role_part = &line[..role_end].trim().to_lowercase();
                let role = match role_part.as_str() {
                    "human" => "user",
                    "bot" => "assistant", 
                    "compact" => "system",
                    _ => continue
                };
                
                if let Some(r) = current_role.take() {
                    if !current_content.is_empty() {
                        messages.push(serde_json::json!({
                            "role": r,
                            "content": current_content.trim()
                        }));
                    }
                }
                current_role = Some(role);
                current_content = line[role_end + 1..].trim_start().to_string();
                continue;
            }
            
            if let Some(_) = current_role {
                if !current_content.is_empty() {
                    current_content.push('\n');
                }
                current_content.push_str(line);
            }
        }

        if let Some(role) = current_role {
            if !current_content.is_empty() {
                messages.push(serde_json::json!({
                    "role": role,
                    "content": current_content.trim()
                }));
            }
        }
        messages
    }
}
- Warning removal and restore of old code. 2025-10-07 07:16:03 -03:00			`use async_trait::async_trait;`
			`use futures::StreamExt;`
			`use serde_json::Value;`
			`use tokio::sync::mpsc;`
feat: enforce config load errors and add dynamic LLM model handling - Updated `BootstrapManager` to use `AppConfig::from_env().expect(...)` and `AppConfig::from_database(...).expect(...)` ensuring failures are explicit rather than silently ignored. - Refactored error propagation in bootstrap flow to use `?` where appropriate, improving reliability of configuration loading. - Added import of `llm_models` in `bot` module and introduced `ConfigManager` usage to fetch the LLM model identifier at runtime. - Integrated dynamic LLM model handler selection via `llm_models::get_handler(&model)`. - Replaced static environment variable retrieval for embedding configuration with runtime 2025-11-02 18:36:21 -03:00			`pub mod local;`
- Warning removal and restore of old code. 2025-10-07 07:16:03 -03:00			`#[async_trait]`
			`pub trait LLMProvider: Send + Sync {`
			`async fn generate(`
			`&self,`
			`prompt: &str,`
			`config: &Value,`
			`) -> Result<String, Box<dyn std::error::Error + Send + Sync>>;`
			`async fn generate_stream(`
			`&self,`
			`prompt: &str,`
			`config: &Value,`
			`tx: mpsc::Sender<String>,`
			`) -> Result<(), Box<dyn std::error::Error + Send + Sync>>;`
feat(automation): improve prompt compaction with async LLM summarization - Added initial 30s delay to compact prompt scheduler - Implemented async LLM summarization for conversation history - Reduced lock contention by minimizing critical sections - Added fallback to original text if summarization fails - Updated README with guidance for failed requirements - Added new `summarize` method to LLMProvider trait - Improved session manager query with proper DSL usage The changes optimize the prompt compaction process by: 1. Reducing lock contention through better resource management 2. Adding LLM-based summarization for better conversation compression 3. Making the system more resilient with proper error handling 4. Improving documentation for development practices 2025-11-06 17:07:12 -03:00			`async fn summarize(`
			`&self,`
			`text: &str,`
			`) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {`
			`let prompt = format!("Summarize the following conversation while preserving key details:\n\n{}", text);`
			`self.generate(&prompt, &serde_json::json!({"max_tokens": 500}))`
			`.await`
			`}`
feat(llm): add cancel_job support and integrate session cleanup Introduce a new `cancel_job` method in the `LLMProvider` trait to allow cancellation of ongoing LLM tasks. Implement no-op versions for OpenAI, Anthropic, and Mock providers. Update the WebSocket handler to invoke job cancellation when a session closes, ensuring better resource management and preventing orphaned tasks. Also, fix unused variable warning in `add_suggestion.rs`. 2025-11-02 15:13:47 -03:00			`async fn cancel_job(`
			`&self,`
			`session_id: &str,`
			`) -> Result<(), Box<dyn std::error::Error + Send + Sync>>;`
- Warning removal and restore of old code. 2025-10-07 07:16:03 -03:00			`}`
			`pub struct OpenAIClient {`
- Remove all compilation errors. 2025-10-11 12:29:03 -03:00			`client: reqwest::Client,`
			`api_key: String,`
			`base_url: String,`
- Warning removal and restore of old code. 2025-10-07 07:16:03 -03:00			`}`
			`#[async_trait]`
			`impl LLMProvider for OpenAIClient {`
			`async fn generate(`
			`&self,`
			`prompt: &str,`
			`_config: &Value,`
			`) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {`
feat(llm): add message parsing for OpenAI client Added parse_messages method to handle structured prompt input for OpenAI API. The method converts human/bot/compact prefixes to appropriate OpenAI roles (user/assistant/system) and properly formats multi-line messages. This enables more complex conversation structures in prompts while maintaining compatibility with the OpenAI API format. Removed the direct prompt-to-message conversion in generate and generate_stream methods, replacing it with the new parse_messages utility. Also reorganized the impl blocks for better code organization. 2025-11-11 21:13:12 -03:00			`let messages = self.parse_messages(prompt);`

- Remove all compilation errors. 2025-10-11 12:29:03 -03:00			`let response = self`
- Warning removal and restore of old code. 2025-10-07 07:16:03 -03:00			`.client`
feat: refactor prompt compaction and clean up test files - Renamed `execute_compact_prompt` to `compact_prompt_for_bots` and simplified logic - Removed redundant comments and empty lines in test files - Consolidated prompt compaction threshold handling - Cleaned up UI logging implementation by removing unnecessary whitespace - Improved code organization in ui_tree module The changes focus on code quality improvements, removing clutter, and making the prompt compaction logic more straightforward. Test files were cleaned up to be more concise. 2025-11-11 18:32:52 -03:00			`.post(&format!("{}/v1/chat/completions/", self.base_url))`
- Remove all compilation errors. 2025-10-11 12:29:03 -03:00			`.header("Authorization", format!("Bearer {}", self.api_key))`
			`.json(&serde_json::json!({`
			`"model": "gpt-3.5-turbo",`
feat(llm): add message parsing for OpenAI client Added parse_messages method to handle structured prompt input for OpenAI API. The method converts human/bot/compact prefixes to appropriate OpenAI roles (user/assistant/system) and properly formats multi-line messages. This enables more complex conversation structures in prompts while maintaining compatibility with the OpenAI API format. Removed the direct prompt-to-message conversion in generate and generate_stream methods, replacing it with the new parse_messages utility. Also reorganized the impl blocks for better code organization. 2025-11-11 21:13:12 -03:00			`"messages": messages,`
- Remove all compilation errors. 2025-10-11 12:29:03 -03:00			`"max_tokens": 1000`
			`}))`
			`.send()`
			`.await?;`
			`let result: Value = response.json().await?;`
Refactor LLM parsing and overhaul connection UI - Strip content up to the “final<\|message\|>” token in OpenAI responses. - Replace the text‑based connection‑status indicator with a small flashing circle. - Simplify updateConnectionStatus to take only the status argument. - Remove special handling of the initial assistant message and streamline empty‑state removal. - Clean up stray blank lines in the announcement template. 2025-10-15 22:24:04 -03:00			`let raw_content = result["choices"][0]["message"]["content"]`
- Remove all compilation errors. 2025-10-11 12:29:03 -03:00			`.as_str()`
Refactor LLM parsing and overhaul connection UI - Strip content up to the “final<\|message\|>” token in OpenAI responses. - Replace the text‑based connection‑status indicator with a small flashing circle. - Simplify updateConnectionStatus to take only the status argument. - Remove special handling of the initial assistant message and streamline empty‑state removal. - Clean up stray blank lines in the announcement template. 2025-10-15 22:24:04 -03:00			`.unwrap_or("");`
			`let end_token = "final<\|message\|>";`
			`let content = if let Some(pos) = raw_content.find(end_token) {`
			`raw_content[(pos + end_token.len())..].to_string()`
			`} else {`
			`raw_content.to_string()`
			`};`
- Remove all compilation errors. 2025-10-11 12:29:03 -03:00			`Ok(content)`
- Warning removal and restore of old code. 2025-10-07 07:16:03 -03:00			`}`
			`async fn generate_stream(`
			`&self,`
			`prompt: &str,`
			`_config: &Value,`
			`tx: mpsc::Sender<String>,`
			`) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {`
feat(llm): add message parsing for OpenAI client Added parse_messages method to handle structured prompt input for OpenAI API. The method converts human/bot/compact prefixes to appropriate OpenAI roles (user/assistant/system) and properly formats multi-line messages. This enables more complex conversation structures in prompts while maintaining compatibility with the OpenAI API format. Removed the direct prompt-to-message conversion in generate and generate_stream methods, replacing it with the new parse_messages utility. Also reorganized the impl blocks for better code organization. 2025-11-11 21:13:12 -03:00			`let messages = self.parse_messages(prompt);`

- Remove all compilation errors. 2025-10-11 12:29:03 -03:00			`let response = self`
- Warning removal and restore of old code. 2025-10-07 07:16:03 -03:00			`.client`
feat: refactor auth and models, update LLM fallback strategy - Simplified auth module by removing unused imports and code - Cleaned up shared models by removing unused structs (Organization, User, Bot, etc.) - Updated add-req.sh to comment out unused directories - Modified LLM fallback strategy in README with additional notes about model behaviors The changes focus on removing unused code and improving documentation while maintaining existing functionality. The auth module was significantly reduced by removing redundant code, and similar cleanup was applied to shared models. The build script was adjusted to reflect currently used directories. 2025-11-04 23:11:33 -03:00			`.post(&format!("{}/v1/chat/completions", self.base_url))`
- Remove all compilation errors. 2025-10-11 12:29:03 -03:00			`.header("Authorization", format!("Bearer {}", self.api_key))`
			`.json(&serde_json::json!({`
			`"model": "gpt-3.5-turbo",`
feat(llm): add message parsing for OpenAI client Added parse_messages method to handle structured prompt input for OpenAI API. The method converts human/bot/compact prefixes to appropriate OpenAI roles (user/assistant/system) and properly formats multi-line messages. This enables more complex conversation structures in prompts while maintaining compatibility with the OpenAI API format. Removed the direct prompt-to-message conversion in generate and generate_stream methods, replacing it with the new parse_messages utility. Also reorganized the impl blocks for better code organization. 2025-11-11 21:13:12 -03:00			`"messages": messages,`
- Remove all compilation errors. 2025-10-11 12:29:03 -03:00			`"stream": true`
			`}))`
			`.send()`
			`.await?;`
			`let mut stream = response.bytes_stream();`
			`let mut buffer = String::new();`
			`while let Some(chunk) = stream.next().await {`
			`let chunk = chunk?;`
			`let chunk_str = String::from_utf8_lossy(&chunk);`
			`for line in chunk_str.lines() {`
			`if line.starts_with("data: ") && !line.contains("[DONE]") {`
			`if let Ok(data) = serde_json::from_str::<Value>(&line[6..]) {`
			`if let Some(content) = data["choices"][0]["delta"]["content"].as_str() {`
			`buffer.push_str(content);`
			`let _ = tx.send(content.to_string()).await;`
			`}`
- Warning removal and restore of old code. 2025-10-07 07:16:03 -03:00			`}`
			`}`
			`}`
			`}`
			`Ok(())`
			`}`
feat(llm): add cancel_job support and integrate session cleanup Introduce a new `cancel_job` method in the `LLMProvider` trait to allow cancellation of ongoing LLM tasks. Implement no-op versions for OpenAI, Anthropic, and Mock providers. Update the WebSocket handler to invoke job cancellation when a session closes, ensuring better resource management and preventing orphaned tasks. Also, fix unused variable warning in `add_suggestion.rs`. 2025-11-02 15:13:47 -03:00			`async fn cancel_job(`
			`&self,`
			`_session_id: &str,`
			`) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {`
			`Ok(())`
			`}`
- Warning removal and restore of old code. 2025-10-07 07:16:03 -03:00			`}`
feat(llm): add message parsing for OpenAI client Added parse_messages method to handle structured prompt input for OpenAI API. The method converts human/bot/compact prefixes to appropriate OpenAI roles (user/assistant/system) and properly formats multi-line messages. This enables more complex conversation structures in prompts while maintaining compatibility with the OpenAI API format. Removed the direct prompt-to-message conversion in generate and generate_stream methods, replacing it with the new parse_messages utility. Also reorganized the impl blocks for better code organization. 2025-11-11 21:13:12 -03:00
			`impl OpenAIClient {`
			`pub fn new(api_key: String, base_url: Option<String>) -> Self {`
			`Self {`
			`client: reqwest::Client::new(),`
			`api_key,`
			`base_url: base_url.unwrap()`
			`}`
			`}`

			`fn parse_messages(&self, prompt: &str) -> Vec<Value> {`
			`let mut messages = Vec::new();`
			`let mut current_role = None;`
			`let mut current_content = String::new();`

			`for line in prompt.lines() {`
			`if let Some(role_end) = line.find(':') {`
			`let role_part = &line[..role_end].trim().to_lowercase();`
			`let role = match role_part.as_str() {`
			`"human" => "user",`
			`"bot" => "assistant",`
			`"compact" => "system",`
			`_ => continue`
			`};`
feat(automation): improve prompt handling and message processing - Add initial instruction to compact_prompt_for_bots summary request - Store processed content separately before formatting as summary - Save filtered content instead of formatted summary in session manager - Remove max_tokens limit from OpenAI client request - Refactor message parsing logic to avoid empty content messages - Improve role-based message handling in OpenAIClient 2025-11-11 21:45:54 -03:00
			`if let Some(r) = current_role.take() {`
			`if !current_content.is_empty() {`
feat(llm): add message parsing for OpenAI client Added parse_messages method to handle structured prompt input for OpenAI API. The method converts human/bot/compact prefixes to appropriate OpenAI roles (user/assistant/system) and properly formats multi-line messages. This enables more complex conversation structures in prompts while maintaining compatibility with the OpenAI API format. Removed the direct prompt-to-message conversion in generate and generate_stream methods, replacing it with the new parse_messages utility. Also reorganized the impl blocks for better code organization. 2025-11-11 21:13:12 -03:00			`messages.push(serde_json::json!({`
			`"role": r,`
			`"content": current_content.trim()`
			`}));`
			`}`
			`}`
feat(automation): improve prompt handling and message processing - Add initial instruction to compact_prompt_for_bots summary request - Store processed content separately before formatting as summary - Save filtered content instead of formatted summary in session manager - Remove max_tokens limit from OpenAI client request - Refactor message parsing logic to avoid empty content messages - Improve role-based message handling in OpenAIClient 2025-11-11 21:45:54 -03:00			`current_role = Some(role);`
			`current_content = line[role_end + 1..].trim_start().to_string();`
			`continue;`
feat(llm): add message parsing for OpenAI client Added parse_messages method to handle structured prompt input for OpenAI API. The method converts human/bot/compact prefixes to appropriate OpenAI roles (user/assistant/system) and properly formats multi-line messages. This enables more complex conversation structures in prompts while maintaining compatibility with the OpenAI API format. Removed the direct prompt-to-message conversion in generate and generate_stream methods, replacing it with the new parse_messages utility. Also reorganized the impl blocks for better code organization. 2025-11-11 21:13:12 -03:00			`}`
feat(automation): improve prompt handling and message processing - Add initial instruction to compact_prompt_for_bots summary request - Store processed content separately before formatting as summary - Save filtered content instead of formatted summary in session manager - Remove max_tokens limit from OpenAI client request - Refactor message parsing logic to avoid empty content messages - Improve role-based message handling in OpenAIClient 2025-11-11 21:45:54 -03:00
feat(llm): add message parsing for OpenAI client Added parse_messages method to handle structured prompt input for OpenAI API. The method converts human/bot/compact prefixes to appropriate OpenAI roles (user/assistant/system) and properly formats multi-line messages. This enables more complex conversation structures in prompts while maintaining compatibility with the OpenAI API format. Removed the direct prompt-to-message conversion in generate and generate_stream methods, replacing it with the new parse_messages utility. Also reorganized the impl blocks for better code organization. 2025-11-11 21:13:12 -03:00			`if let Some(_) = current_role {`
			`if !current_content.is_empty() {`
			`current_content.push('\n');`
			`}`
			`current_content.push_str(line);`
			`}`
			`}`

			`if let Some(role) = current_role {`
feat(automation): improve prompt handling and message processing - Add initial instruction to compact_prompt_for_bots summary request - Store processed content separately before formatting as summary - Save filtered content instead of formatted summary in session manager - Remove max_tokens limit from OpenAI client request - Refactor message parsing logic to avoid empty content messages - Improve role-based message handling in OpenAIClient 2025-11-11 21:45:54 -03:00			`if !current_content.is_empty() {`
			`messages.push(serde_json::json!({`
			`"role": role,`
			`"content": current_content.trim()`
			`}));`
			`}`
feat(llm): add message parsing for OpenAI client Added parse_messages method to handle structured prompt input for OpenAI API. The method converts human/bot/compact prefixes to appropriate OpenAI roles (user/assistant/system) and properly formats multi-line messages. This enables more complex conversation structures in prompts while maintaining compatibility with the OpenAI API format. Removed the direct prompt-to-message conversion in generate and generate_stream methods, replacing it with the new parse_messages utility. Also reorganized the impl blocks for better code organization. 2025-11-11 21:13:12 -03:00			`}`
			`messages`
			`}`
			`}`