botserver/src/llm/mod.rs

use async_trait::async_trait;
use futures::StreamExt;
use serde_json::Value;
use tokio::sync::mpsc;


pub mod local;

#[async_trait]
pub trait LLMProvider: Send + Sync {
    async fn generate(
        &self,
        prompt: &str,
        config: &Value,
    ) -> Result<String, Box<dyn std::error::Error + Send + Sync>>;

    async fn generate_stream(
        &self,
        prompt: &str,
        config: &Value,
        tx: mpsc::Sender<String>,
    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>>;


    async fn cancel_job(
        &self,
        session_id: &str,
    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>>;
}

pub struct OpenAIClient {
    client: reqwest::Client,
    api_key: String,
    base_url: String,
}

impl OpenAIClient {
    pub fn new(api_key: String, base_url: Option<String>) -> Self {
        Self {
            client: reqwest::Client::new(),
            api_key,
            base_url: base_url.unwrap_or_else(|| "https://api.openai.com/v1".to_string()),
        }
    }
}

#[async_trait]
impl LLMProvider for OpenAIClient {
    async fn generate(
        &self,
        prompt: &str,
        _config: &Value,
    ) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
        let response = self
            .client
            .post(&format!("{}/v1/chat/completions", self.base_url))
            .header("Authorization", format!("Bearer {}", self.api_key))
            .json(&serde_json::json!({
                "model": "gpt-3.5-turbo",
                "messages": [{"role": "user", "content": prompt}],
                "max_tokens": 1000
            }))
            .send()
            .await?;

        let result: Value = response.json().await?;
        let raw_content = result["choices"][0]["message"]["content"]
            .as_str()
            .unwrap_or("");
        // Define the end token we want to skip up to. Adjust the token string if needed.
        let end_token = "final<|message|>";
        let content = if let Some(pos) = raw_content.find(end_token) {
            // Skip everything up to and including the end token.
            raw_content[(pos + end_token.len())..].to_string()
        } else {
            // If the token is not found, return the full content.
            raw_content.to_string()
        };

        Ok(content)
    }

    async fn generate_stream(
        &self,
        prompt: &str,
        _config: &Value,
        tx: mpsc::Sender<String>,
    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
        let response = self
            .client
            .post(&format!("{}/v1/chat/completions", self.base_url))
            .header("Authorization", format!("Bearer {}", self.api_key))
            .json(&serde_json::json!({
                "model": "gpt-3.5-turbo",
                "messages": [{"role": "user", "content": prompt}],
                "max_tokens": 1000,
                "stream": true
            }))
            .send()
            .await?;

        let mut stream = response.bytes_stream();
        let mut buffer = String::new();

        while let Some(chunk) = stream.next().await {
            let chunk = chunk?;
            let chunk_str = String::from_utf8_lossy(&chunk);

            for line in chunk_str.lines() {
                if line.starts_with("data: ") && !line.contains("[DONE]") {
                    if let Ok(data) = serde_json::from_str::<Value>(&line[6..]) {
                        if let Some(content) = data["choices"][0]["delta"]["content"].as_str() {
                            buffer.push_str(content);
                            let _ = tx.send(content.to_string()).await;
                        }
                    }
                }
            }
        }

        Ok(())
    }


    async fn cancel_job(
        &self,
        _session_id: &str,
    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
        // OpenAI doesn't support job cancellation
        Ok(())
    }
}
- Warning removal and restore of old code. 2025-10-07 07:16:03 -03:00			`use async_trait::async_trait;`
			`use futures::StreamExt;`
			`use serde_json::Value;`
			`use tokio::sync::mpsc;`
Migration to Rust and free from Azure. 2025-10-06 10:30:17 -03:00
- Warning removal and restore of old code. 2025-10-07 07:16:03 -03:00
feat: enforce config load errors and add dynamic LLM model handling - Updated `BootstrapManager` to use `AppConfig::from_env().expect(...)` and `AppConfig::from_database(...).expect(...)` ensuring failures are explicit rather than silently ignored. - Refactored error propagation in bootstrap flow to use `?` where appropriate, improving reliability of configuration loading. - Added import of `llm_models` in `bot` module and introduced `ConfigManager` usage to fetch the LLM model identifier at runtime. - Integrated dynamic LLM model handler selection via `llm_models::get_handler(&model)`. - Replaced static environment variable retrieval for embedding configuration with runtime 2025-11-02 18:36:21 -03:00			`pub mod local;`
feat(llm): add cancel_job support and integrate session cleanup Introduce a new `cancel_job` method in the `LLMProvider` trait to allow cancellation of ongoing LLM tasks. Implement no-op versions for OpenAI, Anthropic, and Mock providers. Update the WebSocket handler to invoke job cancellation when a session closes, ensuring better resource management and preventing orphaned tasks. Also, fix unused variable warning in `add_suggestion.rs`. 2025-11-02 15:13:47 -03:00
- Warning removal and restore of old code. 2025-10-07 07:16:03 -03:00			`#[async_trait]`
			`pub trait LLMProvider: Send + Sync {`
			`async fn generate(`
			`&self,`
			`prompt: &str,`
			`config: &Value,`
			`) -> Result<String, Box<dyn std::error::Error + Send + Sync>>;`

			`async fn generate_stream(`
			`&self,`
			`prompt: &str,`
			`config: &Value,`
			`tx: mpsc::Sender<String>,`
			`) -> Result<(), Box<dyn std::error::Error + Send + Sync>>;`

feat(llm): add cancel_job support and integrate session cleanup Introduce a new `cancel_job` method in the `LLMProvider` trait to allow cancellation of ongoing LLM tasks. Implement no-op versions for OpenAI, Anthropic, and Mock providers. Update the WebSocket handler to invoke job cancellation when a session closes, ensuring better resource management and preventing orphaned tasks. Also, fix unused variable warning in `add_suggestion.rs`. 2025-11-02 15:13:47 -03:00
			`async fn cancel_job(`
			`&self,`
			`session_id: &str,`
			`) -> Result<(), Box<dyn std::error::Error + Send + Sync>>;`
- Warning removal and restore of old code. 2025-10-07 07:16:03 -03:00			`}`

			`pub struct OpenAIClient {`
- Remove all compilation errors. 2025-10-11 12:29:03 -03:00			`client: reqwest::Client,`
			`api_key: String,`
			`base_url: String,`
- Warning removal and restore of old code. 2025-10-07 07:16:03 -03:00			`}`

			`impl OpenAIClient {`
- Remove all compilation errors. 2025-10-11 12:29:03 -03:00			`pub fn new(api_key: String, base_url: Option<String>) -> Self {`
			`Self {`
			`client: reqwest::Client::new(),`
			`api_key,`
			`base_url: base_url.unwrap_or_else(\|\| "https://api.openai.com/v1".to_string()),`
			`}`
- Warning removal and restore of old code. 2025-10-07 07:16:03 -03:00			`}`
			`}`

			`#[async_trait]`
			`impl LLMProvider for OpenAIClient {`
			`async fn generate(`
			`&self,`
			`prompt: &str,`
			`_config: &Value,`
			`) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {`
- Remove all compilation errors. 2025-10-11 12:29:03 -03:00			`let response = self`
- Warning removal and restore of old code. 2025-10-07 07:16:03 -03:00			`.client`
feat: refactor auth and models, update LLM fallback strategy - Simplified auth module by removing unused imports and code - Cleaned up shared models by removing unused structs (Organization, User, Bot, etc.) - Updated add-req.sh to comment out unused directories - Modified LLM fallback strategy in README with additional notes about model behaviors The changes focus on removing unused code and improving documentation while maintaining existing functionality. The auth module was significantly reduced by removing redundant code, and similar cleanup was applied to shared models. The build script was adjusted to reflect currently used directories. 2025-11-04 23:11:33 -03:00			`.post(&format!("{}/v1/chat/completions", self.base_url))`
- Remove all compilation errors. 2025-10-11 12:29:03 -03:00			`.header("Authorization", format!("Bearer {}", self.api_key))`
			`.json(&serde_json::json!({`
			`"model": "gpt-3.5-turbo",`
			`"messages": [{"role": "user", "content": prompt}],`
			`"max_tokens": 1000`
			`}))`
			`.send()`
			`.await?;`

			`let result: Value = response.json().await?;`
Refactor LLM parsing and overhaul connection UI - Strip content up to the “final<\|message\|>” token in OpenAI responses. - Replace the text‑based connection‑status indicator with a small flashing circle. - Simplify updateConnectionStatus to take only the status argument. - Remove special handling of the initial assistant message and streamline empty‑state removal. - Clean up stray blank lines in the announcement template. 2025-10-15 22:24:04 -03:00			`let raw_content = result["choices"][0]["message"]["content"]`
- Remove all compilation errors. 2025-10-11 12:29:03 -03:00			`.as_str()`
Refactor LLM parsing and overhaul connection UI - Strip content up to the “final<\|message\|>” token in OpenAI responses. - Replace the text‑based connection‑status indicator with a small flashing circle. - Simplify updateConnectionStatus to take only the status argument. - Remove special handling of the initial assistant message and streamline empty‑state removal. - Clean up stray blank lines in the announcement template. 2025-10-15 22:24:04 -03:00			`.unwrap_or("");`
			`// Define the end token we want to skip up to. Adjust the token string if needed.`
			`let end_token = "final<\|message\|>";`
			`let content = if let Some(pos) = raw_content.find(end_token) {`
			`// Skip everything up to and including the end token.`
			`raw_content[(pos + end_token.len())..].to_string()`
			`} else {`
			`// If the token is not found, return the full content.`
			`raw_content.to_string()`
			`};`
- Warning removal and restore of old code. 2025-10-07 07:16:03 -03:00
- Remove all compilation errors. 2025-10-11 12:29:03 -03:00			`Ok(content)`
- Warning removal and restore of old code. 2025-10-07 07:16:03 -03:00			`}`

			`async fn generate_stream(`
			`&self,`
			`prompt: &str,`
			`_config: &Value,`
			`tx: mpsc::Sender<String>,`
			`) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {`
- Remove all compilation errors. 2025-10-11 12:29:03 -03:00			`let response = self`
- Warning removal and restore of old code. 2025-10-07 07:16:03 -03:00			`.client`
feat: refactor auth and models, update LLM fallback strategy - Simplified auth module by removing unused imports and code - Cleaned up shared models by removing unused structs (Organization, User, Bot, etc.) - Updated add-req.sh to comment out unused directories - Modified LLM fallback strategy in README with additional notes about model behaviors The changes focus on removing unused code and improving documentation while maintaining existing functionality. The auth module was significantly reduced by removing redundant code, and similar cleanup was applied to shared models. The build script was adjusted to reflect currently used directories. 2025-11-04 23:11:33 -03:00			`.post(&format!("{}/v1/chat/completions", self.base_url))`
- Remove all compilation errors. 2025-10-11 12:29:03 -03:00			`.header("Authorization", format!("Bearer {}", self.api_key))`
			`.json(&serde_json::json!({`
			`"model": "gpt-3.5-turbo",`
			`"messages": [{"role": "user", "content": prompt}],`
			`"max_tokens": 1000,`
			`"stream": true`
			`}))`
			`.send()`
			`.await?;`

			`let mut stream = response.bytes_stream();`
			`let mut buffer = String::new();`

			`while let Some(chunk) = stream.next().await {`
			`let chunk = chunk?;`
			`let chunk_str = String::from_utf8_lossy(&chunk);`
- Mew LLM provider. 2025-10-12 20:54:42 -03:00
- Remove all compilation errors. 2025-10-11 12:29:03 -03:00			`for line in chunk_str.lines() {`
			`if line.starts_with("data: ") && !line.contains("[DONE]") {`
			`if let Ok(data) = serde_json::from_str::<Value>(&line[6..]) {`
			`if let Some(content) = data["choices"][0]["delta"]["content"].as_str() {`
			`buffer.push_str(content);`
			`let _ = tx.send(content.to_string()).await;`
			`}`
- Warning removal and restore of old code. 2025-10-07 07:16:03 -03:00			`}`
			`}`
			`}`
			`}`

			`Ok(())`
			`}`

feat(llm): add cancel_job support and integrate session cleanup Introduce a new `cancel_job` method in the `LLMProvider` trait to allow cancellation of ongoing LLM tasks. Implement no-op versions for OpenAI, Anthropic, and Mock providers. Update the WebSocket handler to invoke job cancellation when a session closes, ensuring better resource management and preventing orphaned tasks. Also, fix unused variable warning in `add_suggestion.rs`. 2025-11-02 15:13:47 -03:00
			`async fn cancel_job(`
			`&self,`
			`_session_id: &str,`
			`) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {`
			`// OpenAI doesn't support job cancellation`
			`Ok(())`
			`}`
- Warning removal and restore of old code. 2025-10-07 07:16:03 -03:00			`}`