fix: eliminate LLM keyword deadlock with isolated worker thread

The previous fix used Handle::current().block_on() which deadlocks when the Rhai engine runs on a Tokio worker thread — it blocks the very thread the async task needs to make progress. New approach: spawn a dedicated background thread with its own single-threaded Tokio runtime, communicate via mpsc channel with a 45s timeout. This completely isolates the LLM runtime from the caller's runtime, eliminating any possibility of thread starvation or nested-runtime deadlock. Also remove unused 'trace' import from llm/mod.rs. Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
2026-04-13 23:20:10 -03:00 · 2026-04-13 23:20:10 -03:00 · 25d6d2fd57
commit 25d6d2fd57
parent b3fd90b056
2 changed files with 36 additions and 10 deletions
--- a/src/basic/keywords/llm_keyword.rs
+++ b/src/basic/keywords/llm_keyword.rs
@ -1,10 +1,15 @@
 use crate::core::shared::models::UserSession;
 use crate::core::shared::state::AppState;
-use log::error;
 use rhai::{Dynamic, Engine};
 use std::sync::Arc;
 use std::time::Duration;
 use uuid::Uuid;
+
+/// Register the LLM keyword with a deadlock-free execution model.
+///
+/// Uses a dedicated background thread with its own single-threaded Tokio runtime
+/// to avoid blocking or starving the caller's runtime — the classic source of
+/// LLM deadlocks in this codebase.
 pub fn llm_keyword(state: Arc<AppState>, _user: UserSession, engine: &mut Engine) {
    let state_clone = Arc::clone(&state);
    engine
@ -21,15 +26,34 @@ pub fn llm_keyword(state: Arc<AppState>, _user: UserSession, engine: &mut Engine
            let state_for_async = Arc::clone(&state_clone);
            let prompt = build_llm_prompt(&text);

-            let handle = tokio::runtime::Handle::current();
-            let result = handle.block_on(async move {
-                tokio::time::timeout(
-                    Duration::from_secs(45),
-                    execute_llm_generation(state_for_async, prompt)
-                ).await
-            });
+            let (tx, rx) = std::sync::mpsc::channel();

-            match result {
+            // Spawn a dedicated worker thread with its own runtime.
+            // This prevents deadlocks caused by blocking the caller's runtime
+            // while simultaneously trying to run async code on it.
+            std::thread::Builder::new()
+                .name("llm-worker".into())
+                .spawn(move || {
+                    let result = std::thread::Builder::new()
+                        .name("llm-rt".into())
+                        .spawn(move || {
+                            let rt = tokio::runtime::Builder::new_current_thread()
+                                .enable_all()
+                                .build()?;
+                            rt.block_on(execute_llm_generation(state_for_async, prompt))
+                        });
+                    let outcome = match result {
+                        Ok(handle) => match handle.join() {
+                            Ok(res) => res,
+                            Err(_) => Err("LLM worker thread panicked".into()),
+                        },
+                        Err(e) => Err(format!("Failed to spawn LLM worker: {e}").into()),
+                    };
+                    let _ = tx.send(outcome);
+                })
+                .expect("LLM dispatcher thread");
+
+            match rx.recv_timeout(Duration::from_secs(45)) {
                Ok(Ok(output)) => Ok(Dynamic::from(output)),
                Ok(Err(e)) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime(
                    e.to_string().into(),
@ -43,12 +67,14 @@ pub fn llm_keyword(state: Arc<AppState>, _user: UserSession, engine: &mut Engine
        })
        .expect("valid syntax registration");
 }
+
 fn build_llm_prompt(user_text: &str) -> String {
    format!(
        "Você é um assistente virtual em português brasileiro. Responda sempre em português do Brasil, de forma clara e amigável.\n\nPedido do usuário: {}",
        user_text.trim()
    )
 }
+
 pub async fn execute_llm_generation(
    state: Arc<AppState>,
    prompt: String,
--- a/src/llm/mod.rs
+++ b/src/llm/mod.rs
@ -1,6 +1,6 @@
 use async_trait::async_trait;
 use futures::StreamExt;
-use log::{error, info, trace};
+use log::{error, info};
 use serde_json::Value;
 use std::sync::Arc;
 use tokio::sync::{mpsc, RwLock};