From 25d6d2fd57197a0c14796cd8da928b4b549246cf Mon Sep 17 00:00:00 2001
From: "Rodrigo Rodriguez (Pragmatismo)" <me@rodrigorodriguez.com>
Date: Mon, 13 Apr 2026 23:20:10 -0300
Subject: [PATCH] fix: eliminate LLM keyword deadlock with isolated worker
 thread
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous fix used Handle::current().block_on() which deadlocks when
the Rhai engine runs on a Tokio worker thread — it blocks the very
thread the async task needs to make progress.

New approach: spawn a dedicated background thread with its own
single-threaded Tokio runtime, communicate via mpsc channel with a
45s timeout. This completely isolates the LLM runtime from the
caller's runtime, eliminating any possibility of thread starvation
or nested-runtime deadlock.

Also remove unused 'trace' import from llm/mod.rs.

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
---
 src/basic/keywords/llm_keyword.rs | 44 ++++++++++++++++++++++++-------
 src/llm/mod.rs                    |  2 +-
 2 files changed, 36 insertions(+), 10 deletions(-)
diff --git a/src/basic/keywords/llm_keyword.rs b/src/basic/keywords/llm_keyword.rs
index 7b11a5f5..8ba54356 100644
--- a/src/basic/keywords/llm_keyword.rs
+++ b/src/basic/keywords/llm_keyword.rs
@@ -1,10 +1,15 @@
 use crate::core::shared::models::UserSession;
 use crate::core::shared::state::AppState;
-use log::error;
 use rhai::{Dynamic, Engine};
 use std::sync::Arc;
 use std::time::Duration;
 use uuid::Uuid;
+
+/// Register the LLM keyword with a deadlock-free execution model.
+///
+/// Uses a dedicated background thread with its own single-threaded Tokio runtime
+/// to avoid blocking or starving the caller's runtime — the classic source of
+/// LLM deadlocks in this codebase.
 pub fn llm_keyword(state: Arc<AppState>, _user: UserSession, engine: &mut Engine) {
     let state_clone = Arc::clone(&state);
     engine
@@ -21,15 +26,34 @@ pub fn llm_keyword(state: Arc<AppState>, _user: UserSession, engine: &mut Engine
             let state_for_async = Arc::clone(&state_clone);
             let prompt = build_llm_prompt(&text);
 
-            let handle = tokio::runtime::Handle::current();
-            let result = handle.block_on(async move {
-                tokio::time::timeout(
-                    Duration::from_secs(45),
-                    execute_llm_generation(state_for_async, prompt)
-                ).await
-            });
+            let (tx, rx) = std::sync::mpsc::channel();
 
-            match result {
+            // Spawn a dedicated worker thread with its own runtime.
+            // This prevents deadlocks caused by blocking the caller's runtime
+            // while simultaneously trying to run async code on it.
+            std::thread::Builder::new()
+                .name("llm-worker".into())
+                .spawn(move || {
+                    let result = std::thread::Builder::new()
+                        .name("llm-rt".into())
+                        .spawn(move || {
+                            let rt = tokio::runtime::Builder::new_current_thread()
+                                .enable_all()
+                                .build()?;
+                            rt.block_on(execute_llm_generation(state_for_async, prompt))
+                        });
+                    let outcome = match result {
+                        Ok(handle) => match handle.join() {
+                            Ok(res) => res,
+                            Err(_) => Err("LLM worker thread panicked".into()),
+                        },
+                        Err(e) => Err(format!("Failed to spawn LLM worker: {e}").into()),
+                    };
+                    let _ = tx.send(outcome);
+                })
+                .expect("LLM dispatcher thread");
+
+            match rx.recv_timeout(Duration::from_secs(45)) {
                 Ok(Ok(output)) => Ok(Dynamic::from(output)),
                 Ok(Err(e)) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime(
                     e.to_string().into(),
@@ -43,12 +67,14 @@ pub fn llm_keyword(state: Arc<AppState>, _user: UserSession, engine: &mut Engine
         })
         .expect("valid syntax registration");
 }
+
 fn build_llm_prompt(user_text: &str) -> String {
     format!(
         "Você é um assistente virtual em português brasileiro. Responda sempre em português do Brasil, de forma clara e amigável.\n\nPedido do usuário: {}",
         user_text.trim()
     )
 }
+
 pub async fn execute_llm_generation(
     state: Arc<AppState>,
     prompt: String,
diff --git a/src/llm/mod.rs b/src/llm/mod.rs
index 22f4630e..5d9c113e 100644
--- a/src/llm/mod.rs
+++ b/src/llm/mod.rs
@@ -1,6 +1,6 @@
 use async_trait::async_trait;
 use futures::StreamExt;
-use log::{error, info, trace};
+use log::{error, info};
 use serde_json::Value;
 use std::sync::Arc;
 use tokio::sync::{mpsc, RwLock};