From 25d6d2fd57197a0c14796cd8da928b4b549246cf Mon Sep 17 00:00:00 2001 From: "Rodrigo Rodriguez (Pragmatismo)" Date: Mon, 13 Apr 2026 23:20:10 -0300 Subject: [PATCH] fix: eliminate LLM keyword deadlock with isolated worker thread MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous fix used Handle::current().block_on() which deadlocks when the Rhai engine runs on a Tokio worker thread — it blocks the very thread the async task needs to make progress. New approach: spawn a dedicated background thread with its own single-threaded Tokio runtime, communicate via mpsc channel with a 45s timeout. This completely isolates the LLM runtime from the caller's runtime, eliminating any possibility of thread starvation or nested-runtime deadlock. Also remove unused 'trace' import from llm/mod.rs. Co-authored-by: Qwen-Coder --- src/basic/keywords/llm_keyword.rs | 44 ++++++++++++++++++++++++------- src/llm/mod.rs | 2 +- 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/src/basic/keywords/llm_keyword.rs b/src/basic/keywords/llm_keyword.rs index 7b11a5f5..8ba54356 100644 --- a/src/basic/keywords/llm_keyword.rs +++ b/src/basic/keywords/llm_keyword.rs @@ -1,10 +1,15 @@ use crate::core::shared::models::UserSession; use crate::core::shared::state::AppState; -use log::error; use rhai::{Dynamic, Engine}; use std::sync::Arc; use std::time::Duration; use uuid::Uuid; + +/// Register the LLM keyword with a deadlock-free execution model. +/// +/// Uses a dedicated background thread with its own single-threaded Tokio runtime +/// to avoid blocking or starving the caller's runtime — the classic source of +/// LLM deadlocks in this codebase. pub fn llm_keyword(state: Arc, _user: UserSession, engine: &mut Engine) { let state_clone = Arc::clone(&state); engine @@ -21,15 +26,34 @@ pub fn llm_keyword(state: Arc, _user: UserSession, engine: &mut Engine let state_for_async = Arc::clone(&state_clone); let prompt = build_llm_prompt(&text); - let handle = tokio::runtime::Handle::current(); - let result = handle.block_on(async move { - tokio::time::timeout( - Duration::from_secs(45), - execute_llm_generation(state_for_async, prompt) - ).await - }); + let (tx, rx) = std::sync::mpsc::channel(); - match result { + // Spawn a dedicated worker thread with its own runtime. + // This prevents deadlocks caused by blocking the caller's runtime + // while simultaneously trying to run async code on it. + std::thread::Builder::new() + .name("llm-worker".into()) + .spawn(move || { + let result = std::thread::Builder::new() + .name("llm-rt".into()) + .spawn(move || { + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build()?; + rt.block_on(execute_llm_generation(state_for_async, prompt)) + }); + let outcome = match result { + Ok(handle) => match handle.join() { + Ok(res) => res, + Err(_) => Err("LLM worker thread panicked".into()), + }, + Err(e) => Err(format!("Failed to spawn LLM worker: {e}").into()), + }; + let _ = tx.send(outcome); + }) + .expect("LLM dispatcher thread"); + + match rx.recv_timeout(Duration::from_secs(45)) { Ok(Ok(output)) => Ok(Dynamic::from(output)), Ok(Err(e)) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime( e.to_string().into(), @@ -43,12 +67,14 @@ pub fn llm_keyword(state: Arc, _user: UserSession, engine: &mut Engine }) .expect("valid syntax registration"); } + fn build_llm_prompt(user_text: &str) -> String { format!( "Você é um assistente virtual em português brasileiro. Responda sempre em português do Brasil, de forma clara e amigável.\n\nPedido do usuário: {}", user_text.trim() ) } + pub async fn execute_llm_generation( state: Arc, prompt: String, diff --git a/src/llm/mod.rs b/src/llm/mod.rs index 22f4630e..5d9c113e 100644 --- a/src/llm/mod.rs +++ b/src/llm/mod.rs @@ -1,6 +1,6 @@ use async_trait::async_trait; use futures::StreamExt; -use log::{error, info, trace}; +use log::{error, info}; use serde_json::Value; use std::sync::Arc; use tokio::sync::{mpsc, RwLock};