refactor(automation, llm): simplify service init and prompt handling
- Remove `scripts_dir` from `AutomationService` and its constructor, as it was unused.
- Drop LLM and embedding server readiness checks; the service now only schedules periodic tasks.
- Increase the health‑check interval from 5 seconds to 15 seconds for reduced load.
- Streamline the LLM keyword prompt to a concise `"User: {}"` format, removing verbose boilerplate.
- Remove unnecessary logging and LLM cache handling code from the bot orchestrator, cleaning up unused environment variable checks and cache queries.
This commit is contained in:
parent
a5bfda4d09
commit
b033be3e64
10 changed files with 37 additions and 154 deletions
|
|
@ -12,19 +12,13 @@ use tokio::time::Duration;
|
|||
use uuid::Uuid;
|
||||
|
||||
pub struct AutomationService {
|
||||
state: Arc<AppState>,
|
||||
scripts_dir: String,
|
||||
state: Arc<AppState>
|
||||
}
|
||||
|
||||
impl AutomationService {
|
||||
pub fn new(state: Arc<AppState>, scripts_dir: &str) -> Self {
|
||||
trace!(
|
||||
"Creating AutomationService with scripts_dir='{}'",
|
||||
scripts_dir
|
||||
);
|
||||
pub fn new(state: Arc<AppState>) -> Self {
|
||||
Self {
|
||||
state,
|
||||
scripts_dir: scripts_dir.to_string(),
|
||||
state
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -34,39 +28,8 @@ impl AutomationService {
|
|||
tokio::task::spawn_local({
|
||||
let service = service.clone();
|
||||
async move {
|
||||
// Check if llama servers are ready before starting
|
||||
let config_manager = ConfigManager::new(Arc::clone(&service.state.conn));
|
||||
let default_bot_id = {
|
||||
let mut conn = service.state.conn.lock().unwrap();
|
||||
bots.filter(name.eq("default"))
|
||||
.select(id)
|
||||
.first::<uuid::Uuid>(&mut *conn)
|
||||
.unwrap_or_else(|_| uuid::Uuid::nil())
|
||||
};
|
||||
|
||||
let llm_url = match config_manager.get_config(&default_bot_id, "llm-url", None) {
|
||||
Ok(url) => url,
|
||||
Err(e) => {
|
||||
error!("Failed to get llm-url config: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let embedding_url = match config_manager.get_config(&default_bot_id, "embedding-url", None) {
|
||||
Ok(url) => url,
|
||||
Err(e) => {
|
||||
error!("Failed to get embedding-url config: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
if !crate::llm::local::is_server_running(&llm_url).await ||
|
||||
!crate::llm::local::is_server_running(&embedding_url).await {
|
||||
trace!("LLM servers not ready - llm: {}, embedding: {}", llm_url, embedding_url);
|
||||
return;
|
||||
}
|
||||
|
||||
let mut interval = tokio::time::interval(Duration::from_secs(5));
|
||||
let mut interval = tokio::time::interval(Duration::from_secs(15));
|
||||
let mut last_check = Utc::now();
|
||||
loop {
|
||||
interval.tick().await;
|
||||
|
|
|
|||
|
|
@ -70,14 +70,7 @@ pub fn llm_keyword(state: Arc<AppState>, _user: UserSession, engine: &mut Engine
|
|||
/// You can change the style/structure here to guide the model's behavior.
|
||||
fn build_llm_prompt(user_text: &str) -> String {
|
||||
format!(
|
||||
"You are a AI assistant in form of KEYWORD called LLM
|
||||
running inside a General Bots BASIC environment.
|
||||
Task: Process and respond concisely to the following call to x = LLM 'prompt' syntax.
|
||||
---
|
||||
User Input:
|
||||
{}
|
||||
---
|
||||
Respond clearly and accurately in the same language as the input.",
|
||||
"User: {}",
|
||||
user_text.trim()
|
||||
)
|
||||
}
|
||||
|
|
@ -87,7 +80,6 @@ pub async fn execute_llm_generation(
|
|||
state: Arc<AppState>,
|
||||
prompt: String,
|
||||
) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
|
||||
info!("Starting LLM generation for prompt: '{}'", prompt);
|
||||
|
||||
state
|
||||
.llm_provider
|
||||
|
|
|
|||
|
|
@ -569,94 +569,23 @@ impl BotOrchestrator {
|
|||
|
||||
prompt.push_str(&format!("User: {}\nAssistant:", message.content));
|
||||
|
||||
let use_langcache = std::env::var("LLM_CACHE")
|
||||
.unwrap_or_else(|_| "false".to_string())
|
||||
.eq_ignore_ascii_case("true");
|
||||
|
||||
if use_langcache {
|
||||
ensure_collection_exists(&self.state, "semantic_cache").await?;
|
||||
let langcache_client = get_langcache_client()?;
|
||||
let isolated_question = message.content.trim().to_string();
|
||||
let question_embeddings = generate_embeddings(vec![isolated_question.clone()]).await?;
|
||||
let question_embedding = question_embeddings
|
||||
.get(0)
|
||||
.ok_or_else(|| "Failed to generate embedding for question")?
|
||||
.clone();
|
||||
|
||||
let search_results = langcache_client
|
||||
.search("semantic_cache", question_embedding.clone(), 1)
|
||||
.await?;
|
||||
|
||||
if let Some(result) = search_results.first() {
|
||||
let payload = &result.payload;
|
||||
if let Some(resp) = payload.get("response").and_then(|v| v.as_str()) {
|
||||
return Ok(resp.to_string());
|
||||
let (tx, mut rx) = mpsc::channel::<String>(100); let llm = self.state.llm_provider.clone();
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = llm
|
||||
.generate_stream(&prompt, &serde_json::Value::Null, tx)
|
||||
.await
|
||||
{
|
||||
error!("LLM streaming error in direct_mode_handler: {}", e);
|
||||
}
|
||||
});
|
||||
|
||||
let mut full_response = String::new();
|
||||
while let Some(chunk) = rx.recv().await {
|
||||
full_response.push_str(&chunk);
|
||||
}
|
||||
|
||||
let response = self
|
||||
.state
|
||||
.llm_provider
|
||||
.generate(&prompt, &serde_json::Value::Null)
|
||||
.await?;
|
||||
|
||||
let point = QdrantPoint {
|
||||
id: uuid::Uuid::new_v4().to_string(),
|
||||
vector: question_embedding,
|
||||
payload: serde_json::json!({
|
||||
"question": isolated_question,
|
||||
"prompt": prompt,
|
||||
"response": response
|
||||
}),
|
||||
};
|
||||
|
||||
langcache_client
|
||||
.upsert_points("semantic_cache", vec![point])
|
||||
.await?;
|
||||
|
||||
Ok(response)
|
||||
} else {
|
||||
ensure_collection_exists(&self.state, "semantic_cache").await?;
|
||||
let qdrant_client = get_qdrant_client(&self.state)?;
|
||||
let embeddings = generate_embeddings(vec![prompt.clone()]).await?;
|
||||
let embedding = embeddings
|
||||
.get(0)
|
||||
.ok_or_else(|| "Failed to generate embedding")?
|
||||
.clone();
|
||||
|
||||
let search_results = qdrant_client
|
||||
.search("semantic_cache", embedding.clone(), 1)
|
||||
.await?;
|
||||
|
||||
if let Some(result) = search_results.first() {
|
||||
if let Some(payload) = &result.payload {
|
||||
if let Some(resp) = payload.get("response").and_then(|v| v.as_str()) {
|
||||
return Ok(resp.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let response = self
|
||||
.state
|
||||
.llm_provider
|
||||
.generate(&prompt, &serde_json::Value::Null)
|
||||
.await?;
|
||||
|
||||
let point = QdrantPoint {
|
||||
id: uuid::Uuid::new_v4().to_string(),
|
||||
vector: embedding,
|
||||
payload: serde_json::json!({
|
||||
"prompt": prompt,
|
||||
"response": response
|
||||
}),
|
||||
};
|
||||
|
||||
qdrant_client
|
||||
.upsert_points("semantic_cache", vec![point])
|
||||
.await?;
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
Ok(full_response)
|
||||
}
|
||||
|
||||
pub async fn stream_response(
|
||||
|
|
|
|||
25
src/main.rs
25
src/main.rs
|
|
@ -244,19 +244,6 @@ async fn main() -> std::io::Result<()> {
|
|||
.map(|n| n.get())
|
||||
.unwrap_or(4);
|
||||
|
||||
let automation_state = app_state.clone();
|
||||
std::thread::spawn(move || {
|
||||
let rt = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.expect("Failed to create runtime for automation");
|
||||
let local = tokio::task::LocalSet::new();
|
||||
local.block_on(&rt, async move {
|
||||
let scripts_dir = "work/default.gbai/.gbdialog".to_string();
|
||||
let automation = AutomationService::new(automation_state, &scripts_dir);
|
||||
automation.spawn().await.ok();
|
||||
});
|
||||
});
|
||||
|
||||
// Initialize bot orchestrator and mount all bots
|
||||
let bot_orchestrator = BotOrchestrator::new(app_state.clone());
|
||||
|
|
@ -271,6 +258,18 @@ async fn main() -> std::io::Result<()> {
|
|||
.await
|
||||
.expect("Failed to initialize LLM local server");
|
||||
|
||||
let automation_state = app_state.clone();
|
||||
std::thread::spawn(move || {
|
||||
let rt = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.expect("Failed to create runtime for automation");
|
||||
let local = tokio::task::LocalSet::new();
|
||||
local.block_on(&rt, async move {
|
||||
let automation = AutomationService::new(automation_state);
|
||||
automation.spawn().await.ok();
|
||||
});
|
||||
});
|
||||
|
||||
HttpServer::new(move || {
|
||||
|
||||
|
|
|
|||
|
|
@ -293,9 +293,9 @@ impl SessionManager {
|
|||
let mut history: Vec<(String, String)> = Vec::new();
|
||||
for (other_role, content) in messages {
|
||||
let role_str = match other_role {
|
||||
0 => "user".to_string(),
|
||||
1 => "assistant".to_string(),
|
||||
2 => "system".to_string(),
|
||||
1 => "user".to_string(),
|
||||
2 => "assistant".to_string(),
|
||||
3 => "system".to_string(),
|
||||
_ => "unknown".to_string(),
|
||||
};
|
||||
history.push((role_str, content));
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
let resume1 = GET_BOT_MEMORY("general");
|
||||
let resume1 = GET_BOT_MEMORY("resume");
|
||||
let resume2 = GET_BOT_MEMORY("auxiliom");
|
||||
let resume3 = GET_BOT_MEMORY("toolbix");
|
||||
|
||||
|
|
@ -14,5 +14,5 @@ ADD_SUGGESTION "auxiliom" AS "What does Auxiliom do?"
|
|||
ADD_SUGGESTION "toolbix" AS "Show me Toolbix features"
|
||||
ADD_SUGGESTION "toolbix" AS "How can Toolbix help my business?"
|
||||
|
||||
|
||||
TALK resume1
|
||||
TALK "You can ask me about any of the announcements or circulars."
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
SET_SCHEDULE "* * * * *"
|
||||
SET_SCHEDULE "*/10 * * * *"
|
||||
|
||||
|
||||
let text = GET "announcements.gbkb/news/news.pdf"
|
||||
let resume = LLM "Resume this document, in a table (DO NOT THINK) no_think: " + text
|
||||
let resume = LLM "Resume this document: " + text
|
||||
|
||||
SET_BOT_MEMORY "resume", resume
|
||||
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Add table
Reference in a new issue