fix(llm): Detect AVX2 support and gracefully disable LLM on incompatible CPUs
- Add cpu_supports_avx2() function to check /proc/cpuinfo for AVX2 flag - Skip LLM binary download on CPUs without AVX2 (pre-built llama.cpp requires it) - Add runtime check for llama-server binary compatibility (catches 'Illegal instruction') - Provide helpful error messages about alternatives (compile from source or use external API) - Sandy Bridge (2nd gen i7) and older CPUs now gracefully skip local LLM
This commit is contained in:
parent
2da5f0ccdf
commit
c95b56f257
2 changed files with 103 additions and 2 deletions
|
|
@ -2,10 +2,28 @@ use crate::package_manager::component::ComponentConfig;
|
|||
use crate::package_manager::os::detect_os;
|
||||
use crate::package_manager::{InstallMode, OsType};
|
||||
use anyhow::Result;
|
||||
use log::{info, trace};
|
||||
use log::{info, trace, warn};
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Check if the CPU supports AVX2 instructions (required for pre-built llama.cpp binaries)
|
||||
fn cpu_supports_avx2() -> bool {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{
|
||||
// Read /proc/cpuinfo on Linux to check for avx2 flag
|
||||
if let Ok(cpuinfo) = std::fs::read_to_string("/proc/cpuinfo") {
|
||||
return cpuinfo.contains(" avx2 ") || cpuinfo.contains(" avx2\n");
|
||||
}
|
||||
// Fallback: assume AVX2 is not available if we can't read cpuinfo
|
||||
false
|
||||
}
|
||||
#[cfg(not(target_arch = "x86_64"))]
|
||||
{
|
||||
// Non-x86_64 architectures (ARM, etc.) don't use AVX2
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PackageManager {
|
||||
pub mode: InstallMode,
|
||||
|
|
@ -199,6 +217,43 @@ impl PackageManager {
|
|||
}
|
||||
|
||||
fn register_llm(&mut self) {
|
||||
// Check CPU capabilities - pre-built llama.cpp binaries require AVX2
|
||||
let has_avx2 = cpu_supports_avx2();
|
||||
|
||||
if !has_avx2 {
|
||||
warn!("CPU does not support AVX2 instructions. Local LLM will not be available.");
|
||||
warn!("To use local LLM on this CPU, you need to compile llama.cpp from source.");
|
||||
warn!(
|
||||
"Alternatively, configure an external LLM API (OpenAI, Anthropic, etc.) in Vault."
|
||||
);
|
||||
// Register a disabled LLM component that won't download or run anything
|
||||
self.components.insert(
|
||||
"llm".to_string(),
|
||||
ComponentConfig {
|
||||
name: "llm".to_string(),
|
||||
ports: vec![8081, 8082],
|
||||
dependencies: vec![],
|
||||
linux_packages: vec![],
|
||||
macos_packages: vec![],
|
||||
windows_packages: vec![],
|
||||
download_url: None, // Don't download - CPU not compatible
|
||||
binary_name: None,
|
||||
pre_install_cmds_linux: vec![],
|
||||
post_install_cmds_linux: vec![],
|
||||
pre_install_cmds_macos: vec![],
|
||||
post_install_cmds_macos: vec![],
|
||||
pre_install_cmds_windows: vec![],
|
||||
post_install_cmds_windows: vec![],
|
||||
env_vars: HashMap::new(),
|
||||
data_download_list: vec![], // Don't download models
|
||||
exec_cmd: "echo 'LLM disabled - CPU does not support AVX2'".to_string(),
|
||||
check_cmd: "false".to_string(), // Always fail check - LLM not available
|
||||
},
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
info!("CPU supports AVX2 - local LLM will be available");
|
||||
self.components.insert(
|
||||
"llm".to_string(),
|
||||
ComponentConfig {
|
||||
|
|
|
|||
|
|
@ -2,8 +2,9 @@ use crate::config::ConfigManager;
|
|||
use crate::shared::models::schema::bots::dsl::*;
|
||||
use crate::shared::state::AppState;
|
||||
use diesel::prelude::*;
|
||||
use log::{error, info};
|
||||
use log::{error, info, warn};
|
||||
use reqwest;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use tokio;
|
||||
|
||||
|
|
@ -68,6 +69,51 @@ pub async fn ensure_llama_servers_running(
|
|||
info!(" LLM Model: {}", llm_model);
|
||||
info!(" Embedding Model: {}", embedding_model);
|
||||
info!(" LLM Server Path: {}", llm_server_path);
|
||||
|
||||
// Check if llama-server binary exists
|
||||
let llama_server_path = if llm_server_path.is_empty() {
|
||||
"./botserver-stack/bin/llm/build/bin/llama-server".to_string()
|
||||
} else {
|
||||
format!("{}/llama-server", llm_server_path)
|
||||
};
|
||||
|
||||
if !Path::new(&llama_server_path).exists() {
|
||||
warn!("llama-server binary not found at: {}", llama_server_path);
|
||||
warn!("Local LLM server will not be available.");
|
||||
warn!("This may be because:");
|
||||
warn!(" 1. The LLM component was not installed (check if CPU supports AVX2)");
|
||||
warn!(" 2. The binary path is incorrect");
|
||||
warn!("Continuing without local LLM - use external LLM API instead.");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Test if the binary can actually run (check for illegal instruction)
|
||||
info!("Testing llama-server binary compatibility...");
|
||||
let test_result = std::process::Command::new(&llama_server_path)
|
||||
.arg("--version")
|
||||
.output();
|
||||
|
||||
match test_result {
|
||||
Ok(output) => {
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
warn!("llama-server test failed: {}", stderr);
|
||||
if stderr.contains("Illegal instruction") {
|
||||
error!("CPU does not support required instructions (AVX2) for llama-server");
|
||||
error!("Your CPU: Check /proc/cpuinfo for 'avx2' flag");
|
||||
error!("Options:");
|
||||
error!(" 1. Compile llama.cpp from source with your CPU's instruction set");
|
||||
error!(" 2. Use an external LLM API (OpenAI, Anthropic, etc.)");
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Failed to test llama-server: {}", e);
|
||||
// Continue anyway - might work at runtime
|
||||
}
|
||||
}
|
||||
|
||||
info!("Restarting any existing llama-server processes...");
|
||||
|
||||
if let Err(e) = tokio::process::Command::new("sh")
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue