fix(llm): Detect AVX2 support and gracefully disable LLM on incompatible CPUs

- Add cpu_supports_avx2() function to check /proc/cpuinfo for AVX2 flag - Skip LLM binary download on CPUs without AVX2 (pre-built llama.cpp requires it) - Add runtime check for llama-server binary compatibility (catches 'Illegal instruction') - Provide helpful error messages about alternatives (compile from source or use external API) - Sandy Bridge (2nd gen i7) and older CPUs now gracefully skip local LLM
2025-12-10 08:35:35 -03:00 · 2025-12-10 08:35:35 -03:00 · c95b56f257
commit c95b56f257
parent 2da5f0ccdf
2 changed files with 103 additions and 2 deletions
--- a/src/core/package_manager/installer.rs
+++ b/src/core/package_manager/installer.rs
@ -2,10 +2,28 @@ use crate::package_manager::component::ComponentConfig;
 use crate::package_manager::os::detect_os;
 use crate::package_manager::{InstallMode, OsType};
 use anyhow::Result;
-use log::{info, trace};
+use log::{info, trace, warn};
 use std::collections::HashMap;
 use std::path::PathBuf;

+/// Check if the CPU supports AVX2 instructions (required for pre-built llama.cpp binaries)
+fn cpu_supports_avx2() -> bool {
+    #[cfg(target_arch = "x86_64")]
+    {
+        // Read /proc/cpuinfo on Linux to check for avx2 flag
+        if let Ok(cpuinfo) = std::fs::read_to_string("/proc/cpuinfo") {
+            return cpuinfo.contains(" avx2 ") || cpuinfo.contains(" avx2\n");
+        }
+        // Fallback: assume AVX2 is not available if we can't read cpuinfo
+        false
+    }
+    #[cfg(not(target_arch = "x86_64"))]
+    {
+        // Non-x86_64 architectures (ARM, etc.) don't use AVX2
+        false
+    }
+}
+
 #[derive(Debug)]
 pub struct PackageManager {
    pub mode: InstallMode,
@ -199,6 +217,43 @@ impl PackageManager {
    }

    fn register_llm(&mut self) {
+        // Check CPU capabilities - pre-built llama.cpp binaries require AVX2
+        let has_avx2 = cpu_supports_avx2();
+
+        if !has_avx2 {
+            warn!("CPU does not support AVX2 instructions. Local LLM will not be available.");
+            warn!("To use local LLM on this CPU, you need to compile llama.cpp from source.");
+            warn!(
+                "Alternatively, configure an external LLM API (OpenAI, Anthropic, etc.) in Vault."
+            );
+            // Register a disabled LLM component that won't download or run anything
+            self.components.insert(
+                "llm".to_string(),
+                ComponentConfig {
+                    name: "llm".to_string(),
+                    ports: vec![8081, 8082],
+                    dependencies: vec![],
+                    linux_packages: vec![],
+                    macos_packages: vec![],
+                    windows_packages: vec![],
+                    download_url: None, // Don't download - CPU not compatible
+                    binary_name: None,
+                    pre_install_cmds_linux: vec![],
+                    post_install_cmds_linux: vec![],
+                    pre_install_cmds_macos: vec![],
+                    post_install_cmds_macos: vec![],
+                    pre_install_cmds_windows: vec![],
+                    post_install_cmds_windows: vec![],
+                    env_vars: HashMap::new(),
+                    data_download_list: vec![], // Don't download models
+                    exec_cmd: "echo 'LLM disabled - CPU does not support AVX2'".to_string(),
+                    check_cmd: "false".to_string(), // Always fail check - LLM not available
+                },
+            );
+            return;
+        }
+
+        info!("CPU supports AVX2 - local LLM will be available");
        self.components.insert(
            "llm".to_string(),
            ComponentConfig {
--- a/src/llm/local.rs
+++ b/src/llm/local.rs
@ -2,8 +2,9 @@ use crate::config::ConfigManager;
 use crate::shared::models::schema::bots::dsl::*;
 use crate::shared::state::AppState;
 use diesel::prelude::*;
-use log::{error, info};
+use log::{error, info, warn};
 use reqwest;
+use std::path::Path;
 use std::sync::Arc;
 use tokio;

@ -68,6 +69,51 @@ pub async fn ensure_llama_servers_running(
    info!("  LLM Model: {}", llm_model);
    info!("  Embedding Model: {}", embedding_model);
    info!("  LLM Server Path: {}", llm_server_path);
+
+    // Check if llama-server binary exists
+    let llama_server_path = if llm_server_path.is_empty() {
+        "./botserver-stack/bin/llm/build/bin/llama-server".to_string()
+    } else {
+        format!("{}/llama-server", llm_server_path)
+    };
+
+    if !Path::new(&llama_server_path).exists() {
+        warn!("llama-server binary not found at: {}", llama_server_path);
+        warn!("Local LLM server will not be available.");
+        warn!("This may be because:");
+        warn!("  1. The LLM component was not installed (check if CPU supports AVX2)");
+        warn!("  2. The binary path is incorrect");
+        warn!("Continuing without local LLM - use external LLM API instead.");
+        return Ok(());
+    }
+
+    // Test if the binary can actually run (check for illegal instruction)
+    info!("Testing llama-server binary compatibility...");
+    let test_result = std::process::Command::new(&llama_server_path)
+        .arg("--version")
+        .output();
+
+    match test_result {
+        Ok(output) => {
+            if !output.status.success() {
+                let stderr = String::from_utf8_lossy(&output.stderr);
+                warn!("llama-server test failed: {}", stderr);
+                if stderr.contains("Illegal instruction") {
+                    error!("CPU does not support required instructions (AVX2) for llama-server");
+                    error!("Your CPU: Check /proc/cpuinfo for 'avx2' flag");
+                    error!("Options:");
+                    error!("  1. Compile llama.cpp from source with your CPU's instruction set");
+                    error!("  2. Use an external LLM API (OpenAI, Anthropic, etc.)");
+                    return Ok(());
+                }
+            }
+        }
+        Err(e) => {
+            warn!("Failed to test llama-server: {}", e);
+            // Continue anyway - might work at runtime
+        }
+    }
+
    info!("Restarting any existing llama-server processes...");

    if let Err(e) = tokio::process::Command::new("sh")