//! Code Scanner for BASIC Files //! //! Scans .bas files for security issues, fragile code patterns, and misconfigurations. //! Used by the /apicompliance endpoint to generate compliance reports. use chrono::{DateTime, Utc}; use regex::Regex; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::path::{Path, PathBuf}; use walkdir::WalkDir; /// Issue severity levels #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] #[serde(rename_all = "lowercase")] pub enum IssueSeverity { Info, Low, Medium, High, Critical, } impl std::fmt::Display for IssueSeverity { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { IssueSeverity::Info => write!(f, "info"), IssueSeverity::Low => write!(f, "low"), IssueSeverity::Medium => write!(f, "medium"), IssueSeverity::High => write!(f, "high"), IssueSeverity::Critical => write!(f, "critical"), } } } /// Issue types for categorization #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] #[serde(rename_all = "snake_case")] pub enum IssueType { PasswordInConfig, HardcodedSecret, DeprecatedKeyword, FragileCode, ConfigurationIssue, UnderscoreInKeyword, MissingVault, InsecurePattern, DeprecatedIfInput, } impl std::fmt::Display for IssueType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { IssueType::PasswordInConfig => write!(f, "Password in Config"), IssueType::HardcodedSecret => write!(f, "Hardcoded Secret"), IssueType::DeprecatedKeyword => write!(f, "Deprecated Keyword"), IssueType::FragileCode => write!(f, "Fragile Code"), IssueType::ConfigurationIssue => write!(f, "Configuration Issue"), IssueType::UnderscoreInKeyword => write!(f, "Underscore in Keyword"), IssueType::MissingVault => write!(f, "Missing Vault Config"), IssueType::InsecurePattern => write!(f, "Insecure Pattern"), IssueType::DeprecatedIfInput => write!(f, "Deprecated IF...input Pattern"), } } } /// A single compliance issue found in the code #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CodeIssue { pub id: String, pub severity: IssueSeverity, pub issue_type: IssueType, pub title: String, pub description: String, pub file_path: String, pub line_number: Option, pub code_snippet: Option, pub remediation: String, pub category: String, pub detected_at: DateTime, } /// Scan result for a single bot #[derive(Debug, Clone, Serialize, Deserialize)] pub struct BotScanResult { pub bot_id: String, pub bot_name: String, pub scanned_at: DateTime, pub files_scanned: usize, pub issues: Vec, pub stats: ScanStats, } /// Statistics for a scan #[derive(Debug, Clone, Serialize, Deserialize, Default)] pub struct ScanStats { pub critical: usize, pub high: usize, pub medium: usize, pub low: usize, pub info: usize, pub total: usize, } impl ScanStats { pub fn add_issue(&mut self, severity: &IssueSeverity) { match severity { IssueSeverity::Critical => self.critical += 1, IssueSeverity::High => self.high += 1, IssueSeverity::Medium => self.medium += 1, IssueSeverity::Low => self.low += 1, IssueSeverity::Info => self.info += 1, } self.total += 1; } pub fn merge(&mut self, other: &ScanStats) { self.critical += other.critical; self.high += other.high; self.medium += other.medium; self.low += other.low; self.info += other.info; self.total += other.total; } } /// Full compliance scan result #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ComplianceScanResult { pub scanned_at: DateTime, pub duration_ms: u64, pub bots_scanned: usize, pub total_files: usize, pub stats: ScanStats, pub bot_results: Vec, } /// Pattern definition for scanning struct ScanPattern { regex: Regex, issue_type: IssueType, severity: IssueSeverity, title: String, description: String, remediation: String, category: String, } /// Code scanner for BASIC files pub struct CodeScanner { patterns: Vec, base_path: PathBuf, } impl CodeScanner { /// Create a new code scanner pub fn new(base_path: impl AsRef) -> Self { let patterns = Self::build_patterns(); Self { patterns, base_path: base_path.as_ref().to_path_buf(), } } /// Build the list of patterns to scan for fn build_patterns() -> Vec { let mut patterns = Vec::new(); // Critical: Password/secret patterns in code patterns.push(ScanPattern { regex: Regex::new(r#"(?i)password\s*=\s*["'][^"']+["']"#).unwrap(), issue_type: IssueType::PasswordInConfig, severity: IssueSeverity::Critical, title: "Hardcoded Password".to_string(), description: "A password is hardcoded in the source code. This is a critical security risk.".to_string(), remediation: "Move the password to Vault using: vault_password = GET VAULT SECRET \"password_key\"".to_string(), category: "Security".to_string(), }); patterns.push(ScanPattern { regex: Regex::new(r#"(?i)(api[_-]?key|apikey|secret[_-]?key|client[_-]?secret)\s*=\s*["'][^"']{8,}["']"#).unwrap(), issue_type: IssueType::HardcodedSecret, severity: IssueSeverity::Critical, title: "Hardcoded API Key/Secret".to_string(), description: "An API key or secret is hardcoded in the source code.".to_string(), remediation: "Store secrets in Vault and retrieve with GET VAULT SECRET".to_string(), category: "Security".to_string(), }); patterns.push(ScanPattern { regex: Regex::new(r#"(?i)token\s*=\s*["'][a-zA-Z0-9_\-]{20,}["']"#).unwrap(), issue_type: IssueType::HardcodedSecret, severity: IssueSeverity::High, title: "Hardcoded Token".to_string(), description: "A token appears to be hardcoded in the source code.".to_string(), remediation: "Store tokens securely in Vault".to_string(), category: "Security".to_string(), }); // High: Deprecated IF...input pattern patterns.push(ScanPattern { regex: Regex::new(r#"(?i)IF\s+.*\binput\b"#).unwrap(), issue_type: IssueType::DeprecatedIfInput, severity: IssueSeverity::Medium, title: "Deprecated IF...input Pattern".to_string(), description: "Using IF with raw input variable. Prefer HEAR AS for type-safe input handling." .to_string(), remediation: "Replace with: HEAR response AS STRING\nIF response = \"value\" THEN" .to_string(), category: "Code Quality".to_string(), }); // Medium: Underscore in keywords patterns.push(ScanPattern { regex: Regex::new(r#"(?i)\b(GET_BOT_MEMORY|SET_BOT_MEMORY|GET_USER_MEMORY|SET_USER_MEMORY|USE_KB|USE_TOOL|SEND_MAIL|CREATE_TASK)\b"#).unwrap(), issue_type: IssueType::UnderscoreInKeyword, severity: IssueSeverity::Low, title: "Underscore in Keyword".to_string(), description: "Keywords should use spaces instead of underscores for consistency.".to_string(), remediation: "Use spaces: GET BOT MEMORY, SET BOT MEMORY, etc.".to_string(), category: "Naming Convention".to_string(), }); // Medium: POST TO INSTAGRAM with inline credentials patterns.push(ScanPattern { regex: Regex::new(r#"(?i)POST\s+TO\s+INSTAGRAM\s+\w+\s*,\s*\w+"#).unwrap(), issue_type: IssueType::InsecurePattern, severity: IssueSeverity::High, title: "Instagram Credentials in Code".to_string(), description: "Instagram username/password passed directly. Use secure credential storage." .to_string(), remediation: "Store Instagram credentials in Vault and retrieve securely.".to_string(), category: "Security".to_string(), }); // Low: Direct SQL in BASIC patterns.push(ScanPattern { regex: Regex::new(r#"(?i)(SELECT|INSERT|UPDATE|DELETE)\s+.*(FROM|INTO|SET)\s+"#) .unwrap(), issue_type: IssueType::FragileCode, severity: IssueSeverity::Medium, title: "Raw SQL Query".to_string(), description: "Raw SQL queries in BASIC code may be vulnerable to injection." .to_string(), remediation: "Use parameterized queries or the built-in data operations (SAVE, GET, etc.)" .to_string(), category: "Security".to_string(), }); // Info: Eval or dynamic execution patterns.push(ScanPattern { regex: Regex::new(r#"(?i)\bEVAL\s*\("#).unwrap(), issue_type: IssueType::FragileCode, severity: IssueSeverity::High, title: "Dynamic Code Execution".to_string(), description: "EVAL can execute arbitrary code and is a security risk.".to_string(), remediation: "Avoid EVAL. Use structured control flow instead.".to_string(), category: "Security".to_string(), }); // Check for base64 encoded secrets (potential obfuscated credentials) patterns.push(ScanPattern { regex: Regex::new( r#"(?i)(password|secret|key|token)\s*=\s*["'][A-Za-z0-9+/=]{40,}["']"#, ) .unwrap(), issue_type: IssueType::HardcodedSecret, severity: IssueSeverity::High, title: "Potential Encoded Secret".to_string(), description: "A base64-like string is assigned to a sensitive variable.".to_string(), remediation: "Remove encoded secrets from code. Use Vault for secret management." .to_string(), category: "Security".to_string(), }); // AWS credentials pattern patterns.push(ScanPattern { regex: Regex::new(r#"(?i)(AKIA[0-9A-Z]{16})"#).unwrap(), issue_type: IssueType::HardcodedSecret, severity: IssueSeverity::Critical, title: "AWS Access Key".to_string(), description: "An AWS access key ID is hardcoded in the source code.".to_string(), remediation: "Remove immediately and rotate the key. Use IAM roles or Vault." .to_string(), category: "Security".to_string(), }); // Private key patterns patterns.push(ScanPattern { regex: Regex::new(r#"-----BEGIN\s+(RSA\s+)?PRIVATE\s+KEY-----"#).unwrap(), issue_type: IssueType::HardcodedSecret, severity: IssueSeverity::Critical, title: "Private Key in Code".to_string(), description: "A private key is embedded in the source code.".to_string(), remediation: "Remove private key immediately. Store in secure key management system." .to_string(), category: "Security".to_string(), }); // Connection strings with credentials patterns.push(ScanPattern { regex: Regex::new(r#"(?i)(postgres|mysql|mongodb|redis)://[^:]+:[^@]+@"#).unwrap(), issue_type: IssueType::HardcodedSecret, severity: IssueSeverity::Critical, title: "Database Credentials in Connection String".to_string(), description: "Database connection string contains embedded credentials.".to_string(), remediation: "Use environment variables or Vault for database credentials.".to_string(), category: "Security".to_string(), }); patterns } /// Scan all bots in the base path pub async fn scan_all( &self, ) -> Result> { let start_time = std::time::Instant::now(); let mut bot_results = Vec::new(); let mut total_stats = ScanStats::default(); let mut total_files = 0; // Find all .gbai directories (bot packages) let templates_path = self.base_path.join("templates"); let work_path = self.base_path.join("work"); let mut bot_paths = Vec::new(); // Scan templates directory if templates_path.exists() { for entry in WalkDir::new(&templates_path).max_depth(3) { if let Ok(entry) = entry { let path = entry.path(); if path.is_dir() { let name = path.file_name().unwrap_or_default().to_string_lossy(); if name.ends_with(".gbai") || name.ends_with(".gbdialog") { bot_paths.push(path.to_path_buf()); } } } } } // Scan work directory (deployed bots) if work_path.exists() { for entry in WalkDir::new(&work_path).max_depth(3) { if let Ok(entry) = entry { let path = entry.path(); if path.is_dir() { let name = path.file_name().unwrap_or_default().to_string_lossy(); if name.ends_with(".gbai") || name.ends_with(".gbdialog") { bot_paths.push(path.to_path_buf()); } } } } } // Scan each bot for bot_path in &bot_paths { let result = self.scan_bot(bot_path).await?; total_files += result.files_scanned; total_stats.merge(&result.stats); bot_results.push(result); } let duration_ms = start_time.elapsed().as_millis() as u64; Ok(ComplianceScanResult { scanned_at: Utc::now(), duration_ms, bots_scanned: bot_results.len(), total_files, stats: total_stats, bot_results, }) } /// Scan a specific bot directory pub async fn scan_bot( &self, bot_path: &Path, ) -> Result> { let bot_name = bot_path .file_name() .unwrap_or_default() .to_string_lossy() .to_string(); let bot_id = format!("{:x}", md5::compute(&bot_name)); let mut issues = Vec::new(); let mut stats = ScanStats::default(); let mut files_scanned = 0; // Find all .bas files in the bot directory for entry in WalkDir::new(bot_path) { if let Ok(entry) = entry { let path = entry.path(); if path.is_file() { let extension = path.extension().unwrap_or_default().to_string_lossy(); if extension == "bas" || extension == "csv" { files_scanned += 1; let file_issues = self.scan_file(path).await?; for issue in file_issues { stats.add_issue(&issue.severity); issues.push(issue); } } } } } // Check for missing Vault configuration let config_path = bot_path.join("config.csv"); if config_path.exists() { let vault_configured = self.check_vault_config(&config_path).await?; if !vault_configured { let issue = CodeIssue { id: uuid::Uuid::new_v4().to_string(), severity: IssueSeverity::Info, issue_type: IssueType::MissingVault, title: "Vault Not Configured".to_string(), description: "This bot is not configured to use Vault for secrets management.".to_string(), file_path: config_path.to_string_lossy().to_string(), line_number: None, code_snippet: None, remediation: "Add VAULT_ADDR and VAULT_TOKEN to configuration for secure secret management.".to_string(), category: "Configuration".to_string(), detected_at: Utc::now(), }; stats.add_issue(&issue.severity); issues.push(issue); } } // Sort issues by severity (critical first) issues.sort_by(|a, b| b.severity.cmp(&a.severity)); Ok(BotScanResult { bot_id, bot_name, scanned_at: Utc::now(), files_scanned, issues, stats, }) } /// Scan a single file for issues async fn scan_file( &self, file_path: &Path, ) -> Result, Box> { let content = tokio::fs::read_to_string(file_path).await?; let mut issues = Vec::new(); let relative_path = file_path .strip_prefix(&self.base_path) .unwrap_or(file_path) .to_string_lossy() .to_string(); for (line_number, line) in content.lines().enumerate() { let line_num = line_number + 1; // Skip comments let trimmed = line.trim(); if trimmed.starts_with("REM") || trimmed.starts_with("'") || trimmed.starts_with("//") { continue; } for pattern in &self.patterns { if pattern.regex.is_match(line) { // Redact sensitive information in the snippet let snippet = self.redact_sensitive(line); let issue = CodeIssue { id: uuid::Uuid::new_v4().to_string(), severity: pattern.severity.clone(), issue_type: pattern.issue_type.clone(), title: pattern.title.clone(), description: pattern.description.clone(), file_path: relative_path.clone(), line_number: Some(line_num), code_snippet: Some(snippet), remediation: pattern.remediation.clone(), category: pattern.category.clone(), detected_at: Utc::now(), }; issues.push(issue); } } } Ok(issues) } /// Redact sensitive information in code snippets fn redact_sensitive(&self, line: &str) -> String { let mut result = line.to_string(); // Redact quoted strings that look like secrets let secret_pattern = Regex::new(r#"(["'])[^"']{8,}(["'])"#).unwrap(); result = secret_pattern .replace_all(&result, "$1***REDACTED***$2") .to_string(); // Redact AWS keys let aws_pattern = Regex::new(r#"AKIA[0-9A-Z]{16}"#).unwrap(); result = aws_pattern .replace_all(&result, "AKIA***REDACTED***") .to_string(); result } /// Check if Vault is configured for a bot async fn check_vault_config( &self, config_path: &Path, ) -> Result> { let content = tokio::fs::read_to_string(config_path).await?; // Check for Vault-related configuration let has_vault = content.to_lowercase().contains("vault_addr") || content.to_lowercase().contains("vault_token") || content.to_lowercase().contains("vault-"); Ok(has_vault) } /// Scan specific bots by ID pub async fn scan_bots( &self, bot_ids: &[String], ) -> Result> { if bot_ids.is_empty() || bot_ids.contains(&"all".to_string()) { return self.scan_all().await; } // For specific bots, we'd need to look them up by ID // For now, scan all and filter let mut full_result = self.scan_all().await?; full_result .bot_results .retain(|r| bot_ids.contains(&r.bot_id) || bot_ids.contains(&r.bot_name)); // Recalculate stats let mut new_stats = ScanStats::default(); for bot in &full_result.bot_results { new_stats.merge(&bot.stats); } full_result.stats = new_stats; full_result.bots_scanned = full_result.bot_results.len(); Ok(full_result) } } /// Generate a compliance report in various formats pub struct ComplianceReporter; impl ComplianceReporter { /// Generate HTML report pub fn to_html(result: &ComplianceScanResult) -> String { let mut html = String::new(); html.push_str("Compliance Report"); html.push_str(""); html.push_str(""); html.push_str(&format!("

Compliance Scan Report

")); html.push_str(&format!("

Scanned at: {}

", result.scanned_at)); html.push_str(&format!("

Duration: {}ms

", result.duration_ms)); html.push_str(&format!("

Bots scanned: {}

", result.bots_scanned)); html.push_str(&format!("

Files scanned: {}

", result.total_files)); html.push_str("

Summary

"); html.push_str(&format!( "

Critical: {}

", result.stats.critical )); html.push_str(&format!("

High: {}

", result.stats.high)); html.push_str(&format!( "

Medium: {}

", result.stats.medium )); html.push_str(&format!("

Low: {}

", result.stats.low)); html.push_str(&format!("

Info: {}

", result.stats.info)); html.push_str("

Issues

"); html.push_str(""); for bot in &result.bot_results { for issue in &bot.issues { html.push_str(&format!( "", issue.severity.to_string(), issue.severity, issue.issue_type, issue.file_path, issue .line_number .map(|n| n.to_string()) .unwrap_or("-".to_string()), issue.description )); } } html.push_str("
SeverityTypeFileLineDescription
{}{}{}{}{}
"); html } /// Generate JSON report pub fn to_json(result: &ComplianceScanResult) -> Result { serde_json::to_string_pretty(result) } /// Generate CSV report pub fn to_csv(result: &ComplianceScanResult) -> String { let mut csv = String::new(); csv.push_str("Severity,Type,Category,File,Line,Title,Description,Remediation\n"); for bot in &result.bot_results { for issue in &bot.issues { csv.push_str(&format!( "{},{},{},{},{},{},{},{}\n", issue.severity, issue.issue_type, issue.category, issue.file_path, issue .line_number .map(|n| n.to_string()) .unwrap_or("-".to_string()), escape_csv(&issue.title), escape_csv(&issue.description), escape_csv(&issue.remediation) )); } } csv } } /// Escape a string for CSV output fn escape_csv(s: &str) -> String { if s.contains(',') || s.contains('"') || s.contains('\n') { format!("\"{}\"", s.replace('"', "\"\"")) } else { s.to_string() } } #[cfg(test)] mod tests { use super::*; #[test] fn test_pattern_matching() { let scanner = CodeScanner::new("/tmp/test"); // Test password detection let password_pattern = scanner .patterns .iter() .find(|p| matches!(p.issue_type, IssueType::PasswordInConfig)) .unwrap(); assert!(password_pattern.regex.is_match(r#"password = "secret123""#)); assert!(password_pattern.regex.is_match(r#"PASSWORD = 'mypass'"#)); // Test underscore keyword detection let underscore_pattern = scanner .patterns .iter() .find(|p| matches!(p.issue_type, IssueType::UnderscoreInKeyword)) .unwrap(); assert!(underscore_pattern.regex.is_match("GET_BOT_MEMORY")); assert!(underscore_pattern.regex.is_match("SET_USER_MEMORY")); } #[test] fn test_severity_ordering() { assert!(IssueSeverity::Critical > IssueSeverity::High); assert!(IssueSeverity::High > IssueSeverity::Medium); assert!(IssueSeverity::Medium > IssueSeverity::Low); assert!(IssueSeverity::Low > IssueSeverity::Info); } #[test] fn test_stats_merge() { let mut stats1 = ScanStats { critical: 1, high: 2, medium: 3, low: 4, info: 5, total: 15, }; let stats2 = ScanStats { critical: 1, high: 1, medium: 1, low: 1, info: 1, total: 5, }; stats1.merge(&stats2); assert_eq!(stats1.critical, 2); assert_eq!(stats1.high, 3); assert_eq!(stats1.total, 20); } #[test] fn test_csv_escape() { assert_eq!(escape_csv("simple"), "simple"); assert_eq!(escape_csv("with,comma"), "\"with,comma\""); assert_eq!(escape_csv("with\"quote"), "\"with\"\"quote\""); } #[test] fn test_redact_sensitive() { let scanner = CodeScanner::new("/tmp/test"); let line = r#"password = "supersecretpassword123""#; let redacted = scanner.redact_sensitive(line); assert!(redacted.contains("***REDACTED***")); assert!(!redacted.contains("supersecretpassword123")); } }