botserver/src/compliance/code_scanner.rs

660 lines
24 KiB
Rust

//! Code Scanner for BASIC Files
//!
//! Scans .bas files for security issues, fragile code patterns, and misconfigurations.
//! Used by the /apicompliance endpoint to generate compliance reports.
use chrono::{DateTime, Utc};
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use walkdir::WalkDir;
/// Issue severity levels
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
#[serde(rename_all = "lowercase")]
pub enum IssueSeverity {
Info,
Low,
Medium,
High,
Critical,
}
impl std::fmt::Display for IssueSeverity {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
IssueSeverity::Info => write!(f, "info"),
IssueSeverity::Low => write!(f, "low"),
IssueSeverity::Medium => write!(f, "medium"),
IssueSeverity::High => write!(f, "high"),
IssueSeverity::Critical => write!(f, "critical"),
}
}
}
/// Issue types for categorization
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "snake_case")]
pub enum IssueType {
PasswordInConfig,
HardcodedSecret,
DeprecatedKeyword,
FragileCode,
ConfigurationIssue,
UnderscoreInKeyword,
MissingVault,
InsecurePattern,
DeprecatedIfInput,
}
impl std::fmt::Display for IssueType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
IssueType::PasswordInConfig => write!(f, "Password in Config"),
IssueType::HardcodedSecret => write!(f, "Hardcoded Secret"),
IssueType::DeprecatedKeyword => write!(f, "Deprecated Keyword"),
IssueType::FragileCode => write!(f, "Fragile Code"),
IssueType::ConfigurationIssue => write!(f, "Configuration Issue"),
IssueType::UnderscoreInKeyword => write!(f, "Underscore in Keyword"),
IssueType::MissingVault => write!(f, "Missing Vault Config"),
IssueType::InsecurePattern => write!(f, "Insecure Pattern"),
IssueType::DeprecatedIfInput => write!(f, "Deprecated IF...input Pattern"),
}
}
}
/// A single compliance issue found in the code
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CodeIssue {
pub id: String,
pub severity: IssueSeverity,
pub issue_type: IssueType,
pub title: String,
pub description: String,
pub file_path: String,
pub line_number: Option<usize>,
pub code_snippet: Option<String>,
pub remediation: String,
pub category: String,
pub detected_at: DateTime<Utc>,
}
/// Scan result for a single bot
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BotScanResult {
pub bot_id: String,
pub bot_name: String,
pub scanned_at: DateTime<Utc>,
pub files_scanned: usize,
pub issues: Vec<CodeIssue>,
pub stats: ScanStats,
}
/// Statistics for a scan
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct ScanStats {
pub critical: usize,
pub high: usize,
pub medium: usize,
pub low: usize,
pub info: usize,
pub total: usize,
}
impl ScanStats {
pub fn add_issue(&mut self, severity: &IssueSeverity) {
match severity {
IssueSeverity::Critical => self.critical += 1,
IssueSeverity::High => self.high += 1,
IssueSeverity::Medium => self.medium += 1,
IssueSeverity::Low => self.low += 1,
IssueSeverity::Info => self.info += 1,
}
self.total += 1;
}
pub fn merge(&mut self, other: &ScanStats) {
self.critical += other.critical;
self.high += other.high;
self.medium += other.medium;
self.low += other.low;
self.info += other.info;
self.total += other.total;
}
}
/// Full compliance scan result
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ComplianceScanResult {
pub scanned_at: DateTime<Utc>,
pub duration_ms: u64,
pub bots_scanned: usize,
pub total_files: usize,
pub stats: ScanStats,
pub bot_results: Vec<BotScanResult>,
}
/// Pattern definition for scanning
struct ScanPattern {
regex: Regex,
issue_type: IssueType,
severity: IssueSeverity,
title: String,
description: String,
remediation: String,
category: String,
}
/// Code scanner for BASIC files
pub struct CodeScanner {
patterns: Vec<ScanPattern>,
base_path: PathBuf,
}
impl CodeScanner {
/// Create a new code scanner
pub fn new(base_path: impl AsRef<Path>) -> Self {
let patterns = Self::build_patterns();
Self {
patterns,
base_path: base_path.as_ref().to_path_buf(),
}
}
/// Build the list of patterns to scan for
fn build_patterns() -> Vec<ScanPattern> {
let mut patterns = Vec::new();
// Critical: Password/secret patterns in code
patterns.push(ScanPattern {
regex: Regex::new(r#"(?i)password\s*=\s*["'][^"']+["']"#).unwrap(),
issue_type: IssueType::PasswordInConfig,
severity: IssueSeverity::Critical,
title: "Hardcoded Password".to_string(),
description: "A password is hardcoded in the source code. This is a critical security risk.".to_string(),
remediation: "Move the password to Vault using: vault_password = GET VAULT SECRET \"password_key\"".to_string(),
category: "Security".to_string(),
});
patterns.push(ScanPattern {
regex: Regex::new(r#"(?i)(api[_-]?key|apikey|secret[_-]?key|client[_-]?secret)\s*=\s*["'][^"']{8,}["']"#).unwrap(),
issue_type: IssueType::HardcodedSecret,
severity: IssueSeverity::Critical,
title: "Hardcoded API Key/Secret".to_string(),
description: "An API key or secret is hardcoded in the source code.".to_string(),
remediation: "Store secrets in Vault and retrieve with GET VAULT SECRET".to_string(),
category: "Security".to_string(),
});
patterns.push(ScanPattern {
regex: Regex::new(r#"(?i)token\s*=\s*["'][a-zA-Z0-9_\-]{20,}["']"#).unwrap(),
issue_type: IssueType::HardcodedSecret,
severity: IssueSeverity::High,
title: "Hardcoded Token".to_string(),
description: "A token appears to be hardcoded in the source code.".to_string(),
remediation: "Store tokens securely in Vault".to_string(),
category: "Security".to_string(),
});
// High: Deprecated IF...input pattern
patterns.push(ScanPattern {
regex: Regex::new(r#"(?i)IF\s+.*\binput\b"#).unwrap(),
issue_type: IssueType::DeprecatedIfInput,
severity: IssueSeverity::Medium,
title: "Deprecated IF...input Pattern".to_string(),
description:
"Using IF with raw input variable. Prefer HEAR AS for type-safe input handling."
.to_string(),
remediation: "Replace with: HEAR response AS STRING\nIF response = \"value\" THEN"
.to_string(),
category: "Code Quality".to_string(),
});
// Medium: Underscore in keywords
patterns.push(ScanPattern {
regex: Regex::new(r#"(?i)\b(GET_BOT_MEMORY|SET_BOT_MEMORY|GET_USER_MEMORY|SET_USER_MEMORY|USE_KB|USE_TOOL|SEND_MAIL|CREATE_TASK)\b"#).unwrap(),
issue_type: IssueType::UnderscoreInKeyword,
severity: IssueSeverity::Low,
title: "Underscore in Keyword".to_string(),
description: "Keywords should use spaces instead of underscores for consistency.".to_string(),
remediation: "Use spaces: GET BOT MEMORY, SET BOT MEMORY, etc.".to_string(),
category: "Naming Convention".to_string(),
});
// Medium: POST TO INSTAGRAM with inline credentials
patterns.push(ScanPattern {
regex: Regex::new(r#"(?i)POST\s+TO\s+INSTAGRAM\s+\w+\s*,\s*\w+"#).unwrap(),
issue_type: IssueType::InsecurePattern,
severity: IssueSeverity::High,
title: "Instagram Credentials in Code".to_string(),
description:
"Instagram username/password passed directly. Use secure credential storage."
.to_string(),
remediation: "Store Instagram credentials in Vault and retrieve securely.".to_string(),
category: "Security".to_string(),
});
// Low: Direct SQL in BASIC
patterns.push(ScanPattern {
regex: Regex::new(r#"(?i)(SELECT|INSERT|UPDATE|DELETE)\s+.*(FROM|INTO|SET)\s+"#)
.unwrap(),
issue_type: IssueType::FragileCode,
severity: IssueSeverity::Medium,
title: "Raw SQL Query".to_string(),
description: "Raw SQL queries in BASIC code may be vulnerable to injection."
.to_string(),
remediation:
"Use parameterized queries or the built-in data operations (SAVE, GET, etc.)"
.to_string(),
category: "Security".to_string(),
});
// Info: Eval or dynamic execution
patterns.push(ScanPattern {
regex: Regex::new(r#"(?i)\bEVAL\s*\("#).unwrap(),
issue_type: IssueType::FragileCode,
severity: IssueSeverity::High,
title: "Dynamic Code Execution".to_string(),
description: "EVAL can execute arbitrary code and is a security risk.".to_string(),
remediation: "Avoid EVAL. Use structured control flow instead.".to_string(),
category: "Security".to_string(),
});
// Check for base64 encoded secrets (potential obfuscated credentials)
patterns.push(ScanPattern {
regex: Regex::new(
r#"(?i)(password|secret|key|token)\s*=\s*["'][A-Za-z0-9+/=]{40,}["']"#,
)
.unwrap(),
issue_type: IssueType::HardcodedSecret,
severity: IssueSeverity::High,
title: "Potential Encoded Secret".to_string(),
description: "A base64-like string is assigned to a sensitive variable.".to_string(),
remediation: "Remove encoded secrets from code. Use Vault for secret management."
.to_string(),
category: "Security".to_string(),
});
// AWS credentials pattern
patterns.push(ScanPattern {
regex: Regex::new(r#"(?i)(AKIA[0-9A-Z]{16})"#).unwrap(),
issue_type: IssueType::HardcodedSecret,
severity: IssueSeverity::Critical,
title: "AWS Access Key".to_string(),
description: "An AWS access key ID is hardcoded in the source code.".to_string(),
remediation: "Remove immediately and rotate the key. Use IAM roles or Vault."
.to_string(),
category: "Security".to_string(),
});
// Private key patterns
patterns.push(ScanPattern {
regex: Regex::new(r#"-----BEGIN\s+(RSA\s+)?PRIVATE\s+KEY-----"#).unwrap(),
issue_type: IssueType::HardcodedSecret,
severity: IssueSeverity::Critical,
title: "Private Key in Code".to_string(),
description: "A private key is embedded in the source code.".to_string(),
remediation: "Remove private key immediately. Store in secure key management system."
.to_string(),
category: "Security".to_string(),
});
// Connection strings with credentials
patterns.push(ScanPattern {
regex: Regex::new(r#"(?i)(postgres|mysql|mongodb|redis)://[^:]+:[^@]+@"#).unwrap(),
issue_type: IssueType::HardcodedSecret,
severity: IssueSeverity::Critical,
title: "Database Credentials in Connection String".to_string(),
description: "Database connection string contains embedded credentials.".to_string(),
remediation: "Use environment variables or Vault for database credentials.".to_string(),
category: "Security".to_string(),
});
patterns
}
/// Scan all bots in the base path
pub async fn scan_all(
&self,
) -> Result<ComplianceScanResult, Box<dyn std::error::Error + Send + Sync>> {
let start_time = std::time::Instant::now();
let mut bot_results = Vec::new();
let mut total_stats = ScanStats::default();
let mut total_files = 0;
// Find all .gbai directories (bot packages)
let templates_path = self.base_path.join("templates");
let work_path = self.base_path.join("work");
let mut bot_paths = Vec::new();
// Scan templates directory
if templates_path.exists() {
for entry in WalkDir::new(&templates_path).max_depth(3) {
if let Ok(entry) = entry {
let path = entry.path();
if path.is_dir() {
let name = path.file_name().unwrap_or_default().to_string_lossy();
if name.ends_with(".gbai") || name.ends_with(".gbdialog") {
bot_paths.push(path.to_path_buf());
}
}
}
}
}
// Scan work directory (deployed bots)
if work_path.exists() {
for entry in WalkDir::new(&work_path).max_depth(3) {
if let Ok(entry) = entry {
let path = entry.path();
if path.is_dir() {
let name = path.file_name().unwrap_or_default().to_string_lossy();
if name.ends_with(".gbai") || name.ends_with(".gbdialog") {
bot_paths.push(path.to_path_buf());
}
}
}
}
}
// Scan each bot
for bot_path in &bot_paths {
let result = self.scan_bot(bot_path).await?;
total_files += result.files_scanned;
total_stats.merge(&result.stats);
bot_results.push(result);
}
let duration_ms = start_time.elapsed().as_millis() as u64;
Ok(ComplianceScanResult {
scanned_at: Utc::now(),
duration_ms,
bots_scanned: bot_results.len(),
total_files,
stats: total_stats,
bot_results,
})
}
/// Scan a specific bot directory
pub async fn scan_bot(
&self,
bot_path: &Path,
) -> Result<BotScanResult, Box<dyn std::error::Error + Send + Sync>> {
let bot_name = bot_path
.file_name()
.unwrap_or_default()
.to_string_lossy()
.to_string();
let bot_id =
uuid::Uuid::new_v5(&uuid::Uuid::NAMESPACE_OID, bot_name.as_bytes()).to_string();
let mut issues = Vec::new();
let mut stats = ScanStats::default();
let mut files_scanned = 0;
// Find all .bas files in the bot directory
for entry in WalkDir::new(bot_path) {
if let Ok(entry) = entry {
let path = entry.path();
if path.is_file() {
let extension = path.extension().unwrap_or_default().to_string_lossy();
if extension == "bas" || extension == "csv" {
files_scanned += 1;
let file_issues = self.scan_file(path).await?;
for issue in file_issues {
stats.add_issue(&issue.severity);
issues.push(issue);
}
}
}
}
}
// Check for missing Vault configuration
let config_path = bot_path.join("config.csv");
if config_path.exists() {
let vault_configured = self.check_vault_config(&config_path).await?;
if !vault_configured {
let issue = CodeIssue {
id: uuid::Uuid::new_v4().to_string(),
severity: IssueSeverity::Info,
issue_type: IssueType::MissingVault,
title: "Vault Not Configured".to_string(),
description: "This bot is not configured to use Vault for secrets management.".to_string(),
file_path: config_path.to_string_lossy().to_string(),
line_number: None,
code_snippet: None,
remediation: "Add VAULT_ADDR and VAULT_TOKEN to configuration for secure secret management.".to_string(),
category: "Configuration".to_string(),
detected_at: Utc::now(),
};
stats.add_issue(&issue.severity);
issues.push(issue);
}
}
// Sort issues by severity (critical first)
issues.sort_by(|a, b| b.severity.cmp(&a.severity));
Ok(BotScanResult {
bot_id,
bot_name,
scanned_at: Utc::now(),
files_scanned,
issues,
stats,
})
}
/// Scan a single file for issues
async fn scan_file(
&self,
file_path: &Path,
) -> Result<Vec<CodeIssue>, Box<dyn std::error::Error + Send + Sync>> {
let content = tokio::fs::read_to_string(file_path).await?;
let mut issues = Vec::new();
let relative_path = file_path
.strip_prefix(&self.base_path)
.unwrap_or(file_path)
.to_string_lossy()
.to_string();
for (line_number, line) in content.lines().enumerate() {
let line_num = line_number + 1;
// Skip comments
let trimmed = line.trim();
if trimmed.starts_with("REM") || trimmed.starts_with("'") || trimmed.starts_with("//") {
continue;
}
for pattern in &self.patterns {
if pattern.regex.is_match(line) {
// Redact sensitive information in the snippet
let snippet = self.redact_sensitive(line);
let issue = CodeIssue {
id: uuid::Uuid::new_v4().to_string(),
severity: pattern.severity.clone(),
issue_type: pattern.issue_type.clone(),
title: pattern.title.clone(),
description: pattern.description.clone(),
file_path: relative_path.clone(),
line_number: Some(line_num),
code_snippet: Some(snippet),
remediation: pattern.remediation.clone(),
category: pattern.category.clone(),
detected_at: Utc::now(),
};
issues.push(issue);
}
}
}
Ok(issues)
}
/// Redact sensitive information in code snippets
fn redact_sensitive(&self, line: &str) -> String {
let mut result = line.to_string();
// Redact quoted strings that look like secrets
let secret_pattern = Regex::new(r#"(["'])[^"']{8,}(["'])"#).unwrap();
result = secret_pattern
.replace_all(&result, "$1***REDACTED***$2")
.to_string();
// Redact AWS keys
let aws_pattern = Regex::new(r#"AKIA[0-9A-Z]{16}"#).unwrap();
result = aws_pattern
.replace_all(&result, "AKIA***REDACTED***")
.to_string();
result
}
/// Check if Vault is configured for a bot
async fn check_vault_config(
&self,
config_path: &Path,
) -> Result<bool, Box<dyn std::error::Error + Send + Sync>> {
let content = tokio::fs::read_to_string(config_path).await?;
// Check for Vault-related configuration
let has_vault = content.to_lowercase().contains("vault_addr")
|| content.to_lowercase().contains("vault_token")
|| content.to_lowercase().contains("vault-");
Ok(has_vault)
}
/// Scan specific bots by ID
pub async fn scan_bots(
&self,
bot_ids: &[String],
) -> Result<ComplianceScanResult, Box<dyn std::error::Error + Send + Sync>> {
if bot_ids.is_empty() || bot_ids.contains(&"all".to_string()) {
return self.scan_all().await;
}
// For specific bots, we'd need to look them up by ID
// For now, scan all and filter
let mut full_result = self.scan_all().await?;
full_result
.bot_results
.retain(|r| bot_ids.contains(&r.bot_id) || bot_ids.contains(&r.bot_name));
// Recalculate stats
let mut new_stats = ScanStats::default();
for bot in &full_result.bot_results {
new_stats.merge(&bot.stats);
}
full_result.stats = new_stats;
full_result.bots_scanned = full_result.bot_results.len();
Ok(full_result)
}
}
/// Generate a compliance report in various formats
pub struct ComplianceReporter;
impl ComplianceReporter {
/// Generate HTML report
pub fn to_html(result: &ComplianceScanResult) -> String {
let mut html = String::new();
html.push_str("<!DOCTYPE html><html><head><title>Compliance Report</title>");
html.push_str("<style>body{font-family:system-ui;margin:20px;}table{border-collapse:collapse;width:100%;}th,td{border:1px solid #ddd;padding:8px;text-align:left;}.critical{color:#dc2626;}.high{color:#ea580c;}.medium{color:#d97706;}.low{color:#65a30d;}.info{color:#0891b2;}</style>");
html.push_str("</head><body>");
html.push_str(&format!("<h1>Compliance Scan Report</h1>"));
html.push_str(&format!("<p>Scanned at: {}</p>", result.scanned_at));
html.push_str(&format!("<p>Duration: {}ms</p>", result.duration_ms));
html.push_str(&format!("<p>Bots scanned: {}</p>", result.bots_scanned));
html.push_str(&format!("<p>Files scanned: {}</p>", result.total_files));
html.push_str("<h2>Summary</h2>");
html.push_str(&format!(
"<p class='critical'>Critical: {}</p>",
result.stats.critical
));
html.push_str(&format!("<p class='high'>High: {}</p>", result.stats.high));
html.push_str(&format!(
"<p class='medium'>Medium: {}</p>",
result.stats.medium
));
html.push_str(&format!("<p class='low'>Low: {}</p>", result.stats.low));
html.push_str(&format!("<p class='info'>Info: {}</p>", result.stats.info));
html.push_str("<h2>Issues</h2>");
html.push_str("<table><tr><th>Severity</th><th>Type</th><th>File</th><th>Line</th><th>Description</th></tr>");
for bot in &result.bot_results {
for issue in &bot.issues {
html.push_str(&format!(
"<tr><td class='{}'>{}</td><td>{}</td><td>{}</td><td>{}</td><td>{}</td></tr>",
issue.severity.to_string(),
issue.severity,
issue.issue_type,
issue.file_path,
issue
.line_number
.map(|n| n.to_string())
.unwrap_or("-".to_string()),
issue.description
));
}
}
html.push_str("</table></body></html>");
html
}
/// Generate JSON report
pub fn to_json(result: &ComplianceScanResult) -> Result<String, serde_json::Error> {
serde_json::to_string_pretty(result)
}
/// Generate CSV report
pub fn to_csv(result: &ComplianceScanResult) -> String {
let mut csv = String::new();
csv.push_str("Severity,Type,Category,File,Line,Title,Description,Remediation\n");
for bot in &result.bot_results {
for issue in &bot.issues {
csv.push_str(&format!(
"{},{},{},{},{},{},{},{}\n",
issue.severity,
issue.issue_type,
issue.category,
issue.file_path,
issue
.line_number
.map(|n| n.to_string())
.unwrap_or("-".to_string()),
escape_csv(&issue.title),
escape_csv(&issue.description),
escape_csv(&issue.remediation)
));
}
}
csv
}
}
/// Escape a string for CSV output
fn escape_csv(s: &str) -> String {
if s.contains(',') || s.contains('"') || s.contains('\n') {
format!("\"{}\"", s.replace('"', "\"\""))
} else {
s.to_string()
}
}