use anyhow::{anyhow, Result}; use regex::Regex; use serde::{Deserialize, Serialize}; use std::collections::{HashMap, HashSet}; use std::sync::Arc; use tokio::sync::RwLock; use tracing::{info, warn}; use uuid::Uuid; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct DlpConfig { pub enabled: bool, pub scan_inbound: bool, pub scan_outbound: bool, pub block_on_violation: bool, pub log_violations: bool, pub redact_sensitive_data: bool, pub max_content_length: usize, } impl Default for DlpConfig { fn default() -> Self { Self { enabled: true, scan_inbound: true, scan_outbound: true, block_on_violation: false, log_violations: true, redact_sensitive_data: true, max_content_length: 10 * 1024 * 1024, } } } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum SensitiveDataType { Email, PhoneNumber, CreditCard, Ssn, IpAddress, ApiKey, Password, AwsAccessKey, AwsSecretKey, PrivateKey, JwtToken, BankAccount, PassportNumber, DriversLicense, HealthId, Custom, } impl SensitiveDataType { pub fn as_str(&self) -> &'static str { match self { Self::Email => "email", Self::PhoneNumber => "phone_number", Self::CreditCard => "credit_card", Self::Ssn => "ssn", Self::IpAddress => "ip_address", Self::ApiKey => "api_key", Self::Password => "password", Self::AwsAccessKey => "aws_access_key", Self::AwsSecretKey => "aws_secret_key", Self::PrivateKey => "private_key", Self::JwtToken => "jwt_token", Self::BankAccount => "bank_account", Self::PassportNumber => "passport_number", Self::DriversLicense => "drivers_license", Self::HealthId => "health_id", Self::Custom => "custom", } } pub fn severity(&self) -> DlpSeverity { match self { Self::CreditCard | Self::Ssn | Self::BankAccount | Self::PrivateKey => { DlpSeverity::Critical } Self::AwsAccessKey | Self::AwsSecretKey | Self::ApiKey | Self::Password | Self::JwtToken => DlpSeverity::High, Self::PassportNumber | Self::DriversLicense | Self::HealthId => DlpSeverity::High, Self::Email | Self::PhoneNumber => DlpSeverity::Medium, Self::IpAddress => DlpSeverity::Low, Self::Custom => DlpSeverity::Medium, } } pub fn redaction_pattern(&self) -> &'static str { match self { Self::Email => "[EMAIL REDACTED]", Self::PhoneNumber => "[PHONE REDACTED]", Self::CreditCard => "[CARD REDACTED]", Self::Ssn => "[SSN REDACTED]", Self::IpAddress => "[IP REDACTED]", Self::ApiKey => "[API KEY REDACTED]", Self::Password => "[PASSWORD REDACTED]", Self::AwsAccessKey => "[AWS KEY REDACTED]", Self::AwsSecretKey => "[AWS SECRET REDACTED]", Self::PrivateKey => "[PRIVATE KEY REDACTED]", Self::JwtToken => "[TOKEN REDACTED]", Self::BankAccount => "[BANK ACCOUNT REDACTED]", Self::PassportNumber => "[PASSPORT REDACTED]", Self::DriversLicense => "[LICENSE REDACTED]", Self::HealthId => "[HEALTH ID REDACTED]", Self::Custom => "[REDACTED]", } } } #[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash, Serialize, Deserialize)] pub enum DlpSeverity { Low, Medium, High, Critical, } impl DlpSeverity { pub fn as_str(&self) -> &'static str { match self { Self::Low => "low", Self::Medium => "medium", Self::High => "high", Self::Critical => "critical", } } pub fn score(&self) -> u8 { match self { Self::Low => 25, Self::Medium => 50, Self::High => 75, Self::Critical => 100, } } } #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] pub enum DlpAction { Allow, Warn, Redact, Block, Quarantine, } impl DlpAction { pub fn as_str(&self) -> &'static str { match self { Self::Allow => "allow", Self::Warn => "warn", Self::Redact => "redact", Self::Block => "block", Self::Quarantine => "quarantine", } } } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct DlpMatch { pub data_type: SensitiveDataType, pub matched_text: String, pub start_position: usize, pub end_position: usize, pub confidence: f32, pub context: Option, } impl DlpMatch { pub fn new( data_type: SensitiveDataType, matched_text: &str, start: usize, end: usize, ) -> Self { Self { data_type, matched_text: matched_text.to_string(), start_position: start, end_position: end, confidence: 1.0, context: None, } } pub fn with_confidence(mut self, confidence: f32) -> Self { self.confidence = confidence; self } pub fn with_context(mut self, context: String) -> Self { self.context = Some(context); self } pub fn masked_value(&self) -> String { let len = self.matched_text.len(); if len <= 4 { "*".repeat(len) } else { format!( "{}{}{}", &self.matched_text[..2], "*".repeat(len - 4), &self.matched_text[len - 2..] ) } } } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct DlpScanResult { pub matches: Vec, pub severity: DlpSeverity, pub action: DlpAction, pub blocked: bool, pub redacted_content: Option, pub policy_violations: Vec, } impl DlpScanResult { pub fn clean() -> Self { Self { matches: Vec::new(), severity: DlpSeverity::Low, action: DlpAction::Allow, blocked: false, redacted_content: None, policy_violations: Vec::new(), } } pub fn has_sensitive_data(&self) -> bool { !self.matches.is_empty() } pub fn match_count(&self) -> usize { self.matches.len() } pub fn data_types_found(&self) -> HashSet { self.matches.iter().map(|m| m.data_type).collect() } } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct DlpPolicy { pub id: Uuid, pub name: String, pub description: Option, pub enabled: bool, pub data_types: Vec, pub action: DlpAction, pub severity_threshold: DlpSeverity, pub exceptions: Vec, pub applies_to: PolicyScope, } impl DlpPolicy { pub fn new(name: &str, data_types: Vec, action: DlpAction) -> Self { Self { id: Uuid::new_v4(), name: name.to_string(), description: None, enabled: true, data_types, action, severity_threshold: DlpSeverity::Low, exceptions: Vec::new(), applies_to: PolicyScope::All, } } pub fn with_description(mut self, desc: &str) -> Self { self.description = Some(desc.to_string()); self } pub fn with_severity_threshold(mut self, threshold: DlpSeverity) -> Self { self.severity_threshold = threshold; self } pub fn with_scope(mut self, scope: PolicyScope) -> Self { self.applies_to = scope; self } pub fn add_exception(&mut self, exception: DlpException) { self.exceptions.push(exception); } pub fn matches_data_type(&self, data_type: SensitiveDataType) -> bool { self.data_types.contains(&data_type) } pub fn is_exception(&self, content: &str, user_id: Option) -> bool { for exception in &self.exceptions { match exception { DlpException::User(uid) => { if user_id == Some(*uid) { return true; } } DlpException::Pattern(pattern) => { if let Ok(re) = Regex::new(pattern) { if re.is_match(content) { return true; } } } DlpException::Domain(domain) => { if content.contains(domain) { return true; } } } } false } } #[derive(Debug, Clone, Serialize, Deserialize)] pub enum PolicyScope { All, Inbound, Outbound, Users(Vec), Channels(Vec), } #[derive(Debug, Clone, Serialize, Deserialize)] pub enum DlpException { User(Uuid), Pattern(String), Domain(String), } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct DlpViolation { pub id: Uuid, pub timestamp: chrono::DateTime, pub policy_id: Uuid, pub policy_name: String, pub user_id: Option, pub data_types: Vec, pub action_taken: DlpAction, pub severity: DlpSeverity, pub content_hash: String, pub match_count: usize, } pub struct DlpManager { config: DlpConfig, patterns: HashMap>, policies: Arc>>, violations: Arc>>, custom_patterns: Arc>>, } impl DlpManager { pub fn new(config: DlpConfig) -> Result { let patterns = Self::build_detection_patterns()?; Ok(Self { config, patterns, policies: Arc::new(RwLock::new(Vec::new())), violations: Arc::new(RwLock::new(Vec::new())), custom_patterns: Arc::new(RwLock::new(Vec::new())), }) } pub fn with_defaults() -> Result { Self::new(DlpConfig::default()) } fn build_detection_patterns() -> Result>> { let mut patterns = HashMap::new(); patterns.insert( SensitiveDataType::Email, vec![Regex::new(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b")?], ); patterns.insert( SensitiveDataType::PhoneNumber, vec![ Regex::new(r"\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b")?, Regex::new(r"\b\+\d{1,3}[-.\s]?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9}\b")?, Regex::new(r"\(\d{3}\)\s*\d{3}[-.\s]?\d{4}")?, ], ); patterns.insert( SensitiveDataType::CreditCard, vec![ Regex::new(r"\b4\d{3}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b")?, Regex::new(r"\b5[1-5]\d{2}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b")?, Regex::new(r"\b3[47]\d{2}[-\s]?\d{6}[-\s]?\d{5}\b")?, Regex::new(r"\b6(?:011|5\d{2})[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b")?, ], ); patterns.insert( SensitiveDataType::Ssn, vec![ Regex::new(r"\b\d{3}[-\s]?\d{2}[-\s]?\d{4}\b")?, Regex::new(r"(?i)\bssn[:\s]*\d{3}[-\s]?\d{2}[-\s]?\d{4}\b")?, ], ); patterns.insert( SensitiveDataType::IpAddress, vec![ Regex::new(r"\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b")?, Regex::new(r"\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\b")?, ], ); patterns.insert( SensitiveDataType::ApiKey, vec![ Regex::new(r#"(?i)(?:api[_-]?key|apikey)["\s:=]+["']?([a-zA-Z0-9_\-]{20,})["']?"#)?, Regex::new(r#"(?i)(?:secret|token)[_\-]?key[:\s=]+['\"]?([a-zA-Z0-9_\-]{16,})['\"]?"#)?, ], ); patterns.insert( SensitiveDataType::Password, vec![ Regex::new(r#"(?i)password["\s:=]+["']?([^\s"']{8,})["']?"#)?, Regex::new(r#"(?i)(?:passwd|pwd)[:\s=]+['\"]?([^\s'\"]{8,})['\"]?"#)?, ], ); patterns.insert( SensitiveDataType::AwsAccessKey, vec![Regex::new(r"\b(?:AKIA|ABIA|ACCA|ASIA)[0-9A-Z]{16}\b")?], ); patterns.insert( SensitiveDataType::AwsSecretKey, vec![Regex::new(r#"(?i)aws[_\-]?secret[_\-]?(?:access[_\-]?)?key[:\s=]+['\"]?([a-zA-Z0-9/+=]{40})['\"]?"#)?], ); patterns.insert( SensitiveDataType::PrivateKey, vec![ Regex::new(r"-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----")?, Regex::new(r"-----BEGIN PGP PRIVATE KEY BLOCK-----")?, ], ); patterns.insert( SensitiveDataType::JwtToken, vec![Regex::new(r"\beyJ[a-zA-Z0-9_-]*\.eyJ[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*\b")?], ); patterns.insert( SensitiveDataType::BankAccount, vec![ Regex::new(r#"(?i)(?:account|acct)[_\-\s]?(?:number|no|#)?[:\s]*\d{8,17}"#)?, Regex::new(r#"(?i)(?:routing|aba)[_\-\s]?(?:number|no|#)?[:\s]*\d{9}"#)?, Regex::new(r"\b[A-Z]{2}\d{2}[A-Z0-9]{4}\d{7}(?:[A-Z0-9]?){0,16}\b")?, ], ); patterns.insert( SensitiveDataType::PassportNumber, vec![Regex::new(r"(?i)passport[_\-\s]?(?:number|no|#)?[:\s]*[A-Z0-9]{6,9}")?], ); patterns.insert( SensitiveDataType::DriversLicense, vec![Regex::new(r"(?i)(?:driver'?s?|dl)[_\-\s]?(?:license|lic)[_\-\s]?(?:number|no|#)?[:\s]*[A-Z0-9]{5,15}")?], ); patterns.insert( SensitiveDataType::HealthId, vec![ Regex::new(r"(?i)(?:medicare|medicaid)[_\-\s]?(?:id|number|no|#)?[:\s]*[A-Z0-9]{10,12}")?, Regex::new(r"(?i)(?:health|medical)[_\-\s]?(?:id|record)[_\-\s]?(?:number|no|#)?[:\s]*[A-Z0-9]{8,15}")?, ], ); Ok(patterns) } pub async fn scan(&self, content: &str) -> DlpScanResult { if !self.config.enabled { return DlpScanResult::clean(); } if content.len() > self.config.max_content_length { return DlpScanResult { matches: Vec::new(), severity: DlpSeverity::Medium, action: DlpAction::Block, blocked: true, redacted_content: None, policy_violations: vec!["Content exceeds maximum allowed length".into()], }; } let mut all_matches = Vec::new(); for (data_type, patterns) in &self.patterns { for pattern in patterns { for mat in pattern.find_iter(content) { let context = extract_context(content, mat.start(), mat.end(), 20); let dlp_match = DlpMatch::new(*data_type, mat.as_str(), mat.start(), mat.end()) .with_context(context); all_matches.push(dlp_match); } } } let custom_patterns = self.custom_patterns.read().await; for (_, pattern, data_type) in custom_patterns.iter() { for mat in pattern.find_iter(content) { let context = extract_context(content, mat.start(), mat.end(), 20); let dlp_match = DlpMatch::new(*data_type, mat.as_str(), mat.start(), mat.end()) .with_context(context); all_matches.push(dlp_match); } } drop(custom_patterns); if all_matches.is_empty() { return DlpScanResult::clean(); } let severity = all_matches .iter() .map(|m| m.data_type.severity()) .max() .unwrap_or(DlpSeverity::Low); let (action, blocked, policy_violations) = self.evaluate_policies(&all_matches, None).await; let redacted_content = if self.config.redact_sensitive_data { Some(self.redact_content(content, &all_matches)) } else { None }; if self.config.log_violations && !all_matches.is_empty() { warn!( "DLP scan found {} sensitive data matches, severity: {}, action: {}", all_matches.len(), severity.as_str(), action.as_str() ); } DlpScanResult { matches: all_matches, severity, action, blocked, redacted_content, policy_violations, } } pub async fn scan_with_user( &self, content: &str, user_id: Uuid, direction: ScanDirection, ) -> DlpScanResult { if !self.config.enabled { return DlpScanResult::clean(); } match direction { ScanDirection::Inbound if !self.config.scan_inbound => return DlpScanResult::clean(), ScanDirection::Outbound if !self.config.scan_outbound => return DlpScanResult::clean(), _ => {} } let mut result = self.scan(content).await; if result.has_sensitive_data() { let (action, blocked, violations) = self.evaluate_policies(&result.matches, Some(user_id)).await; result.action = action; result.blocked = blocked; result.policy_violations = violations; } result } async fn evaluate_policies( &self, matches: &[DlpMatch], user_id: Option, ) -> (DlpAction, bool, Vec) { let policies = self.policies.read().await; let mut highest_action = DlpAction::Allow; let mut blocked = false; let mut violations = Vec::new(); for policy in policies.iter().filter(|p| p.enabled) { for dlp_match in matches { if !policy.matches_data_type(dlp_match.data_type) { continue; } if dlp_match.data_type.severity() < policy.severity_threshold { continue; } if policy.is_exception(&dlp_match.matched_text, user_id) { continue; } violations.push(format!( "Policy '{}' violated: {} detected", policy.name, dlp_match.data_type.as_str() )); if policy.action as u8 > highest_action as u8 { highest_action = policy.action; } if policy.action == DlpAction::Block || policy.action == DlpAction::Quarantine { blocked = true; } } } if violations.is_empty() && self.config.block_on_violation && !matches.is_empty() { let critical_found = matches .iter() .any(|m| m.data_type.severity() >= DlpSeverity::Critical); if critical_found { blocked = true; highest_action = DlpAction::Block; violations.push("Critical sensitive data detected".into()); } } (highest_action, blocked, violations) } pub fn redact_content(&self, content: &str, matches: &[DlpMatch]) -> String { let mut redacted = content.to_string(); let mut sorted_matches: Vec<&DlpMatch> = matches.iter().collect(); sorted_matches.sort_by(|a, b| b.start_position.cmp(&a.start_position)); for dlp_match in sorted_matches { let redaction = dlp_match.data_type.redaction_pattern(); redacted.replace_range(dlp_match.start_position..dlp_match.end_position, redaction); } redacted } pub fn partial_redact_content(&self, content: &str, matches: &[DlpMatch]) -> String { let mut redacted = content.to_string(); let mut sorted_matches: Vec<&DlpMatch> = matches.iter().collect(); sorted_matches.sort_by(|a, b| b.start_position.cmp(&a.start_position)); for dlp_match in sorted_matches { let masked = dlp_match.masked_value(); redacted.replace_range(dlp_match.start_position..dlp_match.end_position, &masked); } redacted } pub async fn add_policy(&self, policy: DlpPolicy) { let mut policies = self.policies.write().await; policies.push(policy); } pub async fn remove_policy(&self, policy_id: Uuid) -> bool { let mut policies = self.policies.write().await; let initial_len = policies.len(); policies.retain(|p| p.id != policy_id); policies.len() < initial_len } pub async fn get_policies(&self) -> Vec { let policies = self.policies.read().await; policies.clone() } pub async fn add_custom_pattern( &self, name: &str, pattern: &str, data_type: SensitiveDataType, ) -> Result<()> { let regex = Regex::new(pattern).map_err(|e| anyhow!("Invalid regex pattern: {e}"))?; let mut patterns = self.custom_patterns.write().await; patterns.push((name.to_string(), regex, data_type)); info!("Added custom DLP pattern: {}", name); Ok(()) } pub async fn record_violation( &self, policy_id: Uuid, policy_name: &str, user_id: Option, result: &DlpScanResult, content: &str, ) { use sha2::{Digest, Sha256}; let mut hasher = Sha256::new(); hasher.update(content.as_bytes()); let content_hash = hex::encode(hasher.finalize()); let violation = DlpViolation { id: Uuid::new_v4(), timestamp: chrono::Utc::now(), policy_id, policy_name: policy_name.to_string(), user_id, data_types: result.data_types_found().into_iter().collect(), action_taken: result.action, severity: result.severity, content_hash, match_count: result.match_count(), }; let mut violations = self.violations.write().await; violations.push(violation); } pub async fn get_violations( &self, limit: usize, user_id: Option, ) -> Vec { let violations = self.violations.read().await; let filtered: Vec = if let Some(uid) = user_id { violations .iter() .filter(|v| v.user_id == Some(uid)) .cloned() .collect() } else { violations.clone() }; filtered.into_iter().rev().take(limit).collect() } pub fn config(&self) -> &DlpConfig { &self.config } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ScanDirection { Inbound, Outbound, } fn extract_context(content: &str, start: usize, end: usize, padding: usize) -> String { let context_start = start.saturating_sub(padding); let context_end = (end + padding).min(content.len()); let mut context = String::new(); if context_start > 0 { context.push_str("..."); } context.push_str(&content[context_start..context_end]); if context_end < content.len() { context.push_str("..."); } context } pub fn validate_credit_card_checksum(number: &str) -> bool { let digits: Vec = number .chars() .filter(|c| c.is_ascii_digit()) .filter_map(|c| c.to_digit(10)) .collect(); if digits.len() < 13 || digits.len() > 19 { return false; } let mut sum = 0; let mut double = false; for digit in digits.iter().rev() { let mut d = *digit; if double { d *= 2; if d > 9 { d -= 9; } } sum += d; double = !double; } sum % 10 == 0 } pub fn create_default_policies() -> Vec { vec![ DlpPolicy::new( "Block Credit Cards", vec![SensitiveDataType::CreditCard], DlpAction::Block, ) .with_description("Block transmission of credit card numbers"), DlpPolicy::new( "Block SSN", vec![SensitiveDataType::Ssn], DlpAction::Block, ) .with_description("Block transmission of Social Security Numbers"), DlpPolicy::new( "Warn on API Keys", vec![ SensitiveDataType::ApiKey, SensitiveDataType::AwsAccessKey, SensitiveDataType::AwsSecretKey, ], DlpAction::Warn, ) .with_description("Warn when API keys or secrets are detected"), DlpPolicy::new( "Redact Emails", vec![SensitiveDataType::Email], DlpAction::Redact, ) .with_description("Redact email addresses in outbound content") .with_scope(PolicyScope::Outbound), DlpPolicy::new( "Block Private Keys", vec![SensitiveDataType::PrivateKey], DlpAction::Block, ) .with_description("Block transmission of private keys"), ] } #[cfg(test)] mod tests { use super::*; #[tokio::test] async fn test_email_detection() { let manager = DlpManager::with_defaults().expect("Failed to create manager"); let content = "Contact me at test@example.com for more info"; let result = manager.scan(content).await; assert!(result.has_sensitive_data()); assert!(result.data_types_found().contains(&SensitiveDataType::Email)); } #[tokio::test] async fn test_credit_card_detection() { let manager = DlpManager::with_defaults().expect("Failed to create manager"); let content = "My card number is 4111-1111-1111-1111"; let result = manager.scan(content).await; assert!(result.has_sensitive_data()); assert!(result.data_types_found().contains(&SensitiveDataType::CreditCard)); } #[tokio::test] async fn test_ssn_detection() { let manager = DlpManager::with_defaults().expect("Failed to create manager"); let content = "SSN: 123-45-6789"; let result = manager.scan(content).await; assert!(result.has_sensitive_data()); assert!(result.data_types_found().contains(&SensitiveDataType::Ssn)); } #[tokio::test] async fn test_aws_key_detection() { let manager = DlpManager::with_defaults().expect("Failed to create manager"); let content = "AWS key: AKIAIOSFODNN7EXAMPLE"; let result = manager.scan(content).await; assert!(result.has_sensitive_data()); assert!(result.data_types_found().contains(&SensitiveDataType::AwsAccessKey)); } #[tokio::test] async fn test_jwt_detection() { let manager = DlpManager::with_defaults().expect("Failed to create manager"); let content = "Token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U"; let result = manager.scan(content).await; assert!(result.has_sensitive_data()); assert!(result.data_types_found().contains(&SensitiveDataType::JwtToken)); } #[tokio::test] async fn test_clean_content() { let manager = DlpManager::with_defaults().expect("Failed to create manager"); let content = "This is just a regular message with no sensitive data."; let result = manager.scan(content).await; assert!(!result.has_sensitive_data()); assert_eq!(result.action, DlpAction::Allow); } #[tokio::test] async fn test_redaction() { let manager = DlpManager::with_defaults().expect("Failed to create manager"); let content = "Email me at user@example.com please"; let result = manager.scan(content).await; assert!(result.redacted_content.is_some()); let redacted = result.redacted_content.unwrap(); assert!(redacted.contains("[EMAIL REDACTED]")); assert!(!redacted.contains("user@example.com")); } #[tokio::test] async fn test_partial_redaction() { let manager = DlpManager::with_defaults().expect("Failed to create manager"); let content = "Card: 4111111111111111"; let result = manager.scan(content).await; let partial = manager.partial_redact_content(content, &result.matches); assert!(partial.contains("41")); assert!(partial.contains("11")); assert!(partial.contains("*")); } #[tokio::test] async fn test_multiple_matches() { let manager = DlpManager::with_defaults().expect("Failed to create manager"); let content = "Contact: test@example.com, phone: 555-123-4567, card: 4111111111111111"; let result = manager.scan(content).await; assert!(result.match_count() >= 3); assert!(result.data_types_found().contains(&SensitiveDataType::Email)); assert!(result.data_types_found().contains(&SensitiveDataType::PhoneNumber)); assert!(result.data_types_found().contains(&SensitiveDataType::CreditCard)); } #[tokio::test] async fn test_policy_evaluation() { let manager = DlpManager::with_defaults().expect("Failed to create manager"); let policy = DlpPolicy::new( "Block SSN", vec![SensitiveDataType::Ssn], DlpAction::Block, ); manager.add_policy(policy).await; let content = "SSN: 123-45-6789"; let result = manager.scan(content).await; assert!(result.blocked); assert_eq!(result.action, DlpAction::Block); } #[test] fn test_credit_card_checksum_valid() { assert!(validate_credit_card_checksum("4111111111111111")); assert!(validate_credit_card_checksum("5500000000000004")); } #[test] fn test_credit_card_checksum_invalid() { assert!(!validate_credit_card_checksum("4111111111111112")); assert!(!validate_credit_card_checksum("1234567890123456")); } #[test] fn test_severity_levels() { assert_eq!(SensitiveDataType::CreditCard.severity(), DlpSeverity::Critical); assert_eq!(SensitiveDataType::Email.severity(), DlpSeverity::Medium); assert_eq!(SensitiveDataType::IpAddress.severity(), DlpSeverity::Low); } #[test] fn test_dlp_match_masking() { let dlp_match = DlpMatch::new( SensitiveDataType::CreditCard, "4111111111111111", 0, 16, ); let masked = dlp_match.masked_value(); assert!(masked.starts_with("41")); assert!(masked.ends_with("11")); assert!(masked.contains("*")); } #[test] fn test_default_policies() { let policies = create_default_policies(); assert!(!policies.is_empty()); assert!(policies.iter().any(|p| p.name == "Block Credit Cards")); assert!(policies.iter().any(|p| p.name == "Block SSN")); } #[tokio::test] async fn test_custom_pattern() { let manager = DlpManager::with_defaults().expect("Failed to create manager"); manager .add_custom_pattern("Custom ID", r"CUSTOM-\d{6}", SensitiveDataType::Custom) .await .expect("Failed to add pattern"); let content = "ID: CUSTOM-123456"; let result = manager.scan(content).await; assert!(result.has_sensitive_data()); assert!(result.data_types_found().contains(&SensitiveDataType::Custom)); } #[test] fn test_policy_exception() { let mut policy = DlpPolicy::new( "Test Policy", vec![SensitiveDataType::Email], DlpAction::Block, ); policy.add_exception(DlpException::Domain("@internal.com".into())); assert!(policy.is_exception("user@internal.com", None)); assert!(!policy.is_exception("user@external.com", None)); } #[test] fn test_extract_context() { let content = "This is a test email@example.com in the middle"; let context = extract_context(content, 15, 33, 10); assert!(context.contains("email@example.com")); assert!(context.len() < content.len() + 10); } #[tokio::test] async fn test_scan_direction() { let mut config = DlpConfig::default(); config.scan_inbound = false; let manager = DlpManager::new(config).expect("Failed to create manager"); let content = "Email: test@example.com"; let user_id = Uuid::new_v4(); let inbound_result = manager .scan_with_user(content, user_id, ScanDirection::Inbound) .await; assert!(!inbound_result.has_sensitive_data()); let outbound_result = manager .scan_with_user(content, user_id, ScanDirection::Outbound) .await; assert!(outbound_result.has_sensitive_data()); } }