generalbots/botmodels/src/services/anomaly_service.py
Rodrigo Rodriguez (Pragmatismo) 037db5c381 feat: Major workspace reorganization and documentation update
- Add comprehensive documentation in botbook/ with 12 chapters
- Add botapp/ Tauri desktop application
- Add botdevice/ IoT device support
- Add botlib/ shared library crate
- Add botmodels/ Python ML models service
- Add botplugin/ browser extension
- Add botserver/ reorganized server code
- Add bottemplates/ bot templates
- Add bottest/ integration tests
- Add botui/ web UI server
- Add CI/CD workflows in .forgejo/workflows/
- Add AGENTS.md and PROD.md documentation
- Add dependency management scripts (DEPENDENCIES.sh/ps1)
- Remove legacy src/ structure and migrations
- Clean up temporary and backup files
2026-04-19 08:14:25 -03:00

268 lines
8.3 KiB
Python

"""
Anomaly Detection Service - Detecção de desvios/anomalias em dados tabulares
Compatible with salary data, sensor readings, and other numerical time series
"""
import numpy as np
from typing import Optional
from dataclasses import dataclass
from ..core.logging import get_logger
logger = get_logger("anomaly_service")
@dataclass
class AnomalyResult:
is_anomaly: bool
score: float
method: str
threshold: float
details: dict
class AnomalyDetectionService:
def __init__(self):
self.initialized = True
def detect_zscore(
self, data: list[float], threshold: float = 3.0
) -> list[AnomalyResult]:
"""
Z-Score based anomaly detection
Identifies values that are more than N standard deviations from mean
"""
if len(data) < 3:
return []
arr = np.array(data)
mean = np.mean(arr)
std = np.std(arr)
if std == 0:
return []
z_scores = np.abs((arr - mean) / std)
results = []
for i, z in enumerate(z_scores):
is_anomaly = z > threshold
results.append(
AnomalyResult(
is_anomaly=is_anomaly,
score=float(z),
method="zscore",
threshold=threshold,
details={
"index": i,
"value": float(arr[i]),
"mean": float(mean),
"std": float(std),
"deviation": float(arr[i] - mean),
},
)
)
return results
def detect_iqr(
self, data: list[float], multiplier: float = 1.5
) -> list[AnomalyResult]:
"""
IQR (Interquartile Range) based anomaly detection
Identifies values outside Q1 - 1.5*IQR and Q3 + 1.5*IQR
"""
if len(data) < 4:
return []
arr = np.array(data)
q1 = np.percentile(arr, 25)
q3 = np.percentile(arr, 75)
iqr = q3 - q1
lower_bound = q1 - multiplier * iqr
upper_bound = q3 + multiplier * iqr
results = []
for i, val in enumerate(arr):
is_anomaly = val < lower_bound or val > upper_bound
distance = 0
if val < lower_bound:
distance = lower_bound - val
elif val > upper_bound:
distance = val - upper_bound
results.append(
AnomalyResult(
is_anomaly=is_anomaly,
score=float(distance),
method="iqr",
threshold=multiplier,
details={
"index": i,
"value": float(val),
"q1": float(q1),
"q3": float(q3),
"iqr": float(iqr),
"lower_bound": float(lower_bound),
"upper_bound": float(upper_bound),
},
)
)
return results
def detect_isolation_forest(
self, data: list[float], contamination: float = 0.1
) -> list[AnomalyResult]:
"""
Simplified Isolation Forest-like detection using average absolute deviation
More robust to outliers than Z-score
"""
if len(data) < 3:
return []
arr = np.array(data)
median = np.median(arr)
mad = np.median(np.abs(arr - median))
if mad == 0:
return []
modified_z = np.abs(0.6745 * (arr - median) / mad)
threshold = 3.5
results = []
for i, z in enumerate(modified_z):
is_anomaly = z > threshold
results.append(
AnomalyResult(
is_anomaly=is_anomaly,
score=float(z),
method="isolation_forest",
threshold=threshold,
details={
"index": i,
"value": float(arr[i]),
"median": float(median),
"mad": float(mad),
},
)
)
return results
def detect_salary_anomalies(
self, records: list[dict], value_field: str = "salarioBase"
) -> dict:
"""
Specialized detection for salary/payroll data
Combines multiple methods for robust anomaly detection
"""
values = [float(r.get(value_field, 0)) for r in records if value_field in r]
if not values:
return {"error": f"Field '{value_field}' not found in records"}
zscore_results = self.detect_zscore(values, threshold=2.5)
iqr_results = self.detect_iqr(values, multiplier=1.5)
iso_results = self.detect_isolation_forest(values)
anomalies = []
for i in range(len(values)):
votes = sum(
[
zscore_results[i].is_anomaly if zscore_results else False,
iqr_results[i].is_anomaly if iqr_results else False,
iso_results[i].is_anomaly if iso_results else False,
]
)
if votes >= 2:
anomalies.append(
{
"index": i,
"record": records[i],
"value": values[i],
"confidence": votes / 3,
"methods": {
"zscore": zscore_results[i].is_anomaly
if zscore_results
else False,
"iqr": iqr_results[i].is_anomaly if iqr_results else False,
"isolation": iso_results[i].is_anomaly
if iso_results
else False,
},
"zscore_details": zscore_results[i].details
if zscore_results
else {},
}
)
return {
"total_records": len(records),
"anomalies_found": len(anomalies),
"anomaly_rate": len(anomalies) / len(records) if records else 0,
"anomalies": anomalies,
"summary": {
"mean": float(np.mean(values)),
"median": float(np.median(values)),
"std": float(np.std(values)),
"min": float(np.min(values)),
"max": float(np.max(values)),
},
}
def detect_sensor_anomalies(
self, readings: list[dict], value_field: str = "value"
) -> dict:
"""
Detection for sensor/IoT data with time-series characteristics
"""
values = [float(r.get(value_field, 0)) for r in readings if value_field in r]
if not values:
return {"error": f"Field '{value_field}' not found in readings"}
arr = np.array(values)
diff = np.diff(arr)
mean_diff = np.mean(np.abs(diff))
std_diff = np.std(diff)
anomalies = []
for i in range(1, len(values)):
change = abs(values[i] - values[i - 1])
z_change = (change - mean_diff) / std_diff if std_diff > 0 else 0
if z_change > 2.5 or change > 3 * mean_diff:
anomalies.append(
{
"index": i,
"record": readings[i],
"previous_value": values[i - 1],
"current_value": values[i],
"change": change,
"change_zscore": float(z_change),
"type": "sudden_change",
}
)
return {
"total_readings": len(readings),
"anomalies_found": len(anomalies),
"anomalies": anomalies,
"baseline": {
"mean_change": float(mean_diff),
"std_change": float(std_diff),
},
}
_service: Optional[AnomalyDetectionService] = None
def get_anomaly_service() -> AnomalyDetectionService:
global _service
if _service is None:
_service = AnomalyDetectionService()
return _service