Loading...
Loading...
Statistical baseline comparison, deviation scoring, and automated response actions. Detect unusual behavior before it becomes an incident.
Request Rate
Requests per second deviation from baseline.
Error Rate
Error percentage exceeding normal thresholds.
Latency
Response time P95/P99 degradation.
Token Usage
LLM token consumption anomalies.
Score Change
Rapid DRD score fluctuations.
Data Volume
Unusual data transfer patterns.
DRD monitors a comprehensive set of agent metrics. Each metric is tracked per agent with configurable granularity (1-second to 1-hour windows).
Performanceresponse_latency_msEnd-to-end response time per requestthroughput_rpsRequests processed per seconderror_ratePercentage of requests returning errorstimeout_ratePercentage of requests exceeding timeoutqueue_depthNumber of requests waiting in queueBehavioraloutput_length_tokensToken count of generated outputsoutput_sentiment_scoreSentiment polarity of agent responsestool_call_frequencyRate of external tool/API invocationspolicy_check_failuresRate of policy guard denialscontent_flag_rateRate of content safety flag triggersResourcecpu_utilizationCPU usage percentagememory_utilization_mbMemory consumptiongpu_utilizationGPU usage for inference workloadsnetwork_egress_bytesOutbound network traffic volumedisk_io_opsDisk read/write operations per secondBaselines are automatically computed using sliding window statistics: mean, standard deviation, P95, P99, and sample count. They update continuously as new data arrives.
{
"agentId": "agent_abc123",
"metricName": "request_rate",
"mean": 42.5,
"stddev": 8.3,
"p95": 55.2,
"p99": 68.1,
"sampleCount": 10000,
"windowHours": 24
}Baselines define "normal" behavior for each agent. DRD automatically learns baselines from historical data, accounting for time-of-day patterns, day-of-week seasonality, and gradual drift. You can also set explicit static baselines.
{
"agentId": "019agent-scanner-...",
"method": "auto",
"config": {
"learningPeriodDays": 14,
"seasonality": "hourly",
"excludeOutliers": true,
"metrics": [
"response_latency_ms",
"error_rate",
"output_length_tokens",
"policy_check_failures",
"tool_call_frequency"
]
}
}
// Response
{
"ok": true,
"data": {
"baselineId": "019base-abcd-1234-...",
"agentId": "019agent-scanner-...",
"status": "learning",
"method": "auto",
"learningEndsAt": "2026-02-28T09:00:00Z",
"metrics": {
"response_latency_ms": {
"status": "collecting",
"dataPointsCollected": 0
}
},
"createdAt": "2026-02-14T09:00:00Z"
}
}{
"agentId": "019agent-scanner-...",
"method": "static",
"config": {
"metrics": {
"response_latency_ms": {
"mean": 120,
"stddev": 45,
"min": 20,
"max": 500
},
"error_rate": {
"mean": 0.005,
"stddev": 0.002,
"min": 0,
"max": 0.02
}
}
}
}Detected anomalies are classified by severity based on the degree of deviation from baseline and the metric category. Severity determines the urgency of notifications and which auto-actions are triggered.
1-2 sigmaSlightly elevated latency
Default action: Log only
2-3 sigmaError rate doubled from baseline
Default action: Notify via Slack
3-5 sigma10x increase in policy failures
Default action: Notify + throttle agent
> 5 sigmaComplete behavior change detected
Default action: Notify + suspend agent
Configure automated responses when anomalies are detected.
Send notifications via configured channels.
Rate-limit the affected agent.
Temporarily suspend agent operations.
Immediately terminate the agent.
Configure automated actions that trigger when anomalies are detected. Actions can be chained and are executed in order with configurable cooldown periods to prevent alert storms.
{
"agentId": "019agent-scanner-...",
"rules": [
{
"name": "High error rate response",
"condition": {
"metric": "error_rate",
"severity": "critical",
"sustainedSeconds": 60
},
"actions": [
{
"type": "notify",
"channels": ["slack", "pagerduty"],
"message": "Agent {{agent.name}} error rate is {{anomaly.value}} (baseline: {{baseline.mean}})"
},
{
"type": "throttle",
"maxRps": 10,
"durationSeconds": 300
}
],
"cooldownSeconds": 900
},
{
"name": "Policy violation spike",
"condition": {
"metric": "policy_check_failures",
"severity": "emergency",
"sustainedSeconds": 30
},
"actions": [
{
"type": "notify",
"channels": ["slack", "pagerduty", "email"],
"message": "EMERGENCY: Agent {{agent.name}} policy violations spiked to {{anomaly.value}}/min"
},
{
"type": "suspend",
"reason": "Automated suspension: anomalous policy violation rate"
},
{
"type": "create_incident",
"severity": "P1"
}
],
"cooldownSeconds": 0
}
]
}Query detected anomalies with filters for agent, metric, severity, and time range. Each anomaly event includes the detected value, baseline comparison, and any actions taken.
GET /api/v1/anomaly-detection/events?agentId=019agent-scanner-...&severity=critical,emergency&from=2026-02-13T00:00:00Z&limit=10
{
"ok": true,
"data": {
"events": [
{
"id": "019anom-abcd-...",
"agentId": "019agent-scanner-...",
"metric": "error_rate",
"severity": "critical",
"detectedValue": 0.087,
"baseline": {
"mean": 0.005,
"stddev": 0.002,
"deviationSigma": 41.0
},
"sustainedSeconds": 124,
"actionsExecuted": [
{ "type": "notify", "channels": ["slack", "pagerduty"] },
{ "type": "throttle", "maxRps": 10 }
],
"resolved": true,
"resolvedAt": "2026-02-13T15:12:00Z",
"detectedAt": "2026-02-13T15:05:00Z"
}
],
"pagination": {
"total": 3,
"page": 1,
"limit": 10
}
}
}import { DRD } from '@drd/sdk';
const drd = new DRD({ token: 'drd_live_sk_...' });
// Create auto-learning baseline
const baseline = await drd.anomaly.baselines.create({
agentId: 'agent_abc123',
method: 'auto',
config: {
learningPeriodDays: 14,
metrics: ['response_latency_ms', 'error_rate', 'policy_check_failures'],
},
});
// Configure detection rules
await drd.anomaly.rules.set('agent_abc123', [
{
name: 'Error rate alert',
condition: { metric: 'error_rate', severity: 'critical', sustainedSeconds: 60 },
actions: [
{ type: 'notify', channels: ['slack'] },
{ type: 'throttle', maxRps: 10, durationSeconds: 300 },
],
},
]);
// Query recent anomalies
const anomalies = await drd.anomaly.events.list({
agentId: 'agent_abc123',
severity: ['critical', 'emergency'],
from: new Date('2026-02-13'),
});
for (const event of anomalies) {
console.log(`[${event.severity}] ${event.metric}: ${event.detectedValue}`);
console.log(` Baseline: ${event.baseline.mean} +/- ${event.baseline.stddev}`);
console.log(` Actions: ${event.actionsExecuted.map(a => a.type).join(', ')}`);
}
// Acknowledge an anomaly
await drd.anomaly.acknowledge('anomaly_abc123');
// Resolve an anomaly
await drd.anomaly.resolve('anomaly_abc123');