This commit is contained in:
2026-04-12 01:06:31 +07:00
commit 10d660cbcb
1066 changed files with 228596 additions and 0 deletions

View File

@@ -0,0 +1,317 @@
#!/usr/bin/env python3
"""
Context Analyzer - Health analysis and degradation detection for agent contexts.
Usage:
python context_analyzer.py analyze <context_file>
python context_analyzer.py budget --system 2000 --tools 1500 --docs 3000 --history 5000
"""
import argparse
import json
import math
import os
import re
import sys
from dataclasses import dataclass, field
from enum import Enum
from typing import Optional
MAX_FILE_SIZE_MB = 100
def load_json_file(path: str):
"""Load JSON file with proper error handling and size validation."""
try:
size_mb = os.path.getsize(path) / (1024 * 1024)
if size_mb > MAX_FILE_SIZE_MB:
print(f"Error: File too large ({size_mb:.1f}MB). Max {MAX_FILE_SIZE_MB}MB", file=sys.stderr)
sys.exit(1)
with open(path, encoding='utf-8') as f:
return json.load(f)
except FileNotFoundError:
print(f"Error: File not found: {path}", file=sys.stderr)
sys.exit(1)
except PermissionError:
print(f"Error: Permission denied: {path}", file=sys.stderr)
sys.exit(1)
except json.JSONDecodeError as e:
print(f"Error: Invalid JSON in {path}: {e}", file=sys.stderr)
sys.exit(1)
class HealthStatus(Enum):
HEALTHY = "healthy"
WARNING = "warning"
DEGRADED = "degraded"
CRITICAL = "critical"
@dataclass
class ContextAnalysis:
total_tokens: int
token_limit: int
utilization: float
health_status: HealthStatus
health_score: float
degradation_risk: float
poisoning_risk: float
recommendations: list = field(default_factory=list)
def estimate_tokens(text: str) -> int:
"""Estimate token count (~4 chars per token for English)."""
return len(text) // 4
def estimate_message_tokens(messages: list) -> int:
"""Estimate tokens in message list."""
total = 0
for msg in messages:
if isinstance(msg, dict):
content = msg.get("content", "")
total += estimate_tokens(str(content))
# Add overhead for role, metadata
total += 10
else:
total += estimate_tokens(str(msg))
return total
def measure_attention_distribution(context_length: int, sample_size: int = 100) -> list:
"""
Simulate U-shaped attention distribution.
Real implementation would extract from model attention weights.
"""
attention = []
for i in range(sample_size):
position = i / sample_size
# U-shaped curve: high at start/end, low in middle
if position < 0.1:
score = 0.9 - position * 2
elif position > 0.9:
score = 0.7 + (position - 0.9) * 2
else:
score = 0.3 + 0.1 * math.sin(position * math.pi)
attention.append(score)
return attention
def detect_lost_in_middle(messages: list, critical_keywords: list) -> list:
"""Identify critical items in attention-degraded regions."""
if not messages:
return []
total = len(messages)
warnings = []
for i, msg in enumerate(messages):
position = i / total
content = str(msg.get("content", "") if isinstance(msg, dict) else msg)
# Middle region (10%-90%)
if 0.1 < position < 0.9:
for keyword in critical_keywords:
if keyword.lower() in content.lower():
warnings.append({
"position": i,
"position_pct": f"{position:.1%}",
"keyword": keyword,
"risk": "high" if 0.3 < position < 0.7 else "medium"
})
return warnings
def detect_poisoning_patterns(messages: list) -> dict:
"""Detect potential context poisoning indicators."""
error_patterns = [
r"error", r"failed", r"exception", r"cannot", r"unable",
r"invalid", r"not found", r"undefined", r"null"
]
# Simple contradiction check - look for both positive and negative statements
contradiction_keywords = [
("is correct", "is not correct"),
("should work", "should not work"),
("will succeed", "will fail"),
("is valid", "is invalid"),
]
errors_found = []
contradictions = []
for i, msg in enumerate(messages):
content = str(msg.get("content", "") if isinstance(msg, dict) else msg).lower()
# Check error patterns
for pattern in error_patterns:
if re.search(pattern, content):
errors_found.append({"position": i, "pattern": pattern})
# Check for contradiction keywords (simplified)
for pos_phrase, neg_phrase in contradiction_keywords:
if pos_phrase in content and neg_phrase in content:
contradictions.append({"position": i, "type": "self-contradiction"})
total = max(len(messages), 1)
return {
"error_density": len(errors_found) / total,
"contradiction_count": len(contradictions),
"poisoning_risk": min(1.0, (len(errors_found) * 0.1 + len(contradictions) * 0.3))
}
def calculate_health_score(utilization: float, degradation_risk: float, poisoning_risk: float) -> float:
"""
Calculate composite health score.
1.0 = healthy, 0.0 = critical
"""
score = 1.0
# Utilization penalty (kicks in after 70%)
if utilization > 0.7:
score -= (utilization - 0.7) * 1.5
# Degradation penalty
score -= degradation_risk * 0.3
# Poisoning penalty
score -= poisoning_risk * 0.2
return max(0.0, min(1.0, score))
def get_health_status(score: float) -> HealthStatus:
"""Map health score to status."""
if score > 0.8:
return HealthStatus.HEALTHY
elif score > 0.6:
return HealthStatus.WARNING
elif score > 0.4:
return HealthStatus.DEGRADED
return HealthStatus.CRITICAL
def analyze_context(messages: list, token_limit: int = 128000,
critical_keywords: Optional[list] = None) -> ContextAnalysis:
"""
Comprehensive context health analysis.
Args:
messages: List of context messages
token_limit: Model's context window size
critical_keywords: Keywords that should be at attention-favored positions
Returns:
ContextAnalysis with health metrics and recommendations
"""
critical_keywords = critical_keywords or ["goal", "task", "important", "critical", "must"]
# Calculate token utilization
total_tokens = estimate_message_tokens(messages)
utilization = total_tokens / token_limit
# Check for lost-in-middle issues
middle_warnings = detect_lost_in_middle(messages, critical_keywords)
degradation_risk = min(1.0, len(middle_warnings) * 0.2)
# Check for poisoning
poisoning = detect_poisoning_patterns(messages)
poisoning_risk = poisoning["poisoning_risk"]
# Calculate health
health_score = calculate_health_score(utilization, degradation_risk, poisoning_risk)
health_status = get_health_status(health_score)
# Generate recommendations
recommendations = []
if utilization > 0.8:
recommendations.append("URGENT: Context utilization >80%. Trigger compaction immediately.")
elif utilization > 0.7:
recommendations.append("WARNING: Context utilization >70%. Plan for compaction.")
if middle_warnings:
recommendations.append(f"Found {len(middle_warnings)} critical items in middle region. "
"Consider moving to beginning/end.")
if poisoning_risk > 0.3:
recommendations.append("High poisoning risk detected. Review recent tool outputs for errors.")
if health_status == HealthStatus.CRITICAL:
recommendations.append("CRITICAL: Consider context reset with clean state.")
return ContextAnalysis(
total_tokens=total_tokens,
token_limit=token_limit,
utilization=utilization,
health_status=health_status,
health_score=health_score,
degradation_risk=degradation_risk,
poisoning_risk=poisoning_risk,
recommendations=recommendations
)
def calculate_budget(system: int, tools: int, docs: int, history: int,
buffer_pct: float = 0.15) -> dict:
"""Calculate context budget allocation."""
subtotal = system + tools + docs + history
buffer = int(subtotal * buffer_pct)
total = subtotal + buffer
return {
"allocation": {
"system_prompt": system,
"tool_definitions": tools,
"retrieved_docs": docs,
"message_history": history,
"reserved_buffer": buffer
},
"total_budget": total,
"warning_threshold": int(total * 0.7),
"critical_threshold": int(total * 0.8),
"recommendations": [
f"Trigger compaction at {int(total * 0.7):,} tokens",
f"Aggressive optimization at {int(total * 0.8):,} tokens",
f"Reserved {buffer:,} tokens ({buffer_pct:.0%}) for responses"
]
}
def main():
parser = argparse.ArgumentParser(description="Context health analyzer")
subparsers = parser.add_subparsers(dest="command", required=True)
# Analyze command
analyze_parser = subparsers.add_parser("analyze", help="Analyze context health")
analyze_parser.add_argument("context_file", help="JSON file with messages array")
analyze_parser.add_argument("--limit", type=int, default=128000, help="Token limit")
analyze_parser.add_argument("--keywords", nargs="+", help="Critical keywords to track")
# Budget command
budget_parser = subparsers.add_parser("budget", help="Calculate context budget")
budget_parser.add_argument("--system", type=int, default=2000, help="System prompt tokens")
budget_parser.add_argument("--tools", type=int, default=1500, help="Tool definitions tokens")
budget_parser.add_argument("--docs", type=int, default=3000, help="Retrieved docs tokens")
budget_parser.add_argument("--history", type=int, default=5000, help="Message history tokens")
budget_parser.add_argument("--buffer", type=float, default=0.15, help="Buffer percentage")
args = parser.parse_args()
if args.command == "analyze":
data = load_json_file(args.context_file)
messages = data if isinstance(data, list) else data.get("messages", [])
result = analyze_context(messages, args.limit, args.keywords)
print(json.dumps({
"total_tokens": result.total_tokens,
"token_limit": result.token_limit,
"utilization": f"{result.utilization:.1%}",
"health_status": result.health_status.value,
"health_score": f"{result.health_score:.2f}",
"degradation_risk": f"{result.degradation_risk:.2f}",
"poisoning_risk": f"{result.poisoning_risk:.2f}",
"recommendations": result.recommendations
}, indent=2))
elif args.command == "budget":
result = calculate_budget(args.system, args.tools, args.docs, args.history, args.buffer)
print(json.dumps(result, indent=2))
if __name__ == "__main__":
main()