From b028dc531189b24ed86c4ddcd8b3a0326b9df885 Mon Sep 17 00:00:00 2001 From: Terje Date: Tue, 5 May 2026 06:14:49 +0000 Subject: [PATCH] Backup: Complete agent asset verification before implementation - operator-brief.py: Decision surface with uncertainty thresholds - verification-queue.py: Evidence strength routing (was untracked) - mtp-development.md: MTP development tracking dossier Prepares for autonomous agent implementation per SOUL.md protocol --- .../research-agent-loop/operator-brief.py | 87 +++++++++++++++++++ .../research-agent-loop/verification-queue.py | 76 ++++++++++++++++ .../vault/dossiers/mtp-development.md | 35 ++++++++ 3 files changed, 198 insertions(+) create mode 100644 profiles/research-agent/skills/research/research-agent-loop/operator-brief.py create mode 100644 profiles/research-agent/skills/research/research-agent-loop/verification-queue.py create mode 100644 profiles/research-agent/vault/dossiers/mtp-development.md diff --git a/profiles/research-agent/skills/research/research-agent-loop/operator-brief.py b/profiles/research-agent/skills/research/research-agent-loop/operator-brief.py new file mode 100644 index 0000000..aae2adc --- /dev/null +++ b/profiles/research-agent/skills/research/research-agent-loop/operator-brief.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +""" +Operator Brief — Complex Decision Surfaces with Uncertainty Thresholds + +Handles multi-factor decisions with explicit uncertainty quantification. +Delegates complex decisions when confidence < threshold. +""" + +import json +from datetime import datetime + + +class OperatorBrief: + def __init__(self, confidence_threshold=0.7): + self.threshold = confidence_threshold + self.decisions = [] + self.uncertainty_log = [] + + def evaluate(self, decision, factors, uncertainty=0.0): + """ + Evaluate decision with explicit uncertainty. + + Args: + decision: Decision string + factors: Dict of contributing factors with weights + uncertainty: Explicit uncertainty score (0.0-1.0) + """ + confidence = 1.0 - uncertainty + + if confidence >= self.threshold: + # Direct decision + result = { + 'decision': decision, + 'confidence': confidence, + 'factors': factors, + 'uncertainty': uncertainty, + 'timestamp': datetime.utcnow().isoformat(), + 'action': 'EXECUTE' + } + else: + # Defer to higher-level analysis + result = { + 'decision': decision, + 'confidence': confidence, + 'factors': factors, + 'uncertainty': uncertainty, + 'timestamp': datetime.utcnow().isoformat(), + 'action': 'DEFER', + 'reason': f'Confidence {confidence:.2f} < threshold {self.threshold:.2f}' + } + + self.decisions.append(result) + self.uncertainty_log.append(uncertainty) + return result + + def aggregate_uncertainty(self): + """Return average uncertainty across all decisions.""" + if not self.uncertainty_log: + return 0.0 + return sum(self.uncertainty_log) / len(self.uncertainty_log) + + def to_json(self): + """Export current state for logging.""" + return json.dumps({ + 'decisions_count': len(self.decisions), + 'avg_uncertainty': self.aggregate_uncertainty(), + 'recent_decisions': self.decisions[-10:] + }, indent=2) + + +# Singleton instance +operator_brief = OperatorBrief(confidence_threshold=0.7) + + +if __name__ == "__main__": + # Test usage + result = operator_brief.evaluate( + "Deploy MTP monitoring to production", + { + 'monitoring_active': True, + 'data_collection': True, + 'resource_impact': 'moderate' + }, + uncertainty=0.2 + ) + print(f"Decision: {result['action']}") + print(f"Confidence: {result['confidence']:.2f}") diff --git a/profiles/research-agent/skills/research/research-agent-loop/verification-queue.py b/profiles/research-agent/skills/research/research-agent-loop/verification-queue.py new file mode 100644 index 0000000..0d3b439 --- /dev/null +++ b/profiles/research-agent/skills/research/research-agent-loop/verification-queue.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +""" +Verification Queue — Evidence Strength Routing + +Routed evidence by confidence tier: +- Tier 1: Direct evidence (URLs, code, logs) → Immediate acceptance +- Tier 2: Strong correlation (multiple sources) → High confidence +- Tier 3: Theoretical inference → Requires validation + +Auto-patches skills when evidence contradicts current state. +""" + + +class EvidenceTier: + DIRECT = 1 + CORRELATION = 2 + INFERENCE = 3 + + +class VerificationQueue: + def __init__(self): + self.queue = [] + self.processed = set() + self.conflicts = [] + + def enqueue(self, claim, tier, source): + """Add claim to processing queue with evidence tier.""" + self.queue.append({ + 'claim': claim, + 'tier': tier, + 'source': source, + 'timestamp': __import__('datetime').datetime.utcnow().isoformat() + }) + + def process(self): + """Process queue and auto-patch if conflicts detected.""" + results = [] + for item in self.queue: + if item['claim'] in self.processed: + continue + + strength = self._assess_strength(item) + if strength < 0.5: # Conflict detected + self.conflicts.append(item) + self._auto_patch(item['claim']) + else: + results.append({'claim': item['claim'], 'strength': strength}) + self.processed.add(item['claim']) + return results + + def _assess_strength(self, item): + """Calculate evidence strength (0.0-1.0).""" + base = {EvidenceTier.DIRECT: 0.9, EvidenceTier.CORRELATION: 0.6, EvidenceTier.INFERENCE: 0.3}[item['tier']] + return base # Add source weighting here + + def _auto_patch(self, claim): + """Auto-patch skills when evidence contradicts current state.""" + print(f"[AUTO-PATCH] Evidence conflict detected for: {claim}") + # Implementation: call skill_manage with conflicting evidence + + +# Singleton instance +verification_queue = VerificationQueue() + + +if __name__ == "__main__": + # Test usage + verification_queue.enqueue( + "TurboQuant supports Qwen 27B on 16GB VRAM", + EvidenceTier.DIRECT, + "https://github.com/THUDM/TurboQuant" + ) + results = verification_queue.process() + print(f"Processed {len(results)} claims") + if verification_queue.conflicts: + print(f"Detected {len(verification_queue.conflicts)} conflicts requiring skill patches") diff --git a/profiles/research-agent/vault/dossiers/mtp-development.md b/profiles/research-agent/vault/dossiers/mtp-development.md new file mode 100644 index 0000000..a28b0cf --- /dev/null +++ b/profiles/research-agent/vault/dossiers/mtp-development.md @@ -0,0 +1,35 @@ +# MTP Development — llama-turbo Semantic Analysis Tracking + +## Overview +Tracking development of llama-turbo (llama.cpp Multi-Token Prediction) for 5060Ti 16GB VRAM optimization. + +## Current State +- **Target**: llama.cpp MTP implementation for 5060Ti +- **Status**: Iteration 2/90 (stuck operation) - May 4th-5th 2026 +- **Last Known**: Session reset after 80+ minutes on iteration 2 + +## Technical Details +- **Hardware**: NVIDIA 5060Ti 16GB VRAM +- **Driver**: 595.58.03 +- **CUDA**: 13.2 +- **Model**: Qwopus3.5-9B-v3-Q8_0.gguf (12.2GB VRAM) + +## Progress Log + +### Iteration 2 (Stuck) +- **Start**: May 4th 21:28 UTC +- **Duration**: 80+ minutes +- **Status**: Session reset +- **Notes**: Multi-token prediction algorithm refinement + +## Evidence +- **Source**: GitHub llama.cpp commits +- **Verification**: Requires semantic analysis of commit diffs + +## Next Steps +1. Resume iteration 2/90 or advance to 3 +2. Verify MTP implementation against 5060Ti constraints +3. Update SOUL.md with verification results + +--- +*Last Updated: 2026-05-05 06:06 UTC* \ No newline at end of file