Coverage for astrocyte/mip/schema.py: 100%
92 statements
« prev ^ index » next coverage.py v7.15.0, created at 2026-07-04 05:24 +0000
« prev ^ index » next coverage.py v7.15.0, created at 2026-07-04 05:24 +0000
1"""MIP schema — all dataclasses for MIP configuration.
3FFI-safe: no Any, no callables.
4"""
6from __future__ import annotations
8from dataclasses import dataclass
9from datetime import datetime
12@dataclass
13class BankDefinition:
14 id: str # May contain templates like "student-{student_id}"
15 description: str | None = None
16 access: list[str] | None = None # ["agent:tutor", "agent:grader"]
17 compliance: str | None = None # "pdpa", "gdpr", etc.
20@dataclass
21class MatchSpec:
22 """A single match condition."""
24 field: str # "content_type", "metadata.student_id", "pii_detected"
25 operator: str # "eq", "in", "gte", "lte", "gt", "lt", "present", "absent"
26 value: str | int | float | bool | list[str] | None = None
29@dataclass
30class MatchBlock:
31 """Boolean composition of match conditions."""
33 all_conditions: list[MatchSpec] | None = None
34 any_conditions: list[MatchSpec] | None = None
35 none_conditions: list[MatchSpec] | None = None
38@dataclass
39class ChunkerSpec:
40 """Per-rule chunker override. Absent fields fall back to ExtractionProfileConfig."""
42 strategy: str | None = None # "sentence" | "dialogue" | "paragraph" | "fixed"
43 max_size: int | None = None
44 overlap: int | None = None
47@dataclass
48class DedupSpec:
49 """Per-rule dedup override. Absent fields fall back to DedupConfig."""
51 threshold: float | None = None # 0.0–1.0
52 action: str | None = None # "skip" | "skip_chunk" | "warn" | "update"
55@dataclass
56class RerankSpec:
57 """Per-rule reranker override. Resolved per-bank at recall time (P3)."""
59 keyword_weight: float | None = None
60 proper_noun_weight: float | None = None
63@dataclass
64class ReflectSpec:
65 """Per-rule reflect override. Resolved at synthesis time."""
67 prompt: str | None = None # "default" | "temporal_aware" | "evidence_strict"
68 promote_metadata: list[str] | None = None # capped at 5 fields (P4)
71@dataclass
72class PipelineSpec:
73 """Pipeline-shaping action vocabulary. All sub-blocks optional.
75 `version` is required when any pipeline field is set (P2). Persisted onto
76 each retained record so recall can warn on rule-version drift.
78 `preset` expands at load time into the explicit sub-block fields. Explicit
79 fields override preset defaults.
80 """
82 version: int | None = None
83 preset: str | None = None # "conversational" | "document" | "code" | "evidence_strict"
84 chunker: ChunkerSpec | None = None
85 dedup: DedupSpec | None = None
86 rerank: RerankSpec | None = None
87 reflect: ReflectSpec | None = None
88 #: Exponential half-life (days) for the temporal retrieval strategy.
89 #: When set, overrides the orchestrator's default at recall time for
90 #: memories stored under this bank. Shorter (e.g. 1.0) for fast-moving
91 #: chat workloads; longer (e.g. 90.0) for long-term knowledge bases
92 #: where answers legitimately live months back. See
93 #: :mod:`astrocyte.pipeline.retrieval` and
94 #: ``docs/_design/platform-positioning.md`` §LongMemEval root causes.
95 temporal_half_life_days: float | None = None
98@dataclass
99class ForgetSpec:
100 """Per-rule forget policy. Resolved at forget time, keyed by target bank.
102 All fields optional; absent fields fall back to caller-supplied arguments
103 or library defaults. ``version`` is required when any field is set (P2),
104 same semantics as :class:`PipelineSpec`.
105 """
107 version: int | None = None
108 preset: str | None = None # "gdpr" | "student" | "audit-strict"
109 mode: str | None = None # "soft" | "hard" | "tombstone"
110 audit: str | None = None # "none" | "recommended" | "required"
111 cascade: bool | None = None # cascade delete derived chunks/embeddings
112 respect_legal_hold: bool | None = None # refuse forget if legal hold present
113 min_age_days: int | None = None # refuse forget on records younger than N days
114 max_per_call: int | None = None # cap on records per forget request
117@dataclass
118class ActionSpec:
119 bank: str | None = None # May contain templates: "student-{metadata.student_id}"
120 tags: list[str] | None = None # May contain templates
121 retain_policy: str | None = None # "default" | "redact_before_store" | "encrypt" | "reject"
122 escalate: str | None = None # "mip" or None
123 confidence: float = 1.0
124 pipeline: PipelineSpec | None = None # Optional pipeline-shaping overrides
125 forget: ForgetSpec | None = None # Optional forget-policy overrides (Phase 4)
128@dataclass
129class RoutingRule:
130 name: str
131 priority: int
132 match: MatchBlock
133 action: ActionSpec
134 override: bool = False # Compliance-mandatory, cannot be overridden by intent
135 # Phase 5 — operator ergonomics
136 #: Shadow mode: rule is evaluated and logged but its action is NOT applied.
137 #: Used to canary-test new rules with zero behavioral impact.
138 shadow: bool = False
139 #: Activation window. If now < active_from or now > active_until the rule
140 #: is skipped (treated as not present). Useful for staged rollouts.
141 active_from: datetime | None = None
142 active_until: datetime | None = None
143 #: Free-form labels surfaced on RoutingDecision and structured logs so
144 #: operators can group metrics by rule purpose ("compliance", "experiment").
145 observability_tags: list[str] | None = None
148@dataclass
149class EscalationCondition:
150 condition: str # "matched_rules", "confidence", "conflicting_rules"
151 operator: str # "eq", "lt", "gt", "gte", "lte"
152 value: str | int | float | bool = 0
155@dataclass
156class IntentPolicy:
157 escalate_when: list[EscalationCondition] | None = None
158 model_context: str | None = None # Prompt template with {banks}, {tags}
159 constraints: dict[str, list[str] | bool | int] | None = None
160 # constraints keys: "cannot_override" (list[str]), "must_justify" (bool), "max_tokens" (int)
163@dataclass
164class MipConfig:
165 version: str = "1.0"
166 banks: list[BankDefinition] | None = None
167 rules: list[RoutingRule] | None = None
168 intent_policy: IntentPolicy | None = None
169 #: Phase 5 — how to resolve multiple matches at the same priority.
170 #: ``"first"`` (default) preserves declaration order; ``"error"`` raises
171 #: MipRoutingError so authoring conflicts surface loudly; ``"most_specific"``
172 #: picks the rule with the most match conditions.
173 tie_breaker: str = "first"