Coverage for astrocyte/mip/presets.py: 94%
47 statements
« prev ^ index » next coverage.py v7.15.0, created at 2026-07-04 05:24 +0000
« prev ^ index » next coverage.py v7.15.0, created at 2026-07-04 05:24 +0000
1"""MIP pipeline presets — named bundles of chunker/dedup/rerank/reflect overrides.
3Presets are the **primary** authoring interface for pipeline shaping (P1).
4Authors write `pipeline: { preset: conversational }` rather than picking
5individual knobs. Raw overrides are supported but documented as advanced.
7Expansion happens once at load time (in loader._parse_pipeline). Downstream
8code only ever sees fully-resolved PipelineSpec instances — never preset names.
10To add a preset: add an entry to PRESETS. Update docs/_plugins/mip-developer-guide.md.
11"""
13from __future__ import annotations
15from dataclasses import replace
17from astrocyte.mip.schema import (
18 ChunkerSpec,
19 DedupSpec,
20 ForgetSpec,
21 PipelineSpec,
22 ReflectSpec,
23 RerankSpec,
24)
26PRESETS: dict[str, PipelineSpec] = {
27 "conversational": PipelineSpec(
28 chunker=ChunkerSpec(strategy="dialogue", max_size=800, overlap=0),
29 dedup=DedupSpec(threshold=0.92, action="skip_chunk"),
30 rerank=RerankSpec(keyword_weight=0.08, proper_noun_weight=0.15),
31 reflect=ReflectSpec(prompt="temporal_aware", promote_metadata=["speaker", "occurred_at"]),
32 ),
33 "document": PipelineSpec(
34 chunker=ChunkerSpec(strategy="paragraph", max_size=1200, overlap=100),
35 dedup=DedupSpec(threshold=0.95, action="skip"),
36 rerank=RerankSpec(keyword_weight=0.10, proper_noun_weight=0.05),
37 reflect=ReflectSpec(prompt="default", promote_metadata=None),
38 ),
39 "code": PipelineSpec(
40 chunker=ChunkerSpec(strategy="fixed", max_size=1500, overlap=200),
41 dedup=DedupSpec(threshold=0.98, action="skip"),
42 rerank=RerankSpec(keyword_weight=0.12, proper_noun_weight=0.0),
43 reflect=ReflectSpec(prompt="evidence_strict", promote_metadata=None),
44 ),
45 "evidence_strict": PipelineSpec(
46 # Inherits caller's chunker (no override)
47 chunker=None,
48 dedup=DedupSpec(threshold=0.98, action="skip"),
49 rerank=RerankSpec(keyword_weight=0.10, proper_noun_weight=0.05),
50 reflect=ReflectSpec(prompt="evidence_strict", promote_metadata=["source", "occurred_at"]),
51 ),
52}
55def is_known_preset(name: str) -> bool:
56 return name in PRESETS
59def list_presets() -> list[str]:
60 return sorted(PRESETS.keys())
63# ---------------------------------------------------------------------------
64# Forget presets (Phase 4)
65# ---------------------------------------------------------------------------
67FORGET_PRESETS: dict[str, ForgetSpec] = {
68 # GDPR right-to-erasure: hard delete, audit required, cascade derived data,
69 # legal hold MUST be respected (compliance-mandated).
70 "gdpr": ForgetSpec(
71 mode="hard",
72 audit="required",
73 cascade=True,
74 respect_legal_hold=True,
75 min_age_days=0,
76 ),
77 # Student records (FERPA-style): soft delete with grace period, audit on,
78 # refuse on records < 7 days old to prevent accidents.
79 "student": ForgetSpec(
80 mode="soft",
81 audit="recommended",
82 cascade=True,
83 respect_legal_hold=True,
84 min_age_days=7,
85 ),
86 # Audit-strict: tombstone replacement (preserves cryptographic chain),
87 # audit required, cascade off (each tombstone tracked individually).
88 "audit-strict": ForgetSpec(
89 mode="tombstone",
90 audit="required",
91 cascade=False,
92 respect_legal_hold=True,
93 min_age_days=0,
94 ),
95}
98def is_known_forget_preset(name: str) -> bool:
99 return name in FORGET_PRESETS
102def list_forget_presets() -> list[str]:
103 return sorted(FORGET_PRESETS.keys())
106def expand_forget_preset(spec: ForgetSpec) -> ForgetSpec:
107 """Merge a forget preset (if named) with explicit overrides on the spec.
109 Explicit fields on ``spec`` take precedence over preset defaults. Returns
110 a new :class:`ForgetSpec` with ``preset`` cleared and all fields resolved.
111 Caller is responsible for raising on unknown presets.
112 """
113 if spec.preset is None:
114 return spec
115 base = FORGET_PRESETS[spec.preset]
116 return ForgetSpec(
117 version=spec.version,
118 preset=None,
119 mode=spec.mode if spec.mode is not None else base.mode,
120 audit=spec.audit if spec.audit is not None else base.audit,
121 cascade=spec.cascade if spec.cascade is not None else base.cascade,
122 respect_legal_hold=(
123 spec.respect_legal_hold if spec.respect_legal_hold is not None else base.respect_legal_hold
124 ),
125 min_age_days=spec.min_age_days if spec.min_age_days is not None else base.min_age_days,
126 max_per_call=spec.max_per_call if spec.max_per_call is not None else base.max_per_call,
127 )
130def expand_preset(spec: PipelineSpec) -> PipelineSpec:
131 """Merge a preset (if named) with explicit overrides on the spec.
133 Explicit fields on `spec` take precedence over preset defaults. Returns
134 a new PipelineSpec with `preset` cleared and all sub-blocks resolved.
136 If `spec.preset` is None, returns `spec` unchanged (raw overrides only).
137 Caller is responsible for raising on unknown presets — use is_known_preset
138 during loader validation so the error mentions the rule name.
139 """
140 if spec.preset is None:
141 return spec
143 base = PRESETS[spec.preset]
145 return PipelineSpec(
146 version=spec.version,
147 preset=None, # cleared post-expansion
148 chunker=_merge_chunker(base.chunker, spec.chunker),
149 dedup=_merge_dedup(base.dedup, spec.dedup),
150 rerank=_merge_rerank(base.rerank, spec.rerank),
151 reflect=_merge_reflect(base.reflect, spec.reflect),
152 # Explicit override wins over preset default; preset defaults
153 # don't currently set half-life but the field is forward-compatible
154 # if a future preset does.
155 temporal_half_life_days=(
156 spec.temporal_half_life_days if spec.temporal_half_life_days is not None else base.temporal_half_life_days
157 ),
158 )
161def _merge_chunker(base: ChunkerSpec | None, override: ChunkerSpec | None) -> ChunkerSpec | None:
162 if override is None:
163 return base
164 if base is None:
165 return override
166 return replace(
167 base,
168 strategy=override.strategy if override.strategy is not None else base.strategy,
169 max_size=override.max_size if override.max_size is not None else base.max_size,
170 overlap=override.overlap if override.overlap is not None else base.overlap,
171 )
174def _merge_dedup(base: DedupSpec | None, override: DedupSpec | None) -> DedupSpec | None:
175 if override is None:
176 return base
177 if base is None:
178 return override
179 return replace(
180 base,
181 threshold=override.threshold if override.threshold is not None else base.threshold,
182 action=override.action if override.action is not None else base.action,
183 )
186def _merge_rerank(base: RerankSpec | None, override: RerankSpec | None) -> RerankSpec | None:
187 if override is None:
188 return base
189 if base is None:
190 return override
191 return replace(
192 base,
193 keyword_weight=(override.keyword_weight if override.keyword_weight is not None else base.keyword_weight),
194 proper_noun_weight=(
195 override.proper_noun_weight if override.proper_noun_weight is not None else base.proper_noun_weight
196 ),
197 )
200def _merge_reflect(base: ReflectSpec | None, override: ReflectSpec | None) -> ReflectSpec | None:
201 if override is None:
202 return base
203 if base is None:
204 return override
205 return replace(
206 base,
207 prompt=override.prompt if override.prompt is not None else base.prompt,
208 promote_metadata=(
209 override.promote_metadata if override.promote_metadata is not None else base.promote_metadata
210 ),
211 )