Coverage for astrocyte/mip/schema.py: 100%

92 statements  

« prev     ^ index     » next       coverage.py v7.15.0, created at 2026-07-04 05:24 +0000

1"""MIP schema — all dataclasses for MIP configuration. 

2 

3FFI-safe: no Any, no callables. 

4""" 

5 

6from __future__ import annotations 

7 

8from dataclasses import dataclass 

9from datetime import datetime 

10 

11 

12@dataclass 

13class BankDefinition: 

14 id: str # May contain templates like "student-{student_id}" 

15 description: str | None = None 

16 access: list[str] | None = None # ["agent:tutor", "agent:grader"] 

17 compliance: str | None = None # "pdpa", "gdpr", etc. 

18 

19 

20@dataclass 

21class MatchSpec: 

22 """A single match condition.""" 

23 

24 field: str # "content_type", "metadata.student_id", "pii_detected" 

25 operator: str # "eq", "in", "gte", "lte", "gt", "lt", "present", "absent" 

26 value: str | int | float | bool | list[str] | None = None 

27 

28 

29@dataclass 

30class MatchBlock: 

31 """Boolean composition of match conditions.""" 

32 

33 all_conditions: list[MatchSpec] | None = None 

34 any_conditions: list[MatchSpec] | None = None 

35 none_conditions: list[MatchSpec] | None = None 

36 

37 

38@dataclass 

39class ChunkerSpec: 

40 """Per-rule chunker override. Absent fields fall back to ExtractionProfileConfig.""" 

41 

42 strategy: str | None = None # "sentence" | "dialogue" | "paragraph" | "fixed" 

43 max_size: int | None = None 

44 overlap: int | None = None 

45 

46 

47@dataclass 

48class DedupSpec: 

49 """Per-rule dedup override. Absent fields fall back to DedupConfig.""" 

50 

51 threshold: float | None = None # 0.0–1.0 

52 action: str | None = None # "skip" | "skip_chunk" | "warn" | "update" 

53 

54 

55@dataclass 

56class RerankSpec: 

57 """Per-rule reranker override. Resolved per-bank at recall time (P3).""" 

58 

59 keyword_weight: float | None = None 

60 proper_noun_weight: float | None = None 

61 

62 

63@dataclass 

64class ReflectSpec: 

65 """Per-rule reflect override. Resolved at synthesis time.""" 

66 

67 prompt: str | None = None # "default" | "temporal_aware" | "evidence_strict" 

68 promote_metadata: list[str] | None = None # capped at 5 fields (P4) 

69 

70 

71@dataclass 

72class PipelineSpec: 

73 """Pipeline-shaping action vocabulary. All sub-blocks optional. 

74 

75 `version` is required when any pipeline field is set (P2). Persisted onto 

76 each retained record so recall can warn on rule-version drift. 

77 

78 `preset` expands at load time into the explicit sub-block fields. Explicit 

79 fields override preset defaults. 

80 """ 

81 

82 version: int | None = None 

83 preset: str | None = None # "conversational" | "document" | "code" | "evidence_strict" 

84 chunker: ChunkerSpec | None = None 

85 dedup: DedupSpec | None = None 

86 rerank: RerankSpec | None = None 

87 reflect: ReflectSpec | None = None 

88 #: Exponential half-life (days) for the temporal retrieval strategy. 

89 #: When set, overrides the orchestrator's default at recall time for 

90 #: memories stored under this bank. Shorter (e.g. 1.0) for fast-moving 

91 #: chat workloads; longer (e.g. 90.0) for long-term knowledge bases 

92 #: where answers legitimately live months back. See 

93 #: :mod:`astrocyte.pipeline.retrieval` and 

94 #: ``docs/_design/platform-positioning.md`` §LongMemEval root causes. 

95 temporal_half_life_days: float | None = None 

96 

97 

98@dataclass 

99class ForgetSpec: 

100 """Per-rule forget policy. Resolved at forget time, keyed by target bank. 

101 

102 All fields optional; absent fields fall back to caller-supplied arguments 

103 or library defaults. ``version`` is required when any field is set (P2), 

104 same semantics as :class:`PipelineSpec`. 

105 """ 

106 

107 version: int | None = None 

108 preset: str | None = None # "gdpr" | "student" | "audit-strict" 

109 mode: str | None = None # "soft" | "hard" | "tombstone" 

110 audit: str | None = None # "none" | "recommended" | "required" 

111 cascade: bool | None = None # cascade delete derived chunks/embeddings 

112 respect_legal_hold: bool | None = None # refuse forget if legal hold present 

113 min_age_days: int | None = None # refuse forget on records younger than N days 

114 max_per_call: int | None = None # cap on records per forget request 

115 

116 

117@dataclass 

118class ActionSpec: 

119 bank: str | None = None # May contain templates: "student-{metadata.student_id}" 

120 tags: list[str] | None = None # May contain templates 

121 retain_policy: str | None = None # "default" | "redact_before_store" | "encrypt" | "reject" 

122 escalate: str | None = None # "mip" or None 

123 confidence: float = 1.0 

124 pipeline: PipelineSpec | None = None # Optional pipeline-shaping overrides 

125 forget: ForgetSpec | None = None # Optional forget-policy overrides (Phase 4) 

126 

127 

128@dataclass 

129class RoutingRule: 

130 name: str 

131 priority: int 

132 match: MatchBlock 

133 action: ActionSpec 

134 override: bool = False # Compliance-mandatory, cannot be overridden by intent 

135 # Phase 5 — operator ergonomics 

136 #: Shadow mode: rule is evaluated and logged but its action is NOT applied. 

137 #: Used to canary-test new rules with zero behavioral impact. 

138 shadow: bool = False 

139 #: Activation window. If now < active_from or now > active_until the rule 

140 #: is skipped (treated as not present). Useful for staged rollouts. 

141 active_from: datetime | None = None 

142 active_until: datetime | None = None 

143 #: Free-form labels surfaced on RoutingDecision and structured logs so 

144 #: operators can group metrics by rule purpose ("compliance", "experiment"). 

145 observability_tags: list[str] | None = None 

146 

147 

148@dataclass 

149class EscalationCondition: 

150 condition: str # "matched_rules", "confidence", "conflicting_rules" 

151 operator: str # "eq", "lt", "gt", "gte", "lte" 

152 value: str | int | float | bool = 0 

153 

154 

155@dataclass 

156class IntentPolicy: 

157 escalate_when: list[EscalationCondition] | None = None 

158 model_context: str | None = None # Prompt template with {banks}, {tags} 

159 constraints: dict[str, list[str] | bool | int] | None = None 

160 # constraints keys: "cannot_override" (list[str]), "must_justify" (bool), "max_tokens" (int) 

161 

162 

163@dataclass 

164class MipConfig: 

165 version: str = "1.0" 

166 banks: list[BankDefinition] | None = None 

167 rules: list[RoutingRule] | None = None 

168 intent_policy: IntentPolicy | None = None 

169 #: Phase 5 — how to resolve multiple matches at the same priority. 

170 #: ``"first"`` (default) preserves declaration order; ``"error"`` raises 

171 #: MipRoutingError so authoring conflicts surface loudly; ``"most_specific"`` 

172 #: picks the rule with the most match conditions. 

173 tie_breaker: str = "first"