Coverage for astrocyte/pipeline/retain_fsm/context.py: 98%
44 statements
« prev ^ index » next coverage.py v7.15.0, created at 2026-07-04 05:24 +0000
« prev ^ index » next coverage.py v7.15.0, created at 2026-07-04 05:24 +0000
1"""M14.0: shared context + services for the retain FSM.
3The FSM engine drives stateful transitions over a single ``RetainContext``
4per source. Every state mutates the context in place — appends to lists,
5sets output fields — never replaces it. The context is also the unit of
6checkpoint persistence: serialise it between transitions, deserialise to
7resume.
9``RetainServices`` bundles the dependencies states need (store, provider,
10config). Passed alongside ctx into every state coroutine. Mirrors the
11pattern Pydantic / FastAPI uses for dependency-injected request handlers.
13See ``docs/_design/m13-m14-roadmap.md`` §4 for the architectural shape.
14"""
16from __future__ import annotations
18from dataclasses import dataclass, field
19from datetime import datetime, timezone
20from typing import TYPE_CHECKING, Any
22if TYPE_CHECKING:
23 from astrocyte.provider import LLMProvider, MentalModelStore, PageIndexStore
24 from astrocyte.types import PageIndexFact, PageIndexSection
27@dataclass
28class StepLogEntry:
29 """One entry in the per-source audit trail. Karpathy's log.md is
30 materialised from these on M14.5; for M14.0+ they're an in-memory
31 debug aid and the basis for `RetainContext.duration_ms_by_state`.
32 """
34 state: str
35 started_at: datetime
36 completed_at: datetime | None = None
37 duration_ms: float | None = None
38 error: str | None = None
39 notes: dict[str, Any] = field(default_factory=dict)
42@dataclass
43class RetainContext:
44 """All state for one source through the retain FSM.
46 Inputs are populated by the caller before ``RetainFSM.run`` starts.
47 Outputs are populated by states as the pipeline progresses.
48 """
50 # ── Inputs ──────────────────────────────────────────────────────────
51 bank_id: str
52 source_id: str
53 md_text: str
54 reference_date: datetime | None = None
56 # ── Outputs (set by states as the pipeline progresses) ──────────────
57 document_id: str | None = None
58 sections: list[PageIndexSection] = field(default_factory=list)
59 facts: list[PageIndexFact] = field(default_factory=list)
60 entities: list[str] = field(default_factory=list)
61 wikis_created: list[str] = field(default_factory=list)
62 wikis_updated: list[str] = field(default_factory=list)
63 supersedes_edges: list[tuple[str, str]] = field(default_factory=list)
65 # ── Control / observability ─────────────────────────────────────────
66 last_state: str = "INIT"
67 step_log: list[StepLogEntry] = field(default_factory=list)
68 errors: list[str] = field(default_factory=list)
69 started_at: datetime = field(
70 default_factory=lambda: datetime.now(tz=timezone.utc),
71 )
72 completed_at: datetime | None = None
74 def duration_ms_by_state(self) -> dict[str, float]:
75 """Aggregate per-state wall time from the step log. Used by tests
76 and the M14.5 log.md emitter; safe to call mid-run."""
77 out: dict[str, float] = {}
78 for entry in self.step_log:
79 if entry.duration_ms is None:
80 continue
81 out[entry.state] = out.get(entry.state, 0.0) + entry.duration_ms
82 return out
85@dataclass
86class RetainServices:
87 """Dependencies states need. Constructed once at FSM init, threaded
88 into every state coroutine alongside the context.
90 Stores are optional so tests / smoke runs can pass a partial bundle
91 (e.g. only ``provider`` + ``store``); states that need a missing
92 service should fail explicitly with a clear error rather than crash
93 with ``AttributeError`` deep in the call stack.
94 """
96 store: PageIndexStore
97 provider: LLMProvider
98 mental_model_store: MentalModelStore | None = None
99 embedding_model: str = "text-embedding-3-small"
100 extraction_model: str = "gpt-4o-mini"