Coverage for astrocyte/pipeline/temporal_resolution.py: 52%
40 statements
« prev ^ index » next coverage.py v7.15.0, created at 2026-07-04 05:24 +0000
« prev ^ index » next coverage.py v7.15.0, created at 2026-07-04 05:24 +0000
1"""M31 Fix 4 — Temporal resolution at retain time.
3Resolves relative date phrases in a fact's text ("last Tuesday",
4"3 days ago", "two weeks back") to an absolute :class:`datetime`
5**at retain time**, using the fact's anchoring section's
6``session_date`` as the reference base.
8The resolved value is stored as :attr:`MemoryFact.event_date`. The
9answerer renders it instead of forcing the gpt-4o-mini LLM to do
10date arithmetic at query time — gpt-4o-mini is unreliable at date
11math (the 60–70% temporal-reasoning ceiling we keep hitting in
12LME). Moving the deterministic part of the work (regex + library
13parsing) out of the LLM call is an architectural divergence from
14Hindsight, which keeps date math in the LLM-side prompt.
16Design notes
17------------
19- **Single-call retain-time use only.** This module does NOT run at
20 query time — the existing :mod:`astrocyte.pipeline.temporal_dateparser`
21 handles that path with a different contract (date-range extraction
22 from the question).
23- **No LLM cost.** Pure regex + library work. Per-fact cost is
24 measured in microseconds; safe to run on every extracted fact.
25- **Best-effort.** Returns ``None`` when:
26 - The text has no recognisable date phrase
27 - dateparser raises (it has known bugs on certain locales / inputs)
28 - The first valid match is a false-positive short token
29 (mirrors the filter in ``temporal_dateparser``)
30- **Single datetime output.** Unlike the query-time extractor
31 which returns ``(start, end)``, retain-time resolution snaps to
32 one datetime (00:00:00 on the resolved date). The fact's
33 ``occurred_start`` / ``occurred_end`` are LLM-emitted ranges
34 for events that span multiple days; ``event_date`` is the
35 single most-prominent absolute date for this fact.
36- **Anchor semantics.** When ``anchor`` is None (top-level facts
37 without a section context), we cannot resolve relative phrases
38 and return ``None``. Absolute phrases like "March 15, 2024"
39 would parse without an anchor, but the resulting datetime would
40 not be timezone-consistent with the rest of the system, so we
41 conservatively skip these too.
42"""
44from __future__ import annotations
46import logging
47from datetime import datetime
49_logger = logging.getLogger("astrocyte.pipeline.temporal_resolution")
51# Lazy-load dateparser (same pattern as temporal_dateparser).
52_DATEPARSER_AVAILABLE: bool | None = None
53_search_dates = None # type: ignore[var-annotated]
55# False-positive filter — short tokens that dateparser misparses as
56# dates ("on", "or", "in", "may", "march", etc). Mirrors the set in
57# ``temporal_dateparser._FALSE_POSITIVES`` so the two extractors
58# share filter discipline.
59_FALSE_POSITIVES: set[str] = {
60 "on", "in", "at", "to", "is", "or", "by", "as", "an", "a",
61 "may", "march", "the", "and", "for",
62}
65def _lazy_load() -> bool:
66 """Import dateparser on first use, cache the result."""
67 global _DATEPARSER_AVAILABLE, _search_dates
68 if _DATEPARSER_AVAILABLE is not None:
69 return _DATEPARSER_AVAILABLE
70 try:
71 from dateparser.search import search_dates # noqa: PLC0415
73 _search_dates = search_dates
74 _DATEPARSER_AVAILABLE = True
75 except ImportError:
76 _DATEPARSER_AVAILABLE = False
77 _logger.info(
78 "temporal_resolution: dateparser not installed; "
79 "event_date resolution disabled (facts retain occurred_start only)."
80 )
81 return _DATEPARSER_AVAILABLE
84def resolve_event_date(
85 text: str,
86 anchor: datetime | None,
87) -> datetime | None:
88 """Resolve the first relative date phrase in ``text`` to an absolute
89 datetime, using ``anchor`` as the reference base for relative phrases.
91 Args:
92 text: The fact's text (or any natural-language fragment).
93 anchor: Reference "now" for relative phrases. Typically the
94 section's ``session_date`` (when the fact was mentioned).
95 ``None`` disables resolution — we don't want absolute-only
96 parses without a known timezone context.
98 Returns:
99 A datetime snapped to 00:00:00 on the resolved date, or
100 ``None`` when no valid relative date is found.
102 Examples
103 --------
104 >>> from datetime import datetime
105 >>> anchor = datetime(2024, 5, 8)
106 >>> resolve_event_date("I went to the doctor last Tuesday", anchor)
107 datetime.datetime(2024, 5, 7, 0, 0)
108 >>> resolve_event_date("no date here", anchor) is None
109 True
110 """
111 if not text or anchor is None:
112 return None
113 if not _lazy_load():
114 return None
116 settings = {
117 "RELATIVE_BASE": anchor,
118 "PREFER_DATES_FROM": "past",
119 "RETURN_AS_TIMEZONE_AWARE": False,
120 }
122 try:
123 results = _search_dates(text, settings=settings) # type: ignore[misc]
124 except Exception as exc: # noqa: BLE001
125 _logger.debug(
126 "temporal_resolution: dateparser raised %s on text=%r",
127 type(exc).__name__, text[:80],
128 )
129 return None
131 if not results:
132 return None
134 for matched_text, parsed in results:
135 t = matched_text.strip().lower()
136 # Same false-positive filter as the query-time extractor.
137 if t in _FALSE_POSITIVES and len(t) <= 4:
138 continue
139 if len(t) <= 2:
140 continue
141 # Snap to start-of-day for a stable comparable timestamp.
142 return parsed.replace(hour=0, minute=0, second=0, microsecond=0)
144 return None
147__all__ = ["resolve_event_date"]