Coverage for astrocyte/pipeline/temporal_resolution.py: 52%

40 statements  

« prev     ^ index     » next       coverage.py v7.15.0, created at 2026-07-04 05:24 +0000

1"""M31 Fix 4 — Temporal resolution at retain time. 

2 

3Resolves relative date phrases in a fact's text ("last Tuesday", 

4"3 days ago", "two weeks back") to an absolute :class:`datetime` 

5**at retain time**, using the fact's anchoring section's 

6``session_date`` as the reference base. 

7 

8The resolved value is stored as :attr:`MemoryFact.event_date`. The 

9answerer renders it instead of forcing the gpt-4o-mini LLM to do 

10date arithmetic at query time — gpt-4o-mini is unreliable at date 

11math (the 60–70% temporal-reasoning ceiling we keep hitting in 

12LME). Moving the deterministic part of the work (regex + library 

13parsing) out of the LLM call is an architectural divergence from 

14Hindsight, which keeps date math in the LLM-side prompt. 

15 

16Design notes 

17------------ 

18 

19- **Single-call retain-time use only.** This module does NOT run at 

20 query time — the existing :mod:`astrocyte.pipeline.temporal_dateparser` 

21 handles that path with a different contract (date-range extraction 

22 from the question). 

23- **No LLM cost.** Pure regex + library work. Per-fact cost is 

24 measured in microseconds; safe to run on every extracted fact. 

25- **Best-effort.** Returns ``None`` when: 

26 - The text has no recognisable date phrase 

27 - dateparser raises (it has known bugs on certain locales / inputs) 

28 - The first valid match is a false-positive short token 

29 (mirrors the filter in ``temporal_dateparser``) 

30- **Single datetime output.** Unlike the query-time extractor 

31 which returns ``(start, end)``, retain-time resolution snaps to 

32 one datetime (00:00:00 on the resolved date). The fact's 

33 ``occurred_start`` / ``occurred_end`` are LLM-emitted ranges 

34 for events that span multiple days; ``event_date`` is the 

35 single most-prominent absolute date for this fact. 

36- **Anchor semantics.** When ``anchor`` is None (top-level facts 

37 without a section context), we cannot resolve relative phrases 

38 and return ``None``. Absolute phrases like "March 15, 2024" 

39 would parse without an anchor, but the resulting datetime would 

40 not be timezone-consistent with the rest of the system, so we 

41 conservatively skip these too. 

42""" 

43 

44from __future__ import annotations 

45 

46import logging 

47from datetime import datetime 

48 

49_logger = logging.getLogger("astrocyte.pipeline.temporal_resolution") 

50 

51# Lazy-load dateparser (same pattern as temporal_dateparser). 

52_DATEPARSER_AVAILABLE: bool | None = None 

53_search_dates = None # type: ignore[var-annotated] 

54 

55# False-positive filter — short tokens that dateparser misparses as 

56# dates ("on", "or", "in", "may", "march", etc). Mirrors the set in 

57# ``temporal_dateparser._FALSE_POSITIVES`` so the two extractors 

58# share filter discipline. 

59_FALSE_POSITIVES: set[str] = { 

60 "on", "in", "at", "to", "is", "or", "by", "as", "an", "a", 

61 "may", "march", "the", "and", "for", 

62} 

63 

64 

65def _lazy_load() -> bool: 

66 """Import dateparser on first use, cache the result.""" 

67 global _DATEPARSER_AVAILABLE, _search_dates 

68 if _DATEPARSER_AVAILABLE is not None: 

69 return _DATEPARSER_AVAILABLE 

70 try: 

71 from dateparser.search import search_dates # noqa: PLC0415 

72 

73 _search_dates = search_dates 

74 _DATEPARSER_AVAILABLE = True 

75 except ImportError: 

76 _DATEPARSER_AVAILABLE = False 

77 _logger.info( 

78 "temporal_resolution: dateparser not installed; " 

79 "event_date resolution disabled (facts retain occurred_start only)." 

80 ) 

81 return _DATEPARSER_AVAILABLE 

82 

83 

84def resolve_event_date( 

85 text: str, 

86 anchor: datetime | None, 

87) -> datetime | None: 

88 """Resolve the first relative date phrase in ``text`` to an absolute 

89 datetime, using ``anchor`` as the reference base for relative phrases. 

90 

91 Args: 

92 text: The fact's text (or any natural-language fragment). 

93 anchor: Reference "now" for relative phrases. Typically the 

94 section's ``session_date`` (when the fact was mentioned). 

95 ``None`` disables resolution — we don't want absolute-only 

96 parses without a known timezone context. 

97 

98 Returns: 

99 A datetime snapped to 00:00:00 on the resolved date, or 

100 ``None`` when no valid relative date is found. 

101 

102 Examples 

103 -------- 

104 >>> from datetime import datetime 

105 >>> anchor = datetime(2024, 5, 8) 

106 >>> resolve_event_date("I went to the doctor last Tuesday", anchor) 

107 datetime.datetime(2024, 5, 7, 0, 0) 

108 >>> resolve_event_date("no date here", anchor) is None 

109 True 

110 """ 

111 if not text or anchor is None: 

112 return None 

113 if not _lazy_load(): 

114 return None 

115 

116 settings = { 

117 "RELATIVE_BASE": anchor, 

118 "PREFER_DATES_FROM": "past", 

119 "RETURN_AS_TIMEZONE_AWARE": False, 

120 } 

121 

122 try: 

123 results = _search_dates(text, settings=settings) # type: ignore[misc] 

124 except Exception as exc: # noqa: BLE001 

125 _logger.debug( 

126 "temporal_resolution: dateparser raised %s on text=%r", 

127 type(exc).__name__, text[:80], 

128 ) 

129 return None 

130 

131 if not results: 

132 return None 

133 

134 for matched_text, parsed in results: 

135 t = matched_text.strip().lower() 

136 # Same false-positive filter as the query-time extractor. 

137 if t in _FALSE_POSITIVES and len(t) <= 4: 

138 continue 

139 if len(t) <= 2: 

140 continue 

141 # Snap to start-of-day for a stable comparable timestamp. 

142 return parsed.replace(hour=0, minute=0, second=0, microsecond=0) 

143 

144 return None 

145 

146 

147__all__ = ["resolve_event_date"]