Coverage for astrocyte/pipeline/temporal.py: 64%

92 statements  

« prev     ^ index     » next       coverage.py v7.15.0, created at 2026-07-04 05:24 +0000

1"""Lightweight temporal phrase detection for recall/reflect planning. 

2 

3The helpers here do not try to become a full natural-language date parser. 

4They surface deterministic hints that the synthesis prompt can use to resolve 

5LoCoMo-style relative phrases against memory timestamps. 

6""" 

7 

8from __future__ import annotations 

9 

10import re 

11from dataclasses import dataclass 

12from datetime import datetime, timedelta 

13 

14 

15@dataclass(frozen=True) 

16class TemporalHint: 

17 """A temporal phrase detected in a query.""" 

18 

19 phrase: str 

20 kind: str 

21 guidance: str 

22 

23 

24@dataclass(frozen=True) 

25class NormalizedTemporalFact: 

26 """A relative temporal phrase resolved against an anchor timestamp.""" 

27 

28 phrase: str 

29 resolved_date: str 

30 granularity: str 

31 anchor_date: str 

32 

33 

34_HINT_PATTERNS: tuple[tuple[re.Pattern[str], str, str], ...] = ( 

35 ( 

36 re.compile(r"\byesterday\b", re.IGNORECASE), 

37 "relative_day", 

38 "Resolve 'yesterday' as one calendar day before the relevant memory timestamp.", 

39 ), 

40 ( 

41 re.compile(r"\blast\s+week\b|\bthe\s+week\s+before\b", re.IGNORECASE), 

42 "relative_week", 

43 "Resolve week-relative phrases from the memory timestamp; do not use the record date as the event date.", 

44 ), 

45 ( 

46 re.compile(r"\b(previous|last)\s+(monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b", re.IGNORECASE), 

47 "relative_weekday", 

48 "Resolve previous weekdays against the relevant memory timestamp.", 

49 ), 

50 ( 

51 re.compile(r"\b(two|three|four|\d+)\s+weekends?\s+(before|ago|earlier)\b", re.IGNORECASE), 

52 "relative_weekend", 

53 "Resolve weekend offsets by counting complete weekends back from the relevant memory timestamp.", 

54 ), 

55 ( 

56 re.compile( 

57 r"\b(\d+|one|two|three|four|five|six|seven|eight|nine|ten)\s+(days?|weeks?|months?|years?)\s+(before|ago|earlier)\b", 

58 re.IGNORECASE, 

59 ), 

60 "relative_offset", 

61 "Resolve numeric temporal offsets from the relevant memory timestamp.", 

62 ), 

63 ( 

64 re.compile(r"\brecently\b|\blately\b", re.IGNORECASE), 

65 "recent", 

66 "Treat 'recently' as a request for the latest matching event, not necessarily the newest memory overall.", 

67 ), 

68) 

69 

70 

71def extract_temporal_hints(query: str) -> list[TemporalHint]: 

72 """Return deterministic temporal hints found in *query*.""" 

73 

74 text = query or "" 

75 hints: list[TemporalHint] = [] 

76 seen: set[tuple[str, str]] = set() 

77 for pattern, kind, guidance in _HINT_PATTERNS: 

78 for match in pattern.finditer(text): 

79 phrase = match.group(0) 

80 key = (kind, phrase.lower()) 

81 if key in seen: 

82 continue 

83 seen.add(key) 

84 hints.append(TemporalHint(phrase=phrase, kind=kind, guidance=guidance)) 

85 return hints 

86 

87 

88def temporal_guidance_for_query(query: str) -> str | None: 

89 """Format temporal hints for inclusion in a synthesis prompt.""" 

90 

91 hints = extract_temporal_hints(query) 

92 if not hints: 

93 return None 

94 lines = ["Temporal reasoning hints:"] 

95 for hint in hints: 

96 lines.append(f"- {hint.phrase}: {hint.guidance}") 

97 return "\n".join(lines) 

98 

99 

100def normalize_relative_temporal_facts( 

101 text: str, 

102 anchor: datetime | None, 

103) -> list[NormalizedTemporalFact]: 

104 """Resolve common LoCoMo relative phrases against a session timestamp.""" 

105 

106 if anchor is None: 

107 return [] 

108 facts: list[NormalizedTemporalFact] = [] 

109 anchor_date = anchor.date() 

110 for match in re.finditer(r"\byesterday\b", text, re.IGNORECASE): 

111 resolved = anchor_date - timedelta(days=1) 

112 facts.append(_fact(match.group(0), resolved.isoformat(), "day", anchor_date.isoformat())) 

113 for match in re.finditer(r"\blast\s+week\b|\bthe\s+week\s+before\b", text, re.IGNORECASE): 

114 resolved = anchor_date - timedelta(days=7) 

115 facts.append(_fact(match.group(0), resolved.isoformat(), "week", anchor_date.isoformat())) 

116 for match in re.finditer( 

117 r"\b(previous|last)\s+(monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b", 

118 text, 

119 re.IGNORECASE, 

120 ): 

121 weekday = _WEEKDAY_INDEX[match.group(2).lower()] 

122 delta = (anchor_date.weekday() - weekday) % 7 

123 delta = 7 if delta == 0 else delta 

124 resolved = anchor_date - timedelta(days=delta) 

125 facts.append(_fact(match.group(0), resolved.isoformat(), "day", anchor_date.isoformat())) 

126 for match in re.finditer( 

127 r"\b(\d+|one|two|three|four|five|six|seven|eight|nine|ten)\s+(days?|weeks?|months?|years?)\s+(before|ago|earlier)\b", 

128 text, 

129 re.IGNORECASE, 

130 ): 

131 amount = _number(match.group(1)) 

132 unit = match.group(2).lower() 

133 days = amount 

134 granularity = "day" 

135 if unit.startswith("week"): 

136 days = amount * 7 

137 granularity = "week" 

138 elif unit.startswith("month"): 

139 days = amount * 30 

140 granularity = "month" 

141 elif unit.startswith("year"): 

142 days = amount * 365 

143 granularity = "year" 

144 resolved = anchor_date - timedelta(days=days) 

145 facts.append(_fact(match.group(0), resolved.isoformat(), granularity, anchor_date.isoformat())) 

146 return facts 

147 

148 

149def temporal_metadata(text: str, anchor: datetime | None) -> dict[str, str]: 

150 """Serialize normalized temporal facts into metadata-safe strings.""" 

151 

152 facts = normalize_relative_temporal_facts(text, anchor) 

153 if not facts: 

154 return {} 

155 return { 

156 "temporal_anchor": facts[0].anchor_date, 

157 "temporal_phrase": "|".join(fact.phrase for fact in facts), 

158 "resolved_date": "|".join(fact.resolved_date for fact in facts), 

159 "date_granularity": "|".join(fact.granularity for fact in facts), 

160 } 

161 

162 

163def query_time_range(query: str, anchor: datetime | None) -> tuple[datetime, datetime] | None: 

164 """Build a coarse bounded time range for simple relative-date queries.""" 

165 

166 facts = normalize_relative_temporal_facts(query, anchor) 

167 if not facts: 

168 return None 

169 first = facts[0] 

170 start = datetime.fromisoformat(first.resolved_date) 

171 if anchor.tzinfo is not None: 

172 start = start.replace(tzinfo=anchor.tzinfo) 

173 span = timedelta(days=1 if first.granularity == "day" else 7) 

174 return start, start + span 

175 

176 

177def _fact(phrase: str, resolved_date: str, granularity: str, anchor_date: str) -> NormalizedTemporalFact: 

178 return NormalizedTemporalFact( 

179 phrase=phrase, 

180 resolved_date=resolved_date, 

181 granularity=granularity, 

182 anchor_date=anchor_date, 

183 ) 

184 

185 

186_WEEKDAY_INDEX = { 

187 "monday": 0, 

188 "tuesday": 1, 

189 "wednesday": 2, 

190 "thursday": 3, 

191 "friday": 4, 

192 "saturday": 5, 

193 "sunday": 6, 

194} 

195 

196_NUMBER_WORDS = { 

197 "one": 1, 

198 "two": 2, 

199 "three": 3, 

200 "four": 4, 

201 "five": 5, 

202 "six": 6, 

203 "seven": 7, 

204 "eight": 8, 

205 "nine": 9, 

206 "ten": 10, 

207} 

208 

209 

210def _number(value: str) -> int: 

211 return int(value) if value.isdigit() else _NUMBER_WORDS[value.lower()]