Coverage for astrocyte/pipeline/reflect.py: 87%

86 statements  

« prev     ^ index     » next       coverage.py v7.15.0, created at 2026-07-04 05:24 +0000

1"""Fallback reflect — recall + LLM synthesis. 

2 

3Async (I/O-bound). See docs/_design/built-in-pipeline.md section 4. 

4""" 

5 

6from __future__ import annotations 

7 

8import re 

9from typing import TYPE_CHECKING 

10 

11from astrocyte.mip.schema import ReflectSpec 

12from astrocyte.pipeline.query_plan import build_query_plan 

13from astrocyte.types import Dispositions, MemoryHit, Message, ReflectResult 

14 

15if TYPE_CHECKING: 

16 from datetime import datetime 

17 

18 from astrocyte.provider import LLMProvider 

19 

20 

21# Prompt registry — referenced by ReflectSpec.prompt. Unknown names fall back to 

22# "default" so a typo never breaks reflect; the loader/lint step is responsible 

23# for catching unknown names early. Hard cap on metadata keys promoted into the 

24# memory render is enforced here (P4 — defense in depth). 

25_PROMOTE_METADATA_MAX = 5 

26 

27_DEFAULT_PROMPT = ( 

28 "You are a memory synthesis agent. " 

29 "You have been given a set of memories relevant to a query. " 

30 "Synthesize a clear, concise answer based ONLY on what is explicitly stated in the provided memories. " 

31 "Before saying information is unavailable, inspect every provided memory for directly supporting facts. " 

32 "Do not draw on outside knowledge or stereotypes. " 

33 "You may combine directly related memories when they share the same person, event, object, or timeframe, " 

34 "but do not connect merely tangential memories.\n\n" 

35 "Guidelines:\n" 

36 "- When the query asks about a specific person, prioritize memories that explicitly mention that person by name.\n" 

37 "- Consider connections between different memories. If one memory mentions a person and another mentions an event involving that person, combine those facts.\n" 

38 "- Pay attention to dates and temporal ordering when memories include timestamps.\n" 

39 "- If multiple memories provide different details about the same topic, synthesize them into a coherent answer.\n" 

40 "- If directly supporting memories exist, answer from them rather than saying the information is unavailable.\n" 

41 "- If no provided memory directly supports the answer, respond with: 'This information is not available in my memories.'\n" 

42 "- If the question contains a false or unverifiable premise, say so explicitly rather than answering as if the premise were true." 

43) 

44 

45_TEMPORAL_AWARE_PROMPT = ( 

46 "You are a memory synthesis agent answering a question about events over time. " 

47 "Answer ONLY from what is explicitly stated in the provided memories. " 

48 "Each memory carries a date in square brackets in its prefix (e.g. ``[Memory 3] [2023-05-20]``). " 

49 "Use those dates as the source of truth for any duration, ordering, or recency reasoning. " 

50 "If the specific information is not present, respond with: " 

51 "'This information is not available in my memories.'\n\n" 

52 "Workflow for computing answers (apply when the question asks " 

53 "'how many days/weeks/months', 'how long ago', 'in what order', " 

54 "'before/after', 'first/last', or any duration / ordering / age):\n" 

55 " 1. Extract the relevant dates from the memory prefixes.\n" 

56 " 2. Show the dates explicitly in your reasoning (e.g. ``Memory 1 = 2023-05-20, Memory 3 = 2023-05-27``).\n" 

57 " 3. Compute the answer step-by-step (subtraction, ordering, calendar math).\n" 

58 " 4. State the final answer clearly.\n\n" 

59 "Guidelines:\n" 

60 "- Treat timestamps as load-bearing: order memories chronologically before answering.\n" 

61 "- When a question asks about ordering ('before', 'after', 'first', 'last'), justify the answer with the relevant dates.\n" 

62 "- Distinguish between when an event occurred and when it was recorded.\n" 

63 "- Resolve relative phrases ('yesterday', 'last week', 'two weekends ago') against the memory's prefix date AND the user's reference date if provided in <reference_date>.\n" 

64 "- If a relative phrase like 'last week' appears, do the math: 'last week' from a memory dated 2023-06-09 means 2023-06-02 to 2023-06-08.\n" 

65 "- If timestamps are missing or ambiguous, say so rather than guessing.\n" 

66 "- Do not infer a timeline from unrelated clues; only compute from explicit dates." 

67) 

68 

69_EVIDENCE_STRICT_PROMPT = ( 

70 "You are a memory synthesis agent operating under strict evidence rules.\n\n" 

71 "Guidelines:\n" 

72 "- Answer ONLY from the memories provided. Do not draw on outside knowledge.\n" 

73 "- Cite the specific memory number ('Memory 3') for every claim.\n" 

74 "- If the memories do not contain a definitive answer, say 'Insufficient evidence in the provided memories.'\n" 

75 "- Do not paraphrase loosely; preserve nuance, qualifications, and uncertainty markers." 

76) 

77 

78_EVIDENCE_INFERENCE_PROMPT = ( 

79 "You are a memory synthesis agent answering an inference question from personal memories. " 

80 "Use ONLY the provided memories as evidence; do not use outside facts or stereotypes. " 

81 "Unlike strict fact lookup, you MAY make a cautious inference when the question asks what someone " 

82 "would likely do, prefer, believe, or be considered, as long as the inference is directly supported " 

83 "by retrieved memories.\n\n" 

84 "Guidelines:\n" 

85 "- For 'would' or 'likely' questions, answer with calibrated language such as 'Likely yes' or 'Likely no' plus the evidence.\n" 

86 "- Connect preferences, repeated activities, stated goals, and identity facts across memories when they support the inference.\n" 

87 "- If the memories support multiple possibilities, say which is more likely and why.\n" 

88 "- If the memories contain no relevant evidence, respond with: 'This information is not available in my memories.'\n" 

89 "- Never invent facts; every inference must be traceable to the provided memories." 

90) 

91 

92_GROUNDED_SYNTHESIS_PROMPT = ( 

93 "You are a memory synthesis agent for aggregate and multi-hop questions. " 

94 "Use ONLY the provided memories, but actively combine directly related memories when needed.\n\n" 

95 "Guidelines:\n" 

96 "- Scan all provided memories before answering; do not stop at the first matching memory.\n" 

97 "- For list questions, collect distinct answer items and omit unsupported distractors.\n" 

98 "- For count questions, count only evidence-backed occurrences and explain uncertainty briefly when needed.\n" 

99 "- For multi-hop questions, connect facts only when they share the same person, event, object, or timestamp context.\n" 

100 "- If relevant memories are present but incomplete, answer the supported part instead of saying everything is unavailable.\n" 

101 "- If no provided memory directly supports the answer, respond with: 'This information is not available in my memories.'\n" 

102 "- If the question contains a false or wrong-person premise, reject that premise rather than answering from a similar memory." 

103) 

104 

105PROMPT_REGISTRY: dict[str, str] = { 

106 "default": _DEFAULT_PROMPT, 

107 "temporal_aware": _TEMPORAL_AWARE_PROMPT, 

108 "evidence_strict": _EVIDENCE_STRICT_PROMPT, 

109 "evidence_inference": _EVIDENCE_INFERENCE_PROMPT, 

110 "grounded_synthesis": _GROUNDED_SYNTHESIS_PROMPT, 

111} 

112 

113_INFERENCE_QUERY_RE = re.compile( 

114 r"\b(would|likely|probably|considered|interested\s+in|prefer|leaning|pursue)\b", 

115 re.IGNORECASE, 

116) 

117 

118 

119def _auto_prompt_variant(query: str) -> str | None: 

120 """Select a reflect prompt variant from lightweight query cues. 

121 

122 Explicit MIP ``ReflectSpec.prompt`` still wins; this helper is only used 

123 when the bank has no prompt override. Temporal takes precedence because 

124 date math needs stricter handling than general inference. 

125 """ 

126 from astrocyte.pipeline.query_intent import QueryIntent, classify_query_intent 

127 

128 query_plan = build_query_plan(query) 

129 if query_plan.prompt_variant is not None: 

130 return query_plan.prompt_variant 

131 intent = classify_query_intent(query).intent 

132 if intent == QueryIntent.TEMPORAL: 

133 return "temporal_aware" 

134 if _INFERENCE_QUERY_RE.search(query or ""): 

135 return "evidence_inference" 

136 return None 

137 

138 

139def _build_system_prompt( 

140 dispositions: Dispositions | None, 

141 *, 

142 prompt_variant: str | None = None, 

143) -> str: 

144 """Build synthesis system prompt with optional disposition modifiers. 

145 

146 ``prompt_variant`` selects from :data:`PROMPT_REGISTRY` (``"default"``, 

147 ``"temporal_aware"``, ``"evidence_strict"``). Unknown names fall back to 

148 ``"default"`` — the lint/loader path is responsible for catching typos. 

149 """ 

150 base = PROMPT_REGISTRY.get(prompt_variant or "default", _DEFAULT_PROMPT) 

151 if dispositions: 

152 traits: list[str] = [] 

153 if dispositions.skepticism >= 4: 

154 traits.append("Be skeptical of uncertain claims and note where evidence is weak.") 

155 elif dispositions.skepticism <= 2: 

156 traits.append("Trust the memories at face value unless clearly contradictory.") 

157 if dispositions.literalism >= 4: 

158 traits.append("Interpret memories literally and precisely.") 

159 elif dispositions.literalism <= 2: 

160 traits.append("Interpret memories flexibly, considering context and intent.") 

161 if dispositions.empathy >= 4: 

162 traits.append("Acknowledge the human experience behind the memories.") 

163 elif dispositions.empathy <= 2: 

164 traits.append("Focus on factual content without emotional framing.") 

165 if traits: 

166 base += "\n\n" + " ".join(traits) 

167 return base 

168 

169 

170def _format_memories( 

171 hits: list[MemoryHit], 

172 *, 

173 promote_metadata: list[str] | None = None, 

174) -> str: 

175 """Format memory hits as context for the LLM. 

176 

177 ``promote_metadata`` lists metadata keys to surface inline alongside each 

178 memory's prefix (e.g. ``["author", "source_url"]``). The list is hard-capped 

179 at :data:`_PROMOTE_METADATA_MAX` (P4 — keeps the prompt budget bounded); 

180 excess keys are silently dropped. Keys missing on a given hit are skipped 

181 rather than rendered as ``None``. 

182 """ 

183 promoted: list[str] = list(promote_metadata or [])[:_PROMOTE_METADATA_MAX] 

184 lines: list[str] = [] 

185 for i, hit in enumerate(hits, 1): 

186 prefix = f"[Memory {i}]" 

187 if hit.fact_type: 

188 prefix += f" ({hit.fact_type})" 

189 # Prefer occurred_at; fall back to common metadata date keys. 

190 # ``session_date`` covers LongMemEval-style retain (chat 

191 # sessions stamped with their conversation date); ``date_time`` 

192 # covers LoCoMo. Without explicit dates in the prefix, the 

193 # synthesis LLM has no anchor to compute durations or order 

194 # events when the body text uses relative phrases ("today", 

195 # "yesterday"). Dating every memory closed the gap on temporal 

196 # questions in the May 2026 LME post-mortem. 

197 # 

198 # ``ASTROCYTE_LEGACY_MEMORY_FORMAT=1`` env disables the date 

199 # prefix entirely — used by the LoCoMo bisection (2026-05-08) 

200 # to test whether the date-prefix synthesis fix regressed 

201 # LoCoMo accuracy. LoCoMo's ``metadata['date_time']`` is a 

202 # SESSION date which may not match the event date a question 

203 # asks about, potentially confusing the LLM. 

204 import os as _os_local 

205 

206 _legacy_format = _os_local.environ.get("ASTROCYTE_LEGACY_MEMORY_FORMAT") == "1" 

207 if not _legacy_format: 

208 if hit.occurred_at: 

209 prefix += f" [{hit.occurred_at.isoformat()}]" 

210 elif hit.metadata and hit.metadata.get("date_time"): 

211 prefix += f" [{hit.metadata['date_time']}]" 

212 elif hit.metadata and hit.metadata.get("session_date"): 

213 prefix += f" [{hit.metadata['session_date']}]" 

214 if hit.metadata and hit.metadata.get("resolved_date"): 

215 prefix += ( 

216 f" {{temporal_phrase={hit.metadata.get('temporal_phrase')}, " 

217 f"resolved_date={hit.metadata.get('resolved_date')}, " 

218 f"granularity={hit.metadata.get('date_granularity')}}}" 

219 ) 

220 # Promoted metadata fields appended in declared order 

221 if promoted and hit.metadata: 

222 extras = [f"{key}={hit.metadata[key]}" for key in promoted if key in hit.metadata] 

223 if extras: 

224 prefix += " {" + ", ".join(extras) + "}" 

225 lines.append(f"{prefix}: {hit.text}") 

226 return "\n".join(lines) 

227 

228 

229async def synthesize( 

230 query: str, 

231 hits: list[MemoryHit], 

232 llm_provider: LLMProvider, 

233 dispositions: Dispositions | None = None, 

234 max_tokens: int = 2048, 

235 model: str | None = None, 

236 authority_context: str | None = None, 

237 mip_reflect: ReflectSpec | None = None, 

238 query_reference_date: "datetime | None" = None, 

239) -> ReflectResult: 

240 """Synthesize an answer from recall hits using LLM. 

241 

242 This is the fallback reflect used when the memory provider 

243 does not support native reflect. 

244 

245 ``mip_reflect`` (optional) carries the active rule's ReflectSpec — its 

246 ``prompt`` selects from :data:`PROMPT_REGISTRY` and ``promote_metadata`` 

247 lifts metadata fields into the rendered memory block (capped at 5 by P4). 

248 """ 

249 if not hits: 

250 return ReflectResult( 

251 answer="I don't have any relevant memories to answer this question.", 

252 sources=[], 

253 authority_context=authority_context, 

254 ) 

255 

256 prompt_variant = mip_reflect.prompt if mip_reflect is not None else None 

257 promote_metadata = mip_reflect.promote_metadata if mip_reflect is not None else None 

258 system_prompt = _build_system_prompt(dispositions, prompt_variant=prompt_variant) 

259 memories_text = _format_memories(hits, promote_metadata=promote_metadata) 

260 query_plan = build_query_plan(query) 

261 user_prompt = f"<memories>\n{memories_text}\n</memories>\n\n<query>\n{query}\n</query>" 

262 # ``query_reference_date`` anchors relative phrases ("yesterday", 

263 # "X weeks ago") in the question to the question's contemporaneous 

264 # date — required when the dataset predates the run wall-clock 

265 # (e.g. LongMemEval is 2023-vintage, evaluated in 2026+). Without 

266 # this anchor, "yesterday" resolves against the LLM's training 

267 # prior of "now", which is wrong by years on bench runs. The 

268 # temporal_aware prompt variant explicitly references this block. 

269 if query_reference_date is not None: 

270 anchor = ( 

271 query_reference_date.isoformat() 

272 if hasattr(query_reference_date, "isoformat") 

273 else str(query_reference_date) 

274 ) 

275 user_prompt = f"<reference_date>\n{anchor}\n</reference_date>\n\n" + user_prompt 

276 if query_plan.guidance: 

277 user_prompt = f"<query_guidance>\n{query_plan.guidance}\n</query_guidance>\n\n" + user_prompt 

278 if authority_context and str(authority_context).strip(): 

279 user_prompt = f"<authority_context>\n{authority_context.strip()}\n</authority_context>\n\n" + user_prompt 

280 

281 completion = await llm_provider.complete( 

282 messages=[ 

283 Message(role="system", content=system_prompt), 

284 Message(role="user", content=user_prompt), 

285 ], 

286 model=model, 

287 max_tokens=max_tokens, 

288 temperature=0.1, 

289 ) 

290 

291 return ReflectResult( 

292 answer=completion.text, 

293 sources=hits, 

294 authority_context=authority_context, 

295 )