Coverage for astrocyte/pipeline/mental_model_compile.py: 92%

64 statements  

« prev     ^ index     » next       coverage.py v7.15.0, created at 2026-07-04 05:24 +0000

1"""M11.2 — mental-model compile pass. 

2 

3Synthesizes stable user preferences / persona / habits / background 

4from a document's sections and persists them as ``MentalModel`` rows 

5in the bank's :class:`~astrocyte.provider.MentalModelStore`. Direct 

6port of Hindsight's mental-model tier (saved-reflect summaries 

7recalled BEFORE observations and raw memories). 

8 

9Different from :mod:`section_compile` (wiki pages): 

10 

11- **Wiki pages** are topic-clustered observations 

12 ("User visited 3 doctors", "User worked on 5 model kits"). 

13 Generated per cluster via DBSCAN. Carry section-grain provenance. 

14- **Mental models** are DURABLE user-profile statements 

15 ("User prefers Sony cameras", "User practices Spanish 3×/week"). 

16 Generated per document via one LLM call across all sections. 

17 Span the whole document — no per-cluster scope. 

18 

19Why both layers: a question like "recommend a hotel in Miami" wants 

20the user-profile fact ("user prefers ocean views") regardless of 

21which topic-cluster that was discussed in. Wiki pages cluster by 

22topic; mental models cluster by user-profile dimension. 

23 

24Generic across LME / LoCoMo / future benches — the prompt asks for 

25PROFILE facts, not bench-specific shapes. 

26 

27See: 

28- ``docs/_design/recall.md`` §14 (M11 plan) 

29- ``hindsight-api-slim/hindsight_api/engine/reflect/observations.py`` 

30 for Hindsight's analogous structure (their ``Observation`` rows on 

31 ``MentalModel``). 

32""" 

33 

34from __future__ import annotations 

35 

36import json 

37import logging 

38import re 

39from datetime import datetime, timezone 

40from typing import TYPE_CHECKING 

41 

42from astrocyte.types import MentalModel, Message 

43 

44if TYPE_CHECKING: 

45 from astrocyte.provider import LLMProvider, MentalModelStore, PageIndexStore 

46 

47_logger = logging.getLogger("astrocyte.pipeline.mental_model_compile") 

48 

49 

50_COMPILE_PROMPT = """\ 

51You are extracting STABLE user profile facts from a conversation \ 

52transcript. The user is the speaker labeled "user" in chat turns. 

53 

54Output a JSON object with one key, ``models``, containing an array \ 

55of mental-model objects. Each model has: 

56- "title": 3-7 word noun phrase naming the profile dimension \ 

57(e.g. "Photography gear preference", "Diet", "Dance interests") 

58- "content": one declarative sentence stating the user's stable \ 

59preference, habit, persona, or background. Use specific entities \ 

60when the user named them. The reader will use this as authoritative \ 

61context for "recommend X" / "what do I like" / "tell me about my X" \ 

62questions. 

63 

64What counts as a mental model (extract these): 

65- Stable preferences: "User prefers Sony cameras over Canon." 

66- Persona / background: "User is a parent of two children, lives in \ 

67Brooklyn, works in tech." 

68- Recurring habits: "User attends weekly hip-hop dance classes at \ 

69Street Beats." 

70- Stable opinions: "User finds yoga more relaxing than running." 

71- Hobbies / projects: "User is building a home maintenance app." 

72- Skills / expertise: "User has 10 years of photography experience." 

73- Values / goals: "User is saving for a Europe trip in 2024." 

74 

75What does NOT count (ignore these): 

76- One-off events ("Yesterday I went to the doctor") 

77- Casual mentions without preference signal ("I had coffee") 

78- Things the assistant said about the user (only user's own claims) 

79- Speculation or one-time questions 

80 

81Rules: 

82- Each model must reflect something the user EXPLICITLY stated or \ 

83strongly implied across multiple sections (not single-mention) 

84- Be specific about entities when known (brand names, places, etc.) 

85- Cap at 12 models total — prefer the most-discussed dimensions 

86- If sections are generic chitchat with no stable profile signals, \ 

87return ``{{"models": []}}`` 

88 

89Examples of good output: 

90{{"models": [ 

91 {{"title": "Photography gear preference", "content": "User shoots \ 

92with Sony A7 III and prefers Sony-compatible accessories like the 24-70mm \ 

93G Master lens."}}, 

94 {{"title": "Movie genre taste", "content": "User strongly prefers \ 

95stand-up comedy specials on Netflix, especially recent ones (Ali Wong, \ 

96John Mulaney)."}}, 

97 {{"title": "Dance practice", "content": "User attends weekly hip-hop \ 

98classes at Street Beats and enjoys contemporary as a secondary style."}}, 

99 {{"title": "Diet", "content": "User follows a mostly-vegetarian \ 

100diet with occasional fish; avoids dairy."}} 

101]}} 

102 

103OUTPUT MUST BE VALID JSON. No prose around it. 

104 

105Section summaries (chronological): 

106{sections} 

107""" 

108 

109 

110def _slugify(s: str) -> str: 

111 s = s.lower().strip() 

112 s = re.sub(r"[^a-z0-9\s-]", "", s) 

113 s = re.sub(r"\s+", "-", s) 

114 return s[:60] or "model" 

115 

116 

117def _format_sections_for_prompt(sections, max_sections: int = 60) -> str: 

118 """Render section summaries chronologically for the compile prompt. 

119 

120 Cap at ``max_sections`` to keep the prompt under ~6K tokens. We 

121 rank by line_num ascending (chronological order through the 

122 document) and take the first ``max_sections``; LME chat-history 

123 documents commonly have 30-50 sections so this rarely trims. 

124 """ 

125 chronological = sorted(sections, key=lambda s: s.line_num) 

126 rendered = [] 

127 for s in chronological[:max_sections]: 

128 summary = (s.summary or s.title or "").strip() 

129 if not summary: 

130 continue 

131 date = s.session_date.strftime("%Y-%m-%d") if getattr(s, "session_date", None) is not None else "no-date" 

132 rendered.append(f"[line={s.line_num} date={date}] {summary}") 

133 return "\n".join(rendered) 

134 

135 

136async def compile_mental_models_for_document( 

137 *, 

138 page_index_store: PageIndexStore, 

139 mental_model_store: MentalModelStore, 

140 bank_id: str, 

141 document_id: str, 

142 provider: LLMProvider, 

143 model: str | None = None, 

144) -> list[str]: 

145 """Extract mental models for one document and persist via 

146 :class:`MentalModelStore`. 

147 

148 Returns the list of newly-upserted ``model_id``s. Idempotent: when 

149 models with the same ``model_id`` already exist for this bank, they 

150 are bumped to a new revision (the store's standard upsert 

151 semantics) rather than duplicated. 

152 

153 Scoping: ``scope = f"document:{document_id}"`` — mirrors the wiki 

154 tier's pattern so the bench's per-question retrieval can filter 

155 cleanly to the right document without cross-contamination across 

156 sibling LME conversations in the same bank. 

157 """ 

158 sections = await page_index_store.load_skeleton(document_id) 

159 if not sections: 

160 return [] 

161 

162 prompt = _COMPILE_PROMPT.format( 

163 sections=_format_sections_for_prompt(sections), 

164 ) 

165 try: 

166 completion = await provider.complete( 

167 [Message(role="user", content=prompt)], 

168 model=model, 

169 max_tokens=900, 

170 temperature=0.0, 

171 response_format={"type": "json_object"}, 

172 ) 

173 except Exception as exc: # noqa: BLE001 

174 _logger.warning( 

175 "mental_model_compile: LLM call failed doc=%s: %s", 

176 document_id, 

177 exc, 

178 ) 

179 return [] 

180 try: 

181 data = json.loads(completion.text) 

182 except json.JSONDecodeError as exc: 

183 _logger.warning( 

184 "mental_model_compile: JSON parse failed doc=%s: %s text=%r", 

185 document_id, 

186 exc, 

187 completion.text[:200], 

188 ) 

189 return [] 

190 raw_models = data.get("models") or [] 

191 if not isinstance(raw_models, list): 

192 return [] 

193 

194 now = datetime.now(tz=timezone.utc) 

195 scope = f"document:{document_id}" 

196 upserted: list[str] = [] 

197 # M40 — Anchor the MM's evidence timestamp in conversation time, not 

198 # wall-clock-now. Each compiled MM summarizes the whole document, so 

199 # the earliest section date is the right "when did this evidence 

200 # start existing" anchor. Single-point (matches the synthetic single 

201 # source_id ``"{document_id}:doc"``); compute_trend will classify 

202 # NEW (recent doc relative to reference_date) or STALE (old doc). 

203 _section_dates = [ 

204 s.session_date for s in sections 

205 if getattr(s, "session_date", None) is not None 

206 ] 

207 _evidence_anchor: datetime | None = min(_section_dates) if _section_dates else None 

208 for entry in raw_models: 

209 if not isinstance(entry, dict): 

210 continue 

211 title = str(entry.get("title", "")).strip() 

212 content = str(entry.get("content", "")).strip() 

213 if not title or not content: 

214 continue 

215 model_id = f"mm:{document_id[:8]}:{_slugify(title)}" 

216 mm = MentalModel( 

217 model_id=model_id, 

218 bank_id=bank_id, 

219 title=title, 

220 content=content, 

221 scope=scope, 

222 source_ids=[f"{document_id}:doc"], 

223 revision=1, # upsert assigns the real revision number 

224 refreshed_at=now, 

225 source_timestamps=[_evidence_anchor] if _evidence_anchor is not None else None, 

226 ) 

227 try: 

228 await mental_model_store.upsert(mm, bank_id) 

229 except Exception as exc: # noqa: BLE001 

230 _logger.warning( 

231 "mental_model_compile.upsert failed model_id=%s: %s", 

232 model_id, 

233 exc, 

234 ) 

235 continue 

236 upserted.append(model_id) 

237 

238 _logger.info( 

239 "mental_model_compile: doc=%s upserted %d models", 

240 document_id, 

241 len(upserted), 

242 ) 

243 return upserted