Coverage for astrocyte/pipeline/mental_model

1"""M11.2 — mental-model compile pass.

3Synthesizes stable user preferences / persona / habits / background

4from a document's sections and persists them as ``MentalModel`` rows

5in the bank's :class:`~astrocyte.provider.MentalModelStore`. Direct

6port of Hindsight's mental-model tier (saved-reflect summaries

7recalled BEFORE observations and raw memories).

9Different from :mod:`section_compile` (wiki pages):

11- **Wiki pages** are topic-clustered observations

12 ("User visited 3 doctors", "User worked on 5 model kits").

13 Generated per cluster via DBSCAN. Carry section-grain provenance.

14- **Mental models** are DURABLE user-profile statements

15 ("User prefers Sony cameras", "User practices Spanish 3×/week").

16 Generated per document via one LLM call across all sections.

17 Span the whole document — no per-cluster scope.

19Why both layers: a question like "recommend a hotel in Miami" wants

20the user-profile fact ("user prefers ocean views") regardless of

21which topic-cluster that was discussed in. Wiki pages cluster by

22topic; mental models cluster by user-profile dimension.

24Generic across LME / LoCoMo / future benches — the prompt asks for

25PROFILE facts, not bench-specific shapes.

27See:

28- ``docs/_design/recall.md`` §14 (M11 plan)

29- ``hindsight-api-slim/hindsight_api/engine/reflect/observations.py``

30 for Hindsight's analogous structure (their ``Observation`` rows on

31 ``MentalModel``).

32"""

34from __future__ import annotations

36import json

37import logging

38import re

39from datetime import datetime, timezone

40from typing import TYPE_CHECKING

42from astrocyte.types import MentalModel, Message

44if TYPE_CHECKING:

45 from astrocyte.provider import LLMProvider, MentalModelStore, PageIndexStore

47_logger = logging.getLogger("astrocyte.pipeline.mental_model_compile")

50_COMPILE_PROMPT = """\

51You are extracting STABLE user profile facts from a conversation \

52transcript. The user is the speaker labeled "user" in chat turns.

54Output a JSON object with one key, ``models``, containing an array \

55of mental-model objects. Each model has:

56- "title": 3-7 word noun phrase naming the profile dimension \

57(e.g. "Photography gear preference", "Diet", "Dance interests")

58- "content": one declarative sentence stating the user's stable \

59preference, habit, persona, or background. Use specific entities \

60when the user named them. The reader will use this as authoritative \

61context for "recommend X" / "what do I like" / "tell me about my X" \

62questions.

64What counts as a mental model (extract these):

65- Stable preferences: "User prefers Sony cameras over Canon."

66- Persona / background: "User is a parent of two children, lives in \

67Brooklyn, works in tech."

68- Recurring habits: "User attends weekly hip-hop dance classes at \

69Street Beats."

70- Stable opinions: "User finds yoga more relaxing than running."

71- Hobbies / projects: "User is building a home maintenance app."

72- Skills / expertise: "User has 10 years of photography experience."

73- Values / goals: "User is saving for a Europe trip in 2024."

75What does NOT count (ignore these):

76- One-off events ("Yesterday I went to the doctor")

77- Casual mentions without preference signal ("I had coffee")

78- Things the assistant said about the user (only user's own claims)

79- Speculation or one-time questions

81Rules:

82- Each model must reflect something the user EXPLICITLY stated or \

83strongly implied across multiple sections (not single-mention)

84- Be specific about entities when known (brand names, places, etc.)

85- Cap at 12 models total — prefer the most-discussed dimensions

86- If sections are generic chitchat with no stable profile signals, \

87return ``{{"models": []}}``

89Examples of good output:

90{{"models": [

91 {{"title": "Photography gear preference", "content": "User shoots \

92with Sony A7 III and prefers Sony-compatible accessories like the 24-70mm \

93G Master lens."}},

94 {{"title": "Movie genre taste", "content": "User strongly prefers \

95stand-up comedy specials on Netflix, especially recent ones (Ali Wong, \

96John Mulaney)."}},

97 {{"title": "Dance practice", "content": "User attends weekly hip-hop \

98classes at Street Beats and enjoys contemporary as a secondary style."}},

99 {{"title": "Diet", "content": "User follows a mostly-vegetarian \

100diet with occasional fish; avoids dairy."}}

101]}}

102

103OUTPUT MUST BE VALID JSON. No prose around it.

104

105Section summaries (chronological):

106{sections}

107"""

108

109

110def _slugify(s: str) -> str:

111 s = s.lower().strip()

112 s = re.sub(r"[^a-z0-9\s-]", "", s)

113 s = re.sub(r"\s+", "-", s)

114 return s[:60] or "model"

115

116

117def _format_sections_for_prompt(sections, max_sections: int = 60) -> str:

118 """Render section summaries chronologically for the compile prompt.

119

120 Cap at ``max_sections`` to keep the prompt under ~6K tokens. We

121 rank by line_num ascending (chronological order through the

122 document) and take the first ``max_sections``; LME chat-history

123 documents commonly have 30-50 sections so this rarely trims.

124 """

125 chronological = sorted(sections, key=lambda s: s.line_num)

126 rendered = []

127 for s in chronological[:max_sections]:

128 summary = (s.summary or s.title or "").strip()

129 if not summary:

130 continue

131 date = s.session_date.strftime("%Y-%m-%d") if getattr(s, "session_date", None) is not None else "no-date"

132 rendered.append(f"[line={s.line_num} date={date}] {summary}")

133 return "\n".join(rendered)

134

135

136async def compile_mental_models_for_document(

137 *,

138 page_index_store: PageIndexStore,

139 mental_model_store: MentalModelStore,

140 bank_id: str,

141 document_id: str,

142 provider: LLMProvider,

143 model: str | None = None,

144) -> list[str]:

145 """Extract mental models for one document and persist via

146 :class:`MentalModelStore`.

147

148 Returns the list of newly-upserted ``model_id``s. Idempotent: when

149 models with the same ``model_id`` already exist for this bank, they

150 are bumped to a new revision (the store's standard upsert

151 semantics) rather than duplicated.

152

153 Scoping: ``scope = f"document:{document_id}"`` — mirrors the wiki

154 tier's pattern so the bench's per-question retrieval can filter

155 cleanly to the right document without cross-contamination across

156 sibling LME conversations in the same bank.

157 """

158 sections = await page_index_store.load_skeleton(document_id)

159 if not sections:

160 return []

161

162 prompt = _COMPILE_PROMPT.format(

163 sections=_format_sections_for_prompt(sections),

164 )

165 try:

166 completion = await provider.complete(

167 [Message(role="user", content=prompt)],

168 model=model,

169 max_tokens=900,

170 temperature=0.0,

171 response_format={"type": "json_object"},

172 )

173 except Exception as exc: # noqa: BLE001

174 _logger.warning(

175 "mental_model_compile: LLM call failed doc=%s: %s",

176 document_id,

177 exc,

178 )

179 return []

180 try:

181 data = json.loads(completion.text)

182 except json.JSONDecodeError as exc:

183 _logger.warning(

184 "mental_model_compile: JSON parse failed doc=%s: %s text=%r",

185 document_id,

186 exc,

187 completion.text[:200],

188 )

189 return []

190 raw_models = data.get("models") or []

191 if not isinstance(raw_models, list):

192 return []

193

194 now = datetime.now(tz=timezone.utc)

195 scope = f"document:{document_id}"

196 upserted: list[str] = []

197 # M40 — Anchor the MM's evidence timestamp in conversation time, not

198 # wall-clock-now. Each compiled MM summarizes the whole document, so

199 # the earliest section date is the right "when did this evidence

200 # start existing" anchor. Single-point (matches the synthetic single

201 # source_id ``"{document_id}:doc"``); compute_trend will classify

202 # NEW (recent doc relative to reference_date) or STALE (old doc).

203 _section_dates = [

204 s.session_date for s in sections

205 if getattr(s, "session_date", None) is not None

206 ]

207 _evidence_anchor: datetime | None = min(_section_dates) if _section_dates else None

208 for entry in raw_models:

209 if not isinstance(entry, dict):

210 continue

211 title = str(entry.get("title", "")).strip()

212 content = str(entry.get("content", "")).strip()

213 if not title or not content:

214 continue

215 model_id = f"mm:{document_id[:8]}:{_slugify(title)}"

216 mm = MentalModel(

217 model_id=model_id,

218 bank_id=bank_id,

219 title=title,

220 content=content,

221 scope=scope,

222 source_ids=[f"{document_id}:doc"],

223 revision=1, # upsert assigns the real revision number

224 refreshed_at=now,

225 source_timestamps=[_evidence_anchor] if _evidence_anchor is not None else None,

226 )

227 try:

228 await mental_model_store.upsert(mm, bank_id)

229 except Exception as exc: # noqa: BLE001

230 _logger.warning(

231 "mental_model_compile.upsert failed model_id=%s: %s",

232 model_id,

233 exc,

234 )

235 continue

236 upserted.append(model_id)

237

238 _logger.info(

239 "mental_model_compile: doc=%s upserted %d models",

240 document_id,

241 len(upserted),

242 )

243 return upserted

Coverage for astrocyte/pipeline/mental_model_compile.py: 92%

64 statements