Coverage for astrocyte/pipeline/question_annotator.py: 0%

58 statements  

« prev     ^ index     » next       coverage.py v7.15.0, created at 2026-07-04 05:24 +0000

1"""PR2 D.5: LLM-backed question annotator. 

2 

3Replaces the regex-based ``_extract_question_entities`` heuristic with 

4a single LLM call that returns both: 

5 

61. **Entities** (proper + common nouns the question hinges on) — fed to 

7 the entity strategy. Catches lowercase nouns the regex misses 

8 ("pendant", "obesity", "dog treats", "taekwondo", "France"), which 

9 the open-domain failure analysis identified as the pivot for 

10 specific-fact questions. 

11 

122. **Date range** (start + end, anchored against a ``reference_date``) — 

13 fed to the temporal strategy as a *narrow* window. PR2-D.1-4 LME 

14 temporal-reasoning was 0% because we passed the temporal strategy 

15 the full conversation date range; without question-side date parsing 

16 it had nothing to filter on. This module gives it a real window. 

17 

18One LLM call per question (~$0.0002 at gpt-4o-mini prices). Both 

19fields are optional — the orchestrator handles missing entities or 

20date_range gracefully. 

21 

22See: 

23- docs/_design/recall.md §6 (recall pipeline, mode classifier slot) 

24- PR2-D.1-4 LME gate analysis (temporal-reasoning 0% root cause) 

25""" 

26 

27from __future__ import annotations 

28 

29import json 

30import logging 

31from dataclasses import dataclass 

32from datetime import datetime, timezone 

33from typing import TYPE_CHECKING 

34 

35if TYPE_CHECKING: 

36 from astrocyte.provider import LLMProvider 

37 

38from astrocyte.types import Message 

39 

40logger = logging.getLogger("astrocyte.pipeline.question_annotator") 

41 

42 

43@dataclass 

44class QuestionAnnotation: 

45 """One question's parsed structure for the section recall driver. 

46 

47 Both fields are optional. ``entities=[]`` skips the entity 

48 strategy; ``date_range=None`` skips the temporal strategy.""" 

49 

50 entities: list[str] 

51 date_range: tuple[datetime, datetime] | None 

52 

53 

54_PROMPT = """You are an analyst extracting search keys from a user's question about a long conversation transcript. 

55 

56Reference date (treat as "today" when resolving relative phrases): {reference_date} 

57 

58Return ONLY a JSON object with these keys: 

59 

60- "entities": array of strings — names, places, things, concepts, activities the question hinges on. Include BOTH proper nouns (people, places, brands) AND concrete common nouns (objects, activities, conditions). Skip stopwords (the, a, did, what, when, etc.) and tense markers. Aim for 1-6 entries. 

61 

62- "date_range": object with ISO-8601 "start" and "end" date strings, OR {{"start": null, "end": null}} when the question has no temporal anchor. Use the reference date to resolve relative phrases like "last week" or "two months ago". 

63 

64Examples (reference_date "22 October, 2023"): 

65 

66Q: "What did Caroline research?" 

67→ {{"entities": ["Caroline", "research"], "date_range": {{"start": null, "end": null}}}} 

68 

69Q: "In what country did Jolene's mother buy her the pendant?" 

70→ {{"entities": ["Jolene", "mother", "pendant", "country"], "date_range": {{"start": null, "end": null}}}} 

71 

72Q: "What are John's suspected health problems?" 

73→ {{"entities": ["John", "health problems"], "date_range": {{"start": null, "end": null}}}} 

74 

75Q: "What did Caroline say in May 2023?" 

76→ {{"entities": ["Caroline"], "date_range": {{"start": "2023-05-01", "end": "2023-05-31"}}}} 

77 

78Q: "Who did Maria have dinner with on May 3, 2023?" 

79→ {{"entities": ["Maria", "dinner"], "date_range": {{"start": "2023-05-03", "end": "2023-05-03"}}}} 

80 

81Q: "What was Caroline doing two months ago?" 

82→ {{"entities": ["Caroline"], "date_range": {{"start": "2023-08-01", "end": "2023-08-31"}}}} 

83 

84Q: "What temporary job did Jon take to cover expenses?" 

85→ {{"entities": ["Jon", "temporary job", "expenses"], "date_range": {{"start": null, "end": null}}}} 

86 

87Question: {question} 

88Output (JSON only):""" 

89 

90 

91async def annotate_question( 

92 provider: "LLMProvider", 

93 question: str, 

94 *, 

95 reference_date: str | None = None, 

96 model: str | None = None, 

97) -> QuestionAnnotation: 

98 """Single LLM call that returns entities + date_range. 

99 

100 ``reference_date`` is the human-readable date string from the 

101 conv_tree (e.g. "22 October, 2023"). When None, the prompt uses a 

102 placeholder; date phrases referencing "today" can't resolve, but 

103 explicit dates still parse. 

104 

105 Returns ``QuestionAnnotation(entities=[], date_range=None)`` on 

106 LLM failure or parse error — the orchestrator degrades gracefully 

107 (just skips the entity / temporal strategies for this question). 

108 """ 

109 if not question.strip(): 

110 return QuestionAnnotation(entities=[], date_range=None) 

111 

112 prompt = _PROMPT.format( 

113 question=question, 

114 reference_date=reference_date or "(unknown)", 

115 ) 

116 

117 try: 

118 completion = await provider.complete( 

119 messages=[Message(role="user", content=prompt)], 

120 model=model, 

121 max_tokens=200, 

122 temperature=0.0, 

123 response_format={"type": "json_object"}, 

124 ) 

125 except Exception as exc: # noqa: BLE001 — annotator failure shouldn't tank a question 

126 logger.warning( 

127 "annotate_question: LLM call failed for q=%r: %s: %s", 

128 question[:80], 

129 type(exc).__name__, 

130 exc, 

131 ) 

132 return QuestionAnnotation(entities=[], date_range=None) 

133 

134 try: 

135 parsed = json.loads(completion.text) 

136 except json.JSONDecodeError: 

137 logger.warning( 

138 "annotate_question: JSON parse failed for q=%r; raw=%r", 

139 question[:80], 

140 completion.text[:120], 

141 ) 

142 return QuestionAnnotation(entities=[], date_range=None) 

143 

144 raw_entities = parsed.get("entities") or [] 

145 entities: list[str] = [] 

146 seen: set[str] = set() 

147 for e in raw_entities: 

148 if not isinstance(e, str): 

149 continue 

150 clean = e.strip() 

151 if not clean: 

152 continue 

153 key = clean.casefold() 

154 if key in seen: 

155 continue 

156 seen.add(key) 

157 entities.append(clean) 

158 

159 date_range = _parse_iso_range(parsed.get("date_range")) 

160 return QuestionAnnotation(entities=entities, date_range=date_range) 

161 

162 

163def _parse_iso_range(raw) -> tuple[datetime, datetime] | None: 

164 """Coerce ``{"start": "YYYY-MM-DD", "end": "YYYY-MM-DD"}`` → tz-aware 

165 UTC datetime tuple. Returns ``None`` on missing / malformed input.""" 

166 if not isinstance(raw, dict): 

167 return None 

168 start_s = raw.get("start") 

169 end_s = raw.get("end") 

170 if not start_s or not end_s: 

171 return None 

172 try: 

173 start = datetime.strptime(start_s, "%Y-%m-%d").replace(tzinfo=timezone.utc) 

174 end = datetime.strptime(end_s, "%Y-%m-%d").replace( 

175 hour=23, 

176 minute=59, 

177 second=59, 

178 tzinfo=timezone.utc, 

179 ) 

180 except (ValueError, TypeError): 

181 return None 

182 if end < start: 

183 # LLM occasionally swaps; tolerate. 

184 start, end = end, start 

185 return (start, end)