Coverage for astrocyte/pipeline/query_intent.py: 100%

72 statements  

« prev     ^ index     » next       coverage.py v7.15.0, created at 2026-07-04 05:24 +0000

1"""Query intent classification — lightweight heuristic for biasing retrieval. 

2 

3Inspired by EdgeQuake's intent-based mode selection (see 

4``docs/_design/platform-positioning.md`` §EdgeQuake). Given a natural- 

5language query, classify it into one of five intents so the retrieval 

6layer can bias RRF weights, pick specific strategies, or adjust the 

7temporal half-life. 

8 

9Pure, sync, zero LLM. Regex-driven so it's fast (sub-millisecond per 

10query) and deterministic. When the classifier is uncertain, it returns 

11:class:`QueryIntent.UNKNOWN` — callers must fall back to the default 

12multi-strategy blend rather than making a judgment call from guessed 

13signal. 

14 

15References: 

16 

17- EdgeQuake: ``edgequake-query/src/keywords/intent.rs`` — regex-based 

18 intent → query-mode mapping (Factual / Relational / Comparative / 

19 Procedural / Exploratory). 

20- Hindsight: forced retrieval hierarchy in reflect — prioritizes 

21 consolidated observations over raw facts. Our intent is a recall-side 

22 analogue, biasing which *retrieval strategy* gets weight. 

23""" 

24 

25from __future__ import annotations 

26 

27import re 

28from dataclasses import dataclass 

29from enum import Enum 

30 

31 

32class QueryIntent(str, Enum): 

33 """Coarse classification of query purpose. 

34 

35 Values map to retrieval strategy biases: 

36 

37 - ``FACTUAL``: "what / who / when / where / how many" — single fact 

38 lookup. Semantic + keyword both contribute; graph rarely helps; 

39 temporal matters only if the query cites a time. 

40 - ``RELATIONAL``: "how does X relate to Y", "connection between" — 

41 graph traversal is the primary signal; semantic as fallback. 

42 - ``COMPARATIVE``: "X vs Y", "difference between", "better than" — 

43 benefits from broader keyword recall to surface both sides. 

44 - ``PROCEDURAL``: "how to / steps / procedure" — semantic does well; 

45 keyword secondary for specific tool names. 

46 - ``TEMPORAL``: "when / recently / last week / before / after" — the 

47 temporal strategy gets a weight boost. 

48 - ``EXPLORATORY``: "tell me about / what about / summary of" — all 

49 strategies contribute; blend favors diversity over precision. 

50 - ``UNKNOWN``: no confident signal. Callers should NOT bias weights 

51 on UNKNOWN — silently fall back to the default multi-strategy 

52 blend. 

53 """ 

54 

55 FACTUAL = "factual" 

56 RELATIONAL = "relational" 

57 COMPARATIVE = "comparative" 

58 PROCEDURAL = "procedural" 

59 TEMPORAL = "temporal" 

60 EXPLORATORY = "exploratory" 

61 UNKNOWN = "unknown" 

62 

63 

64@dataclass 

65class QueryIntentResult: 

66 """Classification result with a confidence signal.""" 

67 

68 intent: QueryIntent 

69 confidence: float # 0.0 – 1.0 

70 matched_signals: list[str] # Which patterns triggered — useful for debug 

71 

72 

73# --------------------------------------------------------------------------- 

74# Pattern vocabulary 

75# --------------------------------------------------------------------------- 

76# Each pattern is a (regex, weight) pair. Weight accumulates into the 

77# intent's score; regexes should be cheap (no backtracking) and specific 

78# enough to avoid cross-intent overlap. 

79 

80_TEMPORAL_PATTERNS: tuple[tuple[str, float], ...] = ( 

81 ( 

82 r"\b(when|recently|lately|yesterday|today|last\s+(week|month|year|night|monday|tuesday|wednesday|thursday|friday|saturday|sunday))\b", 

83 0.7, 

84 ), 

85 (r"\b(previous|last)\s+(monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b", 0.6), 

86 (r"\b(two|three|four|\d+)\s+weekends?\s+(before|ago|earlier)\b", 0.6), 

87 ( 

88 r"\b(\d+|one|two|three|four|five|six|seven|eight|nine|ten)\s+(days?|weeks?|months?|years?)\s+(before|ago|earlier)\b", 

89 0.5, 

90 ), 

91 (r"\bthe\s+week\s+before\b", 0.5), 

92 (r"\b(before|after|since|until|during)\b", 0.4), 

93 (r"\b\d{4}\b", 0.2), # bare year like "2023" 

94 (r"\b(earlier|later|latest|oldest|newest|first|last)\b", 0.3), 

95 # LongMemEval-style temporal questions — typical shapes 

96 # ("how many days between X and Y", "in what order", etc.). Without 

97 # these, the canonical LME temporal-reasoning question class 

98 # silently fell through to the default synthesis prompt and never 

99 # got the temporal-aware date-arithmetic guidance. 

100 (r"\bhow\s+(many|long)\s+(days?|weeks?|months?|years?|hours?|minutes?)\b", 0.8), 

101 (r"\b(days?|weeks?|months?|years?)\s+(between|elapsed|passed|apart)\b", 0.7), 

102 (r"\bin\s+(what|which)\s+(order|sequence)\b", 0.7), 

103 (r"\bhappened\s+(first|last|before|after|in\s+order)\b", 0.6), 

104 (r"\b(duration|interval|time\s+frame|time\s+span)\b", 0.5), 

105 (r"\bhow\s+(many|long)\s+(days?|weeks?|months?|years?)\s+ago\b", 0.8), 

106) 

107 

108_RELATIONAL_PATTERNS: tuple[tuple[str, float], ...] = ( 

109 (r"\b(relate|relat(ed|ion|ionship)|connect(ed|ion)?|linked?\s+to)\b", 0.8), 

110 (r"\bbetween\s+\w+\s+and\s+\w+", 0.5), 

111 (r"\b(depend\w*|influence|cause|effect|impact)\b", 0.3), 

112) 

113 

114_COMPARATIVE_PATTERNS: tuple[tuple[str, float], ...] = ( 

115 (r"\b(vs|versus)\b", 0.9), 

116 (r"\bcompare(d)?\b|\bcomparison\b", 0.8), 

117 (r"\bdifference\s+(between|from)\b|\bdiffer\w*\b", 0.7), 

118 (r"\b(better|worse|more|less|greater|smaller)\s+than\b", 0.6), 

119 (r"\bsimilar\s+to\b", 0.4), 

120) 

121 

122_PROCEDURAL_PATTERNS: tuple[tuple[str, float], ...] = ( 

123 (r"\bhow\s+(to|do\s+i|can\s+i|should\s+i)\b", 0.8), 

124 (r"\b(steps?|procedure|process|workflow|tutorial|guide)\b", 0.5), 

125 (r"\b(configure|install|setup|set\s+up|enable|disable)\b", 0.4), 

126) 

127 

128_FACTUAL_PATTERNS: tuple[tuple[str, float], ...] = ( 

129 (r"^\s*(what|who|where|which|how\s+many|how\s+much)\b", 0.6), 

130 (r"\bis\s+the\b|\bare\s+the\b", 0.2), 

131) 

132 

133_EXPLORATORY_PATTERNS: tuple[tuple[str, float], ...] = ( 

134 (r"\b(tell\s+me\s+about|what\s+about|anything\s+about)\b", 0.8), 

135 (r"\b(summary|overview|describe|explain)\b", 0.5), 

136) 

137 

138 

139# Compile once at import. Patterns are case-insensitive. 

140_COMPILED: dict[QueryIntent, list[tuple[re.Pattern[str], float]]] = { 

141 QueryIntent.TEMPORAL: [(re.compile(p, re.IGNORECASE), w) for p, w in _TEMPORAL_PATTERNS], 

142 QueryIntent.RELATIONAL: [(re.compile(p, re.IGNORECASE), w) for p, w in _RELATIONAL_PATTERNS], 

143 QueryIntent.COMPARATIVE: [(re.compile(p, re.IGNORECASE), w) for p, w in _COMPARATIVE_PATTERNS], 

144 QueryIntent.PROCEDURAL: [(re.compile(p, re.IGNORECASE), w) for p, w in _PROCEDURAL_PATTERNS], 

145 QueryIntent.FACTUAL: [(re.compile(p, re.IGNORECASE), w) for p, w in _FACTUAL_PATTERNS], 

146 QueryIntent.EXPLORATORY: [(re.compile(p, re.IGNORECASE), w) for p, w in _EXPLORATORY_PATTERNS], 

147} 

148 

149 

150#: Confidence threshold below which the classifier returns UNKNOWN. 

151#: Tuned so that single weak signals (e.g. only a bare year) don't 

152#: categorize a query. Callers rely on UNKNOWN → default blend, so this 

153#: threshold directly shapes when intent-aware biasing engages. 

154CONFIDENCE_THRESHOLD = 0.4 

155 

156 

157# --------------------------------------------------------------------------- 

158# Public API 

159# --------------------------------------------------------------------------- 

160 

161 

162def classify_query_intent(query: str) -> QueryIntentResult: 

163 """Classify a query into a :class:`QueryIntent`. 

164 

165 Scans the query against each intent's regex bank, accumulates weights, 

166 and returns the highest-scoring intent — provided its score crosses 

167 :data:`CONFIDENCE_THRESHOLD`. Ties break toward the intent with more 

168 matched signals (more diverse evidence). 

169 

170 A query can legitimately combine intents (e.g. "when did Alice and 

171 Bob start working together" is both TEMPORAL and RELATIONAL). This 

172 classifier picks the dominant one; downstream callers that want 

173 richer signals can use :func:`classify_all_intents` below. 

174 """ 

175 query_clean = (query or "").strip() 

176 if not query_clean: 

177 return QueryIntentResult(QueryIntent.UNKNOWN, 0.0, []) 

178 

179 scores: dict[QueryIntent, float] = {} 

180 signals: dict[QueryIntent, list[str]] = {} 

181 

182 for intent, patterns in _COMPILED.items(): 

183 score = 0.0 

184 hits: list[str] = [] 

185 for pattern, weight in patterns: 

186 if pattern.search(query_clean): 

187 score += weight 

188 hits.append(pattern.pattern) 

189 if score > 0: 

190 scores[intent] = score 

191 signals[intent] = hits 

192 

193 if not scores: 

194 return QueryIntentResult(QueryIntent.UNKNOWN, 0.0, []) 

195 

196 # Pick the intent with the highest score. Ties broken by number of 

197 # matched signals, then alphabetically by intent name for stability. 

198 def _key(intent: QueryIntent) -> tuple[float, int, str]: 

199 return (scores[intent], len(signals[intent]), intent.value) 

200 

201 best = max(scores.keys(), key=_key) 

202 confidence = min(scores[best], 1.0) # Cap at 1.0 for downstream reasoning. 

203 

204 if confidence < CONFIDENCE_THRESHOLD: 

205 return QueryIntentResult(QueryIntent.UNKNOWN, confidence, signals[best]) 

206 

207 return QueryIntentResult(best, confidence, signals[best]) 

208 

209 

210def classify_all_intents(query: str) -> dict[QueryIntent, float]: 

211 """Return full score map — useful when a caller wants to blend 

212 strategy weights by multi-intent evidence rather than pick a single 

213 dominant intent. 

214 

215 Scores are not normalized — raw pattern-weight sums, capped at 1.0 

216 per intent. Empty dict means no signal. 

217 """ 

218 query_clean = (query or "").strip() 

219 if not query_clean: 

220 return {} 

221 

222 scores: dict[QueryIntent, float] = {} 

223 for intent, patterns in _COMPILED.items(): 

224 score = 0.0 

225 for pattern, weight in patterns: 

226 if pattern.search(query_clean): 

227 score += weight 

228 if score > 0: 

229 scores[intent] = min(score, 1.0) 

230 return scores 

231 

232 

233# --------------------------------------------------------------------------- 

234# Strategy weighting — how classifier output biases retrieval 

235# --------------------------------------------------------------------------- 

236 

237 

238@dataclass 

239class StrategyWeights: 

240 """RRF input weights keyed by retrieval strategy name. 

241 

242 After RRF fusion produces ranked items, each strategy's contribution 

243 is multiplied by its weight before final sort. A weight of 1.0 is 

244 neutral; > 1.0 amplifies that strategy; 0.0 mutes it. 

245 """ 

246 

247 semantic: float = 1.0 

248 keyword: float = 1.0 

249 graph: float = 1.0 

250 temporal: float = 1.0 

251 

252 

253#: Default strategy weights per intent. Conservative biases — no strategy 

254#: is fully muted (everything fuses; biases shift the balance). Tuned by 

255#: the qualitative mapping in :class:`QueryIntent` docstrings. 

256INTENT_STRATEGY_WEIGHTS: dict[QueryIntent, StrategyWeights] = { 

257 QueryIntent.FACTUAL: StrategyWeights(semantic=1.2, keyword=1.2, graph=0.7, temporal=0.8), 

258 QueryIntent.RELATIONAL: StrategyWeights(semantic=0.9, keyword=0.8, graph=1.5, temporal=0.8), 

259 QueryIntent.COMPARATIVE: StrategyWeights(semantic=1.0, keyword=1.3, graph=1.0, temporal=0.8), 

260 QueryIntent.PROCEDURAL: StrategyWeights(semantic=1.2, keyword=1.0, graph=0.8, temporal=0.8), 

261 QueryIntent.TEMPORAL: StrategyWeights(semantic=0.9, keyword=0.9, graph=0.8, temporal=1.5), 

262 QueryIntent.EXPLORATORY: StrategyWeights(semantic=1.0, keyword=1.0, graph=1.0, temporal=1.0), 

263 QueryIntent.UNKNOWN: StrategyWeights(), # Neutral — no bias on guess. 

264} 

265 

266 

267def weights_for_intent(intent: QueryIntent) -> StrategyWeights: 

268 """Look up canonical :class:`StrategyWeights` for an intent.""" 

269 return INTENT_STRATEGY_WEIGHTS.get(intent, StrategyWeights())