Coverage for astrocyte/pipeline/query

1"""Query intent classification — lightweight heuristic for biasing retrieval.

3Inspired by EdgeQuake's intent-based mode selection (see

4``docs/_design/platform-positioning.md`` §EdgeQuake). Given a natural-

5language query, classify it into one of five intents so the retrieval

6layer can bias RRF weights, pick specific strategies, or adjust the

7temporal half-life.

9Pure, sync, zero LLM. Regex-driven so it's fast (sub-millisecond per

10query) and deterministic. When the classifier is uncertain, it returns

11:class:`QueryIntent.UNKNOWN` — callers must fall back to the default

12multi-strategy blend rather than making a judgment call from guessed

13signal.

15References:

17- EdgeQuake: ``edgequake-query/src/keywords/intent.rs`` — regex-based

18 intent → query-mode mapping (Factual / Relational / Comparative /

19 Procedural / Exploratory).

20- Hindsight: forced retrieval hierarchy in reflect — prioritizes

21 consolidated observations over raw facts. Our intent is a recall-side

22 analogue, biasing which *retrieval strategy* gets weight.

23"""

25from __future__ import annotations

27import re

28from dataclasses import dataclass

29from enum import Enum

32class QueryIntent(str, Enum):

33 """Coarse classification of query purpose.

35 Values map to retrieval strategy biases:

37 - ``FACTUAL``: "what / who / when / where / how many" — single fact

38 lookup. Semantic + keyword both contribute; graph rarely helps;

39 temporal matters only if the query cites a time.

40 - ``RELATIONAL``: "how does X relate to Y", "connection between" —

41 graph traversal is the primary signal; semantic as fallback.

42 - ``COMPARATIVE``: "X vs Y", "difference between", "better than" —

43 benefits from broader keyword recall to surface both sides.

44 - ``PROCEDURAL``: "how to / steps / procedure" — semantic does well;

45 keyword secondary for specific tool names.

46 - ``TEMPORAL``: "when / recently / last week / before / after" — the

47 temporal strategy gets a weight boost.

48 - ``EXPLORATORY``: "tell me about / what about / summary of" — all

49 strategies contribute; blend favors diversity over precision.

50 - ``UNKNOWN``: no confident signal. Callers should NOT bias weights

51 on UNKNOWN — silently fall back to the default multi-strategy

52 blend.

53 """

55 FACTUAL = "factual"

56 RELATIONAL = "relational"

57 COMPARATIVE = "comparative"

58 PROCEDURAL = "procedural"

59 TEMPORAL = "temporal"

60 EXPLORATORY = "exploratory"

61 UNKNOWN = "unknown"

64@dataclass

65class QueryIntentResult:

66 """Classification result with a confidence signal."""

68 intent: QueryIntent

69 confidence: float # 0.0 – 1.0

70 matched_signals: list[str] # Which patterns triggered — useful for debug

73# ---------------------------------------------------------------------------

74# Pattern vocabulary

75# ---------------------------------------------------------------------------

76# Each pattern is a (regex, weight) pair. Weight accumulates into the

77# intent's score; regexes should be cheap (no backtracking) and specific

78# enough to avoid cross-intent overlap.

80_TEMPORAL_PATTERNS: tuple[tuple[str, float], ...] = (

81 (

83 0.7,

84 ),

87 (

89 0.5,

90 ),

91 (r"\bthe\s+week\s+before\b", 0.5),

92 (r"\b(before|after|since|until|during)\b", 0.4),

93 (r"\b\d{4}\b", 0.2), # bare year like "2023"

95 # LongMemEval-style temporal questions — typical shapes

96 # ("how many days between X and Y", "in what order", etc.). Without

97 # these, the canonical LME temporal-reasoning question class

98 # silently fell through to the default synthesis prompt and never

99 # got the temporal-aware date-arithmetic guidance.

102 (r"\bin\s+(what|which)\s+(order|sequence)\b", 0.7),

103 (r"\bhappened\s+(first|last|before|after|in\s+order)\b", 0.6),

104 (r"\b(duration|interval|time\s+frame|time\s+span)\b", 0.5),

105 (r"\bhow\s+(many|long)\s+(days?|weeks?|months?|years?)\s+ago\b", 0.8),

106)

107

108_RELATIONAL_PATTERNS: tuple[tuple[str, float], ...] = (

110 (r"\bbetween\s+\w+\s+and\s+\w+", 0.5),

111 (r"\b(depend\w*|influence|cause|effect|impact)\b", 0.3),

112)

113

114_COMPARATIVE_PATTERNS: tuple[tuple[str, float], ...] = (

115 (r"\b(vs|versus)\b", 0.9),

116 (r"\bcompare(d)?\b|\bcomparison\b", 0.8),

117 (r"\bdifference\s+(between|from)\b|\bdiffer\w*\b", 0.7),

119 (r"\bsimilar\s+to\b", 0.4),

120)

121

122_PROCEDURAL_PATTERNS: tuple[tuple[str, float], ...] = (

123 (r"\bhow\s+(to|do\s+i|can\s+i|should\s+i)\b", 0.8),

126)

127

128_FACTUAL_PATTERNS: tuple[tuple[str, float], ...] = (

130 (r"\bis\s+the\b|\bare\s+the\b", 0.2),

131)

132

133_EXPLORATORY_PATTERNS: tuple[tuple[str, float], ...] = (

134 (r"\b(tell\s+me\s+about|what\s+about|anything\s+about)\b", 0.8),

135 (r"\b(summary|overview|describe|explain)\b", 0.5),

136)

137

138

139# Compile once at import. Patterns are case-insensitive.

140_COMPILED: dict[QueryIntent, list[tuple[re.Pattern[str], float]]] = {

141 QueryIntent.TEMPORAL: [(re.compile(p, re.IGNORECASE), w) for p, w in _TEMPORAL_PATTERNS],

142 QueryIntent.RELATIONAL: [(re.compile(p, re.IGNORECASE), w) for p, w in _RELATIONAL_PATTERNS],

143 QueryIntent.COMPARATIVE: [(re.compile(p, re.IGNORECASE), w) for p, w in _COMPARATIVE_PATTERNS],

144 QueryIntent.PROCEDURAL: [(re.compile(p, re.IGNORECASE), w) for p, w in _PROCEDURAL_PATTERNS],

145 QueryIntent.FACTUAL: [(re.compile(p, re.IGNORECASE), w) for p, w in _FACTUAL_PATTERNS],

146 QueryIntent.EXPLORATORY: [(re.compile(p, re.IGNORECASE), w) for p, w in _EXPLORATORY_PATTERNS],

147}

148

149

150#: Confidence threshold below which the classifier returns UNKNOWN.

151#: Tuned so that single weak signals (e.g. only a bare year) don't

152#: categorize a query. Callers rely on UNKNOWN → default blend, so this

153#: threshold directly shapes when intent-aware biasing engages.

154CONFIDENCE_THRESHOLD = 0.4

155

156

157# ---------------------------------------------------------------------------

158# Public API

159# ---------------------------------------------------------------------------

160

161

162def classify_query_intent(query: str) -> QueryIntentResult:

163 """Classify a query into a :class:`QueryIntent`.

164

165 Scans the query against each intent's regex bank, accumulates weights,

166 and returns the highest-scoring intent — provided its score crosses

167 :data:`CONFIDENCE_THRESHOLD`. Ties break toward the intent with more

168 matched signals (more diverse evidence).

169

170 A query can legitimately combine intents (e.g. "when did Alice and

171 Bob start working together" is both TEMPORAL and RELATIONAL). This

172 classifier picks the dominant one; downstream callers that want

173 richer signals can use :func:`classify_all_intents` below.

174 """

175 query_clean = (query or "").strip()

176 if not query_clean:

177 return QueryIntentResult(QueryIntent.UNKNOWN, 0.0, [])

178

179 scores: dict[QueryIntent, float] = {}

180 signals: dict[QueryIntent, list[str]] = {}

181

182 for intent, patterns in _COMPILED.items():

183 score = 0.0

184 hits: list[str] = []

185 for pattern, weight in patterns:

186 if pattern.search(query_clean):

187 score += weight

188 hits.append(pattern.pattern)

189 if score > 0:

190 scores[intent] = score

191 signals[intent] = hits

192

193 if not scores:

194 return QueryIntentResult(QueryIntent.UNKNOWN, 0.0, [])

195

196 # Pick the intent with the highest score. Ties broken by number of

197 # matched signals, then alphabetically by intent name for stability.

198 def _key(intent: QueryIntent) -> tuple[float, int, str]:

199 return (scores[intent], len(signals[intent]), intent.value)

200

201 best = max(scores.keys(), key=_key)

202 confidence = min(scores[best], 1.0) # Cap at 1.0 for downstream reasoning.

203

204 if confidence < CONFIDENCE_THRESHOLD:

205 return QueryIntentResult(QueryIntent.UNKNOWN, confidence, signals[best])

206

207 return QueryIntentResult(best, confidence, signals[best])

208

209

210def classify_all_intents(query: str) -> dict[QueryIntent, float]:

211 """Return full score map — useful when a caller wants to blend

212 strategy weights by multi-intent evidence rather than pick a single

213 dominant intent.

214

215 Scores are not normalized — raw pattern-weight sums, capped at 1.0

216 per intent. Empty dict means no signal.

217 """

218 query_clean = (query or "").strip()

219 if not query_clean:

220 return {}

221

222 scores: dict[QueryIntent, float] = {}

223 for intent, patterns in _COMPILED.items():

224 score = 0.0

225 for pattern, weight in patterns:

226 if pattern.search(query_clean):

227 score += weight

228 if score > 0:

229 scores[intent] = min(score, 1.0)

230 return scores

231

232

233# ---------------------------------------------------------------------------

234# Strategy weighting — how classifier output biases retrieval

235# ---------------------------------------------------------------------------

236

237

238@dataclass

239class StrategyWeights:

240 """RRF input weights keyed by retrieval strategy name.

241

242 After RRF fusion produces ranked items, each strategy's contribution

243 is multiplied by its weight before final sort. A weight of 1.0 is

244 neutral; > 1.0 amplifies that strategy; 0.0 mutes it.

245 """

246

247 semantic: float = 1.0

248 keyword: float = 1.0

249 graph: float = 1.0

250 temporal: float = 1.0

251

252

253#: Default strategy weights per intent. Conservative biases — no strategy

254#: is fully muted (everything fuses; biases shift the balance). Tuned by

255#: the qualitative mapping in :class:`QueryIntent` docstrings.

256INTENT_STRATEGY_WEIGHTS: dict[QueryIntent, StrategyWeights] = {

257 QueryIntent.FACTUAL: StrategyWeights(semantic=1.2, keyword=1.2, graph=0.7, temporal=0.8),

258 QueryIntent.RELATIONAL: StrategyWeights(semantic=0.9, keyword=0.8, graph=1.5, temporal=0.8),

259 QueryIntent.COMPARATIVE: StrategyWeights(semantic=1.0, keyword=1.3, graph=1.0, temporal=0.8),

260 QueryIntent.PROCEDURAL: StrategyWeights(semantic=1.2, keyword=1.0, graph=0.8, temporal=0.8),

261 QueryIntent.TEMPORAL: StrategyWeights(semantic=0.9, keyword=0.9, graph=0.8, temporal=1.5),

262 QueryIntent.EXPLORATORY: StrategyWeights(semantic=1.0, keyword=1.0, graph=1.0, temporal=1.0),

263 QueryIntent.UNKNOWN: StrategyWeights(), # Neutral — no bias on guess.

264}

265

266

267def weights_for_intent(intent: QueryIntent) -> StrategyWeights:

268 """Look up canonical :class:`StrategyWeights` for an intent."""

269 return INTENT_STRATEGY_WEIGHTS.get(intent, StrategyWeights())

Coverage for astrocyte/pipeline/query_intent.py: 100%

72 statements