Coverage for astrocyte/pipeline/query_intent.py: 100%
72 statements
« prev ^ index » next coverage.py v7.15.0, created at 2026-07-04 05:24 +0000
« prev ^ index » next coverage.py v7.15.0, created at 2026-07-04 05:24 +0000
1"""Query intent classification — lightweight heuristic for biasing retrieval.
3Inspired by EdgeQuake's intent-based mode selection (see
4``docs/_design/platform-positioning.md`` §EdgeQuake). Given a natural-
5language query, classify it into one of five intents so the retrieval
6layer can bias RRF weights, pick specific strategies, or adjust the
7temporal half-life.
9Pure, sync, zero LLM. Regex-driven so it's fast (sub-millisecond per
10query) and deterministic. When the classifier is uncertain, it returns
11:class:`QueryIntent.UNKNOWN` — callers must fall back to the default
12multi-strategy blend rather than making a judgment call from guessed
13signal.
15References:
17- EdgeQuake: ``edgequake-query/src/keywords/intent.rs`` — regex-based
18 intent → query-mode mapping (Factual / Relational / Comparative /
19 Procedural / Exploratory).
20- Hindsight: forced retrieval hierarchy in reflect — prioritizes
21 consolidated observations over raw facts. Our intent is a recall-side
22 analogue, biasing which *retrieval strategy* gets weight.
23"""
25from __future__ import annotations
27import re
28from dataclasses import dataclass
29from enum import Enum
32class QueryIntent(str, Enum):
33 """Coarse classification of query purpose.
35 Values map to retrieval strategy biases:
37 - ``FACTUAL``: "what / who / when / where / how many" — single fact
38 lookup. Semantic + keyword both contribute; graph rarely helps;
39 temporal matters only if the query cites a time.
40 - ``RELATIONAL``: "how does X relate to Y", "connection between" —
41 graph traversal is the primary signal; semantic as fallback.
42 - ``COMPARATIVE``: "X vs Y", "difference between", "better than" —
43 benefits from broader keyword recall to surface both sides.
44 - ``PROCEDURAL``: "how to / steps / procedure" — semantic does well;
45 keyword secondary for specific tool names.
46 - ``TEMPORAL``: "when / recently / last week / before / after" — the
47 temporal strategy gets a weight boost.
48 - ``EXPLORATORY``: "tell me about / what about / summary of" — all
49 strategies contribute; blend favors diversity over precision.
50 - ``UNKNOWN``: no confident signal. Callers should NOT bias weights
51 on UNKNOWN — silently fall back to the default multi-strategy
52 blend.
53 """
55 FACTUAL = "factual"
56 RELATIONAL = "relational"
57 COMPARATIVE = "comparative"
58 PROCEDURAL = "procedural"
59 TEMPORAL = "temporal"
60 EXPLORATORY = "exploratory"
61 UNKNOWN = "unknown"
64@dataclass
65class QueryIntentResult:
66 """Classification result with a confidence signal."""
68 intent: QueryIntent
69 confidence: float # 0.0 – 1.0
70 matched_signals: list[str] # Which patterns triggered — useful for debug
73# ---------------------------------------------------------------------------
74# Pattern vocabulary
75# ---------------------------------------------------------------------------
76# Each pattern is a (regex, weight) pair. Weight accumulates into the
77# intent's score; regexes should be cheap (no backtracking) and specific
78# enough to avoid cross-intent overlap.
80_TEMPORAL_PATTERNS: tuple[tuple[str, float], ...] = (
81 (
82 r"\b(when|recently|lately|yesterday|today|last\s+(week|month|year|night|monday|tuesday|wednesday|thursday|friday|saturday|sunday))\b",
83 0.7,
84 ),
85 (r"\b(previous|last)\s+(monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b", 0.6),
86 (r"\b(two|three|four|\d+)\s+weekends?\s+(before|ago|earlier)\b", 0.6),
87 (
88 r"\b(\d+|one|two|three|four|five|six|seven|eight|nine|ten)\s+(days?|weeks?|months?|years?)\s+(before|ago|earlier)\b",
89 0.5,
90 ),
91 (r"\bthe\s+week\s+before\b", 0.5),
92 (r"\b(before|after|since|until|during)\b", 0.4),
93 (r"\b\d{4}\b", 0.2), # bare year like "2023"
94 (r"\b(earlier|later|latest|oldest|newest|first|last)\b", 0.3),
95 # LongMemEval-style temporal questions — typical shapes
96 # ("how many days between X and Y", "in what order", etc.). Without
97 # these, the canonical LME temporal-reasoning question class
98 # silently fell through to the default synthesis prompt and never
99 # got the temporal-aware date-arithmetic guidance.
100 (r"\bhow\s+(many|long)\s+(days?|weeks?|months?|years?|hours?|minutes?)\b", 0.8),
101 (r"\b(days?|weeks?|months?|years?)\s+(between|elapsed|passed|apart)\b", 0.7),
102 (r"\bin\s+(what|which)\s+(order|sequence)\b", 0.7),
103 (r"\bhappened\s+(first|last|before|after|in\s+order)\b", 0.6),
104 (r"\b(duration|interval|time\s+frame|time\s+span)\b", 0.5),
105 (r"\bhow\s+(many|long)\s+(days?|weeks?|months?|years?)\s+ago\b", 0.8),
106)
108_RELATIONAL_PATTERNS: tuple[tuple[str, float], ...] = (
109 (r"\b(relate|relat(ed|ion|ionship)|connect(ed|ion)?|linked?\s+to)\b", 0.8),
110 (r"\bbetween\s+\w+\s+and\s+\w+", 0.5),
111 (r"\b(depend\w*|influence|cause|effect|impact)\b", 0.3),
112)
114_COMPARATIVE_PATTERNS: tuple[tuple[str, float], ...] = (
115 (r"\b(vs|versus)\b", 0.9),
116 (r"\bcompare(d)?\b|\bcomparison\b", 0.8),
117 (r"\bdifference\s+(between|from)\b|\bdiffer\w*\b", 0.7),
118 (r"\b(better|worse|more|less|greater|smaller)\s+than\b", 0.6),
119 (r"\bsimilar\s+to\b", 0.4),
120)
122_PROCEDURAL_PATTERNS: tuple[tuple[str, float], ...] = (
123 (r"\bhow\s+(to|do\s+i|can\s+i|should\s+i)\b", 0.8),
124 (r"\b(steps?|procedure|process|workflow|tutorial|guide)\b", 0.5),
125 (r"\b(configure|install|setup|set\s+up|enable|disable)\b", 0.4),
126)
128_FACTUAL_PATTERNS: tuple[tuple[str, float], ...] = (
129 (r"^\s*(what|who|where|which|how\s+many|how\s+much)\b", 0.6),
130 (r"\bis\s+the\b|\bare\s+the\b", 0.2),
131)
133_EXPLORATORY_PATTERNS: tuple[tuple[str, float], ...] = (
134 (r"\b(tell\s+me\s+about|what\s+about|anything\s+about)\b", 0.8),
135 (r"\b(summary|overview|describe|explain)\b", 0.5),
136)
139# Compile once at import. Patterns are case-insensitive.
140_COMPILED: dict[QueryIntent, list[tuple[re.Pattern[str], float]]] = {
141 QueryIntent.TEMPORAL: [(re.compile(p, re.IGNORECASE), w) for p, w in _TEMPORAL_PATTERNS],
142 QueryIntent.RELATIONAL: [(re.compile(p, re.IGNORECASE), w) for p, w in _RELATIONAL_PATTERNS],
143 QueryIntent.COMPARATIVE: [(re.compile(p, re.IGNORECASE), w) for p, w in _COMPARATIVE_PATTERNS],
144 QueryIntent.PROCEDURAL: [(re.compile(p, re.IGNORECASE), w) for p, w in _PROCEDURAL_PATTERNS],
145 QueryIntent.FACTUAL: [(re.compile(p, re.IGNORECASE), w) for p, w in _FACTUAL_PATTERNS],
146 QueryIntent.EXPLORATORY: [(re.compile(p, re.IGNORECASE), w) for p, w in _EXPLORATORY_PATTERNS],
147}
150#: Confidence threshold below which the classifier returns UNKNOWN.
151#: Tuned so that single weak signals (e.g. only a bare year) don't
152#: categorize a query. Callers rely on UNKNOWN → default blend, so this
153#: threshold directly shapes when intent-aware biasing engages.
154CONFIDENCE_THRESHOLD = 0.4
157# ---------------------------------------------------------------------------
158# Public API
159# ---------------------------------------------------------------------------
162def classify_query_intent(query: str) -> QueryIntentResult:
163 """Classify a query into a :class:`QueryIntent`.
165 Scans the query against each intent's regex bank, accumulates weights,
166 and returns the highest-scoring intent — provided its score crosses
167 :data:`CONFIDENCE_THRESHOLD`. Ties break toward the intent with more
168 matched signals (more diverse evidence).
170 A query can legitimately combine intents (e.g. "when did Alice and
171 Bob start working together" is both TEMPORAL and RELATIONAL). This
172 classifier picks the dominant one; downstream callers that want
173 richer signals can use :func:`classify_all_intents` below.
174 """
175 query_clean = (query or "").strip()
176 if not query_clean:
177 return QueryIntentResult(QueryIntent.UNKNOWN, 0.0, [])
179 scores: dict[QueryIntent, float] = {}
180 signals: dict[QueryIntent, list[str]] = {}
182 for intent, patterns in _COMPILED.items():
183 score = 0.0
184 hits: list[str] = []
185 for pattern, weight in patterns:
186 if pattern.search(query_clean):
187 score += weight
188 hits.append(pattern.pattern)
189 if score > 0:
190 scores[intent] = score
191 signals[intent] = hits
193 if not scores:
194 return QueryIntentResult(QueryIntent.UNKNOWN, 0.0, [])
196 # Pick the intent with the highest score. Ties broken by number of
197 # matched signals, then alphabetically by intent name for stability.
198 def _key(intent: QueryIntent) -> tuple[float, int, str]:
199 return (scores[intent], len(signals[intent]), intent.value)
201 best = max(scores.keys(), key=_key)
202 confidence = min(scores[best], 1.0) # Cap at 1.0 for downstream reasoning.
204 if confidence < CONFIDENCE_THRESHOLD:
205 return QueryIntentResult(QueryIntent.UNKNOWN, confidence, signals[best])
207 return QueryIntentResult(best, confidence, signals[best])
210def classify_all_intents(query: str) -> dict[QueryIntent, float]:
211 """Return full score map — useful when a caller wants to blend
212 strategy weights by multi-intent evidence rather than pick a single
213 dominant intent.
215 Scores are not normalized — raw pattern-weight sums, capped at 1.0
216 per intent. Empty dict means no signal.
217 """
218 query_clean = (query or "").strip()
219 if not query_clean:
220 return {}
222 scores: dict[QueryIntent, float] = {}
223 for intent, patterns in _COMPILED.items():
224 score = 0.0
225 for pattern, weight in patterns:
226 if pattern.search(query_clean):
227 score += weight
228 if score > 0:
229 scores[intent] = min(score, 1.0)
230 return scores
233# ---------------------------------------------------------------------------
234# Strategy weighting — how classifier output biases retrieval
235# ---------------------------------------------------------------------------
238@dataclass
239class StrategyWeights:
240 """RRF input weights keyed by retrieval strategy name.
242 After RRF fusion produces ranked items, each strategy's contribution
243 is multiplied by its weight before final sort. A weight of 1.0 is
244 neutral; > 1.0 amplifies that strategy; 0.0 mutes it.
245 """
247 semantic: float = 1.0
248 keyword: float = 1.0
249 graph: float = 1.0
250 temporal: float = 1.0
253#: Default strategy weights per intent. Conservative biases — no strategy
254#: is fully muted (everything fuses; biases shift the balance). Tuned by
255#: the qualitative mapping in :class:`QueryIntent` docstrings.
256INTENT_STRATEGY_WEIGHTS: dict[QueryIntent, StrategyWeights] = {
257 QueryIntent.FACTUAL: StrategyWeights(semantic=1.2, keyword=1.2, graph=0.7, temporal=0.8),
258 QueryIntent.RELATIONAL: StrategyWeights(semantic=0.9, keyword=0.8, graph=1.5, temporal=0.8),
259 QueryIntent.COMPARATIVE: StrategyWeights(semantic=1.0, keyword=1.3, graph=1.0, temporal=0.8),
260 QueryIntent.PROCEDURAL: StrategyWeights(semantic=1.2, keyword=1.0, graph=0.8, temporal=0.8),
261 QueryIntent.TEMPORAL: StrategyWeights(semantic=0.9, keyword=0.9, graph=0.8, temporal=1.5),
262 QueryIntent.EXPLORATORY: StrategyWeights(semantic=1.0, keyword=1.0, graph=1.0, temporal=1.0),
263 QueryIntent.UNKNOWN: StrategyWeights(), # Neutral — no bias on guess.
264}
267def weights_for_intent(intent: QueryIntent) -> StrategyWeights:
268 """Look up canonical :class:`StrategyWeights` for an intent."""
269 return INTENT_STRATEGY_WEIGHTS.get(intent, StrategyWeights())