Coverage for astrocyte/pipeline/multi_query.py: 88%
25 statements
« prev ^ index » next coverage.py v7.15.0, created at 2026-07-04 05:24 +0000
« prev ^ index » next coverage.py v7.15.0, created at 2026-07-04 05:24 +0000
1"""Multi-query expansion for complex (multi-hop) recall.
3When a question requires evidence from multiple sessions or topics,
4a single recall query often misses half the evidence. Decomposing
5into 2–3 focused sub-questions and merging their recall results
6substantially improves multi-hop coverage.
8This module handles decomposition only. The orchestrator owns the
9recall-and-merge loop so it can reuse the full retrieval pipeline
10(embeddings, parallel strategies, RRF, rerank) for each sub-query.
11"""
13from __future__ import annotations
15import logging
16from typing import TYPE_CHECKING
18from astrocyte.types import Message
20if TYPE_CHECKING:
21 from astrocyte.provider import LLMProvider
23_logger = logging.getLogger(__name__)
25_DECOMPOSITION_SYSTEM = (
26 "You decompose complex questions into simpler sub-questions for memory search. "
27 "Return ONLY the sub-questions, one per line, no numbering, no explanation. "
28 "If the question is already simple (a single fact lookup), return just the original question unchanged."
29)
31_DECOMPOSITION_USER = (
32 "Decompose this question into 2–3 focused sub-questions whose answers together answer the original:\n\n{query}"
33)
35# Hard cap on sub-questions to bound downstream recall cost.
36# 3 sub-questions + the original = 4 total recall passes at most.
37_MAX_SUB_QUESTIONS = 4
40async def decompose_query(query: str, llm_provider: LLMProvider) -> list[str]:
41 """Return a list of sub-questions for multi-hop query expansion.
43 The first element is always the original query (used as an anchor
44 so callers can detect a no-op: ``len(result) == 1`` means the LLM
45 judged the question already simple). Capped at ``_MAX_SUB_QUESTIONS``
46 total entries.
48 Failures (LLM errors, empty responses) return ``[query]`` and log at
49 DEBUG so a misconfigured provider degrades to normal single-query
50 recall rather than crashing.
51 """
52 try:
53 completion = await llm_provider.complete(
54 messages=[
55 Message(role="system", content=_DECOMPOSITION_SYSTEM),
56 Message(role="user", content=_DECOMPOSITION_USER.format(query=query)),
57 ],
58 max_tokens=200,
59 temperature=0.0,
60 )
61 lines = [line.strip() for line in completion.text.strip().splitlines() if line.strip()]
62 except Exception:
63 _logger.debug("Query decomposition failed; falling back to single-query recall", exc_info=True)
64 return [query]
66 if not lines:
67 return [query]
69 # Deduplicate while preserving order; original query is always the anchor.
70 seen: set[str] = set()
71 result: list[str] = []
72 for line in [query] + lines:
73 norm = line.lower().strip()
74 if norm not in seen:
75 seen.add(norm)
76 result.append(line)
78 return result[:_MAX_SUB_QUESTIONS]