Coverage for astrocyte/pipeline/multi_query.py: 88%

25 statements  

« prev     ^ index     » next       coverage.py v7.15.0, created at 2026-07-04 05:24 +0000

1"""Multi-query expansion for complex (multi-hop) recall. 

2 

3When a question requires evidence from multiple sessions or topics, 

4a single recall query often misses half the evidence. Decomposing 

5into 2–3 focused sub-questions and merging their recall results 

6substantially improves multi-hop coverage. 

7 

8This module handles decomposition only. The orchestrator owns the 

9recall-and-merge loop so it can reuse the full retrieval pipeline 

10(embeddings, parallel strategies, RRF, rerank) for each sub-query. 

11""" 

12 

13from __future__ import annotations 

14 

15import logging 

16from typing import TYPE_CHECKING 

17 

18from astrocyte.types import Message 

19 

20if TYPE_CHECKING: 

21 from astrocyte.provider import LLMProvider 

22 

23_logger = logging.getLogger(__name__) 

24 

25_DECOMPOSITION_SYSTEM = ( 

26 "You decompose complex questions into simpler sub-questions for memory search. " 

27 "Return ONLY the sub-questions, one per line, no numbering, no explanation. " 

28 "If the question is already simple (a single fact lookup), return just the original question unchanged." 

29) 

30 

31_DECOMPOSITION_USER = ( 

32 "Decompose this question into 2–3 focused sub-questions whose answers together answer the original:\n\n{query}" 

33) 

34 

35# Hard cap on sub-questions to bound downstream recall cost. 

36# 3 sub-questions + the original = 4 total recall passes at most. 

37_MAX_SUB_QUESTIONS = 4 

38 

39 

40async def decompose_query(query: str, llm_provider: LLMProvider) -> list[str]: 

41 """Return a list of sub-questions for multi-hop query expansion. 

42 

43 The first element is always the original query (used as an anchor 

44 so callers can detect a no-op: ``len(result) == 1`` means the LLM 

45 judged the question already simple). Capped at ``_MAX_SUB_QUESTIONS`` 

46 total entries. 

47 

48 Failures (LLM errors, empty responses) return ``[query]`` and log at 

49 DEBUG so a misconfigured provider degrades to normal single-query 

50 recall rather than crashing. 

51 """ 

52 try: 

53 completion = await llm_provider.complete( 

54 messages=[ 

55 Message(role="system", content=_DECOMPOSITION_SYSTEM), 

56 Message(role="user", content=_DECOMPOSITION_USER.format(query=query)), 

57 ], 

58 max_tokens=200, 

59 temperature=0.0, 

60 ) 

61 lines = [line.strip() for line in completion.text.strip().splitlines() if line.strip()] 

62 except Exception: 

63 _logger.debug("Query decomposition failed; falling back to single-query recall", exc_info=True) 

64 return [query] 

65 

66 if not lines: 

67 return [query] 

68 

69 # Deduplicate while preserving order; original query is always the anchor. 

70 seen: set[str] = set() 

71 result: list[str] = [] 

72 for line in [query] + lines: 

73 norm = line.lower().strip() 

74 if norm not in seen: 

75 seen.add(norm) 

76 result.append(line) 

77 

78 return result[:_MAX_SUB_QUESTIONS]