Coverage for astrocyte/pipeline/hyde.py: 100%

1"""Hypothetical Document Embedding (HyDE) — R1 research technique.

3Rather than embedding the raw query and searching for similar chunks, HyDE

4asks the LLM to generate a *hypothetical answer* first, then embeds that

5answer. Because the hypothetical answer is written in the same style as

6stored memories, it sits much closer to relevant chunks in embedding space

7than the question does.

9References:

10 Gao et al. 2022 — "Precise Zero-Shot Dense Retrieval without Relevance Labels"

11 https://arxiv.org/abs/2212.10496

13Usage:

14 hyde_vec = await generate_hyde_vector(query, llm_provider)

15 # None on any failure — caller falls back to original query vector.

17All failures are logged at DEBUG and return ``None`` so HyDE is never on the

18critical path. The caller should always have the original query vector as a

19fallback.

20"""

22from __future__ import annotations

24import logging

25from typing import TYPE_CHECKING

27from astrocyte.types import Message

29if TYPE_CHECKING:

30 from astrocyte.provider import LLMProvider

32logger = logging.getLogger("astrocyte.hyde")

34_SYSTEM_PROMPT = (

35 "You are a memory retrieval assistant. Given a search query, generate a "

36 "single concise hypothetical memory entry that would perfectly answer the "

37 "query. Write it as a factual statement in the style of a stored memory — "

38 "not as an answer to a question, not as a question itself. Be specific and "

39 "concrete. One or two sentences maximum."

40)

43async def generate_hyde_vector(

44 query: str,

45 llm_provider: LLMProvider,

46) -> list[float] | None:

47 """Generate a hypothetical document for *query* and return its embedding.

49 Steps:

50 1. Ask the LLM to write a hypothetical memory that would answer *query*.

51 2. Embed the hypothetical text using the same embedding path as normal

52 retain/recall.

53 3. Return the embedding vector, or ``None`` on any failure.

55 Args:

56 query: The natural-language recall query.

57 llm_provider: LLM provider used for both generation and embedding.

59 Returns:

60 Embedding vector of the hypothetical document, or ``None`` if

61 generation or embedding fails (so the caller can fall back gracefully).

62 """

63 # Inline import to avoid circular dependency at module load time.

64 from astrocyte.pipeline.embedding import generate_embeddings

66 try:

67 hypothetical = await _generate_hypothetical(query, llm_provider)

68 except Exception as exc:

69 logger.debug("HyDE generation failed — falling back to original query: %s", exc)

70 return None

72 if not hypothetical:

73 return None

75 try:

76 embeddings = await generate_embeddings([hypothetical], llm_provider)

77 return embeddings[0] if embeddings else None

78 except Exception as exc:

79 logger.debug("HyDE embedding failed — falling back to original query: %s", exc)

80 return None

83async def _generate_hypothetical(query: str, llm_provider: LLMProvider) -> str:

84 """Call the LLM to produce a hypothetical memory for *query*."""

85 messages = [

86 Message(role="system", content=_SYSTEM_PROMPT),

87 Message(role="user", content=query),

88 ]

89 response = await llm_provider.complete(messages, max_tokens=150)

90 return (response.text or "").strip()