Coverage for astrocyte/pipeline/hyde.py: 100%
25 statements
« prev ^ index » next coverage.py v7.15.0, created at 2026-07-04 05:24 +0000
« prev ^ index » next coverage.py v7.15.0, created at 2026-07-04 05:24 +0000
1"""Hypothetical Document Embedding (HyDE) — R1 research technique.
3Rather than embedding the raw query and searching for similar chunks, HyDE
4asks the LLM to generate a *hypothetical answer* first, then embeds that
5answer. Because the hypothetical answer is written in the same style as
6stored memories, it sits much closer to relevant chunks in embedding space
7than the question does.
9References:
10 Gao et al. 2022 — "Precise Zero-Shot Dense Retrieval without Relevance Labels"
11 https://arxiv.org/abs/2212.10496
13Usage:
14 hyde_vec = await generate_hyde_vector(query, llm_provider)
15 # None on any failure — caller falls back to original query vector.
17All failures are logged at DEBUG and return ``None`` so HyDE is never on the
18critical path. The caller should always have the original query vector as a
19fallback.
20"""
22from __future__ import annotations
24import logging
25from typing import TYPE_CHECKING
27from astrocyte.types import Message
29if TYPE_CHECKING:
30 from astrocyte.provider import LLMProvider
32logger = logging.getLogger("astrocyte.hyde")
34_SYSTEM_PROMPT = (
35 "You are a memory retrieval assistant. Given a search query, generate a "
36 "single concise hypothetical memory entry that would perfectly answer the "
37 "query. Write it as a factual statement in the style of a stored memory — "
38 "not as an answer to a question, not as a question itself. Be specific and "
39 "concrete. One or two sentences maximum."
40)
43async def generate_hyde_vector(
44 query: str,
45 llm_provider: LLMProvider,
46) -> list[float] | None:
47 """Generate a hypothetical document for *query* and return its embedding.
49 Steps:
50 1. Ask the LLM to write a hypothetical memory that would answer *query*.
51 2. Embed the hypothetical text using the same embedding path as normal
52 retain/recall.
53 3. Return the embedding vector, or ``None`` on any failure.
55 Args:
56 query: The natural-language recall query.
57 llm_provider: LLM provider used for both generation and embedding.
59 Returns:
60 Embedding vector of the hypothetical document, or ``None`` if
61 generation or embedding fails (so the caller can fall back gracefully).
62 """
63 # Inline import to avoid circular dependency at module load time.
64 from astrocyte.pipeline.embedding import generate_embeddings
66 try:
67 hypothetical = await _generate_hypothetical(query, llm_provider)
68 except Exception as exc:
69 logger.debug("HyDE generation failed — falling back to original query: %s", exc)
70 return None
72 if not hypothetical:
73 return None
75 try:
76 embeddings = await generate_embeddings([hypothetical], llm_provider)
77 return embeddings[0] if embeddings else None
78 except Exception as exc:
79 logger.debug("HyDE embedding failed — falling back to original query: %s", exc)
80 return None
83async def _generate_hypothetical(query: str, llm_provider: LLMProvider) -> str:
84 """Call the LLM to produce a hypothetical memory for *query*."""
85 messages = [
86 Message(role="system", content=_SYSTEM_PROMPT),
87 Message(role="user", content=query),
88 ]
89 response = await llm_provider.complete(messages, max_tokens=150)
90 return (response.text or "").strip()