Coverage for astrocyte/pipeline/wiki_lint.py: 100%

51 statements  

« prev     ^ index     » next       coverage.py v7.15.0, created at 2026-07-04 05:24 +0000

1"""Wiki lint pass — Karpathy's periodic "lint" op (M12.5). 

2 

3Karpathy's LLM Wiki spec includes a lint operation that checks compiled 

4pages for contradictions, stale claims, orphans, missing cross-links, 

5and data gaps. This module implements **contradiction detection** for 

6v1 — the highest-bench-leverage check. 

7 

8Mechanism: per WikiPage, send (page content + top-K facts in same scope) 

9to an LLM judge with a yes/no contradiction prompt. If the judge flags a 

10contradiction, the page is marked unclean and callers can filter it out 

11of the synth context. 

12 

13The lint is **stateless per call** — callers either (a) run once per 

14bank and cache the report, or (b) run inline per-question on the 

15small set of wiki hits actually surfaced by recall. The latter pays 

16~1-2 LLM calls per question instead of N upfront but only lints 

17pages that would have been seen. 

18 

19Generic across benches — operates on WikiPage shape (title + content + 

20scope) without bench-specific assumptions. 

21 

22See: 

23- ``docs/_design/llm-wiki-compile.md`` §7 (lint design) 

24- Karpathy gist: https://gist.github.com/karpathy/442a6bf555914893e9891c11519de94f 

25""" 

26 

27from __future__ import annotations 

28 

29import json 

30import logging 

31from dataclasses import dataclass, field 

32from typing import TYPE_CHECKING 

33 

34if TYPE_CHECKING: 

35 from astrocyte.provider import LLMProvider 

36 

37logger = logging.getLogger("astrocyte.pipeline.wiki_lint") 

38 

39 

40@dataclass 

41class WikiLintIssue: 

42 """An issue detected by the lint pass for one wiki page.""" 

43 

44 page_id: str 

45 kind: str # 'contradicted' | 'stale' | 'orphan' 

46 detail: str = "" 

47 

48 

49@dataclass 

50class WikiLintReport: 

51 """Aggregate of lint issues for a set of pages.""" 

52 

53 bank_id: str 

54 issues: list[WikiLintIssue] = field(default_factory=list) 

55 

56 def is_clean(self, page_id: str) -> bool: 

57 """True if no issues were flagged for this page_id.""" 

58 return not any(i.page_id == page_id for i in self.issues) 

59 

60 def kinds_for(self, page_id: str) -> set[str]: 

61 """Set of issue kinds flagged for this page.""" 

62 return {i.kind for i in self.issues if i.page_id == page_id} 

63 

64 

65_CONTRADICTION_PROMPT = """You are auditing a knowledge-base entry against the source facts it summarises. 

66 

67Knowledge entry (titled "{title}"): 

68{content} 

69 

70Source facts (each independently true, most relevant first): 

71{facts_block} 

72 

73Does the entry contain ANY claim that DISAGREES with one of the source facts in a way that matters for answering questions? A contradiction is when the entry directly asserts something the facts refute. Vagueness or omission is NOT contradiction. 

74 

75Respond with JSON only: 

76{{"verdict": "OK" or "CONTRADICTED", "explanation": "one sentence; only when CONTRADICTED"}}""" 

77 

78 

79async def lint_one_wiki( 

80 *, 

81 page_id: str, 

82 title: str, 

83 content: str, 

84 facts: list[str], 

85 llm_provider: LLMProvider, 

86 model: str = "gpt-4o-mini", 

87) -> WikiLintIssue | None: 

88 """Lint a single wiki page against pre-fetched facts. 

89 

90 Returns: 

91 ``WikiLintIssue`` with ``kind='contradicted'`` if the LLM judge 

92 flags a contradiction, ``None`` otherwise (clean / no facts / 

93 LLM failure — resilient). 

94 """ 

95 if not facts or not content.strip(): 

96 return None 

97 

98 facts_block = "\n".join(f"{i + 1}. {fact}" for i, fact in enumerate(facts)) 

99 prompt = _CONTRADICTION_PROMPT.format( 

100 title=title, 

101 content=content, 

102 facts_block=facts_block, 

103 ) 

104 

105 try: 

106 completion = await llm_provider.complete( 

107 prompt, 

108 model=model, 

109 response_format={"type": "json_object"}, 

110 ) 

111 except Exception as exc: # noqa: BLE001 

112 logger.warning( 

113 "wiki_lint llm call failed for page=%s: %s: %s", 

114 page_id, 

115 type(exc).__name__, 

116 exc, 

117 ) 

118 return None 

119 

120 try: 

121 parsed = json.loads(completion.text) 

122 except (json.JSONDecodeError, AttributeError): 

123 logger.warning("wiki_lint json parse failed for page=%s", page_id) 

124 return None 

125 

126 verdict = str(parsed.get("verdict", "")).strip().upper() 

127 if verdict == "CONTRADICTED": 

128 return WikiLintIssue( 

129 page_id=page_id, 

130 kind="contradicted", 

131 detail=str(parsed.get("explanation", "")).strip(), 

132 ) 

133 return None 

134 

135 

136async def lint_wiki_pages( 

137 *, 

138 pages: list[tuple[str, str, str, list[str]]], 

139 llm_provider: LLMProvider, 

140 bank_id: str, 

141 model: str = "gpt-4o-mini", 

142) -> WikiLintReport: 

143 """Batch lint a set of pages. 

144 

145 Args: 

146 pages: List of ``(page_id, title, content, facts)`` tuples. The 

147 tuple shape lets callers adapt from WikiPage / WikiPageHit 

148 without forcing a particular dataclass. 

149 llm_provider: Provider for the contradiction-detection LLM call. 

150 bank_id: Bank scope recorded on the report. 

151 model: LLM model for the judge. 

152 

153 Returns: 

154 ``WikiLintReport`` with one issue per contradicted page. Pages 

155 with no facts or that fail the LLM call are silently skipped 

156 (treated as clean — fail-open). 

157 """ 

158 report = WikiLintReport(bank_id=bank_id) 

159 for page_id, title, content, facts in pages: 

160 if not page_id: 

161 continue 

162 # Orphan: page has no source facts to anchor against. The 

163 # caller can still treat orphans as clean for filtering; we 

164 # flag for visibility. 

165 if not facts: 

166 report.issues.append( 

167 WikiLintIssue( 

168 page_id=page_id, 

169 kind="orphan", 

170 detail="no facts in scope", 

171 ) 

172 ) 

173 continue 

174 issue = await lint_one_wiki( 

175 page_id=page_id, 

176 title=title, 

177 content=content, 

178 facts=facts, 

179 llm_provider=llm_provider, 

180 model=model, 

181 ) 

182 if issue is not None: 

183 report.issues.append(issue) 

184 return report