Coverage for astrocyte/documents/parsers/markdown.py: 100%
18 statements
« prev ^ index » next coverage.py v7.15.0, created at 2026-07-04 05:24 +0000
« prev ^ index » next coverage.py v7.15.0, created at 2026-07-04 05:24 +0000
1"""MarkdownParser — pass-through for markdown / text input.
3Decodes UTF-8 bytes to a string and returns the markdown unchanged.
4The cheapest parser; useful for tests, for inline content, and as the
5zero-configuration default when ingesting text the caller already has.
6"""
8from __future__ import annotations
10from astrocyte.documents.parsers.base import Parser
12_MARKDOWN_EXTENSIONS = {".md", ".markdown", ".txt", ".rst"}
15class MarkdownParser(Parser):
16 """Treat input as raw markdown / plain text. UTF-8 decode + return."""
18 def name(self) -> str:
19 return "markdown"
21 def supports(self, filename: str, content_type: str | None = None) -> bool:
22 if content_type and content_type.lower().startswith(("text/markdown", "text/plain")):
23 return True
24 if not filename:
25 return True # if we don't know, claim it
26 lower = filename.lower()
27 return any(lower.endswith(ext) for ext in _MARKDOWN_EXTENSIONS)
29 async def convert(self, file_data: bytes, filename: str) -> str:
30 """Decode bytes as UTF-8 markdown; pass through.
32 Surrogate-escape fallback for malformed encodings so we never
33 raise on a slightly-broken file — the tree builder will handle
34 whatever comes out.
35 """
36 try:
37 return file_data.decode("utf-8")
38 except UnicodeDecodeError:
39 return file_data.decode("utf-8", errors="surrogateescape")