Coverage for astrocyte/eval/judges/_stemmer.py: 79%

19 statements  

« prev     ^ index     » next       coverage.py v7.15.0, created at 2026-07-04 05:24 +0000

1"""Porter stemmer wrapper for the canonical LoCoMo judge. 

2 

3Uses ``snowballstemmer`` when available (the ``eval`` extra installs it). 

4Falls back to a no-op identity stemmer when absent — tests that pin the 

5exact LoCoMo scores will fail under the fallback, which is the right 

6signal: if you don't have the stemmer you don't get canonical scores. 

7 

8Why not just import snowballstemmer directly: 

9- Keeps snowballstemmer an optional extra (benchmark-only), so general 

10 users don't pay for a transitive dep they never invoke. 

11- Gives a clear error path when the extra is missing but the judge is 

12 called — better than a raw ``ModuleNotFoundError`` from deep in the 

13 F1 loop. 

14""" 

15 

16from __future__ import annotations 

17 

18import logging 

19from functools import lru_cache 

20from typing import Protocol 

21 

22_logger = logging.getLogger(__name__) 

23 

24 

25@lru_cache(maxsize=1) 

26def _get_stemmer() -> "_Stemmer | None": 

27 try: 

28 import snowballstemmer 

29 except ImportError: 

30 _logger.warning( 

31 "snowballstemmer not installed; LoCoMo judge will NOT produce " 

32 "canonical scores. Install with: pip install 'astrocyte[eval]' " 

33 "(or: pip install snowballstemmer>=2.2).", 

34 ) 

35 return None 

36 return snowballstemmer.stemmer("english") 

37 

38 

39class _Stemmer(Protocol): # protocol stub for type hints 

40 def stemWord(self, word: str) -> str: # pragma: no cover 

41 """Stem ``word`` and return the stem.""" 

42 

43 

44def porter_stem(word: str) -> str: 

45 """Porter-stem ``word`` using snowballstemmer. 

46 

47 Raises :class:`ImportError` when the optional ``eval`` extra is not 

48 installed. Canonical LoCoMo F1 scores are wrong without stemming, so 

49 silent degradation is worse than a loud failure. 

50 """ 

51 stemmer = _get_stemmer() 

52 if stemmer is None: 

53 raise ImportError( 

54 "snowballstemmer is required for canonical LoCoMo scoring. " 

55 "Install with: pip install 'astrocyte[eval]' " 

56 "(or: pip install snowballstemmer>=2.2).", 

57 ) 

58 return stemmer.stemWord(word)