Coverage for astrocyte/operation_metadata.py: 100%

87 statements  

« prev     ^ index     » next       coverage.py v7.15.0, created at 2026-07-04 05:24 +0000

1"""Typed metadata dataclasses for Astrocyte operations. 

2 

3Adopted from Hindsight (``hindsight_api/engine/operation_metadata.py``). 

4Provides structured shapes for the ``metadata`` field of audit entries, 

5async task results, and operation traces. Use these instead of 

6free-form ``dict[str, Any]`` so consumers can rely on stable keys. 

7 

8Pairs with ``astrocyte.audit.AuditEntry.metadata`` — pass 

9``metadata=RecallMetadata(...).to_dict()`` to get structured audit 

10payloads. 

11 

12The dataclasses are intentionally permissive (no nested validation, 

13no required-but-derivable fields). They exist to document the shape 

14that consumers can rely on, not to enforce a wire contract. 

15""" 

16 

17from __future__ import annotations 

18 

19from dataclasses import asdict, dataclass, field 

20from typing import Any 

21 

22# ─── retain side ────────────────────────────────────────────────────── 

23 

24 

25@dataclass 

26class RetainMetadata: 

27 """Metadata for a single ``retain`` operation (one document/session).""" 

28 

29 items_count: int # messages or chunks ingested 

30 bytes_in: int = 0 # total content bytes 

31 facts_extracted: int = 0 

32 entities_extracted: int = 0 

33 sections_created: int = 0 

34 embeddings_generated: int = 0 

35 elapsed_ms: float = 0.0 

36 extras: dict[str, Any] = field(default_factory=dict) 

37 

38 def to_dict(self) -> dict[str, Any]: 

39 return asdict(self) 

40 

41 

42@dataclass 

43class BatchRetainParentMetadata: 

44 """Metadata for parent of split batch_retain (when payload was sub-batched).""" 

45 

46 items_count: int 

47 total_tokens: int 

48 num_sub_batches: int 

49 is_parent: bool = True 

50 

51 def to_dict(self) -> dict[str, Any]: 

52 return asdict(self) 

53 

54 

55@dataclass 

56class BatchRetainChildMetadata: 

57 """Metadata for one sub-batch of a split batch_retain.""" 

58 

59 items_count: int 

60 parent_operation_id: str 

61 sub_batch_index: int 

62 total_sub_batches: int 

63 

64 def to_dict(self) -> dict[str, Any]: 

65 return asdict(self) 

66 

67 

68@dataclass 

69class ConsolidationMetadata: 

70 """Metadata for consolidation operations.""" 

71 

72 observations_processed: int = 0 

73 observations_created: int = 0 

74 observations_updated: int = 0 

75 observations_deleted: int = 0 

76 elapsed_ms: float = 0.0 

77 model: str | None = None 

78 extras: dict[str, Any] = field(default_factory=dict) 

79 

80 def to_dict(self) -> dict[str, Any]: 

81 return asdict(self) 

82 

83 

84@dataclass 

85class ExtractionMetadata: 

86 """Metadata for fact/entity extraction operations.""" 

87 

88 chunks_processed: int = 0 

89 facts_extracted: int = 0 

90 entities_extracted: int = 0 

91 elapsed_ms: float = 0.0 

92 model: str | None = None 

93 

94 def to_dict(self) -> dict[str, Any]: 

95 return asdict(self) 

96 

97 

98# ─── recall side ────────────────────────────────────────────────────── 

99 

100 

101@dataclass 

102class RecallMetadata: 

103 """Metadata for a single ``recall`` (search/retrieval) operation.""" 

104 

105 n_results: int 

106 top_score: float = 0.0 

107 strategies_used: list[str] = field(default_factory=list) 

108 elapsed_ms: float = 0.0 

109 cross_encoder_used: bool = False 

110 extras: dict[str, Any] = field(default_factory=dict) 

111 

112 def to_dict(self) -> dict[str, Any]: 

113 return asdict(self) 

114 

115 

116@dataclass 

117class ClassifyMetadata: 

118 """Metadata for a question-router classification call (M16).""" 

119 

120 question_type: str 

121 confidence: float 

122 effective_type: str # may differ from question_type due to threshold 

123 confidence_threshold: float 

124 classifier_model: str 

125 elapsed_ms: float = 0.0 

126 

127 def to_dict(self) -> dict[str, Any]: 

128 return asdict(self) 

129 

130 

131@dataclass 

132class RerankMetadata: 

133 """Metadata for a cross-encoder / MLX-reranker call.""" 

134 

135 provider: str # "modal-cross-encoder", "mlx-jina", ... 

136 n_items: int 

137 n_returned: int 

138 elapsed_ms: float = 0.0 

139 model: str | None = None 

140 

141 def to_dict(self) -> dict[str, Any]: 

142 return asdict(self) 

143 

144 

145# ─── generic / catch-all ────────────────────────────────────────────── 

146 

147 

148@dataclass 

149class GenericOperationMetadata: 

150 """Catch-all when no typed shape applies yet.""" 

151 

152 operation: str 

153 elapsed_ms: float = 0.0 

154 extras: dict[str, Any] = field(default_factory=dict) 

155 

156 def to_dict(self) -> dict[str, Any]: 

157 return asdict(self)