Coverage for astrocyte/conversations/types.py: 100%

46 statements  

« prev     ^ index     » next       coverage.py v7.15.0, created at 2026-07-04 05:24 +0000

1"""Conversation Engine data types — ConversationTurn, Conversation, TurnRole. 

2 

3Hindsight-inspired ordered-turn representation. Plain dataclasses; no 

4DB coupling, no Memory Engine knowledge. Persisted via the 

5``ConversationStore`` SPI. 

6 

7Conversation shape: 

8 

9 Conversation 

10 └─ turns: ordered list of ConversationTurn 

11 ├─ role: "user" | "assistant" | "system" | "tool" | (custom) 

12 ├─ content: the message text 

13 ├─ timestamp: when the turn happened (optional) 

14 └─ metadata: free-form per-turn metadata 

15 

16Why a separate type from Document/DocumentTree: 

17 - Conversations are inherently SEQUENTIAL, not hierarchical 

18 - Speaker context matters and must be preserved across chunking 

19 - Turn boundaries are the natural chunking unit (vs tree boundaries 

20 for documents) 

21 - Bench workloads like LME / LoCoMo are conversations, not documents 

22""" 

23 

24from __future__ import annotations 

25 

26import uuid 

27from dataclasses import dataclass, field 

28from datetime import datetime, timezone 

29from typing import Any, Literal 

30 

31TurnRole = Literal["user", "assistant", "system", "tool"] 

32"""Standard roles from chat-API conventions (OpenAI / Anthropic / etc). 

33 

34Other roles are accepted at the type level (we use ``str`` in the 

35dataclass, not the Literal, so adapters can pass custom roles like 

36``"customer"`` / ``"agent"`` without changing the framework). 

37""" 

38 

39 

40# ─── ConversationTurn ───────────────────────────────────────────────── 

41 

42 

43@dataclass 

44class ConversationTurn: 

45 """One turn in a conversation. 

46 

47 ``id`` is generated on construction for cross-reference (e.g., when 

48 a follow-up turn explicitly cites an earlier one). ``timestamp`` is 

49 optional — many chat sources don't surface per-turn timestamps and 

50 the conversation-level created_at is sufficient. 

51 """ 

52 

53 id: str 

54 role: str # "user", "assistant", "system", "tool", or custom 

55 content: str 

56 timestamp: datetime | None = None 

57 metadata: dict[str, Any] = field(default_factory=dict) 

58 

59 @classmethod 

60 def new( 

61 cls, 

62 *, 

63 role: str, 

64 content: str, 

65 timestamp: datetime | None = None, 

66 metadata: dict[str, Any] | None = None, 

67 ) -> ConversationTurn: 

68 """Construct a turn with a fresh UUID id.""" 

69 return cls( 

70 id=str(uuid.uuid4()), 

71 role=role, 

72 content=content, 

73 timestamp=timestamp, 

74 metadata=metadata or {}, 

75 ) 

76 

77 def char_count(self) -> int: 

78 """Total char count including a header line for the role.""" 

79 # Approximates what we'd serialize: "**{role}**: {content}" 

80 return len(self.role) + 4 + len(self.content) 

81 

82 

83# ─── Conversation ───────────────────────────────────────────────────── 

84 

85 

86@dataclass 

87class Conversation: 

88 """An ordered sequence of turns with conversation-level metadata. 

89 

90 ``source_uri`` identifies the upstream conversation source (e.g., 

91 ``"slack://channel-id/thread-ts"``, ``"openai-chat://..."``, 

92 ``"bench://lme/q-12345"``). 

93 """ 

94 

95 id: str 

96 turns: list[ConversationTurn] = field(default_factory=list) 

97 source_uri: str = "" 

98 title: str = "" 

99 created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) 

100 metadata: dict[str, Any] = field(default_factory=dict) 

101 

102 @classmethod 

103 def new( 

104 cls, 

105 *, 

106 turns: list[ConversationTurn] | None = None, 

107 source_uri: str = "", 

108 title: str = "", 

109 metadata: dict[str, Any] | None = None, 

110 ) -> Conversation: 

111 """Construct a Conversation with a fresh UUID id.""" 

112 return cls( 

113 id=str(uuid.uuid4()), 

114 turns=turns or [], 

115 source_uri=source_uri, 

116 title=title, 

117 metadata=metadata or {}, 

118 ) 

119 

120 def turn_count(self) -> int: 

121 return len(self.turns) 

122 

123 def total_chars(self) -> int: 

124 return sum(t.char_count() for t in self.turns) 

125 

126 def add_turn( 

127 self, 

128 *, 

129 role: str, 

130 content: str, 

131 timestamp: datetime | None = None, 

132 metadata: dict[str, Any] | None = None, 

133 ) -> ConversationTurn: 

134 """Append a turn (in-place) and return it.""" 

135 turn = ConversationTurn.new( 

136 role=role, 

137 content=content, 

138 timestamp=timestamp, 

139 metadata=metadata, 

140 ) 

141 self.turns.append(turn) 

142 return turn 

143 

144 @property 

145 def earliest_timestamp(self) -> datetime | None: 

146 timestamps = [t.timestamp for t in self.turns if t.timestamp is not None] 

147 return min(timestamps) if timestamps else None 

148 

149 @property 

150 def latest_timestamp(self) -> datetime | None: 

151 timestamps = [t.timestamp for t in self.turns if t.timestamp is not None] 

152 return max(timestamps) if timestamps else None