AssemblyAI · dlange-aai · May 28, 2026 · May 28, 2026
diff --git a/assemblyai/__version__.py b/assemblyai/__version__.py
@@ -1 +1 @@
-__version__ = "0.64.3"
+__version__ = "0.64.4"
diff --git a/assemblyai/streaming/v3/__init__.py b/assemblyai/streaming/v3/__init__.py
@@ -6,6 +6,7 @@
     EventMessage,
     LLMGatewayResponseEvent,
     NoiseSuppressionModel,
+    SpeakerRevisionEvent,
     SpeechModel,
     SpeechStartedEvent,
     StreamingClientOptions,
@@ -29,6 +30,7 @@
     "EventMessage",
     "LLMGatewayResponseEvent",
     "NoiseSuppressionModel",
+    "SpeakerRevisionEvent",
     "SpeechModel",
     "SpeechStartedEvent",
     "StreamingClient",

diff --git a/assemblyai/streaming/v3/_base.py b/assemblyai/streaming/v3/_base.py
@@ -33,6 +33,7 @@
     ErrorEvent,
     EventMessage,
     LLMGatewayResponseEvent,
+    SpeakerRevisionEvent,
     SpeechStartedEvent,
     StreamingClientOptions,
     StreamingError,
@@ -233,6 +234,8 @@ def _parse_message(cls, data: Dict[str, Any]) -> Optional[EventMessage]:
                 return _parse_model(SpeechStartedEvent, data)
             elif event_type == StreamingEvents.LLMGatewayResponse:
                 return _parse_model(LLMGatewayResponseEvent, data)
+            elif event_type == StreamingEvents.SpeakerRevision:
+                return _parse_model(SpeakerRevisionEvent, data)
             elif event_type == StreamingEvents.Error:
                 return _parse_model(ErrorEvent, data)
             elif event_type == StreamingEvents.Warning:

diff --git a/assemblyai/streaming/v3/models.py b/assemblyai/streaming/v3/models.py
@@ -74,6 +74,21 @@ class LLMGatewayResponseEvent(BaseModel):
     data: Any
 
 
+class SpeakerRevisionEvent(BaseModel):
+    """Server-side correction to a previously-emitted Turn's speaker labels.
+
+    Emitted after offline reclustering refines the live tentative labels.
+    Match by `turn_order` against the original Turn; replace its per-word
+    speaker assignments (and the turn-level `speaker_label`) with these.
+    Text and word timestamps are unchanged from the original Turn.
+    """
+
+    type: Literal["SpeakerRevision"] = "SpeakerRevision"
+    turn_order: int
+    speaker_label: Optional[str] = None
+    words: List[Word] = []
+
+
 EventMessage = Union[
     BeginEvent,
     TerminationEvent,
@@ -82,6 +97,7 @@ class LLMGatewayResponseEvent(BaseModel):
     ErrorEvent,
     WarningEvent,
     LLMGatewayResponseEvent,
+    SpeakerRevisionEvent,
 ]
 
 
@@ -290,3 +306,4 @@ class StreamingEvents(Enum):
     Error = "Error"
     Warning = "Warning"
     LLMGatewayResponse = "LLMGatewayResponse"
+    SpeakerRevision = "SpeakerRevision"
diff --git a/tests/unit/test_streaming.py b/tests/unit/test_streaming.py
@@ -13,6 +13,7 @@
 
 from assemblyai.streaming.v3 import (
     NoiseSuppressionModel,
+    SpeakerRevisionEvent,
     SpeechModel,
     SpeechStartedEvent,
     StreamingClient,
@@ -911,6 +912,88 @@ def test_turn_event_with_word_speakers():
     assert [w.speaker for w in event.words] == ["A", "B"]
 
 
+def test_speaker_revision_event_parses():
+    # Given: a SpeakerRevision payload as emitted by the server (revision words
+    # use the same Word schema as Turn — start/end/confidence/text/word_is_final/speaker)
+    data = {
+        "type": "SpeakerRevision",
+        "turn_order": 3,
+        "speaker_label": "B",
+        "words": [
+            {
+                "start": 1000,
+                "end": 1200,
+                "confidence": 0.9,
+                "text": "hello",
+                "word_is_final": True,
+                "speaker": "B",
+            },
+            {
+                "start": 1210,
+                "end": 1400,
+                "confidence": 0.88,
+                "text": "world",
+                "word_is_final": True,
+                "speaker": "A",
+            },
+        ],
+    }
+
+    # When: parsed
+    event = SpeakerRevisionEvent.parse_obj(data)
+
+    # Then: the revision carries the corrected per-word and turn-level speakers
+    assert event.type == "SpeakerRevision"
+    assert event.turn_order == 3
+    assert event.speaker_label == "B"
+    assert [w.speaker for w in event.words] == ["B", "A"]
+
+
+def test_speaker_revision_event_dispatched_to_handler(mocker: MockFixture):
+    # Given: a SpeakerRevision frame on the wire and a handler registered
+    revision_json = json.dumps(
+        {
+            "type": "SpeakerRevision",
+            "turn_order": 5,
+            "speaker_label": "A",
+            "words": [
+                {
+                    "start": 500,
+                    "end": 700,
+                    "confidence": 0.95,
+                    "text": "yes",
+                    "word_is_final": True,
+                    "speaker": "A",
+                },
+            ],
+        }
+    )
+    fake_ws = _FakeWebSocket(recv_script=[revision_json])
+    mocker.patch(
+        "assemblyai.streaming.v3.client.websocket_connect",
+        return_value=fake_ws,
+    )
+    received = []
+    client = StreamingClient(
+        StreamingClientOptions(api_key="test", api_host="api.example.com")
+    )
+    client.on(StreamingEvents.SpeakerRevision, lambda _c, event: received.append(event))
+
+    # When: the client reads the frame
+    client.connect(_default_params())
+    deadline = time.monotonic() + 2.0
+    while time.monotonic() < deadline and not received:
+        time.sleep(0.02)
+    client.disconnect(terminate=False)
+
+    # Then: the handler is invoked with a parsed SpeakerRevisionEvent
+    assert len(received) == 1
+    assert isinstance(received[0], SpeakerRevisionEvent)
+    assert received[0].turn_order == 5
+    assert received[0].speaker_label == "A"
+    assert [w.speaker for w in received[0].words] == ["A"]
+
+
 def test_speech_model_required():
     """Test that omitting speech_model raises a validation error."""
     with pytest.raises(ValidationError):