Remove deprecated speaker_change, channel_and_speaker_change, and spe…

…aker_change_sensitivity diarization options (#101)
speechmatics · Aug 2, 2024 · 9f6f011 · 9f6f011
1 parent 076618a
commit 9f6f011
Show file tree

Hide file tree

Showing 13 changed files with 17 additions and 218 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [2.0.0] - 2024-08-01
+
+### Changed
+
+- Remove deprecated speaker_change, channel_and_speaker_change, and speaker_change_sensitivity diarization options
+- Remove speaker change deprecation warning
+
 ## [1.15.0] - 2024-07-31
 
 ## Added

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-1.15.0
+2.0.0
diff --git a/speechmatics/adapters.py b/speechmatics/adapters.py
@@ -34,7 +34,6 @@ def convert_to_txt(
     language: str,
     language_pack_info: dict = None,
     speaker_labels: bool = True,
-    speaker_change_token: bool = False,
 ) -> str:
     """
     Convert a set of transcription result tokens to a plain text format.
@@ -59,13 +58,6 @@ def convert_to_txt(
         if not group:
             continue
 
-        # group_tokens always puts speaker_change tokens first in a new group
-        if speaker_change_token and group[0]["type"] == "speaker_change":
-            if len(group) == 1:
-                texts.append("<sc>")
-            else:
-                texts.append("<sc>\n")
-
         speaker = get_speaker(group[0])
         if speaker and speaker != current_speaker and speaker_labels:
             current_speaker = speaker
@@ -79,25 +71,18 @@ def convert_to_txt(
 def group_tokens(tokens: List[dict]) -> List[List[dict]]:
     """
     Group the tokens in a set of results by speaker (and language if present).
-    speaker_change tokens also cause a new group to form.
 
     :param results: the JSON v2 results
     :return: list of lists
     """
     groups = []
     last = None
-    last_is_speaker_change = False
     for token in tokens:
-        if token["type"] == "speaker_change":
-            groups.append([token])
-            last_is_speaker_change = True
-            continue
-        if last_is_speaker_change or last == (get_speaker(token), get_language(token)):
+        if last == (get_speaker(token), get_language(token)):
             groups[-1].append(token)
         else:
             groups.append([token])
         last = (get_speaker(token), get_language(token))
-        last_is_speaker_change = False
 
     return groups
 

diff --git a/speechmatics/batch_client.py b/speechmatics/batch_client.py
@@ -231,15 +231,6 @@ def submit_job(
                 a filepath as a string or Path object, or a dict"""
             )
 
-        if (
-            config_dict.get("transcription_config", {}).get("diarization")
-            == "channel_and_speaker_change"
-        ):
-            LOGGER.warning(
-                "DeprecationWarning: Speaker Change Detection has been deprecated "
-                "and will not be supported in future versions of this package."
-            )
-
         # If audio=None, fetch_data must be specified
         file_object = None
         try:

diff --git a/speechmatics/cli.py b/speechmatics/cli.py
@@ -216,7 +216,6 @@ def get_transcription_config(
         "max_delay_mode",
         "diarization",
         "channel_diarization_labels",
-        "speaker_change_sensitivity",
         "speaker_diarization_sensitivity",
     ]:
         if args.get(option) is not None:
@@ -398,7 +397,6 @@ def add_printing_handlers(
     enable_transcription_partials=False,
     enable_translation_partials=False,
     debug_handlers_too=False,
-    speaker_change_token=False,
     print_json=False,
     translation_config=None,
 ):
@@ -419,9 +417,6 @@ def add_printing_handlers(
         debug_handlers_too (bool, optional): Whether to enable 'debug'
             handlers that print out an ASCII symbol representing messages being
             received and sent.
-        speaker_change_token (bool, optional): Whether to explicitly include a
-            speaker change token '<sc>' in the output to indicate speaker
-            changes.
         print_json (bool, optional): Whether to print json transcript messages.
         translation_config (TranslationConfig, optional): Translation config with target languages.
     """
@@ -449,7 +444,6 @@ def partial_transcript_handler(message):
             api.transcription_config.language,
             language_pack_info=api.get_language_pack_info(),
             speaker_labels=True,
-            speaker_change_token=speaker_change_token,
         )
         if plaintext:
             sys.stderr.write(f"{escape_seq}{plaintext}\r")
@@ -464,7 +458,6 @@ def transcript_handler(message):
             api.transcription_config.language,
             language_pack_info=api.get_language_pack_info(),
             speaker_labels=True,
-            speaker_change_token=speaker_change_token,
         )
         if plaintext:
             sys.stdout.write(f"{escape_seq}{plaintext}\n")
@@ -670,7 +663,6 @@ def rt_main(args):
         enable_transcription_partials=args["enable_transcription_partials"],
         enable_translation_partials=args["enable_translation_partials"],
         debug_handlers_too=args["debug"],
-        speaker_change_token=args["speaker_change_token"],
         print_json=args["print_json"],
         translation_config=transcription_config.translation_config,
     )

diff --git a/speechmatics/cli_parser.py b/speechmatics/cli_parser.py
@@ -363,7 +363,7 @@ def get_arg_parser():
     )
     batch_diarization_parser.add_argument(
         "--diarization",
-        choices=["none", "speaker", "channel", "channel_and_speaker_change"],
+        choices=["none", "speaker", "channel"],
         help="Which type of diarization to use.",
     )
     batch_diarization_parser.add_argument(
@@ -424,17 +424,6 @@ def get_arg_parser():
         type=int,
         help="Enforces the maximum number of speakers allowed in a single audio stream. Min: 2, Max: 20, Default: 20.",
     )
-    rt_transcribe_command_parser.add_argument(
-        "--speaker-change-sensitivity",
-        type=float,
-        help="Sensitivity level for speaker change.",
-    )
-    rt_transcribe_command_parser.add_argument(
-        "--speaker-change-token",
-        default=False,
-        action="store_true",
-        help="Shows a <sc> token where a speaker change was detected.",
-    )
     rt_transcribe_command_parser.add_argument(
         "--max-delay",
         type=float,
@@ -496,7 +485,7 @@ def get_arg_parser():
 
     rt_transcribe_command_parser.add_argument(
         "--diarization",
-        choices=["none", "speaker", "speaker_change"],
+        choices=["none", "speaker"],
         help="Which type of diarization to use.",
     )
 

diff --git a/speechmatics/client.py b/speechmatics/client.py
@@ -448,15 +448,6 @@ async def run(
         :raises Exception: Can raise any exception returned by the
             consumer/producer tasks.
         """
-        if (
-            transcription_config.speaker_change_sensitivity
-            or transcription_config.diarization == "speaker_change"
-        ):
-            LOGGER.warning(
-                "DeprecationWarning: Speaker Change Detection has been deprecated "
-                "and will not be supported in future versions of this package."
-            )
-
         self.transcription_config = transcription_config
         self.seq_no = 0
         self._language_pack_info = None

diff --git a/speechmatics/models.py b/speechmatics/models.py
@@ -250,9 +250,6 @@ class TranscriptionConfig(_TranscriptionConfig):
     speaker_diarization_config: RTSpeakerDiarizationConfig = None
     """Configuration for speaker diarization."""
 
-    speaker_change_sensitivity: float = None
-    """Sensitivity level for speaker change."""
-
     streaming_mode: bool = None
     """Indicates if we run the engine in streaming mode, or regular RT mode."""
 

diff --git a/tests/data/convert_to_txt/speaker_change.json b/tests/data/convert_to_txt/speaker_change.json
diff --git a/tests/mock_rt_server.py b/tests/mock_rt_server.py
@@ -135,12 +135,6 @@ def dummy_add_transcript():
                     {"content": "foo", "confidence": 1.0, "language": "en"},
                 ],
             },
-            {
-                "type": "speaker_change",
-                "start_time": 1.0,
-                "end_time": 1.0,
-                "score": 0.8,
-            },
             {
                 "type": "word",
                 "start_time": 1.0,

diff --git a/tests/test_adapters.py b/tests/test_adapters.py
@@ -8,55 +8,43 @@
 
 
 @pytest.mark.parametrize(
-    "json_name, txt_name, language_pack_info, speaker_labels, speaker_change_token",
+    "json_name, txt_name, language_pack_info, speaker_labels",
     [
-        ("empty.json", "empty.txt", {"word_delimiter": " "}, False, False),
-        ("simple_case.json", "simple_case.txt", {"word_delimiter": " "}, False, False),
+        ("empty.json", "empty.txt", {"word_delimiter": " "}, False),
+        ("simple_case.json", "simple_case.txt", {"word_delimiter": " "}, False),
         (
             "simple_case.json",
             "simple_case_with_speakers.txt",
             {"word_delimiter": " "},
             True,
-            False,
         ),
         (
             "simple_case.json",
             "simple_case_no_word_delim.txt",
             {"word_delimiter": ""},
             False,
-            False,
         ),
         (
             "two_speakers.json",
             "two_speakers.txt",
             {"word_delimiter": " "},
             False,
-            False,
         ),
         (
             "two_speakers.json",
             "two_speakers_with_speaker_labels.txt",
             {"word_delimiter": " "},
             True,
-            False,
-        ),
-        ("entity.json", "entity.txt", {"word_delimiter": " "}, False, False),
-        ("punctuation.json", "punctuation.txt", {"word_delimiter": " "}, False, False),
-        (
-            "speaker_change.json",
-            "speaker_change.txt",
-            {"word_delimiter": " "},
-            False,
-            True,
         ),
+        ("entity.json", "entity.txt", {"word_delimiter": " "}, False),
+        ("punctuation.json", "punctuation.txt", {"word_delimiter": " "}, False),
     ],
 )
 def test_convert_to_txt(
     json_name: str,
     txt_name: str,
     language_pack_info: dict,
     speaker_labels: bool,
-    speaker_change_token: bool,
 ):
     json_file_path = path_to_test_resource(os.path.join("convert_to_txt", json_name))
     txt_file_path = path_to_test_resource(os.path.join("convert_to_txt", txt_name))
@@ -73,7 +61,6 @@ def test_convert_to_txt(
             "en",
             language_pack_info,
             speaker_labels,
-            speaker_change_token,
         )
         == txt
     )
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -158,10 +158,6 @@
             ["batch", "transcribe", "--speaker-diarization-sensitivity=0.7"],
             {"speaker_diarization_sensitivity": 0.7},
         ),
-        (
-            ["rt", "transcribe", "--speaker-change-token"],
-            {"speaker_change_token": True},
-        ),
         (
             [
                 "rt",
@@ -183,18 +179,6 @@
                 "channel_diarization_labels": ["label5 label4 label3"],
             },
         ),
-        (
-            [
-                "batch",
-                "transcribe",
-                "--diarization=channel_and_speaker_change",
-                "--channel-diarization-labels=label1 label2",
-            ],
-            {
-                "diarization": "channel_and_speaker_change",
-                "channel_diarization_labels": ["label1 label2"],
-            },
-        ),
         (["rt", "transcribe", "--auth-token=xyz"], {"auth_token": "xyz"}),
         (
             ["batch", "transcribe", "--domain=finance"],
@@ -208,10 +192,6 @@
             ["batch", "transcribe", "--output-format=json-v2"],
             {"output_format": "json-v2"},
         ),
-        (
-            ["batch", "transcribe", "--diarization=channel_and_speaker_change"],
-            {"diarization": "channel_and_speaker_change"},
-        ),
         (["batch", "submit"], {"command": "submit"}),
         (
             ["rt", "transcribe", "--config-file=data/transcription_config.json"],
@@ -631,9 +611,6 @@ def test_rt_main_with_all_options(mock_server, tmp_path):
         "0.1",
         "--diarization",
         "none",
-        "--speaker-change-sensitivity",
-        "0.8",
-        "--speaker-change-token",
         "--max-delay",
         "5.0",
         "--max-delay-mode",
@@ -678,7 +655,6 @@ def test_rt_main_with_all_options(mock_server, tmp_path):
     assert msg["transcription_config"]["diarization"] == "none"
     assert msg["transcription_config"]["max_delay"] == 5.0
     assert msg["transcription_config"]["max_delay_mode"] == "fixed"
-    assert msg["transcription_config"]["speaker_change_sensitivity"] == 0.8
     assert msg["transcription_config"].get("operating_point") is None
     assert (
         msg["transcription_config"]["transcript_filtering_config"][
@@ -751,8 +727,6 @@ def test_rt_main_with_config_file_cmdline_override(mock_server):
         "--output-locale=en-US",
         "--domain=different",
         "--operating-point=enhanced",
-        "--speaker-change-sensitivity",
-        "0.8",
         audio_path,
     ]
 
@@ -773,7 +747,6 @@ def test_rt_main_with_config_file_cmdline_override(mock_server):
     assert msg["transcription_config"]["domain"] == "different"
     assert msg["transcription_config"]["enable_entities"] is True
     assert msg["transcription_config"]["output_locale"] == "en-US"
-    assert msg["transcription_config"]["speaker_change_sensitivity"] == 0.8
     assert msg["transcription_config"]["operating_point"] == "enhanced"
     assert msg["translation_config"] is not None
     assert msg["translation_config"]["enable_partials"] is True