From 9f6f01196bfebb67993ec3a4063cc7e44913577d Mon Sep 17 00:00:00 2001 From: Dumitru Gutu Date: Fri, 2 Aug 2024 11:14:41 +0100 Subject: [PATCH] Remove deprecated speaker_change, channel_and_speaker_change, and speaker_change_sensitivity diarization options (#101) --- CHANGELOG.md | 7 ++ VERSION | 2 +- speechmatics/adapters.py | 17 +---- speechmatics/batch_client.py | 9 --- speechmatics/cli.py | 8 --- speechmatics/cli_parser.py | 15 +--- speechmatics/client.py | 9 --- speechmatics/models.py | 3 - tests/data/convert_to_txt/speaker_change.json | 40 ----------- tests/mock_rt_server.py | 6 -- tests/test_adapters.py | 23 ++----- tests/test_cli.py | 27 -------- tests/test_cli_handlers.py | 69 +------------------ 13 files changed, 17 insertions(+), 218 deletions(-) delete mode 100644 tests/data/convert_to_txt/speaker_change.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 45a2ea1..d8303c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [2.0.0] - 2024-08-01 + +### Changed + +- Remove deprecated speaker_change, channel_and_speaker_change, and speaker_change_sensitivity diarization options +- Remove speaker change deprecation warning + ## [1.15.0] - 2024-07-31 ## Added diff --git a/VERSION b/VERSION index 141f2e8..227cea2 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.15.0 +2.0.0 diff --git a/speechmatics/adapters.py b/speechmatics/adapters.py index 1028e47..6788345 100644 --- a/speechmatics/adapters.py +++ b/speechmatics/adapters.py @@ -34,7 +34,6 @@ def convert_to_txt( language: str, language_pack_info: dict = None, speaker_labels: bool = True, - speaker_change_token: bool = False, ) -> str: """ Convert a set of transcription result tokens to a plain text format. @@ -59,13 +58,6 @@ def convert_to_txt( if not group: continue - # group_tokens always puts speaker_change tokens first in a new group - if speaker_change_token and group[0]["type"] == "speaker_change": - if len(group) == 1: - texts.append("") - else: - texts.append("\n") - speaker = get_speaker(group[0]) if speaker and speaker != current_speaker and speaker_labels: current_speaker = speaker @@ -79,25 +71,18 @@ def convert_to_txt( def group_tokens(tokens: List[dict]) -> List[List[dict]]: """ Group the tokens in a set of results by speaker (and language if present). - speaker_change tokens also cause a new group to form. :param results: the JSON v2 results :return: list of lists """ groups = [] last = None - last_is_speaker_change = False for token in tokens: - if token["type"] == "speaker_change": - groups.append([token]) - last_is_speaker_change = True - continue - if last_is_speaker_change or last == (get_speaker(token), get_language(token)): + if last == (get_speaker(token), get_language(token)): groups[-1].append(token) else: groups.append([token]) last = (get_speaker(token), get_language(token)) - last_is_speaker_change = False return groups diff --git a/speechmatics/batch_client.py b/speechmatics/batch_client.py index dc7e981..a8958ff 100644 --- a/speechmatics/batch_client.py +++ b/speechmatics/batch_client.py @@ -231,15 +231,6 @@ def submit_job( a filepath as a string or Path object, or a dict""" ) - if ( - config_dict.get("transcription_config", {}).get("diarization") - == "channel_and_speaker_change" - ): - LOGGER.warning( - "DeprecationWarning: Speaker Change Detection has been deprecated " - "and will not be supported in future versions of this package." - ) - # If audio=None, fetch_data must be specified file_object = None try: diff --git a/speechmatics/cli.py b/speechmatics/cli.py index 8bd9c24..f859825 100755 --- a/speechmatics/cli.py +++ b/speechmatics/cli.py @@ -216,7 +216,6 @@ def get_transcription_config( "max_delay_mode", "diarization", "channel_diarization_labels", - "speaker_change_sensitivity", "speaker_diarization_sensitivity", ]: if args.get(option) is not None: @@ -398,7 +397,6 @@ def add_printing_handlers( enable_transcription_partials=False, enable_translation_partials=False, debug_handlers_too=False, - speaker_change_token=False, print_json=False, translation_config=None, ): @@ -419,9 +417,6 @@ def add_printing_handlers( debug_handlers_too (bool, optional): Whether to enable 'debug' handlers that print out an ASCII symbol representing messages being received and sent. - speaker_change_token (bool, optional): Whether to explicitly include a - speaker change token '' in the output to indicate speaker - changes. print_json (bool, optional): Whether to print json transcript messages. translation_config (TranslationConfig, optional): Translation config with target languages. """ @@ -449,7 +444,6 @@ def partial_transcript_handler(message): api.transcription_config.language, language_pack_info=api.get_language_pack_info(), speaker_labels=True, - speaker_change_token=speaker_change_token, ) if plaintext: sys.stderr.write(f"{escape_seq}{plaintext}\r") @@ -464,7 +458,6 @@ def transcript_handler(message): api.transcription_config.language, language_pack_info=api.get_language_pack_info(), speaker_labels=True, - speaker_change_token=speaker_change_token, ) if plaintext: sys.stdout.write(f"{escape_seq}{plaintext}\n") @@ -670,7 +663,6 @@ def rt_main(args): enable_transcription_partials=args["enable_transcription_partials"], enable_translation_partials=args["enable_translation_partials"], debug_handlers_too=args["debug"], - speaker_change_token=args["speaker_change_token"], print_json=args["print_json"], translation_config=transcription_config.translation_config, ) diff --git a/speechmatics/cli_parser.py b/speechmatics/cli_parser.py index 2c45de2..5f162b9 100644 --- a/speechmatics/cli_parser.py +++ b/speechmatics/cli_parser.py @@ -363,7 +363,7 @@ def get_arg_parser(): ) batch_diarization_parser.add_argument( "--diarization", - choices=["none", "speaker", "channel", "channel_and_speaker_change"], + choices=["none", "speaker", "channel"], help="Which type of diarization to use.", ) batch_diarization_parser.add_argument( @@ -424,17 +424,6 @@ def get_arg_parser(): type=int, help="Enforces the maximum number of speakers allowed in a single audio stream. Min: 2, Max: 20, Default: 20.", ) - rt_transcribe_command_parser.add_argument( - "--speaker-change-sensitivity", - type=float, - help="Sensitivity level for speaker change.", - ) - rt_transcribe_command_parser.add_argument( - "--speaker-change-token", - default=False, - action="store_true", - help="Shows a token where a speaker change was detected.", - ) rt_transcribe_command_parser.add_argument( "--max-delay", type=float, @@ -496,7 +485,7 @@ def get_arg_parser(): rt_transcribe_command_parser.add_argument( "--diarization", - choices=["none", "speaker", "speaker_change"], + choices=["none", "speaker"], help="Which type of diarization to use.", ) diff --git a/speechmatics/client.py b/speechmatics/client.py index 35028f3..8aca612 100644 --- a/speechmatics/client.py +++ b/speechmatics/client.py @@ -448,15 +448,6 @@ async def run( :raises Exception: Can raise any exception returned by the consumer/producer tasks. """ - if ( - transcription_config.speaker_change_sensitivity - or transcription_config.diarization == "speaker_change" - ): - LOGGER.warning( - "DeprecationWarning: Speaker Change Detection has been deprecated " - "and will not be supported in future versions of this package." - ) - self.transcription_config = transcription_config self.seq_no = 0 self._language_pack_info = None diff --git a/speechmatics/models.py b/speechmatics/models.py index 270fac6..07cf7ac 100644 --- a/speechmatics/models.py +++ b/speechmatics/models.py @@ -250,9 +250,6 @@ class TranscriptionConfig(_TranscriptionConfig): speaker_diarization_config: RTSpeakerDiarizationConfig = None """Configuration for speaker diarization.""" - speaker_change_sensitivity: float = None - """Sensitivity level for speaker change.""" - streaming_mode: bool = None """Indicates if we run the engine in streaming mode, or regular RT mode.""" diff --git a/tests/data/convert_to_txt/speaker_change.json b/tests/data/convert_to_txt/speaker_change.json deleted file mode 100644 index ad0793d..0000000 --- a/tests/data/convert_to_txt/speaker_change.json +++ /dev/null @@ -1,40 +0,0 @@ -{ - "message": "AddTranscript", - "format": "2.7", - "results": [ - { - "alternatives": [ - { - "confidence": 1.0, - "content": "Hello", - "language": "en" - } - ], - "end_time": 0.1, - "start_time": 0.0, - "type": "word" - }, - { - "end_time": 0.1, - "start_time": 0.1, - "type": "speaker_change" - }, - { - "alternatives": [ - { - "confidence": 1.0, - "content": "Hey", - "language": "en" - } - ], - "end_time": 0.2, - "start_time": 0.1, - "type": "word" - } - ], - "metadata": { - "end_time": 0.2, - "start_time": 0.0, - "transcript": "Hello\n\nHey" - } -} diff --git a/tests/mock_rt_server.py b/tests/mock_rt_server.py index 21fcf2d..5431e9e 100644 --- a/tests/mock_rt_server.py +++ b/tests/mock_rt_server.py @@ -135,12 +135,6 @@ def dummy_add_transcript(): {"content": "foo", "confidence": 1.0, "language": "en"}, ], }, - { - "type": "speaker_change", - "start_time": 1.0, - "end_time": 1.0, - "score": 0.8, - }, { "type": "word", "start_time": 1.0, diff --git a/tests/test_adapters.py b/tests/test_adapters.py index bb7e407..2c7549c 100644 --- a/tests/test_adapters.py +++ b/tests/test_adapters.py @@ -8,47 +8,36 @@ @pytest.mark.parametrize( - "json_name, txt_name, language_pack_info, speaker_labels, speaker_change_token", + "json_name, txt_name, language_pack_info, speaker_labels", [ - ("empty.json", "empty.txt", {"word_delimiter": " "}, False, False), - ("simple_case.json", "simple_case.txt", {"word_delimiter": " "}, False, False), + ("empty.json", "empty.txt", {"word_delimiter": " "}, False), + ("simple_case.json", "simple_case.txt", {"word_delimiter": " "}, False), ( "simple_case.json", "simple_case_with_speakers.txt", {"word_delimiter": " "}, True, - False, ), ( "simple_case.json", "simple_case_no_word_delim.txt", {"word_delimiter": ""}, False, - False, ), ( "two_speakers.json", "two_speakers.txt", {"word_delimiter": " "}, False, - False, ), ( "two_speakers.json", "two_speakers_with_speaker_labels.txt", {"word_delimiter": " "}, True, - False, - ), - ("entity.json", "entity.txt", {"word_delimiter": " "}, False, False), - ("punctuation.json", "punctuation.txt", {"word_delimiter": " "}, False, False), - ( - "speaker_change.json", - "speaker_change.txt", - {"word_delimiter": " "}, - False, - True, ), + ("entity.json", "entity.txt", {"word_delimiter": " "}, False), + ("punctuation.json", "punctuation.txt", {"word_delimiter": " "}, False), ], ) def test_convert_to_txt( @@ -56,7 +45,6 @@ def test_convert_to_txt( txt_name: str, language_pack_info: dict, speaker_labels: bool, - speaker_change_token: bool, ): json_file_path = path_to_test_resource(os.path.join("convert_to_txt", json_name)) txt_file_path = path_to_test_resource(os.path.join("convert_to_txt", txt_name)) @@ -73,7 +61,6 @@ def test_convert_to_txt( "en", language_pack_info, speaker_labels, - speaker_change_token, ) == txt ) diff --git a/tests/test_cli.py b/tests/test_cli.py index defc087..0add650 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -158,10 +158,6 @@ ["batch", "transcribe", "--speaker-diarization-sensitivity=0.7"], {"speaker_diarization_sensitivity": 0.7}, ), - ( - ["rt", "transcribe", "--speaker-change-token"], - {"speaker_change_token": True}, - ), ( [ "rt", @@ -183,18 +179,6 @@ "channel_diarization_labels": ["label5 label4 label3"], }, ), - ( - [ - "batch", - "transcribe", - "--diarization=channel_and_speaker_change", - "--channel-diarization-labels=label1 label2", - ], - { - "diarization": "channel_and_speaker_change", - "channel_diarization_labels": ["label1 label2"], - }, - ), (["rt", "transcribe", "--auth-token=xyz"], {"auth_token": "xyz"}), ( ["batch", "transcribe", "--domain=finance"], @@ -208,10 +192,6 @@ ["batch", "transcribe", "--output-format=json-v2"], {"output_format": "json-v2"}, ), - ( - ["batch", "transcribe", "--diarization=channel_and_speaker_change"], - {"diarization": "channel_and_speaker_change"}, - ), (["batch", "submit"], {"command": "submit"}), ( ["rt", "transcribe", "--config-file=data/transcription_config.json"], @@ -631,9 +611,6 @@ def test_rt_main_with_all_options(mock_server, tmp_path): "0.1", "--diarization", "none", - "--speaker-change-sensitivity", - "0.8", - "--speaker-change-token", "--max-delay", "5.0", "--max-delay-mode", @@ -678,7 +655,6 @@ def test_rt_main_with_all_options(mock_server, tmp_path): assert msg["transcription_config"]["diarization"] == "none" assert msg["transcription_config"]["max_delay"] == 5.0 assert msg["transcription_config"]["max_delay_mode"] == "fixed" - assert msg["transcription_config"]["speaker_change_sensitivity"] == 0.8 assert msg["transcription_config"].get("operating_point") is None assert ( msg["transcription_config"]["transcript_filtering_config"][ @@ -751,8 +727,6 @@ def test_rt_main_with_config_file_cmdline_override(mock_server): "--output-locale=en-US", "--domain=different", "--operating-point=enhanced", - "--speaker-change-sensitivity", - "0.8", audio_path, ] @@ -773,7 +747,6 @@ def test_rt_main_with_config_file_cmdline_override(mock_server): assert msg["transcription_config"]["domain"] == "different" assert msg["transcription_config"]["enable_entities"] is True assert msg["transcription_config"]["output_locale"] == "en-US" - assert msg["transcription_config"]["speaker_change_sensitivity"] == 0.8 assert msg["transcription_config"]["operating_point"] == "enhanced" assert msg["translation_config"] is not None assert msg["translation_config"]["enable_partials"] is True diff --git a/tests/test_cli_handlers.py b/tests/test_cli_handlers.py index c029e9f..33868be 100644 --- a/tests/test_cli_handlers.py +++ b/tests/test_cli_handlers.py @@ -159,52 +159,17 @@ def test_add_printing_handlers_translation_handler(mocker, capsys, check_tty): assert not err -TRANSCRIPT_TXT_WITH_SC = "Hey\nHello" -TRANSCRIPT_WITH_SC = { - "message": "AddTranscript", - "results": [ - { - "type": "word", - "start_time": 0.08999999612569809, - "end_time": 0.29999998211860657, - "alternatives": [{"confidence": 1.0, "content": "Hey", "language": "en"}], - }, - { - "type": "speaker_change", - "start_time": 0.08999999612569809, - "end_time": 0.29999998211860657, - "score": 1, - }, - { - "type": "word", - "start_time": 0.08999999612569809, - "end_time": 0.29999998211860657, - "alternatives": [{"confidence": 1.0, "content": "Hello", "language": "en"}], - }, - ], - "metadata": { - "start_time": 58.920005798339844, - "end_time": 60.0000057220459, - "transcript": TRANSCRIPT_TXT_WITH_SC, - }, - "format": "2.4", -} - - def check_printing_handlers( mocker, capsys, transcript, expected_transcript_txt, - speaker_change_token, ): api = mocker.MagicMock() api.get_language_pack_info = mocker.MagicMock(return_value={"word_delimiter": " "}) transcripts = cli.Transcripts(text="", json=[]) - cli.add_printing_handlers( - api, transcripts, speaker_change_token=speaker_change_token - ) + cli.add_printing_handlers(api, transcripts) assert not transcripts.text assert not transcripts.json out, err = capsys.readouterr() @@ -225,35 +190,3 @@ def check_printing_handlers( out, err = capsys.readouterr() assert out == escape_seq + expected_transcript_txt + "\n" assert not err - - -@pytest.mark.parametrize("check_tty", [False, True]) -def test_add_printing_handlers_with_speaker_change_token(mocker, capsys, check_tty): - # patch in isatty, in order to check behaviour with and without tty - sys.stderr.isatty = lambda: check_tty - sys.stdout.isatty = lambda: check_tty - - expected_transcript = "Hey\n\nHello" - check_printing_handlers( - mocker, - capsys, - TRANSCRIPT_WITH_SC, - expected_transcript, - speaker_change_token=True, - ) - - -@pytest.mark.parametrize("check_tty", [False, True]) -def test_add_printing_handlers_with_speaker_change_no_token(mocker, capsys, check_tty): - # patch in isatty, in order to check beheviour with and without tty - sys.stderr.isatty = lambda: check_tty - sys.stdout.isatty = lambda: check_tty - - expected_transcript = "Hey\nHello" - check_printing_handlers( - mocker, - capsys, - TRANSCRIPT_WITH_SC, - expected_transcript, - speaker_change_token=False, - )