Skip to content

Commit

Permalink
Remove deprecated speaker_change, channel_and_speaker_change, and spe…
Browse files Browse the repository at this point in the history
…aker_change_sensitivity diarization options (#101)
  • Loading branch information
dumitrugutu authored Aug 2, 2024
1 parent 076618a commit 9f6f011
Show file tree
Hide file tree
Showing 13 changed files with 17 additions and 218 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [2.0.0] - 2024-08-01

### Changed

- Remove deprecated speaker_change, channel_and_speaker_change, and speaker_change_sensitivity diarization options
- Remove speaker change deprecation warning

## [1.15.0] - 2024-07-31

## Added
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.15.0
2.0.0
17 changes: 1 addition & 16 deletions speechmatics/adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ def convert_to_txt(
language: str,
language_pack_info: dict = None,
speaker_labels: bool = True,
speaker_change_token: bool = False,
) -> str:
"""
Convert a set of transcription result tokens to a plain text format.
Expand All @@ -59,13 +58,6 @@ def convert_to_txt(
if not group:
continue

# group_tokens always puts speaker_change tokens first in a new group
if speaker_change_token and group[0]["type"] == "speaker_change":
if len(group) == 1:
texts.append("<sc>")
else:
texts.append("<sc>\n")

speaker = get_speaker(group[0])
if speaker and speaker != current_speaker and speaker_labels:
current_speaker = speaker
Expand All @@ -79,25 +71,18 @@ def convert_to_txt(
def group_tokens(tokens: List[dict]) -> List[List[dict]]:
"""
Group the tokens in a set of results by speaker (and language if present).
speaker_change tokens also cause a new group to form.
:param results: the JSON v2 results
:return: list of lists
"""
groups = []
last = None
last_is_speaker_change = False
for token in tokens:
if token["type"] == "speaker_change":
groups.append([token])
last_is_speaker_change = True
continue
if last_is_speaker_change or last == (get_speaker(token), get_language(token)):
if last == (get_speaker(token), get_language(token)):
groups[-1].append(token)
else:
groups.append([token])
last = (get_speaker(token), get_language(token))
last_is_speaker_change = False

return groups

Expand Down
9 changes: 0 additions & 9 deletions speechmatics/batch_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,15 +231,6 @@ def submit_job(
a filepath as a string or Path object, or a dict"""
)

if (
config_dict.get("transcription_config", {}).get("diarization")
== "channel_and_speaker_change"
):
LOGGER.warning(
"DeprecationWarning: Speaker Change Detection has been deprecated "
"and will not be supported in future versions of this package."
)

# If audio=None, fetch_data must be specified
file_object = None
try:
Expand Down
8 changes: 0 additions & 8 deletions speechmatics/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,6 @@ def get_transcription_config(
"max_delay_mode",
"diarization",
"channel_diarization_labels",
"speaker_change_sensitivity",
"speaker_diarization_sensitivity",
]:
if args.get(option) is not None:
Expand Down Expand Up @@ -398,7 +397,6 @@ def add_printing_handlers(
enable_transcription_partials=False,
enable_translation_partials=False,
debug_handlers_too=False,
speaker_change_token=False,
print_json=False,
translation_config=None,
):
Expand All @@ -419,9 +417,6 @@ def add_printing_handlers(
debug_handlers_too (bool, optional): Whether to enable 'debug'
handlers that print out an ASCII symbol representing messages being
received and sent.
speaker_change_token (bool, optional): Whether to explicitly include a
speaker change token '<sc>' in the output to indicate speaker
changes.
print_json (bool, optional): Whether to print json transcript messages.
translation_config (TranslationConfig, optional): Translation config with target languages.
"""
Expand Down Expand Up @@ -449,7 +444,6 @@ def partial_transcript_handler(message):
api.transcription_config.language,
language_pack_info=api.get_language_pack_info(),
speaker_labels=True,
speaker_change_token=speaker_change_token,
)
if plaintext:
sys.stderr.write(f"{escape_seq}{plaintext}\r")
Expand All @@ -464,7 +458,6 @@ def transcript_handler(message):
api.transcription_config.language,
language_pack_info=api.get_language_pack_info(),
speaker_labels=True,
speaker_change_token=speaker_change_token,
)
if plaintext:
sys.stdout.write(f"{escape_seq}{plaintext}\n")
Expand Down Expand Up @@ -670,7 +663,6 @@ def rt_main(args):
enable_transcription_partials=args["enable_transcription_partials"],
enable_translation_partials=args["enable_translation_partials"],
debug_handlers_too=args["debug"],
speaker_change_token=args["speaker_change_token"],
print_json=args["print_json"],
translation_config=transcription_config.translation_config,
)
Expand Down
15 changes: 2 additions & 13 deletions speechmatics/cli_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ def get_arg_parser():
)
batch_diarization_parser.add_argument(
"--diarization",
choices=["none", "speaker", "channel", "channel_and_speaker_change"],
choices=["none", "speaker", "channel"],
help="Which type of diarization to use.",
)
batch_diarization_parser.add_argument(
Expand Down Expand Up @@ -424,17 +424,6 @@ def get_arg_parser():
type=int,
help="Enforces the maximum number of speakers allowed in a single audio stream. Min: 2, Max: 20, Default: 20.",
)
rt_transcribe_command_parser.add_argument(
"--speaker-change-sensitivity",
type=float,
help="Sensitivity level for speaker change.",
)
rt_transcribe_command_parser.add_argument(
"--speaker-change-token",
default=False,
action="store_true",
help="Shows a <sc> token where a speaker change was detected.",
)
rt_transcribe_command_parser.add_argument(
"--max-delay",
type=float,
Expand Down Expand Up @@ -496,7 +485,7 @@ def get_arg_parser():

rt_transcribe_command_parser.add_argument(
"--diarization",
choices=["none", "speaker", "speaker_change"],
choices=["none", "speaker"],
help="Which type of diarization to use.",
)

Expand Down
9 changes: 0 additions & 9 deletions speechmatics/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,15 +448,6 @@ async def run(
:raises Exception: Can raise any exception returned by the
consumer/producer tasks.
"""
if (
transcription_config.speaker_change_sensitivity
or transcription_config.diarization == "speaker_change"
):
LOGGER.warning(
"DeprecationWarning: Speaker Change Detection has been deprecated "
"and will not be supported in future versions of this package."
)

self.transcription_config = transcription_config
self.seq_no = 0
self._language_pack_info = None
Expand Down
3 changes: 0 additions & 3 deletions speechmatics/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,9 +250,6 @@ class TranscriptionConfig(_TranscriptionConfig):
speaker_diarization_config: RTSpeakerDiarizationConfig = None
"""Configuration for speaker diarization."""

speaker_change_sensitivity: float = None
"""Sensitivity level for speaker change."""

streaming_mode: bool = None
"""Indicates if we run the engine in streaming mode, or regular RT mode."""

Expand Down
40 changes: 0 additions & 40 deletions tests/data/convert_to_txt/speaker_change.json

This file was deleted.

6 changes: 0 additions & 6 deletions tests/mock_rt_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,6 @@ def dummy_add_transcript():
{"content": "foo", "confidence": 1.0, "language": "en"},
],
},
{
"type": "speaker_change",
"start_time": 1.0,
"end_time": 1.0,
"score": 0.8,
},
{
"type": "word",
"start_time": 1.0,
Expand Down
23 changes: 5 additions & 18 deletions tests/test_adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,55 +8,43 @@


@pytest.mark.parametrize(
"json_name, txt_name, language_pack_info, speaker_labels, speaker_change_token",
"json_name, txt_name, language_pack_info, speaker_labels",
[
("empty.json", "empty.txt", {"word_delimiter": " "}, False, False),
("simple_case.json", "simple_case.txt", {"word_delimiter": " "}, False, False),
("empty.json", "empty.txt", {"word_delimiter": " "}, False),
("simple_case.json", "simple_case.txt", {"word_delimiter": " "}, False),
(
"simple_case.json",
"simple_case_with_speakers.txt",
{"word_delimiter": " "},
True,
False,
),
(
"simple_case.json",
"simple_case_no_word_delim.txt",
{"word_delimiter": ""},
False,
False,
),
(
"two_speakers.json",
"two_speakers.txt",
{"word_delimiter": " "},
False,
False,
),
(
"two_speakers.json",
"two_speakers_with_speaker_labels.txt",
{"word_delimiter": " "},
True,
False,
),
("entity.json", "entity.txt", {"word_delimiter": " "}, False, False),
("punctuation.json", "punctuation.txt", {"word_delimiter": " "}, False, False),
(
"speaker_change.json",
"speaker_change.txt",
{"word_delimiter": " "},
False,
True,
),
("entity.json", "entity.txt", {"word_delimiter": " "}, False),
("punctuation.json", "punctuation.txt", {"word_delimiter": " "}, False),
],
)
def test_convert_to_txt(
json_name: str,
txt_name: str,
language_pack_info: dict,
speaker_labels: bool,
speaker_change_token: bool,
):
json_file_path = path_to_test_resource(os.path.join("convert_to_txt", json_name))
txt_file_path = path_to_test_resource(os.path.join("convert_to_txt", txt_name))
Expand All @@ -73,7 +61,6 @@ def test_convert_to_txt(
"en",
language_pack_info,
speaker_labels,
speaker_change_token,
)
== txt
)
27 changes: 0 additions & 27 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,10 +158,6 @@
["batch", "transcribe", "--speaker-diarization-sensitivity=0.7"],
{"speaker_diarization_sensitivity": 0.7},
),
(
["rt", "transcribe", "--speaker-change-token"],
{"speaker_change_token": True},
),
(
[
"rt",
Expand All @@ -183,18 +179,6 @@
"channel_diarization_labels": ["label5 label4 label3"],
},
),
(
[
"batch",
"transcribe",
"--diarization=channel_and_speaker_change",
"--channel-diarization-labels=label1 label2",
],
{
"diarization": "channel_and_speaker_change",
"channel_diarization_labels": ["label1 label2"],
},
),
(["rt", "transcribe", "--auth-token=xyz"], {"auth_token": "xyz"}),
(
["batch", "transcribe", "--domain=finance"],
Expand All @@ -208,10 +192,6 @@
["batch", "transcribe", "--output-format=json-v2"],
{"output_format": "json-v2"},
),
(
["batch", "transcribe", "--diarization=channel_and_speaker_change"],
{"diarization": "channel_and_speaker_change"},
),
(["batch", "submit"], {"command": "submit"}),
(
["rt", "transcribe", "--config-file=data/transcription_config.json"],
Expand Down Expand Up @@ -631,9 +611,6 @@ def test_rt_main_with_all_options(mock_server, tmp_path):
"0.1",
"--diarization",
"none",
"--speaker-change-sensitivity",
"0.8",
"--speaker-change-token",
"--max-delay",
"5.0",
"--max-delay-mode",
Expand Down Expand Up @@ -678,7 +655,6 @@ def test_rt_main_with_all_options(mock_server, tmp_path):
assert msg["transcription_config"]["diarization"] == "none"
assert msg["transcription_config"]["max_delay"] == 5.0
assert msg["transcription_config"]["max_delay_mode"] == "fixed"
assert msg["transcription_config"]["speaker_change_sensitivity"] == 0.8
assert msg["transcription_config"].get("operating_point") is None
assert (
msg["transcription_config"]["transcript_filtering_config"][
Expand Down Expand Up @@ -751,8 +727,6 @@ def test_rt_main_with_config_file_cmdline_override(mock_server):
"--output-locale=en-US",
"--domain=different",
"--operating-point=enhanced",
"--speaker-change-sensitivity",
"0.8",
audio_path,
]

Expand All @@ -773,7 +747,6 @@ def test_rt_main_with_config_file_cmdline_override(mock_server):
assert msg["transcription_config"]["domain"] == "different"
assert msg["transcription_config"]["enable_entities"] is True
assert msg["transcription_config"]["output_locale"] == "en-US"
assert msg["transcription_config"]["speaker_change_sensitivity"] == 0.8
assert msg["transcription_config"]["operating_point"] == "enhanced"
assert msg["translation_config"] is not None
assert msg["translation_config"]["enable_partials"] is True
Expand Down
Loading

0 comments on commit 9f6f011

Please sign in to comment.