Skip to content

Commit

Permalink
Add remove-disfluencies arg (#96)
Browse files Browse the repository at this point in the history
* Add remove-disfluencies arg

* Fix all_options test

* Add remove-disfluency arg parsing

* lint

* Nest remove_disfluencies within a transcript_filtering_config

* lint

* Fix dicts to use optional type

* Undo unecessary formatting change

* Remove unecessary remove_disfluencies param

* Fix copy paste error for transcript_filtering_config
  • Loading branch information
TeriDSpeech authored May 3, 2024
1 parent 4b8ff20 commit 9c370ce
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 2 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [1.14.6] - 2024-04-26

## Added

- Support for removing words tagged as disfluency.

## [1.14.5] - 2024-03-20

## Added
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.14.5
1.14.6
6 changes: 6 additions & 0 deletions speechmatics/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,12 @@ def get_transcription_config(
"volume_threshold": args.get("volume_threshold")
}

if args.get("remove_disfluencies") is not None:
config["transcript_filtering_config"] = {}
config["transcript_filtering_config"]["remove_disfluencies"] = args.get(
"remove_disfluencies"
)

if args.get("ctrl"):
LOGGER.warning(f"Using internal dev control command: {args['ctrl']}")
config["ctrl"] = json.loads(args["ctrl"])
Expand Down
6 changes: 6 additions & 0 deletions speechmatics/cli_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,12 @@ def get_arg_parser():
required=False,
help="Comma-separated list of whitelisted event types for audio events.",
)
rt_transcribe_command_parser.add_argument(
"--remove-disfluencies",
default=False,
action="store_true",
help="Removes words tagged as disfluency.",
)

# Parent parser for batch auto-chapters argument
batch_audio_events_parser = argparse.ArgumentParser(add_help=False)
Expand Down
5 changes: 4 additions & 1 deletion speechmatics/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,12 @@ def asdict(self) -> Dict[Any, Any]:
enable_entities: bool = None
"""Indicates if inverse text normalization entity output is enabled."""

audio_filtering_config: dict = None
audio_filtering_config: Optional[dict] = None
"""Configuration for limiting the transcription of quiet audio."""

transcript_filtering_config: Optional[dict] = None
"""Configuration for applying filtering to the transcription."""


@dataclass
class RTSpeakerDiarizationConfig:
Expand Down
7 changes: 7 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,7 @@ def test_rt_main_with_all_options(mock_server, tmp_path):
str(chunk_size),
"--auth-token=xyz",
audio_path,
"--remove-disfluencies",
]

cli.main(vars(cli.parse_args(args)))
Expand Down Expand Up @@ -660,6 +661,12 @@ def test_rt_main_with_all_options(mock_server, tmp_path):
assert msg["transcription_config"]["max_delay_mode"] == "fixed"
assert msg["transcription_config"]["speaker_change_sensitivity"] == 0.8
assert msg["transcription_config"].get("operating_point") is None
assert (
msg["transcription_config"]["transcript_filtering_config"][
"remove_disfluencies"
]
is True
)

# Check that the chunk size argument is respected
add_audio_messages = mock_server.find_add_audio_messages()
Expand Down

0 comments on commit 9c370ce

Please sign in to comment.