Skip to content

Commit

Permalink
Fix agermanidis#67 high memory/RAM usage partially by terminating chi…
Browse files Browse the repository at this point in the history
…ld processes, lowering default multiprocessing counts and using gc.collect(0).
  • Loading branch information
BingLingGroup committed Feb 3, 2020
1 parent d9a474b commit f41c846
Show file tree
Hide file tree
Showing 16 changed files with 178 additions and 127 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## TOC

- [Unreleased](#unreleased)
-
- [0.5.4-alpha - 2020-01-31](#054-alpha---2020-01-31)
- [Added](#added054-alpha)
- [Changed](#changed054-alpha)
Expand All @@ -34,6 +35,10 @@ Click up arrow to go back to TOC.

### Unreleased

#### Changed(Unreleased)

- Fix high memory/RAM usage partially by terminating child processes, lowering default multiprocessing counts and using `gc.collect(0)`. [issue #67](https://github.com/BingLingGroup/autosub/issues/67), [issue #74](https://github.com/BingLingGroup/autosub/issues/74)

### [0.5.4-alpha] - 2020-01-31

#### Added(0.5.4-alpha)
Expand All @@ -46,6 +51,8 @@ Click up arrow to go back to TOC.
- Fix output format limits when input is a subtitles file.
- Remove gtransv2 support.

<escape><a href = "#TOC">&nbsp;&nbsp;</a></escape>

### [0.5.3-alpha] - 2019-12-30

#### Changed(0.5.3-alpha)
Expand All @@ -54,6 +61,8 @@ Click up arrow to go back to TOC.
- Fix Auditok option issues. [issue #70](https://github.com/BingLingGroup/autosub/issues/70)
- Fix output option issue. [issue #73](https://github.com/BingLingGroup/autosub/issues/73)

<escape><a href = "#TOC">&nbsp;&nbsp;</a></escape>

### [0.5.2-alpha] - 2019-11-05

#### Added(0.5.2-alpha)
Expand Down
7 changes: 2 additions & 5 deletions autosub/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,13 @@ def main(): # pylint: disable=too-many-branches, too-many-statements, too-many-
validate_result = cmdline_utils.validate_io(args, styles_list)

if validate_result == 0:
ffmpeg_cmd = ffmpeg_utils.get_cmd("ffmpeg")
ffmpeg_cmd = "{} ".format(ffmpeg_utils.get_cmd("ffmpeg"))
if not ffmpeg_cmd:
raise exceptions.AutosubException(
_("Error: Dependency ffmpeg"
" not found on this machine."))

ffmpeg_cmd = ffmpeg_cmd + ' '

cmdline_utils.fix_args(args,
ffmpeg_cmd=ffmpeg_cmd)
cmdline_utils.fix_args(args, ffmpeg_cmd=ffmpeg_cmd)

if args.audio_process:
args.audio_process = {k.lower() for k in args.audio_process}
Expand Down
12 changes: 10 additions & 2 deletions autosub/cmdline_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import os
import subprocess
import tempfile
import gc

# Import third-party modules
import auditok
Expand Down Expand Up @@ -865,7 +866,7 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen
channel=1,
sample_rate=48000,
out_=audio_wav)
print(_("\nConvert source audio to \"{name}\" "
print(_("\nConvert source file to \"{name}\" "
"to get audio length and detect audio regions.").format(
name=audio_wav))
print(command)
Expand All @@ -875,9 +876,11 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen

if not ffmpeg_utils.ffprobe_check_file(audio_wav):
raise exceptions.AutosubException(
_("Error: Convert source audio to \"{name}\" failed.").format(
_("Error: Convert source file to \"{name}\" failed.").format(
name=audio_wav))

print(_("Detecting speech regions using Auditok."))

regions = core.auditok_gen_speech_regions(
audio_wav=audio_wav,
energy_threshold=args.energy_threshold,
Expand All @@ -886,6 +889,8 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen
max_continuous_silence=args.max_continuous_silence,
mode=mode)
os.remove(audio_wav)
gc.collect(0)

print(_("\n\"{name}\" has been deleted.").format(name=audio_wav))

if not regions:
Expand Down Expand Up @@ -935,6 +940,7 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen
suffix=args.api_suffix,
concurrency=args.audio_concurrency,
is_keep=args.keep)
gc.collect(0)

if not audio_fragments or \
len(audio_fragments) != len(regions):
Expand Down Expand Up @@ -981,6 +987,7 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen
concurrency=args.speech_concurrency,
min_confidence=args.min_confidence,
is_keep=args.keep)
gc.collect(0)

elif args.speech_api == "gcsv1":
# Google Cloud speech-to-text V1P1Beta1
Expand Down Expand Up @@ -1034,6 +1041,7 @@ def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statemen
else:
text_list = None

gc.collect(0)
if not text_list or len(text_list) != len(regions):
raise exceptions.SpeechToTextException(
_("Error: Speech-to-text failed.\nAll works done."))
Expand Down
8 changes: 7 additions & 1 deletion autosub/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import sys
import shlex
import locale
import multiprocessing

# Import third-party modules

Expand Down Expand Up @@ -52,7 +53,12 @@
GOOGLE_SPEECH_V2_API_KEY = "AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw"
GOOGLE_SPEECH_V2_API_URL = \
"www.google.com/speech-api/v2/recognize?client=chromium&lang={lang}&key={key}"
DEFAULT_CONCURRENCY = 10

if multiprocessing.cpu_count() > 3:
DEFAULT_CONCURRENCY = multiprocessing.cpu_count() >> 1
else:
DEFAULT_CONCURRENCY = 2

DEFAULT_SRC_LANGUAGE = 'en-US'
DEFAULT_ENERGY_THRESHOLD = 45
MAX_REGION_SIZE = 6.0
Expand Down
16 changes: 13 additions & 3 deletions autosub/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import multiprocessing
import time
import gettext
import gc

# Import third-party modules
import progressbar
Expand Down Expand Up @@ -66,6 +67,7 @@ def auditok_gen_speech_regions( # pylint: disable=too-many-arguments
for token in tokens:
# get start and end times
regions.append((token[1] * 10, token[2] * 10))

asource.close()
# reference
# auditok.readthedocs.io/en/latest/apitutorial.html#examples-using-real-audio-data
Expand Down Expand Up @@ -108,14 +110,16 @@ def bulk_audio_conversion( # pylint: disable=too-many-arguments
for i, flac_region in enumerate(pool.imap(converter, regions)):
audio_fragments.append(flac_region)
pbar.update(i)
gc.collect(0)
pbar.finish()
pool.terminate()
pool.join()

except KeyboardInterrupt:
pbar.finish()
pool.terminate()
pool.join()
return None

return audio_fragments


Expand Down Expand Up @@ -155,8 +159,11 @@ def gsv2_to_text( # pylint: disable=too-many-locals,too-many-arguments,too-many
text_list.append(transcript)
else:
text_list.append("")
gc.collect(0)
pbar.update(i)
pbar.finish()
pool.terminate()
pool.join()

except (KeyboardInterrupt, AttributeError) as error:
pbar.finish()
Expand Down Expand Up @@ -274,6 +281,7 @@ def gcsv1_to_text( # pylint: disable=too-many-locals,too-many-arguments,too-man
tasks.append(pool.apply_async(
speech_trans_api.gcsv1p1beta1_service_client,
args=(filename, is_keep, config, min_confidence)))
gc.collect(0)

for task in tasks:
i = i + 1
Expand All @@ -285,6 +293,8 @@ def gcsv1_to_text( # pylint: disable=too-many-locals,too-many-arguments,too-man
pbar.update(i)

pbar.finish()
pool.terminate()
pool.join()

except (KeyboardInterrupt, AttributeError) as error:
pbar.finish()
Expand Down Expand Up @@ -508,7 +518,7 @@ def list_to_sub_str(
else:
# fallback process
print(_("Format \"{fmt}\" not supported. "
"Using \"{default_fmt}\" instead.").format(
"Use \"{default_fmt}\" instead.").format(
fmt=subtitles_file_format,
default_fmt=constants.DEFAULT_SUBTITLES_FORMAT))
pysubs2_obj = pysubs2.SSAFile()
Expand Down Expand Up @@ -567,7 +577,7 @@ def ssafile_to_sub_str(
else:
# fallback process
print(_("Format \"{fmt}\" not supported. "
"Using \"{default_fmt}\" instead.").format(
"Use \"{default_fmt}\" instead.").format(
fmt=subtitles_file_format,
default_fmt=constants.DEFAULT_SUBTITLES_FORMAT))
formatted_subtitles = ssafile.to_string(
Expand Down
Binary file modified autosub/data/locale/zh_CN/LC_MESSAGES/autosub.cmdline_utils.mo
Binary file not shown.
Loading

0 comments on commit f41c846

Please sign in to comment.