generated from ministryofjustice/template-repository
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
perf: time ingestion sources and transformers
- add calls to `time` unix command to `datahub ingest` calls - add decorators for timing function/iterator runs - use the timers for - create_cadet_databases_source - justice_data_source - `AssignCadetDatabases transformer Co-authored-by: Mat Moore <mat.moore@noreply.github.com>
- Loading branch information
Showing
10 changed files
with
199 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
import logging | ||
import time | ||
from datetime import timedelta | ||
|
||
logging.basicConfig(level=logging.DEBUG) | ||
|
||
|
||
def report_time(func): | ||
""" | ||
Decorator to report the total time of a function call | ||
""" | ||
|
||
def wrapped_func(*args, **kwargs): | ||
arg_types = [type(arg) for arg in args] | ||
stopwatch = Stopwatch( | ||
function=func.__name__, arg_types=arg_types, kwargs=kwargs | ||
) | ||
|
||
stopwatch.start() | ||
|
||
r = func(*args, **kwargs) | ||
|
||
stopwatch.stop() | ||
stopwatch.report() | ||
|
||
return r | ||
|
||
return wrapped_func | ||
|
||
|
||
def report_generator_time(func): | ||
""" | ||
Decorator to report the total time of an iterable | ||
""" | ||
|
||
def wrapped_func(*args, **kwargs): | ||
arg_types = [type(arg) for arg in args] | ||
stopwatch = Stopwatch( | ||
function=func.__name__, arg_types=arg_types, kwargs=kwargs | ||
) | ||
|
||
stopwatch.start() | ||
|
||
r = func(*args, **kwargs) | ||
yield from r | ||
|
||
stopwatch.stop() | ||
stopwatch.report() | ||
|
||
return r | ||
|
||
return wrapped_func | ||
|
||
|
||
class Stopwatch: | ||
""" | ||
Wrapper around the time module for timing code execution | ||
""" | ||
|
||
def __init__(self, **meta): | ||
self.running = False | ||
self.start_time = None | ||
self.stop_time = None | ||
self.elapsed = 0 | ||
joined_meta = ", ".join(f"{k}={v}" for k, v in meta.items()) | ||
self.prefix = f"TIMING: {joined_meta}, " if joined_meta else "TIMING: " | ||
|
||
def start(self): | ||
self.start_time = time.time() | ||
self.running = True | ||
|
||
def stop(self): | ||
self.running = False | ||
if not self.start_time: | ||
return | ||
|
||
now = time.time() | ||
elapsed = now - self.start_time | ||
self.stop_time = now | ||
self.elapsed += elapsed | ||
|
||
def report(self): | ||
logging.info( | ||
f"{self.prefix}" | ||
f"start_time={time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(self.start_time))}, " | ||
f"end_time={time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(self.stop_time))}, " | ||
f"elapsed_time={str(timedelta(seconds=self.elapsed))}" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
import logging | ||
import re | ||
|
||
from ingestion.utils import Stopwatch, report_generator_time, report_time | ||
|
||
REPORT_REGEX = re.compile( | ||
r"TIMING: .*" | ||
r"start_time=\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}, " | ||
r"end_time=\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}, " | ||
r"elapsed_time=0:00:\d\d", | ||
) | ||
|
||
|
||
def test_stopwatch_generates_a_report(caplog): | ||
caplog.set_level(logging.INFO) | ||
s = Stopwatch() | ||
s.start() | ||
s.stop() | ||
s.report() | ||
|
||
messages = [r.message for r in caplog.records] | ||
assert len(messages) == 1 | ||
assert re.match( | ||
REPORT_REGEX, | ||
messages[0], | ||
) | ||
|
||
|
||
def test_report_time_generates_a_report(caplog): | ||
caplog.set_level(logging.INFO) | ||
|
||
@report_time | ||
def foo(): | ||
return 1 + 1 | ||
|
||
assert foo() == 2 | ||
|
||
messages = [r.message for r in caplog.records] | ||
assert len(messages) == 1 | ||
assert re.match( | ||
REPORT_REGEX, | ||
messages[0], | ||
) | ||
assert "function=foo, " in messages[0] | ||
|
||
|
||
def test_report_generate_time(caplog): | ||
caplog.set_level(logging.INFO) | ||
|
||
@report_generator_time | ||
def foo(): | ||
yield 1 | ||
yield 2 | ||
|
||
generator = foo() | ||
values = list(generator) | ||
assert values == [1, 2] | ||
|
||
messages = [r.message for r in caplog.records] | ||
assert len(messages) == 1 | ||
assert re.match( | ||
REPORT_REGEX, | ||
messages[0], | ||
) | ||
assert "function=foo, " in messages[0] |