From 19124db5ee825c23b15a735afa05a649d3b3ae1f Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Fri, 29 Oct 2021 09:16:06 -0400 Subject: [PATCH] Initial structured logging work with `fire_event` (#4137) add event type modeling and fire_event calls --- core/dbt/events/README.md | 9 +++ core/dbt/events/events.py | 46 ------------- core/dbt/events/functions.py | 28 ++++++++ core/dbt/events/history.py | 7 ++ core/dbt/events/types.py | 124 +++++++++++++++++++++++++++++++++++ core/dbt/logger.py | 6 +- core/dbt/task/parse.py | 31 +++++---- 7 files changed, 191 insertions(+), 60 deletions(-) create mode 100644 core/dbt/events/README.md delete mode 100644 core/dbt/events/events.py create mode 100644 core/dbt/events/functions.py create mode 100644 core/dbt/events/history.py create mode 100644 core/dbt/events/types.py diff --git a/core/dbt/events/README.md b/core/dbt/events/README.md new file mode 100644 index 00000000000..f3f765f34d6 --- /dev/null +++ b/core/dbt/events/README.md @@ -0,0 +1,9 @@ +# Events Module + +The Events module is the implmentation for structured logging. These events represent both a programatic interface to dbt processes as well as human-readable messaging in one centralized place. The centralization allows for leveraging mypy to enforce interface invariants across all dbt events, and the distinct type layer allows for decoupling events and libraries such as loggers. + +# Using the Events Module +The event module provides types that represent what is happening in dbt in `events.types`. These types are intended to represent an exhaustive list of all things happening within dbt that will need to be logged, streamed, or printed. To fire an event, `events.functions::fire_event` is the entry point to the module from everywhere in dbt. + +# Adding a New Event +In `events.types` add a new class that represents the new event. This may be a simple class with no values, or it may be a dataclass with some values to construct downstream messaging. Only include the data necessary to construct this message within this class. You must extend all destinations (e.g. - if your log message belongs on the cli, extend `CliEventABC`) as well as the loglevel this event belongs to. diff --git a/core/dbt/events/events.py b/core/dbt/events/events.py deleted file mode 100644 index 0d4921e8ad6..00000000000 --- a/core/dbt/events/events.py +++ /dev/null @@ -1,46 +0,0 @@ - -from typing import NamedTuple, NoReturn, Union - - -# common trick for getting mypy to do exhaustiveness checks -# will come up with something like `"assert_never" has incompatible type` -# if something is missing. -def assert_never(x: NoReturn) -> NoReturn: - raise AssertionError("Unhandled type: {}".format(type(x).__name__)) - -# The following classes represent the data necessary to describe a -# particular event to both human readable logs, and machine reliable -# event streams. The transformation to these forms will live in outside -# functions. -# -# Until we drop support for Python 3.6 we must use NamedTuples over -# frozen dataclasses. - -# TODO dummy class -class OK(NamedTuple): - result: int - - -# TODO dummy class -class Failure(NamedTuple): - msg: str - - -# using a union instead of inheritance means that this set cannot -# be extended outside this file, and thus mypy can do exhaustiveness -# checks for us. -Event = Union[OK, Failure] - - -# function that translates any instance of the above event types -# into its human-readable message. -# -# This could instead be implemented as a method on an ABC for all -# above classes, but this at least puts all that logic in one place. -def humanMsg(r: Event) -> str: - if isinstance(r, OK): - return str(r.result) - elif isinstance(r, Failure): - return "Failure: " + r.msg - else: - assert_never(r) diff --git a/core/dbt/events/functions.py b/core/dbt/events/functions.py new file mode 100644 index 00000000000..fea072ff8b2 --- /dev/null +++ b/core/dbt/events/functions.py @@ -0,0 +1,28 @@ + +import dbt.logger as logger # type: ignore # TODO eventually remove dependency on this logger +from dbt.events.history import EVENT_HISTORY +from dbt.events.types import CliEventABC, Event + + +# top-level method for accessing the new eventing system +# this is where all the side effects happen branched by event type +# (i.e. - mutating the event history, printing to stdout, logging +# to files, etc.) +def fire_event(e: Event) -> None: + EVENT_HISTORY.append(e) + if isinstance(e, CliEventABC): + if e.level_tag() == 'test': + # TODO after implmenting #3977 send to new test level + logger.GLOBAL_LOGGER.debug(logger.timestamped_line(e.cli_msg())) + elif e.level_tag() == 'debug': + logger.GLOBAL_LOGGER.debug(logger.timestamped_line(e.cli_msg())) + elif e.level_tag() == 'info': + logger.GLOBAL_LOGGER.info(logger.timestamped_line(e.cli_msg())) + elif e.level_tag() == 'warn': + logger.GLOBAL_LOGGER.warning()(logger.timestamped_line(e.cli_msg())) + elif e.level_tag() == 'error': + logger.GLOBAL_LOGGER.error(logger.timestamped_line(e.cli_msg())) + else: + raise AssertionError( + f"Event type {type(e).__name__} has unhandled level: {e.level_tag()}" + ) diff --git a/core/dbt/events/history.py b/core/dbt/events/history.py new file mode 100644 index 00000000000..1bbc1995f7d --- /dev/null +++ b/core/dbt/events/history.py @@ -0,0 +1,7 @@ +from dbt.events.types import Event +from typing import List + + +# the global history of events for this session +# TODO this is naive and the memory footprint is likely far too large. +EVENT_HISTORY: List[Event] = [] diff --git a/core/dbt/events/types.py b/core/dbt/events/types.py new file mode 100644 index 00000000000..15e6003abde --- /dev/null +++ b/core/dbt/events/types.py @@ -0,0 +1,124 @@ +from abc import ABCMeta, abstractmethod +from dataclasses import dataclass + + +# types to represent log levels + +# in preparation for #3977 +class TestLevel(): + def level_tag(self) -> str: + return "test" + + +class DebugLevel(): + def level_tag(self) -> str: + return "debug" + + +class InfoLevel(): + def level_tag(self) -> str: + return "info" + + +class WarnLevel(): + def level_tag(self) -> str: + return "warn" + + +class ErrorLevel(): + def level_tag(self) -> str: + return "error" + + +# The following classes represent the data necessary to describe a +# particular event to both human readable logs, and machine reliable +# event streams. classes extend superclasses that indicate what +# destinations they are intended for, which mypy uses to enforce +# that the necessary methods are defined. + + +# top-level superclass for all events +class Event(metaclass=ABCMeta): + # do not define this yourself. inherit it from one of the above level types. + @abstractmethod + def level_tag(self) -> str: + raise Exception("level_tag not implemented for event") + + +class CliEventABC(Event, metaclass=ABCMeta): + # Solely the human readable message. Timestamps and formatting will be added by the logger. + @abstractmethod + def cli_msg(self) -> str: + raise Exception("cli_msg not implemented for cli event") + + +class ParsingStart(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Start parsing." + + +class ParsingCompiling(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Compiling." + + +class ParsingWritingManifest(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Writing manifest." + + +class ParsingDone(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Done." + + +class ManifestDependenciesLoaded(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Dependencies loaded" + + +class ManifestLoaderCreated(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "ManifestLoader created" + + +class ManifestLoaded(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Manifest loaded" + + +class ManifestChecked(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Manifest checked" + + +class ManifestFlatGraphBuilt(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Flat graph built" + + +@dataclass +class ReportPerformancePath(InfoLevel, CliEventABC): + path: str + + def cli_msg(self) -> str: + return f"Performance info: {self.path}" + + +# since mypy doesn't run on every file we need to suggest to mypy that every +# class gets instantiated. But we don't actually want to run this code. +# making the conditional `if False` causes mypy to skip it as dead code so +# we need to skirt around that by computing something it doesn't check statically. +# +# TODO remove these lines once we run mypy everywhere. +if 1 == 0: + ParsingStart() + ParsingCompiling() + ParsingWritingManifest() + ParsingDone() + ManifestDependenciesLoaded() + ManifestLoaderCreated() + ManifestLoaded() + ManifestChecked() + ManifestFlatGraphBuilt() + ReportPerformancePath(path='') diff --git a/core/dbt/logger.py b/core/dbt/logger.py index 16eb7cd56a8..468fcb1d021 100644 --- a/core/dbt/logger.py +++ b/core/dbt/logger.py @@ -655,8 +655,12 @@ def get_timestamp(): return time.strftime("%H:%M:%S") +def timestamped_line(msg: str) -> str: + return "{} | {}".format(get_timestamp(), msg) + + def print_timestamped_line(msg: str, use_color: Optional[str] = None): if use_color is not None: msg = dbt.ui.color(msg, use_color) - GLOBAL_LOGGER.info("{} | {}".format(get_timestamp(), msg)) + GLOBAL_LOGGER.info(timestamped_line(msg)) diff --git a/core/dbt/task/parse.py b/core/dbt/task/parse.py index 6f68df53b68..e6a6d259951 100644 --- a/core/dbt/task/parse.py +++ b/core/dbt/task/parse.py @@ -11,8 +11,14 @@ from dbt.parser.manifest import ( Manifest, ManifestLoader, _check_manifest ) -from dbt.logger import DbtProcessState, print_timestamped_line +from dbt.logger import DbtProcessState from dbt.clients.system import write_file +from dbt.events.types import ( + ManifestDependenciesLoaded, ManifestLoaderCreated, ManifestLoaded, ManifestChecked, + ManifestFlatGraphBuilt, ParsingStart, ParsingCompiling, ParsingWritingManifest, ParsingDone, + ReportPerformancePath +) +from dbt.events.functions import fire_event from dbt.graph import Graph import time from typing import Optional @@ -40,7 +46,7 @@ def write_perf_info(self): path = os.path.join(self.config.target_path, PERF_INFO_FILE_NAME) write_file(path, json.dumps(self.loader._perf_info, cls=dbt.utils.JSONEncoder, indent=4)) - print_timestamped_line(f"Performance info: {path}") + fire_event(ReportPerformancePath(path=path)) # This method takes code that normally exists in other files # and pulls it in here, to simplify logging and make the @@ -58,22 +64,22 @@ def get_full_manifest(self): with PARSING_STATE: start_load_all = time.perf_counter() projects = root_config.load_dependencies() - print_timestamped_line("Dependencies loaded") + fire_event(ManifestDependenciesLoaded()) loader = ManifestLoader(root_config, projects, macro_hook) - print_timestamped_line("ManifestLoader created") + fire_event(ManifestLoaderCreated()) manifest = loader.load() - print_timestamped_line("Manifest loaded") + fire_event(ManifestLoaded()) _check_manifest(manifest, root_config) - print_timestamped_line("Manifest checked") + fire_event(ManifestChecked()) manifest.build_flat_graph() - print_timestamped_line("Flat graph built") + fire_event(ManifestFlatGraphBuilt()) loader._perf_info.load_all_elapsed = ( time.perf_counter() - start_load_all ) self.loader = loader self.manifest = manifest - print_timestamped_line("Manifest loaded") + fire_event(ManifestLoaded()) def compile_manifest(self): adapter = get_adapter(self.config) @@ -81,15 +87,14 @@ def compile_manifest(self): self.graph = compiler.compile(self.manifest) def run(self): - events.register(Progress(ParseStart)) - print_timestamped_line('Start parsing.') + fire_event(ParsingStart()) self.get_full_manifest() if self.args.compile: - print_timestamped_line('Compiling.') + fire_event(ParsingCompiling()) self.compile_manifest() if self.args.write_manifest: - print_timestamped_line('Writing manifest.') + fire_event(ParsingWritingManifest()) self.write_manifest() self.write_perf_info() - print_timestamped_line('Done.') + fire_event(ParsingDone())