From 53bafd1c02fc2e8d94ccf2ff357e7154c1445c33 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 21 Sep 2021 14:50:48 -0400 Subject: [PATCH] turn on static parser by default --- core/dbt/parser/models.py | 225 +++++++++++------- .../test_all_experimental_parser.py | 50 +++- 2 files changed, 178 insertions(+), 97 deletions(-) diff --git a/core/dbt/parser/models.py b/core/dbt/parser/models.py index 6a472569214..e54e927ae67 100644 --- a/core/dbt/parser/models.py +++ b/core/dbt/parser/models.py @@ -1,3 +1,4 @@ +from copy import deepcopy from dbt.context.context_config import ContextConfig from dbt.contracts.graph.parsed import ParsedModelNode import dbt.flags as flags @@ -10,7 +11,6 @@ from dbt_extractor import ExtractionError, py_extract_from_source # type: ignore from functools import reduce from itertools import chain -import random from typing import Any, Dict, Iterator, List, Optional, Union @@ -28,80 +28,37 @@ def resource_type(self) -> NodeType: def get_compiled_path(cls, block: FileBlock): return block.path.relative_path - # TODO when this is turned on by default, simplify the nasty if/else tree inside this method. def render_update( self, node: ParsedModelNode, config: ContextConfig ) -> None: - # TODO go back to 1/100 when this is turned on by default. - # `True` roughly 1/50 times this function is called - sample: bool = random.randint(1, 51) == 50 + # only sample on normal runs, not when the experimental parser flag is on. + sample: bool = False + if not flags.USE_EXPERIMENTAL_PARSER: + # sampling is explicitly disabled here, but use the following + # commented code to sample a fraction of the time when new + # experimental features are added. (must also `import random`) + pass + # `True` roughly 1/100 times this function is called + # sample = random.randint(1, 101) == 100 # top-level declaration of variables - experimentally_parsed: Optional[Union[str, Dict[str, List[Any]]]] = None - config_call_dict: Dict[str, Any] = {} - source_calls: List[List[str]] = [] - - # run the experimental parser if the flag is on or if we're sampling - if flags.USE_EXPERIMENTAL_PARSER or sample: - if self._has_banned_macro(node): - # this log line is used for integration testing. If you change - # the code at the beginning of the line change the tests in - # test/integration/072_experimental_parser_tests/test_all_experimental_parser.py - logger.debug( - f"1601: parser fallback to jinja because of macro override for {node.path}" - ) - experimentally_parsed = "has_banned_macro" - else: - # run the experimental parser and return the results - try: - experimentally_parsed = py_extract_from_source( - node.raw_sql - ) - logger.debug(f"1699: statically parsed {node.path}") - # if we want information on what features are barring the experimental - # parser from reading model files, this is where we would add that - # since that information is stored in the `ExtractionError`. - except ExtractionError: - experimentally_parsed = "cannot_parse" - - # if the parser succeeded, extract some data in easy-to-compare formats - if isinstance(experimentally_parsed, dict): - # create second config format - for c in experimentally_parsed['configs']: - ContextConfig._add_config_call(config_call_dict, {c[0]: c[1]}) - - # format sources TODO change extractor to match this type - for s in experimentally_parsed['sources']: - source_calls.append([s[0], s[1]]) - experimentally_parsed['sources'] = source_calls - - # normal dbt run - if not flags.USE_EXPERIMENTAL_PARSER: - # normal rendering - super().render_update(node, config) - # if we're sampling, compare for correctness - if sample: - result = _get_sample_result( - experimentally_parsed, - config_call_dict, - source_calls, - node, - config - ) - # fire a tracking event. this fires one event for every sample - # so that we have data on a per file basis. Not only can we expect - # no false positives or misses, we can expect the number model - # files parseable by the experimental parser to match our internal - # testing. - if tracking.active_user is not None: # None in some tests - tracking.track_experimental_parser_sample({ - "project_id": self.root_project.hashed_name(), - "file_id": utils.get_hash(node), - "status": result - }) - - # if the --use-experimental-parser flag was set, and the experimental parser succeeded - elif isinstance(experimentally_parsed, Dict): + statically_parsed: Optional[Union[str, Dict[str, List[Any]]]] = None + experimental_sample: Optional[Union[str, Dict[str, List[Any]]]] = None + + # sample the experimental parser during a normal run + if sample: + experimental_sample = self.run_experimental_parser(node) + # use the experimental parser exclusively if the flag is on + if flags.USE_EXPERIMENTAL_PARSER: + statically_parsed = self.run_experimental_parser(node) + # run the stable static parser by default + else: + statically_parsed = self.run_static_parser(node) + + # if the static parser succeeded, extract some data in easy-to-compare formats + if isinstance(statically_parsed, dict): + config_call_dict = _get_config_call_dict(statically_parsed) + # since it doesn't need python jinja, fit the refs, sources, and configs # into the node. Down the line the rest of the node will be updated with # this information. (e.g. depends_on etc.) @@ -114,29 +71,104 @@ def render_update( # update the unrendered config with values from the file. # values from yaml files are in there already - node.unrendered_config.update(dict(experimentally_parsed['configs'])) + node.unrendered_config.update(dict(statically_parsed['configs'])) # set refs and sources on the node object - node.refs += experimentally_parsed['refs'] - node.sources += experimentally_parsed['sources'] + node.refs += statically_parsed['refs'] + node.sources += statically_parsed['sources'] # configs don't need to be merged into the node # setting them in config._config_call_dict is sufficient self.manifest._parsing_info.static_analysis_parsed_path_count += 1 - - # the experimental parser didn't run on this model. - # fall back to python jinja rendering. - elif experimentally_parsed in ["has_banned_macro"]: - # not logging here since the reason should have been logged above - super().render_update(node, config) - # the experimental parser ran on this model and failed. - # fall back to python jinja rendering. + # if the static parser didn't succeed, fall back to jinja else: + # jinja rendering + super().render_update(node, config) logger.debug( - f"1602: parser fallback to jinja because of extractor failure for {node.path}" + f"1602: parser fallback to jinja rendering on {node.path}" ) - super().render_update(node, config) + + # if we're sampling, compare for correctness + if sample: + result = _get_sample_result( + experimental_sample, + config_call_dict, + node, + config + ) + # fire a tracking event. this fires one event for every sample + # so that we have data on a per file basis. Not only can we expect + # no false positives or misses, we can expect the number model + # files parseable by the experimental parser to match our internal + # testing. + if tracking.active_user is not None: # None in some tests + tracking.track_experimental_parser_sample({ + "project_id": self.root_project.hashed_name(), + "file_id": utils.get_hash(node), + "status": result + }) + + def run_static_parser( + self, node: ParsedModelNode + ) -> Optional[Union[str, Dict[str, List[Any]]]]: + # if any banned macros have been overridden by the user, we cannot use the static parser. + if self._has_banned_macro(node): + # this log line is used for integration testing. If you change + # the code at the beginning of the line change the tests in + # test/integration/072_experimental_parser_tests/test_all_experimental_parser.py + logger.debug( + f"1601: detected macro override of ref/source/config in the scope of {node.path}" + ) + return "has_banned_macro" + + # run the stable static parser and return the results + try: + statically_parsed = py_extract_from_source( + node.raw_sql + ) + logger.debug(f"1699: static parser successfully parsed {node.path}") + return _shift_sources(statically_parsed) + # if we want information on what features are barring the static + # parser from reading model files, this is where we would add that + # since that information is stored in the `ExtractionError`. + except ExtractionError: + logger.debug( + f"1603: static parser failed on {node.path}" + ) + return "cannot_parse" + + def run_experimental_parser( + self, node: ParsedModelNode + ) -> Optional[Union[str, Dict[str, List[Any]]]]: + # if any banned macros have been overridden by the user, we cannot use the static parser. + if self._has_banned_macro(node): + # this log line is used for integration testing. If you change + # the code at the beginning of the line change the tests in + # test/integration/072_experimental_parser_tests/test_all_experimental_parser.py + logger.debug( + f"1601: parser fallback to jinja because of macro override for {node.path}" + ) + return "has_banned_macro" + + # run the experimental parser and return the results + try: + # for now, this line calls the stable static parser since there are no + # experimental features. Change `py_extract_from_source` to the new + # experimental call when we add additional features. + experimentally_parsed = py_extract_from_source( + node.raw_sql + ) + logger.debug(f"1698: experimental parser successfully parsed {node.path}") + return _shift_sources(experimentally_parsed) + # if we want information on what features are barring the experimental + # parser from reading model files, this is where we would add that + # since that information is stored in the `ExtractionError`. + except ExtractionError: + logger.debug( + f"1604: experimental parser failed on {node.path}" + ) + return "cannot_parse" # checks for banned macros def _has_banned_macro( @@ -164,11 +196,36 @@ def _has_banned_macro( ) +# pure function. safe to use elsewhere, but unlikely to be useful outside this file. +def _get_config_call_dict( + static_parser_result: Dict[str, List[Any]] +) -> Dict[str, Any]: + config_call_dict: Dict[str, Any] = {} + + for c in static_parser_result['configs']: + ContextConfig._add_config_call(config_call_dict, {c[0]: c[1]}) + + return config_call_dict + + +# TODO if we format sources in the extractor to match this type, we won't need this function. +def _shift_sources( + static_parser_result: Dict[str, List[Any]] +) -> Dict[str, List[Any]]: + shifted_result = deepcopy(static_parser_result) + source_calls = [] + + for s in static_parser_result['sources']: + source_calls.append([s[0], s[1]]) + shifted_result['sources'] = source_calls + + return shifted_result + + # returns a list of string codes to be sent as a tracking event def _get_sample_result( sample_output: Optional[Union[str, Dict[str, Any]]], config_call_dict: Dict[str, Any], - source_calls: List[List[str]], node: ParsedModelNode, config: ContextConfig ) -> List[str]: diff --git a/test/integration/072_experimental_parser_tests/test_all_experimental_parser.py b/test/integration/072_experimental_parser_tests/test_all_experimental_parser.py index 6dc81c317fb..a4bb637514f 100644 --- a/test/integration/072_experimental_parser_tests/test_all_experimental_parser.py +++ b/test/integration/072_experimental_parser_tests/test_all_experimental_parser.py @@ -38,13 +38,31 @@ def test_postgres_experimental_parser_basic(self): def test_postgres_env_experimental_parser(self): os.environ['DBT_USE_EXPERIMENTAL_PARSER'] = 'true' results = self.run_dbt(['parse']) + + # test that the static parser extracts some basic ref, source, and config calls by default + # without the experimental flag + @use_profile('postgres') + def test_postgres_static_parser_basic(self): + _, log_output = self.run_dbt_and_capture(['--debug', 'parse']) + + print(log_output) + + # successful stable static parsing + self.assertTrue("1699: " in log_output) + # successful experimental static parsing + self.assertFalse("1698: " in log_output) + # experimental parser failed + self.assertFalse("1604: " in log_output) + # static parser failed + self.assertFalse("1603: " in log_output) + manifest = get_manifest() node = manifest.nodes['model.test.model_a'] self.assertEqual(node.refs, [['model_a']]) self.assertEqual(node.sources, [['my_src', 'my_tbl']]) self.assertEqual(node.config._extra, {'x': True}) self.assertEqual(node.config.tags, ['hello', 'world']) - + class TestRefOverrideExperimentalParser(DBTIntegrationTest): @property @@ -69,10 +87,12 @@ def test_postgres_experimental_parser_ref_override(self): print(log_output) - # successful static parsing - self.assertFalse("1699: " in log_output) - # ran static parser but failed - self.assertFalse("1602: " in log_output) + # successful experimental static parsing + self.assertFalse("1698: " in log_output) + # fallback to jinja rendering + self.assertTrue("1602: " in log_output) + # experimental parser failed + self.assertFalse("1604: " in log_output) # didn't run static parser because dbt detected a built-in macro override self.assertTrue("1601: " in log_output) @@ -99,10 +119,12 @@ def test_postgres_experimental_parser_source_override(self): print(log_output) - # successful static parsing - self.assertFalse("1699: " in log_output) - # ran static parser but failed - self.assertFalse("1602: " in log_output) + # successful experimental static parsing + self.assertFalse("1698: " in log_output) + # fallback to jinja rendering + self.assertTrue("1602: " in log_output) + # experimental parser failed + self.assertFalse("1604: " in log_output) # didn't run static parser because dbt detected a built-in macro override self.assertTrue("1601: " in log_output) @@ -129,9 +151,11 @@ def test_postgres_experimental_parser_config_override(self): print(log_output) - # successful static parsing - self.assertFalse("1699: " in log_output) - # ran static parser but failed - self.assertFalse("1602: " in log_output) + # successful experimental static parsing + self.assertFalse("1698: " in log_output) + # fallback to jinja rendering + self.assertTrue("1602: " in log_output) + # experimental parser failed + self.assertFalse("1604: " in log_output) # didn't run static parser because dbt detected a built-in macro override self.assertTrue("1601: " in log_output)