Merge pull request #3939 from dbt-labs/static-parser-by-default

Turn static parser on by default with the ability to shut it off.
dbt-labs · Sep 29, 2021 · 38eb46d · 38eb46d
2 parents fe9ed9c + 6925ceb
commit 38eb46d
Show file tree

Hide file tree

Showing 8 changed files with 285 additions and 110 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ### Features
 - Normalize global CLI arguments/flags ([#2990](https://github.com/dbt-labs/dbt/issues/2990), [#3839](https://github.com/dbt-labs/dbt/pull/3839))
+- Turns on the static parser by default and adds the flag `--no-static-parser` to disable it. ([#3377](https://github.com/dbt-labs/dbt/issues/3377), [#3939](https://github.com/dbt-labs/dbt/pull/3939))
 
 ### Fixes
 

diff --git a/core/dbt/contracts/project.py b/core/dbt/contracts/project.py
@@ -235,6 +235,7 @@ class UserConfig(ExtensibleDbtClassMixin, Replaceable, UserConfigContract):
     version_check: Optional[bool] = None
     fail_fast: Optional[bool] = None
     use_experimental_parser: Optional[bool] = None
+    static_parser: Optional[bool] = None
 
 
 @dataclass

diff --git a/core/dbt/flags.py b/core/dbt/flags.py
@@ -21,6 +21,7 @@
 
 # Global CLI commands
 USE_EXPERIMENTAL_PARSER = None
+STATIC_PARSER = None
 WARN_ERROR = None
 WRITE_JSON = None
 PARTIAL_PARSE = None
@@ -37,6 +38,7 @@
 # Environment variables use the pattern 'DBT_{flag name}', like DBT_PROFILES_DIR
 flag_defaults = {
     "USE_EXPERIMENTAL_PARSER": False,
+    "STATIC_PARSER": True,
     "WARN_ERROR": False,
     "WRITE_JSON": True,
     "PARTIAL_PARSE": False,
@@ -93,8 +95,8 @@ def _get_context():
 
 def set_from_args(args, user_config):
     global STRICT_MODE, FULL_REFRESH, WARN_ERROR, \
-        USE_EXPERIMENTAL_PARSER, WRITE_JSON, PARTIAL_PARSE, USE_COLORS, \
-        STORE_FAILURES, PROFILES_DIR, DEBUG, LOG_FORMAT, GREEDY, \
+        USE_EXPERIMENTAL_PARSER, STATIC_PARSER, WRITE_JSON, PARTIAL_PARSE, \
+        USE_COLORS, STORE_FAILURES, PROFILES_DIR, DEBUG, LOG_FORMAT, GREEDY, \
         VERSION_CHECK, FAIL_FAST, SEND_ANONYMOUS_USAGE_STATS, PRINTER_WIDTH
 
     STRICT_MODE = False  # backwards compatibility
@@ -105,6 +107,7 @@ def set_from_args(args, user_config):
 
     # global cli flags with env var and user_config alternatives
     USE_EXPERIMENTAL_PARSER = get_flag_value('USE_EXPERIMENTAL_PARSER', args, user_config)
+    STATIC_PARSER = get_flag_value('STATIC_PARSER', args, user_config)
     WARN_ERROR = get_flag_value('WARN_ERROR', args, user_config)
     WRITE_JSON = get_flag_value('WRITE_JSON', args, user_config)
     PARTIAL_PARSE = get_flag_value('PARTIAL_PARSE', args, user_config)
@@ -147,6 +150,7 @@ def get_flag_value(flag, args, user_config):
 def get_flag_dict():
     return {
         "use_experimental_parser": USE_EXPERIMENTAL_PARSER,
+        "static_parser": STATIC_PARSER,
         "warn_error": WARN_ERROR,
         "write_json": WRITE_JSON,
         "partial_parse": PARTIAL_PARSE,

diff --git a/core/dbt/main.py b/core/dbt/main.py
@@ -1066,14 +1066,26 @@ def parse_args(args, cls=DBTArgumentParser):
         help=argparse.SUPPRESS,
     )
 
-    # if set, will use the tree-sitter-jinja2 parser and extractor instead of
-    # jinja rendering when possible.
+    # if set, will use the latest features from the static parser instead of
+    # the stable static parser.
     p.add_argument(
         '--use-experimental-parser',
         action='store_true',
         default=None,
         help='''
-        Uses an experimental parser to extract jinja values.
+        Enables experimental parsing features.
+        '''
+    )
+
+    # if set, will disable the use of the stable static parser and instead
+    # always rely on jinja rendering.
+    p.add_argument(
+        '--no-static-parser',
+        default=None,
+        dest='static_parser',
+        action='store_false',
+        help='''
+        Disables the static parser.
         '''
     )
 

diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py
@@ -577,7 +577,7 @@ def read_manifest_for_partial_parse(self) -> Optional[Manifest]:
     def build_perf_info(self):
         mli = ManifestLoaderInfo(
             is_partial_parse_enabled=flags.PARTIAL_PARSE,
-            is_static_analysis_enabled=flags.USE_EXPERIMENTAL_PARSER
+            is_static_analysis_enabled=flags.STATIC_PARSER
         )
         for project in self.all_projects.values():
             project_info = ProjectLoaderInfo(

diff --git a/core/dbt/parser/models.py b/core/dbt/parser/models.py
@@ -1,3 +1,4 @@
+from copy import deepcopy
 from dbt.context.context_config import ContextConfig
 from dbt.contracts.graph.parsed import ParsedModelNode
 import dbt.flags as flags
@@ -10,7 +11,6 @@
 from dbt_extractor import ExtractionError, py_extract_from_source  # type: ignore
 from functools import reduce
 from itertools import chain
-import random
 from typing import Any, Dict, Iterator, List, Optional, Union
 
 
@@ -28,80 +28,44 @@ def resource_type(self) -> NodeType:
     def get_compiled_path(cls, block: FileBlock):
         return block.path.relative_path
 
-    # TODO when this is turned on by default, simplify the nasty if/else tree inside this method.
     def render_update(
         self, node: ParsedModelNode, config: ContextConfig
     ) -> None:
-        # TODO go back to 1/100 when this is turned on by default.
-        # `True` roughly 1/50 times this function is called
-        sample: bool = random.randint(1, 51) == 50
+        if not flags.STATIC_PARSER:
+            # jinja rendering
+            super().render_update(node, config)
+            logger.debug(f"1605: jinja rendering because of STATIC_PARSER flag. file: {node.path}")
+            return
 
-        # top-level declaration of variables
-        experimentally_parsed: Optional[Union[str, Dict[str, List[Any]]]] = None
-        config_call_dict: Dict[str, Any] = {}
-        source_calls: List[List[str]] = []
-
-        # run the experimental parser if the flag is on or if we're sampling
-        if flags.USE_EXPERIMENTAL_PARSER or sample:
-            if self._has_banned_macro(node):
-                # this log line is used for integration testing. If you change
-                # the code at the beginning of the line change the tests in
-                # test/integration/072_experimental_parser_tests/test_all_experimental_parser.py
-                logger.debug(
-                    f"1601: parser fallback to jinja because of macro override for {node.path}"
-                )
-                experimentally_parsed = "has_banned_macro"
-            else:
-                # run the experimental parser and return the results
-                try:
-                    experimentally_parsed = py_extract_from_source(
-                        node.raw_sql
-                    )
-                    logger.debug(f"1699: statically parsed {node.path}")
-                # if we want information on what features are barring the experimental
-                # parser from reading model files, this is where we would add that
-                # since that information is stored in the `ExtractionError`.
-                except ExtractionError:
-                    experimentally_parsed = "cannot_parse"
-
-        # if the parser succeeded, extract some data in easy-to-compare formats
-        if isinstance(experimentally_parsed, dict):
-            # create second config format
-            for c in experimentally_parsed['configs']:
-                ContextConfig._add_config_call(config_call_dict, {c[0]: c[1]})
-
-            # format sources TODO change extractor to match this type
-            for s in experimentally_parsed['sources']:
-                source_calls.append([s[0], s[1]])
-            experimentally_parsed['sources'] = source_calls
-
-        # normal dbt run
+        # only sample on normal runs, not when the experimental parser flag is on.
+        sample: bool = False
         if not flags.USE_EXPERIMENTAL_PARSER:
-            # normal rendering
-            super().render_update(node, config)
-            # if we're sampling, compare for correctness
-            if sample:
-                result = _get_sample_result(
-                    experimentally_parsed,
-                    config_call_dict,
-                    source_calls,
-                    node,
-                    config
-                )
-                # fire a tracking event. this fires one event for every sample
-                # so that we have data on a per file basis. Not only can we expect
-                # no false positives or misses, we can expect the number model
-                # files parseable by the experimental parser to match our internal
-                # testing.
-                if tracking.active_user is not None:  # None in some tests
-                    tracking.track_experimental_parser_sample({
-                        "project_id": self.root_project.hashed_name(),
-                        "file_id": utils.get_hash(node),
-                        "status": result
-                    })
-
-        # if the --use-experimental-parser flag was set, and the experimental parser succeeded
-        elif isinstance(experimentally_parsed, Dict):
+            # sampling is explicitly disabled here, but use the following
+            # commented code to sample a fraction of the time when new
+            # experimental features are added. (must also `import random`)
+            pass
+            # `True` roughly 1/100 times this function is called
+            # sample = random.randint(1, 101) == 100
+
+        # top-level declaration of variables
+        statically_parsed: Optional[Union[str, Dict[str, List[Any]]]] = None
+        experimental_sample: Optional[Union[str, Dict[str, List[Any]]]] = None
+
+        # sample the experimental parser during a normal run
+        if sample:
+            logger.debug(f"1610: conducting experimental parser sample on {node.path}")
+            experimental_sample = self.run_experimental_parser(node)
+        # use the experimental parser exclusively if the flag is on
+        if flags.USE_EXPERIMENTAL_PARSER:
+            statically_parsed = self.run_experimental_parser(node)
+        # run the stable static parser unless it is explicitly turned off
+        else:
+            statically_parsed = self.run_static_parser(node)
+
+        # if the static parser succeeded, extract some data in easy-to-compare formats
+        if isinstance(statically_parsed, dict):
+            config_call_dict = _get_config_call_dict(statically_parsed)
+
             # since it doesn't need python jinja, fit the refs, sources, and configs
             # into the node. Down the line the rest of the node will be updated with
             # this information. (e.g. depends_on etc.)
@@ -114,29 +78,104 @@ def render_update(
 
             # update the unrendered config with values from the file.
             # values from yaml files are in there already
-            node.unrendered_config.update(dict(experimentally_parsed['configs']))
+            node.unrendered_config.update(dict(statically_parsed['configs']))
 
             # set refs and sources on the node object
-            node.refs += experimentally_parsed['refs']
-            node.sources += experimentally_parsed['sources']
+            node.refs += statically_parsed['refs']
+            node.sources += statically_parsed['sources']
 
             # configs don't need to be merged into the node
             # setting them in config._config_call_dict is sufficient
 
             self.manifest._parsing_info.static_analysis_parsed_path_count += 1
-
-        # the experimental parser didn't run on this model.
-        # fall back to python jinja rendering.
-        elif experimentally_parsed in ["has_banned_macro"]:
-            # not logging here since the reason should have been logged above
-            super().render_update(node, config)
-        # the experimental parser ran on this model and failed.
-        # fall back to python jinja rendering.
+        # if the static parser didn't succeed, fall back to jinja
         else:
+            # jinja rendering
+            super().render_update(node, config)
             logger.debug(
-                f"1602: parser fallback to jinja because of extractor failure for {node.path}"
+                f"1602: parser fallback to jinja rendering on {node.path}"
             )
-            super().render_update(node, config)
+
+        # if we're sampling, compare for correctness
+        if sample:
+            result = _get_sample_result(
+                experimental_sample,
+                config_call_dict,
+                node,
+                config
+            )
+            # fire a tracking event. this fires one event for every sample
+            # so that we have data on a per file basis. Not only can we expect
+            # no false positives or misses, we can expect the number model
+            # files parseable by the experimental parser to match our internal
+            # testing.
+            if tracking.active_user is not None:  # None in some tests
+                tracking.track_experimental_parser_sample({
+                    "project_id": self.root_project.hashed_name(),
+                    "file_id": utils.get_hash(node),
+                    "status": result
+                })
+
+    def run_static_parser(
+        self, node: ParsedModelNode
+    ) -> Optional[Union[str, Dict[str, List[Any]]]]:
+        # if any banned macros have been overridden by the user, we cannot use the static parser.
+        if self._has_banned_macro(node):
+            # this log line is used for integration testing. If you change
+            # the code at the beginning of the line change the tests in
+            # test/integration/072_experimental_parser_tests/test_all_experimental_parser.py
+            logger.debug(
+                f"1601: detected macro override of ref/source/config in the scope of {node.path}"
+            )
+            return "has_banned_macro"
+
+        # run the stable static parser and return the results
+        try:
+            statically_parsed = py_extract_from_source(
+                node.raw_sql
+            )
+            logger.debug(f"1699: static parser successfully parsed {node.path}")
+            return _shift_sources(statically_parsed)
+        # if we want information on what features are barring the static
+        # parser from reading model files, this is where we would add that
+        # since that information is stored in the `ExtractionError`.
+        except ExtractionError:
+            logger.debug(
+                f"1603: static parser failed on {node.path}"
+            )
+            return "cannot_parse"
+
+    def run_experimental_parser(
+        self, node: ParsedModelNode
+    ) -> Optional[Union[str, Dict[str, List[Any]]]]:
+        # if any banned macros have been overridden by the user, we cannot use the static parser.
+        if self._has_banned_macro(node):
+            # this log line is used for integration testing. If you change
+            # the code at the beginning of the line change the tests in
+            # test/integration/072_experimental_parser_tests/test_all_experimental_parser.py
+            logger.debug(
+                f"1601: detected macro override of ref/source/config in the scope of {node.path}"
+            )
+            return "has_banned_macro"
+
+        # run the experimental parser and return the results
+        try:
+            # for now, this line calls the stable static parser since there are no
+            # experimental features. Change `py_extract_from_source` to the new
+            # experimental call when we add additional features.
+            experimentally_parsed = py_extract_from_source(
+                node.raw_sql
+            )
+            logger.debug(f"1698: experimental parser successfully parsed {node.path}")
+            return _shift_sources(experimentally_parsed)
+        # if we want information on what features are barring the experimental
+        # parser from reading model files, this is where we would add that
+        # since that information is stored in the `ExtractionError`.
+        except ExtractionError:
+            logger.debug(
+                f"1604: experimental parser failed on {node.path}"
+            )
+            return "cannot_parse"
 
     # checks for banned macros
     def _has_banned_macro(
@@ -164,11 +203,36 @@ def _has_banned_macro(
         )
 
 
+# pure function. safe to use elsewhere, but unlikely to be useful outside this file.
+def _get_config_call_dict(
+    static_parser_result: Dict[str, List[Any]]
+) -> Dict[str, Any]:
+    config_call_dict: Dict[str, Any] = {}
+
+    for c in static_parser_result['configs']:
+        ContextConfig._add_config_call(config_call_dict, {c[0]: c[1]})
+
+    return config_call_dict
+
+
+# TODO if we format sources in the extractor to match this type, we won't need this function.
+def _shift_sources(
+    static_parser_result: Dict[str, List[Any]]
+) -> Dict[str, List[Any]]:
+    shifted_result = deepcopy(static_parser_result)
+    source_calls = []
+
+    for s in static_parser_result['sources']:
+        source_calls.append([s[0], s[1]])
+    shifted_result['sources'] = source_calls
+
+    return shifted_result
+
+
 # returns a list of string codes to be sent as a tracking event
 def _get_sample_result(
     sample_output: Optional[Union[str, Dict[str, Any]]],
     config_call_dict: Dict[str, Any],
-    source_calls: List[List[str]],
     node: ParsedModelNode,
     config: ContextConfig
 ) -> List[str]: