Skip to content

Commit

Permalink
Merge pull request #3939 from dbt-labs/static-parser-by-default
Browse files Browse the repository at this point in the history
Turn static parser on by default with the ability to shut it off.
  • Loading branch information
Nathaniel May authored Sep 29, 2021
2 parents fe9ed9c + 6925ceb commit 38eb46d
Show file tree
Hide file tree
Showing 8 changed files with 285 additions and 110 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

### Features
- Normalize global CLI arguments/flags ([#2990](https://github.com/dbt-labs/dbt/issues/2990), [#3839](https://github.com/dbt-labs/dbt/pull/3839))
- Turns on the static parser by default and adds the flag `--no-static-parser` to disable it. ([#3377](https://github.com/dbt-labs/dbt/issues/3377), [#3939](https://github.com/dbt-labs/dbt/pull/3939))

### Fixes

Expand Down
1 change: 1 addition & 0 deletions core/dbt/contracts/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ class UserConfig(ExtensibleDbtClassMixin, Replaceable, UserConfigContract):
version_check: Optional[bool] = None
fail_fast: Optional[bool] = None
use_experimental_parser: Optional[bool] = None
static_parser: Optional[bool] = None


@dataclass
Expand Down
8 changes: 6 additions & 2 deletions core/dbt/flags.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

# Global CLI commands
USE_EXPERIMENTAL_PARSER = None
STATIC_PARSER = None
WARN_ERROR = None
WRITE_JSON = None
PARTIAL_PARSE = None
Expand All @@ -37,6 +38,7 @@
# Environment variables use the pattern 'DBT_{flag name}', like DBT_PROFILES_DIR
flag_defaults = {
"USE_EXPERIMENTAL_PARSER": False,
"STATIC_PARSER": True,
"WARN_ERROR": False,
"WRITE_JSON": True,
"PARTIAL_PARSE": False,
Expand Down Expand Up @@ -93,8 +95,8 @@ def _get_context():

def set_from_args(args, user_config):
global STRICT_MODE, FULL_REFRESH, WARN_ERROR, \
USE_EXPERIMENTAL_PARSER, WRITE_JSON, PARTIAL_PARSE, USE_COLORS, \
STORE_FAILURES, PROFILES_DIR, DEBUG, LOG_FORMAT, GREEDY, \
USE_EXPERIMENTAL_PARSER, STATIC_PARSER, WRITE_JSON, PARTIAL_PARSE, \
USE_COLORS, STORE_FAILURES, PROFILES_DIR, DEBUG, LOG_FORMAT, GREEDY, \
VERSION_CHECK, FAIL_FAST, SEND_ANONYMOUS_USAGE_STATS, PRINTER_WIDTH

STRICT_MODE = False # backwards compatibility
Expand All @@ -105,6 +107,7 @@ def set_from_args(args, user_config):

# global cli flags with env var and user_config alternatives
USE_EXPERIMENTAL_PARSER = get_flag_value('USE_EXPERIMENTAL_PARSER', args, user_config)
STATIC_PARSER = get_flag_value('STATIC_PARSER', args, user_config)
WARN_ERROR = get_flag_value('WARN_ERROR', args, user_config)
WRITE_JSON = get_flag_value('WRITE_JSON', args, user_config)
PARTIAL_PARSE = get_flag_value('PARTIAL_PARSE', args, user_config)
Expand Down Expand Up @@ -147,6 +150,7 @@ def get_flag_value(flag, args, user_config):
def get_flag_dict():
return {
"use_experimental_parser": USE_EXPERIMENTAL_PARSER,
"static_parser": STATIC_PARSER,
"warn_error": WARN_ERROR,
"write_json": WRITE_JSON,
"partial_parse": PARTIAL_PARSE,
Expand Down
18 changes: 15 additions & 3 deletions core/dbt/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -1066,14 +1066,26 @@ def parse_args(args, cls=DBTArgumentParser):
help=argparse.SUPPRESS,
)

# if set, will use the tree-sitter-jinja2 parser and extractor instead of
# jinja rendering when possible.
# if set, will use the latest features from the static parser instead of
# the stable static parser.
p.add_argument(
'--use-experimental-parser',
action='store_true',
default=None,
help='''
Uses an experimental parser to extract jinja values.
Enables experimental parsing features.
'''
)

# if set, will disable the use of the stable static parser and instead
# always rely on jinja rendering.
p.add_argument(
'--no-static-parser',
default=None,
dest='static_parser',
action='store_false',
help='''
Disables the static parser.
'''
)

Expand Down
2 changes: 1 addition & 1 deletion core/dbt/parser/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,7 @@ def read_manifest_for_partial_parse(self) -> Optional[Manifest]:
def build_perf_info(self):
mli = ManifestLoaderInfo(
is_partial_parse_enabled=flags.PARTIAL_PARSE,
is_static_analysis_enabled=flags.USE_EXPERIMENTAL_PARSER
is_static_analysis_enabled=flags.STATIC_PARSER
)
for project in self.all_projects.values():
project_info = ProjectLoaderInfo(
Expand Down
232 changes: 148 additions & 84 deletions core/dbt/parser/models.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from copy import deepcopy
from dbt.context.context_config import ContextConfig
from dbt.contracts.graph.parsed import ParsedModelNode
import dbt.flags as flags
Expand All @@ -10,7 +11,6 @@
from dbt_extractor import ExtractionError, py_extract_from_source # type: ignore
from functools import reduce
from itertools import chain
import random
from typing import Any, Dict, Iterator, List, Optional, Union


Expand All @@ -28,80 +28,44 @@ def resource_type(self) -> NodeType:
def get_compiled_path(cls, block: FileBlock):
return block.path.relative_path

# TODO when this is turned on by default, simplify the nasty if/else tree inside this method.
def render_update(
self, node: ParsedModelNode, config: ContextConfig
) -> None:
# TODO go back to 1/100 when this is turned on by default.
# `True` roughly 1/50 times this function is called
sample: bool = random.randint(1, 51) == 50
if not flags.STATIC_PARSER:
# jinja rendering
super().render_update(node, config)
logger.debug(f"1605: jinja rendering because of STATIC_PARSER flag. file: {node.path}")
return

# top-level declaration of variables
experimentally_parsed: Optional[Union[str, Dict[str, List[Any]]]] = None
config_call_dict: Dict[str, Any] = {}
source_calls: List[List[str]] = []

# run the experimental parser if the flag is on or if we're sampling
if flags.USE_EXPERIMENTAL_PARSER or sample:
if self._has_banned_macro(node):
# this log line is used for integration testing. If you change
# the code at the beginning of the line change the tests in
# test/integration/072_experimental_parser_tests/test_all_experimental_parser.py
logger.debug(
f"1601: parser fallback to jinja because of macro override for {node.path}"
)
experimentally_parsed = "has_banned_macro"
else:
# run the experimental parser and return the results
try:
experimentally_parsed = py_extract_from_source(
node.raw_sql
)
logger.debug(f"1699: statically parsed {node.path}")
# if we want information on what features are barring the experimental
# parser from reading model files, this is where we would add that
# since that information is stored in the `ExtractionError`.
except ExtractionError:
experimentally_parsed = "cannot_parse"

# if the parser succeeded, extract some data in easy-to-compare formats
if isinstance(experimentally_parsed, dict):
# create second config format
for c in experimentally_parsed['configs']:
ContextConfig._add_config_call(config_call_dict, {c[0]: c[1]})

# format sources TODO change extractor to match this type
for s in experimentally_parsed['sources']:
source_calls.append([s[0], s[1]])
experimentally_parsed['sources'] = source_calls

# normal dbt run
# only sample on normal runs, not when the experimental parser flag is on.
sample: bool = False
if not flags.USE_EXPERIMENTAL_PARSER:
# normal rendering
super().render_update(node, config)
# if we're sampling, compare for correctness
if sample:
result = _get_sample_result(
experimentally_parsed,
config_call_dict,
source_calls,
node,
config
)
# fire a tracking event. this fires one event for every sample
# so that we have data on a per file basis. Not only can we expect
# no false positives or misses, we can expect the number model
# files parseable by the experimental parser to match our internal
# testing.
if tracking.active_user is not None: # None in some tests
tracking.track_experimental_parser_sample({
"project_id": self.root_project.hashed_name(),
"file_id": utils.get_hash(node),
"status": result
})

# if the --use-experimental-parser flag was set, and the experimental parser succeeded
elif isinstance(experimentally_parsed, Dict):
# sampling is explicitly disabled here, but use the following
# commented code to sample a fraction of the time when new
# experimental features are added. (must also `import random`)
pass
# `True` roughly 1/100 times this function is called
# sample = random.randint(1, 101) == 100

# top-level declaration of variables
statically_parsed: Optional[Union[str, Dict[str, List[Any]]]] = None
experimental_sample: Optional[Union[str, Dict[str, List[Any]]]] = None

# sample the experimental parser during a normal run
if sample:
logger.debug(f"1610: conducting experimental parser sample on {node.path}")
experimental_sample = self.run_experimental_parser(node)
# use the experimental parser exclusively if the flag is on
if flags.USE_EXPERIMENTAL_PARSER:
statically_parsed = self.run_experimental_parser(node)
# run the stable static parser unless it is explicitly turned off
else:
statically_parsed = self.run_static_parser(node)

# if the static parser succeeded, extract some data in easy-to-compare formats
if isinstance(statically_parsed, dict):
config_call_dict = _get_config_call_dict(statically_parsed)

# since it doesn't need python jinja, fit the refs, sources, and configs
# into the node. Down the line the rest of the node will be updated with
# this information. (e.g. depends_on etc.)
Expand All @@ -114,29 +78,104 @@ def render_update(

# update the unrendered config with values from the file.
# values from yaml files are in there already
node.unrendered_config.update(dict(experimentally_parsed['configs']))
node.unrendered_config.update(dict(statically_parsed['configs']))

# set refs and sources on the node object
node.refs += experimentally_parsed['refs']
node.sources += experimentally_parsed['sources']
node.refs += statically_parsed['refs']
node.sources += statically_parsed['sources']

# configs don't need to be merged into the node
# setting them in config._config_call_dict is sufficient

self.manifest._parsing_info.static_analysis_parsed_path_count += 1

# the experimental parser didn't run on this model.
# fall back to python jinja rendering.
elif experimentally_parsed in ["has_banned_macro"]:
# not logging here since the reason should have been logged above
super().render_update(node, config)
# the experimental parser ran on this model and failed.
# fall back to python jinja rendering.
# if the static parser didn't succeed, fall back to jinja
else:
# jinja rendering
super().render_update(node, config)
logger.debug(
f"1602: parser fallback to jinja because of extractor failure for {node.path}"
f"1602: parser fallback to jinja rendering on {node.path}"
)
super().render_update(node, config)

# if we're sampling, compare for correctness
if sample:
result = _get_sample_result(
experimental_sample,
config_call_dict,
node,
config
)
# fire a tracking event. this fires one event for every sample
# so that we have data on a per file basis. Not only can we expect
# no false positives or misses, we can expect the number model
# files parseable by the experimental parser to match our internal
# testing.
if tracking.active_user is not None: # None in some tests
tracking.track_experimental_parser_sample({
"project_id": self.root_project.hashed_name(),
"file_id": utils.get_hash(node),
"status": result
})

def run_static_parser(
self, node: ParsedModelNode
) -> Optional[Union[str, Dict[str, List[Any]]]]:
# if any banned macros have been overridden by the user, we cannot use the static parser.
if self._has_banned_macro(node):
# this log line is used for integration testing. If you change
# the code at the beginning of the line change the tests in
# test/integration/072_experimental_parser_tests/test_all_experimental_parser.py
logger.debug(
f"1601: detected macro override of ref/source/config in the scope of {node.path}"
)
return "has_banned_macro"

# run the stable static parser and return the results
try:
statically_parsed = py_extract_from_source(
node.raw_sql
)
logger.debug(f"1699: static parser successfully parsed {node.path}")
return _shift_sources(statically_parsed)
# if we want information on what features are barring the static
# parser from reading model files, this is where we would add that
# since that information is stored in the `ExtractionError`.
except ExtractionError:
logger.debug(
f"1603: static parser failed on {node.path}"
)
return "cannot_parse"

def run_experimental_parser(
self, node: ParsedModelNode
) -> Optional[Union[str, Dict[str, List[Any]]]]:
# if any banned macros have been overridden by the user, we cannot use the static parser.
if self._has_banned_macro(node):
# this log line is used for integration testing. If you change
# the code at the beginning of the line change the tests in
# test/integration/072_experimental_parser_tests/test_all_experimental_parser.py
logger.debug(
f"1601: detected macro override of ref/source/config in the scope of {node.path}"
)
return "has_banned_macro"

# run the experimental parser and return the results
try:
# for now, this line calls the stable static parser since there are no
# experimental features. Change `py_extract_from_source` to the new
# experimental call when we add additional features.
experimentally_parsed = py_extract_from_source(
node.raw_sql
)
logger.debug(f"1698: experimental parser successfully parsed {node.path}")
return _shift_sources(experimentally_parsed)
# if we want information on what features are barring the experimental
# parser from reading model files, this is where we would add that
# since that information is stored in the `ExtractionError`.
except ExtractionError:
logger.debug(
f"1604: experimental parser failed on {node.path}"
)
return "cannot_parse"

# checks for banned macros
def _has_banned_macro(
Expand Down Expand Up @@ -164,11 +203,36 @@ def _has_banned_macro(
)


# pure function. safe to use elsewhere, but unlikely to be useful outside this file.
def _get_config_call_dict(
static_parser_result: Dict[str, List[Any]]
) -> Dict[str, Any]:
config_call_dict: Dict[str, Any] = {}

for c in static_parser_result['configs']:
ContextConfig._add_config_call(config_call_dict, {c[0]: c[1]})

return config_call_dict


# TODO if we format sources in the extractor to match this type, we won't need this function.
def _shift_sources(
static_parser_result: Dict[str, List[Any]]
) -> Dict[str, List[Any]]:
shifted_result = deepcopy(static_parser_result)
source_calls = []

for s in static_parser_result['sources']:
source_calls.append([s[0], s[1]])
shifted_result['sources'] = source_calls

return shifted_result


# returns a list of string codes to be sent as a tracking event
def _get_sample_result(
sample_output: Optional[Union[str, Dict[str, Any]]],
config_call_dict: Dict[str, Any],
source_calls: List[List[str]],
node: ParsedModelNode,
config: ContextConfig
) -> List[str]:
Expand Down
Loading

0 comments on commit 38eb46d

Please sign in to comment.