Skip to content

Commit

Permalink
3448 user defined default selection criteria (#3875)
Browse files Browse the repository at this point in the history
* Added selector default

* Updated code from review feedback

* Fix schema.yml location

Co-authored-by: Jeremy Cohen <jeremy@fishtownanalytics.com>
  • Loading branch information
TeddyCr and jtcohen6 authored Sep 16, 2021
1 parent 95cca27 commit 4845174
Show file tree
Hide file tree
Showing 15 changed files with 203 additions and 17 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
- to simplify config (default usage of `copy_materialization='table'` if is is not found in global or local config)
- to let copy several source tables into single target table at a time. ([Google doc reference](https://cloud.google.com/bigquery/docs/managing-tables#copying_multiple_source_tables))
- Customize ls task JSON output by adding new flag `--output-keys` ([#3778](https://github.com/dbt-labs/dbt/issues/3778), [#3395](https://github.com/dbt-labs/dbt/issues/3395))
- add support for execution project on BigQuery through profile configuration ([#3708](https://github.com/dbt-labs/dbt/issues/3708))
- Add support for execution project on BigQuery through profile configuration ([#3707](https://github.com/dbt-labs/dbt/issues/3707), [#3708](https://github.com/dbt-labs/dbt/issues/3708))
- Skip downstream nodes during the `build` task when a test fails. ([#3597](https://github.com/dbt-labs/dbt/issues/3597), [#3792](https://github.com/dbt-labs/dbt/pull/3792))
- Added default field in the `selectors.yml` to allow user to define default selector ([#3448](https://github.com/dbt-labs/dbt/issues/3448#issuecomment-907611470))

### Fixes

Expand Down Expand Up @@ -43,6 +44,7 @@ Contributors:
- [@annafil](https://github.com/annafil) ([#3825](https://github.com/dbt-labs/dbt/pull/3825))
- [@AndreasTA-AW](https://github.com/AndreasTA-AW) ([#3691](https://github.com/dbt-labs/dbt/pull/3691))
- [@Kayrnt](https://github.com/Kayrnt) ([3707](https://github.com/dbt-labs/dbt/pull/3707))
- [@TeddyCr](https://github.com/TeddyCr) ([#3448](https://github.com/dbt-labs/dbt/pull/3865))

## dbt 0.21.0b2 (August 19, 2021)

Expand Down
18 changes: 16 additions & 2 deletions core/dbt/config/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
selector_data_from_root,
SelectorConfig,
)
from dbt.logger import print_timestamped_line


INVALID_VERSION_ERROR = """\
Expand Down Expand Up @@ -645,13 +646,26 @@ def from_project_root(
def hashed_name(self):
return hashlib.md5(self.project_name.encode('utf-8')).hexdigest()

def get_selector(self, name: str) -> SelectionSpec:
def get_selector(self, name: str) -> Union[SelectionSpec, bool]:
if name not in self.selectors:
raise RuntimeException(
f'Could not find selector named {name}, expected one of '
f'{list(self.selectors)}'
)
return self.selectors[name]
return self.selectors[name]["definition"]

def get_default_selector(self) -> Union[SelectionSpec, bool, None]:
"""This function fetch the default selector to use on `dbt run` (if any)
:return: either a selector if default is set or None
:rtype: Union[SelectionSpec, None]
"""
for selector_name, selector in self.selectors.items():
if selector["default"] is True:
name = selector_name
print_timestamped_line(f'Using default selector {name}')
return self.get_selector(name)

return None

def get_macro_search_order(self, macro_namespace: str):
for dispatch_entry in self.dispatch:
Expand Down
23 changes: 21 additions & 2 deletions core/dbt/config/selectors.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import Dict, Any
from typing import Dict, Any, Union
from dbt.clients.yaml_helper import ( # noqa: F401
yaml, Loader, Dumper, load_yaml_text
)
Expand Down Expand Up @@ -29,13 +29,14 @@
"""


class SelectorConfig(Dict[str, SelectionSpec]):
class SelectorConfig(Dict[str, Dict[str, Union[SelectionSpec, bool]]]):

@classmethod
def selectors_from_dict(cls, data: Dict[str, Any]) -> 'SelectorConfig':
try:
SelectorFile.validate(data)
selector_file = SelectorFile.from_dict(data)
validate_selector_default(selector_file)
selectors = parse_from_selectors_definition(selector_file)
except ValidationError as exc:
yaml_sel_cfg = yaml.dump(exc.instance)
Expand Down Expand Up @@ -118,6 +119,24 @@ def selector_config_from_data(
return selectors


def validate_selector_default(selector_file: SelectorFile) -> None:
"""Check if a selector.yml file has more than 1 default key set to true"""
default_set: bool = False
default_selector_name: Union[str, None] = None

for selector in selector_file.selectors:
if selector.default is True and default_set is False:
default_set = True
default_selector_name = selector.name
continue
if selector.default is True and default_set is True:
raise DbtSelectorsError(
"Error when parsing the selector file. "
"Found multiple selectors with `default: true`:"
f"{default_selector_name} and {selector.name}"
)


# These are utilities to clean up the dictionary created from
# selectors.yml by turning the cli-string format entries into
# normalized dictionary entries. It parallels the flow in
Expand Down
1 change: 1 addition & 0 deletions core/dbt/contracts/selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ class SelectorDefinition(dbtClassMixin):
name: str
definition: Union[str, Dict[str, Any]]
description: str = ''
default: bool = False


@dataclass
Expand Down
10 changes: 6 additions & 4 deletions core/dbt/graph/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,10 +271,12 @@ def parse_from_definition(

def parse_from_selectors_definition(
source: SelectorFile
) -> Dict[str, SelectionSpec]:
result: Dict[str, SelectionSpec] = {}
) -> Dict[str, Dict[str, Union[SelectionSpec, bool]]]:
result: Dict[str, Dict[str, Union[SelectionSpec, bool]]] = {}
selector: SelectorDefinition
for selector in source.selectors:
result[selector.name] = parse_from_definition(selector.definition,
rootlevel=True)
result[selector.name] = {
"default": selector.default,
"definition": parse_from_definition(selector.definition, rootlevel=True)
}
return result
3 changes: 3 additions & 0 deletions core/dbt/task/compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,11 @@ def raise_on_first_error(self):
return True

def get_selection_spec(self) -> SelectionSpec:
default_selector = self.config.get_default_selector()
if self.args.selector_name:
spec = self.config.get_selector(self.args.selector_name)
elif not (self.args.select or self.args.exclude) and default_selector:
spec = default_selector
else:
spec = parse_difference(self.args.select, self.args.exclude)
return spec
Expand Down
3 changes: 3 additions & 0 deletions core/dbt/task/freshness.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,12 @@ def get_selection_spec(self) -> SelectionSpec:
processing graph. A SelectionSpec describes what nodes to select
when creating queue from graph of nodes.
"""
default_selector = self.config.get_default_selector()
if self.args.selector_name:
# use pre-defined selector (--selector) to create selection spec
spec = self.config.get_selector(self.args.selector_name)
elif not (self.args.select or self.args.exclude) and default_selector:
spec = default_selector
else:
# use --select and --exclude args to create selection spec
spec = parse_difference(self.args.select, self.args.exclude)
Expand Down
3 changes: 3 additions & 0 deletions core/dbt/task/list.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,11 @@ def selector(self):
return self.args.select

def get_selection_spec(self) -> SelectionSpec:
default_selector = self.config.get_default_selector()
if self.args.selector_name:
spec = self.config.get_selector(self.args.selector_name)
elif not (self.args.select or self.args.exclude) and default_selector:
spec = default_selector
else:
spec = parse_difference(self.selector, self.args.exclude)
return spec
Expand Down
2 changes: 2 additions & 0 deletions test/integration/073_default_selectors_tests/data/model_c.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
fun,_loaded_at
1,2021-04-19 01:00:00
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT 1 AS fun
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT 1 AS fun
35 changes: 35 additions & 0 deletions test/integration/073_default_selectors_tests/models/schema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
version: 2

sources:
- name: src
schema: "{{ target.schema }}"
freshness:
warn_after: {count: 24, period: hour}
loaded_at_field: _loaded_at
tables:
- name: source_a
identifier: model_c
columns:
- name: fun
- name: _loaded_at
- name: src
schema: "{{ target.schema }}"
freshness:
warn_after: {count: 24, period: hour}
loaded_at_field: _loaded_at
tables:
- name: source_b
identifier: model_c
columns:
- name: fun
- name: _loaded_at

models:
- name: model_a
columns:
- name: fun
tags: [marketing]
- name: model_b
columns:
- name: fun
tags: [finance]
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import yaml
from test.integration.base import DBTIntegrationTest, use_profile


class TestDefaultSelectors(DBTIntegrationTest):
'''Test the selectors default argument'''
@property
def schema(self):
return 'test_default_selectors_101'

@property
def models(self):
return 'models'

@property
def project_config(self):
return {
'config-version': 2,
'source-paths': ['models'],
'data-paths': ['data'],
'seeds': {
'quote_columns': False,
},
}

@property
def selectors_config(self):
return yaml.safe_load('''
selectors:
- name: default_selector
description: test default selector
definition:
union:
- method: source
value: "test.src.source_a"
- method: fqn
value: "model_a"
default: true
''')

def list_and_assert(self, expected):
'''list resources in the project with the selectors default'''
listed = self.run_dbt(['ls', '--resource-type', 'model'])

assert len(listed) == len(expected)

def compile_and_assert(self, expected):
'''Compile project with the selectors default'''
compiled = self.run_dbt(['compile'])

assert len(compiled.results) == len(expected)
assert compiled.results[0].node.name == expected[0]

def run_and_assert(self, expected):
run = self.run_dbt(['run'])

assert len(run.results) == len(expected)
assert run.results[0].node.name == expected[0]

def freshness_and_assert(self, expected):
self.run_dbt(['seed', '-s', 'test.model_c'])
freshness = self.run_dbt(['source', 'freshness'])

assert len(freshness.results) == len(expected)
assert freshness.results[0].node.name == expected[0]

@use_profile('postgres')
def test__postgres__model_a_only(self):
expected_model = ['model_a']

self.list_and_assert(expected_model)
self.compile_and_assert(expected_model)

def test__postgres__source_a_only(self):
expected_source = ['source_a']

self.freshness_and_assert(expected_source)
16 changes: 8 additions & 8 deletions test/unit/test_graph_selector_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def test_parse_simple():
childrens_parents=False,
children_depth=None,
parents_depth=None,
) == parsed['tagged_foo']
) == parsed['tagged_foo']["definition"]


def test_parse_simple_childrens_parents():
Expand All @@ -141,7 +141,7 @@ def test_parse_simple_childrens_parents():
childrens_parents=True,
children_depth=None,
parents_depth=None,
) == parsed['tagged_foo']
) == parsed['tagged_foo']["definition"]


def test_parse_simple_arguments_with_modifiers():
Expand Down Expand Up @@ -169,7 +169,7 @@ def test_parse_simple_arguments_with_modifiers():
childrens_parents=False,
children_depth=2,
parents_depth=None,
) == parsed['configured_view']
) == parsed['configured_view']["definition"]


def test_parse_union():
Expand All @@ -188,7 +188,7 @@ def test_parse_union():
assert Union(
Criteria(method=MethodName.Config, value='view', method_arguments=['materialized']),
Criteria(method=MethodName.Tag, value='foo', method_arguments=[])
) == parsed['views-or-foos']
) == parsed['views-or-foos']["definition"]


def test_parse_intersection():
Expand All @@ -208,7 +208,7 @@ def test_parse_intersection():
assert Intersection(
Criteria(method=MethodName.Config, value='view', method_arguments=['materialized']),
Criteria(method=MethodName.Tag, value='foo', method_arguments=[]),
) == parsed['views-and-foos']
) == parsed['views-and-foos']["definition"]


def test_parse_union_excluding():
Expand All @@ -232,7 +232,7 @@ def test_parse_union_excluding():
Criteria(method=MethodName.Tag, value='foo', method_arguments=[])
),
Criteria(method=MethodName.Tag, value='bar', method_arguments=[]),
) == parsed['views-or-foos-not-bars']
) == parsed['views-or-foos-not-bars']["definition"]


def test_parse_yaml_complex():
Expand Down Expand Up @@ -272,7 +272,7 @@ def test_parse_yaml_complex():
assert Union(
Criteria(method=MethodName.Tag, value='nightly'),
Criteria(method=MethodName.Tag, value='weeknights_only'),
) == parsed['weeknights']
) == parsed['weeknights']["definition"]

assert Union(
Intersection(
Expand Down Expand Up @@ -300,4 +300,4 @@ def test_parse_yaml_complex():
),
),
),
) == parsed['test_name']
) == parsed['test_name']["definition"]
23 changes: 23 additions & 0 deletions test/unit/test_selector_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,3 +177,26 @@ def test_method_no_value(self):
"not a valid method name"
):
selector_config_from_data(dct)

def test_multiple_default_true(self):
"""Test selector_config_from_data returns the correct error when multiple
default values are set
"""
dct = get_selector_dict('''\
selectors:
- name: summa_nothing
definition:
method: tag
value: nightly
default: true
- name: summa_something
definition:
method: tag
value: daily
default: true
''')
with self.assertRaisesRegex(
dbt.exceptions.DbtSelectorsError,
'Found multiple selectors with `default: true`:'
):
selector_config_from_data(dct)

0 comments on commit 4845174

Please sign in to comment.