diff --git a/core/dbt/contracts/graph/parsed.py b/core/dbt/contracts/graph/parsed.py index 3dc82c43b1a..4fb098c6346 100644 --- a/core/dbt/contracts/graph/parsed.py +++ b/core/dbt/contracts/graph/parsed.py @@ -5,14 +5,15 @@ from hologram import JsonSchemaMixin from hologram.helpers import ( - StrEnum, register_pattern, ExtensibleJsonSchemaMixin + StrEnum, register_pattern ) import dbt.clients.jinja import dbt.flags from dbt.contracts.graph.unparsed import ( UnparsedNode, UnparsedMacro, UnparsedDocumentationFile, Quoting, - UnparsedBaseNode, FreshnessThreshold + UnparsedBaseNode, FreshnessThreshold, ExternalTable, + AdditionalPropertiesAllowed ) from dbt.contracts.util import Replaceable from dbt.logger import GLOBAL_LOGGER as logger # noqa @@ -50,7 +51,7 @@ def insensitive_patterns(*patterns: str): @dataclass class NodeConfig( - ExtensibleJsonSchemaMixin, Replaceable, MutableMapping[str, Any] + AdditionalPropertiesAllowed, Replaceable, MutableMapping[str, Any] ): enabled: bool = True materialized: str = 'view' @@ -61,30 +62,6 @@ class NodeConfig( quoting: Dict[str, Any] = field(default_factory=dict) column_types: Dict[str, Any] = field(default_factory=dict) tags: Union[List[str], str] = field(default_factory=list) - _extra: Dict[str, Any] = field(default_factory=dict) - - @property - def extra(self): - return self._extra - - @classmethod - def from_dict(cls, data, validate=True): - self = super().from_dict(data=data, validate=validate) - keys = self.to_dict(validate=False, omit_none=False) - for key, value in data.items(): - if key not in keys: - self._extra[key] = value - return self - - def to_dict(self, omit_none=True, validate=False): - data = super().to_dict(omit_none=omit_none, validate=validate) - data.update(self._extra) - return data - - def replace(self, **kwargs): - dct = self.to_dict(omit_none=False, validate=False) - dct.update(kwargs) - return self.from_dict(dct) @classmethod def field_mapping(cls): @@ -133,6 +110,7 @@ def __len__(self): class ColumnInfo(JsonSchemaMixin, Replaceable): name: str description: str = '' + data_type: Optional[str] = None # Docrefs are not quite like regular references, as they indicate what they @@ -476,6 +454,7 @@ class ParsedSourceDefinition( quoting: Quoting = field(default_factory=Quoting) loaded_at_field: Optional[str] = None freshness: Optional[FreshnessThreshold] = None + external: Optional[ExternalTable] = None docrefs: List[Docref] = field(default_factory=list) description: str = '' columns: Dict[str, ColumnInfo] = field(default_factory=dict) diff --git a/core/dbt/contracts/graph/unparsed.py b/core/dbt/contracts/graph/unparsed.py index 80c5a30a506..5ecde24c083 100644 --- a/core/dbt/contracts/graph/unparsed.py +++ b/core/dbt/contracts/graph/unparsed.py @@ -1,8 +1,9 @@ from dbt.node_types import NodeType from dbt.contracts.util import Replaceable, Mergeable +from dbt.exceptions import CompilationException from hologram import JsonSchemaMixin -from hologram.helpers import StrEnum +from hologram.helpers import StrEnum, ExtensibleJsonSchemaMixin from dataclasses import dataclass, field from datetime import timedelta @@ -57,6 +58,7 @@ class UnparsedRunHook(UnparsedNode): class NamedTested(JsonSchemaMixin, Replaceable): name: str description: str = '' + data_type: Optional[str] = None tests: Optional[List[Union[Dict[str, Any], str]]] = None def __post_init__(self): @@ -129,6 +131,59 @@ def __bool__(self): return self.warn_after is not None or self.error_after is not None +@dataclass +class AdditionalPropertiesAllowed(ExtensibleJsonSchemaMixin): + _extra: Dict[str, Any] = field(default_factory=dict) + + @property + def extra(self): + return self._extra + + @classmethod + def from_dict(cls, data, validate=True): + self = super().from_dict(data=data, validate=validate) + keys = self.to_dict(validate=False, omit_none=False) + for key, value in data.items(): + if key not in keys: + self._extra[key] = value + return self + + def to_dict(self, omit_none=True, validate=False): + data = super().to_dict(omit_none=omit_none, validate=validate) + data.update(self._extra) + return data + + def replace(self, **kwargs): + dct = self.to_dict(omit_none=False, validate=False) + dct.update(kwargs) + return self.from_dict(dct) + + +@dataclass +class ExternalPartition(AdditionalPropertiesAllowed, Replaceable): + name: str = '' + description: str = '' + data_type: str = '' + + def __post_init__(self): + if self.name == '' or self.data_type == '': + raise CompilationException( + 'External partition columns must have names and data types' + ) + + +@dataclass +class ExternalTable(AdditionalPropertiesAllowed, Mergeable): + location: Optional[str] = None + file_format: Optional[str] = None + row_format: Optional[str] = None + tbl_properties: Optional[str] = None + partitions: Optional[List[ExternalPartition]] = None + + def __bool__(self): + return self.location is not None + + @dataclass class Quoting(JsonSchemaMixin, Mergeable): database: Optional[bool] = None @@ -144,6 +199,9 @@ class UnparsedSourceTableDefinition(ColumnDescription, NodeDescription): freshness: Optional[FreshnessThreshold] = field( default_factory=FreshnessThreshold ) + external: Optional[ExternalTable] = field( + default_factory=ExternalTable + ) def __post_init__(self): NodeDescription.__post_init__(self) diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index ddf2fd5c185..62887d81931 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -62,9 +62,10 @@ def __init__(self): self.column_info: Dict[str, ColumnInfo] = {} self.docrefs: List[Docref] = [] - def add(self, column_name, description): + def add(self, column_name, description, data_type): self.column_info[column_name] = ColumnInfo(name=column_name, - description=description) + description=description, + data_type=data_type) def collect_docrefs( @@ -216,9 +217,10 @@ def parse_column( ) -> None: column_name = column.name description = column.description + data_type = column.data_type collect_docrefs(block.target, refs, column_name, description) - refs.add(column_name, description) + refs.add(column_name, description, data_type) if not column.tests: return @@ -348,6 +350,7 @@ def generate_source_node( unique_id=unique_id, name=table.name, description=description, + external=table.external, source_name=source.name, source_description=source_description, loader=source.loader, diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index 2300e20f689..6c7c456f53b 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -844,22 +844,27 @@ def expected_seeded_manifest(self, model_database=None): 'id': { 'name': 'id', 'description': 'The user ID number', + 'data_type': None, }, 'first_name': { 'name': 'first_name', 'description': "The user's first name", + 'data_type': None, }, 'email': { 'name': 'email', 'description': "The user's email", + 'data_type': None, }, 'ip_address': { 'name': 'ip_address', 'description': "The user's IP address", + 'data_type': None, }, 'updated_at': { 'name': 'updated_at', 'description': "The last time this user's email was updated", + 'data_type': None, }, }, 'patch_path': schema_yml_path, @@ -1162,11 +1167,13 @@ def expected_postgres_references_manifest(self, model_database=None): 'columns': { 'first_name': { 'description': 'The first name being summarized', - 'name': 'first_name' + 'name': 'first_name', + 'data_type': None }, 'ct': { 'description': 'The number of instances of the first name', - 'name': 'ct' + 'name': 'ct', + 'data_type': None }, }, 'config': { @@ -1228,11 +1235,13 @@ def expected_postgres_references_manifest(self, model_database=None): 'columns': { 'first_name': { 'description': 'The first name being summarized', - 'name': 'first_name' + 'name': 'first_name', + 'data_type': None }, 'ct': { 'description': 'The number of instances of the first name', - 'name': 'ct' + 'name': 'ct', + 'data_type': None }, }, 'config': { @@ -1327,7 +1336,8 @@ def expected_postgres_references_manifest(self, model_database=None): 'columns': { 'id': { 'description': 'An ID field', - 'name': 'id' + 'name': 'id', + 'data_type': None } }, 'quoting': { @@ -1354,6 +1364,10 @@ def expected_postgres_references_manifest(self, model_database=None): 'documentation_package': '', }, ], + 'external': { + 'file_format': None, 'location': None, 'partitions': None, + 'row_format': None, 'tbl_properties': None + }, 'freshness': {'error_after': None, 'warn_after': None, 'filter': None}, 'identifier': 'seed', 'loaded_at_field': None, @@ -1591,23 +1605,28 @@ def expected_bigquery_complex_manifest(self): 'columns': { 'email': { 'description': "The user's email", - 'name': 'email' + 'name': 'email', + 'data_type': None }, 'first_name': { 'description': "The user's name", - 'name': 'first_name' + 'name': 'first_name', + 'data_type': None }, 'id': { 'description': 'The user id', - 'name': 'id' + 'name': 'id', + 'data_type': None }, 'ip_address': { 'description': "The user's IP address", - 'name': 'ip_address' + 'name': 'ip_address', + 'data_type': None }, 'updated_at': { 'description': 'When the user was updated', - 'name': 'updated_at' + 'name': 'updated_at', + 'data_type': None }, }, 'description': 'A clustered and partitioned copy of the test model', @@ -1648,23 +1667,28 @@ def expected_bigquery_complex_manifest(self): 'columns': { 'email': { 'description': "The user's email", - 'name': 'email' + 'name': 'email', + 'data_type': None }, 'first_name': { 'description': "The user's name", - 'name': 'first_name' + 'name': 'first_name', + 'data_type': None }, 'id': { 'description': 'The user id', - 'name': 'id' + 'name': 'id', + 'data_type': None }, 'ip_address': { 'description': "The user's IP address", - 'name': 'ip_address' + 'name': 'ip_address', + 'data_type': None }, 'updated_at': { 'description': 'When the user was updated', - 'name': 'updated_at' + 'name': 'updated_at', + 'data_type': None }, }, 'description': 'A clustered and partitioned copy of the test model, clustered on multiple columns', @@ -1707,22 +1731,27 @@ def expected_bigquery_complex_manifest(self): 'field_1': { 'name': 'field_1', 'description': 'The first field', + 'data_type': None, }, 'field_2': { 'name': 'field_2', 'description': 'The second field', + 'data_type': None, }, 'field_3': { 'name': 'field_3', 'description': 'The third field', + 'data_type': None, }, 'nested_field.field_4': { 'name': 'nested_field.field_4', 'description': 'The first nested field', + 'data_type': None, }, 'nested_field.field_5': { 'name': 'nested_field.field_5', 'description': 'The second nested field', + 'data_type': None, }, }, 'description': 'The test model', @@ -1968,22 +1997,27 @@ def expected_redshift_incremental_view_manifest(self): 'id': { 'name': 'id', 'description': 'The user ID number', + 'data_type': None, }, 'first_name': { 'name': 'first_name', 'description': "The user's first name", + 'data_type': None, }, 'email': { 'name': 'email', 'description': "The user's email", + 'data_type': None, }, 'ip_address': { 'name': 'ip_address', 'description': "The user's IP address", + 'data_type': None, }, 'updated_at': { 'name': 'updated_at', 'description': "The last time this user's email was updated", + 'data_type': None, }, }, 'patch_path': self.dir('rs_models/schema.yml'), @@ -2185,12 +2219,12 @@ def expected_run_results(self, quote_schema=True, quote_model=False, 'target/compiled/test/model.sql' ), 'columns': { - 'id': {'description': 'The user ID number', 'name': 'id'}, - 'first_name': {'description': "The user's first name", 'name': 'first_name'}, - 'email': {'description': "The user's email", 'name': 'email'}, - 'ip_address': {'description': "The user's IP address", 'name': 'ip_address'}, + 'id': {'description': 'The user ID number', 'name': 'id', 'data_type': None}, + 'first_name': {'description': "The user's first name", 'name': 'first_name', 'data_type': None}, + 'email': {'description': "The user's email", 'name': 'email', 'data_type': None}, + 'ip_address': {'description': "The user's IP address", 'name': 'ip_address', 'data_type': None}, 'updated_at': {'description': "The last time this user's email was updated", - 'name': 'updated_at'} + 'name': 'updated_at', 'data_type': None} }, 'compiled': True, 'compiled_sql': compiled_sql, @@ -2473,11 +2507,13 @@ def expected_postgres_references_run_results(self): 'columns': { 'first_name': { 'description': 'The first name being summarized', - 'name': 'first_name' + 'name': 'first_name', + 'data_type': None }, 'ct': { 'description': 'The number of instances of the first name', - 'name': 'ct' + 'name': 'ct', + 'data_type': None }, }, 'compiled': True, @@ -2563,11 +2599,13 @@ def expected_postgres_references_run_results(self): 'columns': { 'first_name': { 'description': 'The first name being summarized', - 'name': 'first_name' + 'name': 'first_name', + 'data_type': None }, 'ct': { 'description': 'The number of instances of the first name', - 'name': 'ct' + 'name': 'ct', + 'data_type': None }, }, 'compiled': True, diff --git a/test/integration/035_docs_blocks/test_docs_blocks.py b/test/integration/035_docs_blocks/test_docs_blocks.py index 48f14410e9e..d6d7b4c5631 100644 --- a/test/integration/035_docs_blocks/test_docs_blocks.py +++ b/test/integration/035_docs_blocks/test_docs_blocks.py @@ -36,7 +36,8 @@ def test_postgres_valid_doc_ref(self): self.assertEqual( { 'name': 'id', - 'description': 'The user ID number' + 'description': 'The user ID number', + 'data_type': None, }, model_data['columns']['id'] ) @@ -44,6 +45,7 @@ def test_postgres_valid_doc_ref(self): { 'name': 'first_name', 'description': "The user's first name", + 'data_type': None, }, model_data['columns']['first_name'] ) @@ -52,6 +54,7 @@ def test_postgres_valid_doc_ref(self): { 'name': 'last_name', 'description': "The user's last name", + 'data_type': None, }, model_data['columns']['last_name'] ) @@ -75,7 +78,8 @@ def test_postgres_alternative_docs_path(self): self.assertEqual( { 'name': 'id', - 'description': 'The user ID number with alternative text' + 'description': 'The user ID number with alternative text', + 'data_type': None, }, model_data['columns']['id'] ) @@ -83,6 +87,7 @@ def test_postgres_alternative_docs_path(self): { 'name': 'first_name', 'description': "The user's first name", + 'data_type': None, }, model_data['columns']['first_name'] ) @@ -91,6 +96,7 @@ def test_postgres_alternative_docs_path(self): { 'name': 'last_name', 'description': "The user's last name in this other file", + 'data_type': None, }, model_data['columns']['last_name'] ) diff --git a/test/unit/test_contracts_graph_unparsed.py b/test/unit/test_contracts_graph_unparsed.py index 792e6be109c..e2200d523b0 100644 --- a/test/unit/test_contracts_graph_unparsed.py +++ b/test/unit/test_contracts_graph_unparsed.py @@ -300,6 +300,7 @@ def test_table_defaults(self): 'tests': [], 'columns': [], 'quoting': {}, + 'external': {}, 'freshness': {}, }, { @@ -308,6 +309,7 @@ def test_table_defaults(self): 'tests': [], 'columns': [], 'quoting': {'database': True}, + 'external': {}, 'freshness': {}, }, ], diff --git a/test/unit/test_parser.py b/test/unit/test_parser.py index 8d394ab3214..5d32c7f9fc2 100644 --- a/test/unit/test_parser.py +++ b/test/unit/test_parser.py @@ -24,7 +24,7 @@ ParsedSnapshotNode, TimestampSnapshotConfig, SnapshotStrategy, ParsedAnalysisNode ) -from dbt.contracts.graph.unparsed import FreshnessThreshold +from dbt.contracts.graph.unparsed import FreshnessThreshold, ExternalTable from .utils import config_from_parts_or_dicts, normalize @@ -209,6 +209,7 @@ def test__parse_basic_source(self): name='my_table', loader='', freshness=FreshnessThreshold(), + external=ExternalTable(), source_description='', identifier='my_table', fqn=['snowplow', 'my_source', 'my_table'],