From a203fe866ad3e969e7de9cc24ddbbef1934aa7d0 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Sun, 19 Mar 2023 19:24:07 -0400 Subject: [PATCH] CT 2196, CT2121 constraints column order (#7161) --- .../unreleased/Features-20230313-135917.yaml | 6 +++ core/dbt/clients/jinja.py | 2 +- core/dbt/context/exceptions_jinja.py | 6 +++ core/dbt/exceptions.py | 17 +++++++ .../models/table/columns_spec_ddl.sql | 45 ++++++++++++++---- .../models/table/create_table_as.sql | 23 ++++++++-- .../dbt/include/postgres/macros/adapters.sql | 7 +-- .../dbt/tests/adapter/constraints/fixtures.py | 30 ++++++------ .../adapter/constraints/test_constraints.py | 46 +++++++++---------- ...nt_configs.py => test_contract_configs.py} | 42 ++++++++++------- 10 files changed, 152 insertions(+), 72 deletions(-) create mode 100644 .changes/unreleased/Features-20230313-135917.yaml rename tests/functional/configs/{test_constraint_configs.py => test_contract_configs.py} (89%) diff --git a/.changes/unreleased/Features-20230313-135917.yaml b/.changes/unreleased/Features-20230313-135917.yaml new file mode 100644 index 00000000000..e8d7391a2f9 --- /dev/null +++ b/.changes/unreleased/Features-20230313-135917.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Make model contracts agnostic to ordering +time: 2023-03-13T13:59:17.255368-04:00 +custom: + Author: gshank + Issue: 6975 7064 diff --git a/core/dbt/clients/jinja.py b/core/dbt/clients/jinja.py index ecf668c11e5..9674a1265e6 100644 --- a/core/dbt/clients/jinja.py +++ b/core/dbt/clients/jinja.py @@ -483,7 +483,7 @@ def get_environment( native: bool = False, ) -> jinja2.Environment: args: Dict[str, List[Union[str, Type[jinja2.ext.Extension]]]] = { - "extensions": ["jinja2.ext.do"] + "extensions": ["jinja2.ext.do", "jinja2.ext.loopcontrols"] } if capture_macros: diff --git a/core/dbt/context/exceptions_jinja.py b/core/dbt/context/exceptions_jinja.py index 8eb0ea09f64..c596fcc6ecb 100644 --- a/core/dbt/context/exceptions_jinja.py +++ b/core/dbt/context/exceptions_jinja.py @@ -23,6 +23,7 @@ PropertyYMLError, NotImplementedError, RelationWrongTypeError, + ContractError, ColumnTypeMissingError, ) @@ -66,6 +67,10 @@ def raise_compiler_error(msg, node=None) -> NoReturn: raise CompilationError(msg, node) +def raise_contract_error(yaml_columns, sql_columns) -> NoReturn: + raise ContractError(yaml_columns, sql_columns) + + def raise_database_error(msg, node=None) -> NoReturn: raise DbtDatabaseError(msg, node) @@ -124,6 +129,7 @@ def column_type_missing(column_names) -> NoReturn: raise_invalid_property_yml_version, raise_not_implemented, relation_wrong_type, + raise_contract_error, column_type_missing, ] } diff --git a/core/dbt/exceptions.py b/core/dbt/exceptions.py index c1a63da11ca..d85a9443976 100644 --- a/core/dbt/exceptions.py +++ b/core/dbt/exceptions.py @@ -2124,6 +2124,23 @@ def get_message(self) -> str: return msg +class ContractError(CompilationError): + def __init__(self, yaml_columns, sql_columns): + self.yaml_columns = yaml_columns + self.sql_columns = sql_columns + super().__init__(msg=self.get_message()) + + def get_message(self) -> str: + msg = ( + "Contracts are enabled for this model. " + "Please ensure the name, data_type, and number of columns in your `yml` file " + "match the columns in your SQL file.\n" + f"Schema File Columns: {self.yaml_columns}\n" + f"SQL File Columns: {self.sql_columns}" + ) + return msg + + # not modifying these since rpc should be deprecated soon class UnknownAsyncIDException(Exception): CODE = 10012 diff --git a/core/dbt/include/global_project/macros/materializations/models/table/columns_spec_ddl.sql b/core/dbt/include/global_project/macros/materializations/models/table/columns_spec_ddl.sql index 7d929f5ff9c..e3538855641 100644 --- a/core/dbt/include/global_project/macros/materializations/models/table/columns_spec_ddl.sql +++ b/core/dbt/include/global_project/macros/materializations/models/table/columns_spec_ddl.sql @@ -29,7 +29,7 @@ {# Compares the column schema provided by a model's sql file to the column schema provided by a model's schema file. - If any differences in name, data_type or order of columns exist between the two schemas, raises a compiler error + If any differences in name, data_type or number of columns exist between the two schemas, raises a compiler error #} {% macro assert_columns_equivalent(sql) %} {#-- Obtain the column schema provided by sql file. #} @@ -37,23 +37,52 @@ {#--Obtain the column schema provided by the schema file by generating an 'empty schema' query from the model's columns. #} {%- set schema_file_provided_columns = get_column_schema_from_query(get_empty_schema_sql(model['columns'])) -%} - {%- set sql_file_provided_columns_formatted = format_columns(sql_file_provided_columns) -%} - {%- set schema_file_provided_columns_formatted = format_columns(schema_file_provided_columns) -%} + {#-- create dictionaries with name and formatted data type and strings for exception #} + {%- set sql_columns = format_columns(sql_file_provided_columns) -%} + {%- set string_sql_columns = stringify_formatted_columns(sql_columns) -%} + {%- set yaml_columns = format_columns(schema_file_provided_columns) -%} + {%- set string_yaml_columns = stringify_formatted_columns(yaml_columns) -%} - {%- if sql_file_provided_columns_formatted != schema_file_provided_columns_formatted -%} - {%- do exceptions.raise_compiler_error('Please ensure the name, data_type, order, and number of columns in your `yml` file match the columns in your SQL file.\nSchema File Columns: ' ~ (schema_file_provided_columns_formatted|trim) ~ '\n\nSQL File Columns: ' ~ (sql_file_provided_columns_formatted|trim) ~ ' ' ) %} + {%- if sql_columns|length != yaml_columns|length -%} + {%- do exceptions.raise_contract_error(string_yaml_columns, string_sql_columns) -%} {%- endif -%} + {%- for sql_col in sql_columns -%} + {%- set yaml_col = [] -%} + {%- for this_col in yaml_columns -%} + {%- if this_col['name'] == sql_col['name'] -%} + {%- do yaml_col.append(this_col) -%} + {%- break -%} + {%- endif -%} + {%- endfor -%} + {%- if not yaml_col -%} + {#-- Column with name not found in yaml #} + {%- do exceptions.raise_contract_error(string_yaml_columns, string_sql_columns) -%} + {%- endif -%} + {%- if sql_col['formatted'] != yaml_col[0]['formatted'] -%} + {#-- Column data types don't match #} + {%- do exceptions.raise_contract_error(string_yaml_columns, string_sql_columns) -%} + {%- endif -%} + {%- endfor -%} + {% endmacro %} {% macro format_columns(columns) %} {% set formatted_columns = [] %} {% for column in columns %} {%- set formatted_column = adapter.dispatch('format_column', 'dbt')(column) -%} - {%- do formatted_columns.append(formatted_column) -%} + {%- do formatted_columns.append({'name': column.name, 'formatted': formatted_column}) -%} {% endfor %} - {{ return(formatted_columns|join(', ')) }} -{%- endmacro -%} + {{ return(formatted_columns) }} +{% endmacro %} + +{% macro stringify_formatted_columns(formatted_columns) %} + {% set column_strings = [] %} + {% for column in formatted_columns %} + {% do column_strings.append(column['formatted']) %} + {% endfor %} + {{ return(column_strings|join(', ')) }} +{% endmacro %} {% macro default__format_column(column) -%} {{ return(column.column.lower() ~ " " ~ column.dtype) }} diff --git a/core/dbt/include/global_project/macros/materializations/models/table/create_table_as.sql b/core/dbt/include/global_project/macros/materializations/models/table/create_table_as.sql index 4914f62bf25..f8cdcf6cb23 100644 --- a/core/dbt/include/global_project/macros/materializations/models/table/create_table_as.sql +++ b/core/dbt/include/global_project/macros/materializations/models/table/create_table_as.sql @@ -25,11 +25,26 @@ create {% if temporary: -%}temporary{%- endif %} table {{ relation.include(database=(not temporary), schema=(not temporary)) }} - {% if config.get('contract', False) %} - {{ get_assert_columns_equivalent(sql) }} - {{ get_columns_spec_ddl() }} - {% endif %} + {% if config.get('contract', False) %} + {{ get_assert_columns_equivalent(sql) }} + {{ get_columns_spec_ddl() }} + {%- set sql = get_select_subquery(sql) %} + {% endif %} as ( {{ sql }} ); {%- endmacro %} + +{% macro get_select_subquery(sql) %} + {{ return(adapter.dispatch('get_select_subquery', 'dbt')(sql)) }} +{% endmacro %} + +{% macro default__get_select_subquery(sql) %} + select + {% for column in model['columns'] %} + {{ column }}{{ ", " if not loop.last }} + {% endfor %} + from ( + {{ sql }} + ) as model_subq +{%- endmacro %} diff --git a/plugins/postgres/dbt/include/postgres/macros/adapters.sql b/plugins/postgres/dbt/include/postgres/macros/adapters.sql index 21241c639e4..8f7fccd2cbe 100644 --- a/plugins/postgres/dbt/include/postgres/macros/adapters.sql +++ b/plugins/postgres/dbt/include/postgres/macros/adapters.sql @@ -13,10 +13,11 @@ {{ get_assert_columns_equivalent(sql) }} {{ get_columns_spec_ddl() }} ; insert into {{ relation }} {{ get_column_names() }} - {% else %} - as + {%- set sql = get_select_subquery(sql) %} + {% else %} + as {% endif %} - ( + ( {{ sql }} ); {%- endmacro %} diff --git a/tests/adapter/dbt/tests/adapter/constraints/fixtures.py b/tests/adapter/dbt/tests/adapter/constraints/fixtures.py index c6923d94e53..71017d0a6cb 100644 --- a/tests/adapter/dbt/tests/adapter/constraints/fixtures.py +++ b/tests/adapter/dbt/tests/adapter/constraints/fixtures.py @@ -6,9 +6,9 @@ }} select - 1 as id, 'blue' as color, - cast('2019-01-01' as date) as date_day + 1 as id, + '2019-01-01' as date_day """ my_model_wrong_order_sql = """ @@ -21,7 +21,7 @@ select 'blue' as color, 1 as id, - cast('2019-01-01' as date) as date_day + '2019-01-01' as date_day """ my_model_wrong_name_sql = """ @@ -32,9 +32,9 @@ }} select - 1 as error, 'blue' as color, - cast('2019-01-01' as date) as date_day + 1 as error, + '2019-01-01' as date_day """ my_model_data_type_sql = """ @@ -60,7 +60,7 @@ cast(null as {{ dbt.type_int() }}) as id, -- change the color as well (to test rollback) 'red' as color, - cast('2019-01-01' as date) as date_day + '2019-01-01' as date_day """ model_schema_yml = """ @@ -81,7 +81,7 @@ - name: color data_type: text - name: date_day - data_type: date + data_type: text - name: my_model_error config: contract: true @@ -96,7 +96,7 @@ - name: color data_type: text - name: date_day - data_type: date + data_type: text - name: my_model_wrong_order config: contract: true @@ -111,7 +111,7 @@ - name: color data_type: text - name: date_day - data_type: date + data_type: text - name: my_model_wrong_name config: contract: true @@ -126,7 +126,7 @@ - name: color data_type: text - name: date_day - data_type: date + data_type: text """ model_data_type_schema_yml = """ @@ -150,7 +150,7 @@ select 1 as id, 'blue' as color, - cast('2019-01-01' as date) as date_day + '2019-01-01' as date_day """ my_model_view_wrong_order_sql = """ @@ -163,7 +163,7 @@ select 'blue' as color, 1 as id, - cast('2019-01-01' as date) as date_day + '2019-01-01' as date_day """ my_model_view_wrong_name_sql = """ @@ -174,9 +174,9 @@ }} select - 1 as error, 'blue' as color, - cast('2019-01-01' as date) as date_day + 1 as error, + '2019-01-01' as date_day """ my_model_view_with_nulls_sql = """ @@ -191,5 +191,5 @@ cast(null as {{ dbt.type_int() }}) as id, -- change the color as well (to test rollback) 'red' as color, - cast('2019-01-01' as date) as date_day + '2019-01-01' as date_day """ diff --git a/tests/adapter/dbt/tests/adapter/constraints/test_constraints.py b/tests/adapter/dbt/tests/adapter/constraints/test_constraints.py index 1b08e9f8bae..961e2b6b1d8 100644 --- a/tests/adapter/dbt/tests/adapter/constraints/test_constraints.py +++ b/tests/adapter/dbt/tests/adapter/constraints/test_constraints.py @@ -54,7 +54,6 @@ def data_types(self, schema_int_type, int_type, string_type): return [ ["1", schema_int_type, int_type], ["'1'", string_type, string_type], - ["cast('2019-01-01' as date)", "date", "DATE"], ["true", "bool", "BOOL"], ["'2013-11-03 00:00:00-07'::timestamptz", "timestamptz", "DATETIMETZ"], ["'2013-11-03 00:00:00-07'::timestamp", "timestamp", "DATETIME"], @@ -65,8 +64,9 @@ def data_types(self, schema_int_type, int_type, string_type): ] def test__constraints_wrong_column_order(self, project, string_type, int_type): + # This no longer causes an error, since we enforce yaml column order results, log_output = run_dbt_and_capture( - ["run", "-s", "my_model_wrong_order"], expect_pass=False + ["run", "-s", "my_model_wrong_order"], expect_pass=True ) manifest = get_manifest(project.project_root) model_id = "model.test.my_model_wrong_order" @@ -75,18 +75,6 @@ def test__constraints_wrong_column_order(self, project, string_type, int_type): assert contract_actual_config is True - expected_compile_error = "Please ensure the name, data_type, order, and number of columns in your `yml` file match the columns in your SQL file." - expected_schema_file_columns = ( - f"Schema File Columns: id {int_type}, color {string_type}, date_day DATE" - ) - expected_sql_file_columns = ( - f"SQL File Columns: color {string_type}, id {int_type}, date_day DATE" - ) - - assert expected_compile_error in log_output - assert expected_schema_file_columns in log_output - assert expected_sql_file_columns in log_output - def test__constraints_wrong_column_names(self, project, string_type, int_type): results, log_output = run_dbt_and_capture( ["run", "-s", "my_model_wrong_name"], expect_pass=False @@ -98,12 +86,12 @@ def test__constraints_wrong_column_names(self, project, string_type, int_type): assert contract_actual_config is True - expected_compile_error = "Please ensure the name, data_type, order, and number of columns in your `yml` file match the columns in your SQL file." + expected_compile_error = "Please ensure the name, data_type, and number of columns in your `yml` file match the columns in your SQL file." expected_schema_file_columns = ( - f"Schema File Columns: id {int_type}, color {string_type}, date_day DATE" + f"Schema File Columns: id {int_type}, color {string_type}, date_day {string_type}" ) expected_sql_file_columns = ( - f"SQL File Columns: error {int_type}, color {string_type}, date_day DATE" + f"SQL File Columns: color {string_type}, error {int_type}, date_day {string_type}" ) assert expected_compile_error in log_output @@ -147,7 +135,7 @@ def test__constraints_wrong_column_data_types( assert contract_actual_config is True - expected_compile_error = "Please ensure the name, data_type, order, and number of columns in your `yml` file match the columns in your SQL file." + expected_compile_error = "Please ensure the name, data_type, and number of columns in your `yml` file match the columns in your SQL file." expected_sql_file_columns = ( f"SQL File Columns: wrong_data_type_column_name {error_data_type}" ) @@ -190,17 +178,25 @@ def test__constraints_correct_column_data_types(self, project, data_types): create table {0} ( id integer not null primary key check (id > 0) , color text , - date_day date + date_day text ) ; insert into {0} ( id , color , date_day -) ( +) +( select - 1 as id, - 'blue' as color, - cast('2019-01-01' as date) as date_day + id, + color, + date_day + from + ( + select + 'blue' as color, + 1 as id, + '2019-01-01' as date_day + ) as model_subq ); """ @@ -248,10 +244,10 @@ def test__constraints_ddl(self, project, expected_sql): expected_sql_check == generated_sql_check ), f""" -- GENERATED SQL -{generated_sql} +{generated_sql_check} -- EXPECTED SQL -{expected_sql} +{expected_sql_check} """ def test__constraints_enforcement_rollback( diff --git a/tests/functional/configs/test_constraint_configs.py b/tests/functional/configs/test_contract_configs.py similarity index 89% rename from tests/functional/configs/test_constraint_configs.py rename to tests/functional/configs/test_contract_configs.py index b5594b75423..805948185d4 100644 --- a/tests/functional/configs/test_constraint_configs.py +++ b/tests/functional/configs/test_contract_configs.py @@ -1,6 +1,6 @@ import pytest from dbt.exceptions import ParsingError -from dbt.tests.util import run_dbt, get_manifest, run_dbt_and_capture +from dbt.tests.util import run_dbt, get_manifest, get_artifact, run_dbt_and_capture my_model_sql = """ {{ @@ -10,8 +10,8 @@ }} select - 1 as id, 'blue' as color, + 1 as id, cast('2019-01-01' as date) as date_day """ @@ -29,7 +29,7 @@ cast('2019-01-01' as date) as date_day """ -my_model_constraints_disabled_sql = """ +my_model_contract_disabled_sql = """ {{ config( materialized = "table", @@ -171,7 +171,7 @@ def model(dbt, _): """ -class TestModelLevelConstraintsEnabledConfigs: +class TestModelLevelContractEnabledConfigs: @pytest.fixture(scope="class") def models(self): return { @@ -180,11 +180,12 @@ def models(self): } def test__model_contract_true(self, project): - run_dbt(["parse"]) + run_dbt(["run"]) manifest = get_manifest(project.project_root) model_id = "model.test.my_model" - my_model_columns = manifest.nodes[model_id].columns - my_model_config = manifest.nodes[model_id].config + model = manifest.nodes[model_id] + my_model_columns = model.columns + my_model_config = model.config contract_actual_config = my_model_config.contract assert contract_actual_config is True @@ -193,8 +194,17 @@ def test__model_contract_true(self, project): assert expected_columns == str(my_model_columns) + # compiled fields aren't in the manifest above because it only has parsed fields + manifest_json = get_artifact(project.project_root, "target", "manifest.json") + compiled_code = manifest_json["nodes"][model_id]["compiled_code"] + cleaned_code = " ".join(compiled_code.split()) + assert ( + "select 'blue' as color, 1 as id, cast('2019-01-01' as date) as date_day" + == cleaned_code + ) + -class TestProjectConstraintsEnabledConfigs: +class TestProjectContractEnabledConfigs: @pytest.fixture(scope="class") def project_config_update(self): return { @@ -221,7 +231,7 @@ def test_defined_column_type(self, project): assert contract_actual_config is True -class TestProjectConstraintsEnabledConfigsError: +class TestProjectContractEnabledConfigsError: @pytest.fixture(scope="class") def project_config_update(self): return { @@ -253,7 +263,7 @@ def test_undefined_column_type(self, project): assert expected_compile_error in log_output -class TestModelConstraintsEnabledConfigs: +class TestModelContractEnabledConfigs: @pytest.fixture(scope="class") def models(self): return {"my_model.sql": my_model_contract_sql, "constraints_schema.yml": model_schema_yml} @@ -267,7 +277,7 @@ def test__model_contract(self, project): assert contract_actual_config is True -class TestModelConstraintsEnabledConfigsMissingDataTypes: +class TestModelContractEnabledConfigsMissingDataTypes: @pytest.fixture(scope="class") def models(self): return { @@ -289,11 +299,11 @@ def test_undefined_column_type(self, project): assert expected_compile_error in log_output -class TestModelLevelConstraintsDisabledConfigs: +class TestModelLevelContractDisabledConfigs: @pytest.fixture(scope="class") def models(self): return { - "my_model.sql": my_model_constraints_disabled_sql, + "my_model.sql": my_model_contract_disabled_sql, "constraints_schema.yml": model_schema_yml, } @@ -308,7 +318,7 @@ def test__model_contract_false(self, project): assert contract_actual_config is False -class TestModelLevelConstraintsErrorMessages: +class TestModelLevelContractErrorMessages: @pytest.fixture(scope="class") def models(self): return { @@ -330,7 +340,7 @@ def test__config_errors(self, project): assert expected_empty_data_type_error not in str(exc_str) -class TestSchemaConstraintsEnabledConfigs: +class TestSchemaContractEnabledConfigs: @pytest.fixture(scope="class") def models(self): return { @@ -347,7 +357,7 @@ def test__schema_error(self, project): assert schema_error_expected in str(exc_str) -class TestPythonModelLevelConstraintsErrorMessages: +class TestPythonModelLevelContractErrorMessages: @pytest.fixture(scope="class") def models(self): return {