Skip to content

Commit

Permalink
Semantic Model Validation (#8049)
Browse files Browse the repository at this point in the history
* Use dbt-semantic-interface validations on semantic models and metrics defined in Core.

* Remove empty test, since semantic models don't generate any validation warnings.

* Add changelog entry.

* Temporarily remove requirement that there must be semantic models definied in order to define metrics
  • Loading branch information
peterallenwebb authored Jul 10, 2023
1 parent 54b1e56 commit 44572e7
Show file tree
Hide file tree
Showing 8 changed files with 542 additions and 3,891 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20230707-112838.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Support validation of metrics and semantic models.
time: 2023-07-07T11:28:38.760462-04:00
custom:
Author: peterallenwebb
Issue: "7969"
33 changes: 33 additions & 0 deletions core/dbt/contracts/graph/semantic_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,48 @@
PydanticTimeSpineTableConfiguration,
)
from dbt_semantic_interfaces.type_enums import TimeGranularity
from dbt_semantic_interfaces.validations.semantic_manifest_validator import (
SemanticManifestValidator,
)

from dbt.clients.system import write_file
from dbt.events.base_types import EventLevel
from dbt.events.functions import fire_event
from dbt.events.types import SemanticValidationFailure
from dbt.exceptions import ParsingError


class SemanticManifest:
def __init__(self, manifest):
self.manifest = manifest

def validate(self) -> bool:

# TODO: Enforce this check.
# if self.manifest.metrics and not self.manifest.semantic_models:
# fire_event(
# SemanticValidationFailure(
# msg="Metrics require semantic models, but none were found."
# ),
# EventLevel.ERROR,
# )
# return False

if not self.manifest.metrics or not self.manifest.semantic_models:
return True

semantic_manifest = self._get_pydantic_semantic_manifest()
validator = SemanticManifestValidator[PydanticSemanticManifest]()
validation_results = validator.validate_semantic_manifest(semantic_manifest)

for warning in validation_results.warnings:
fire_event(SemanticValidationFailure(msg=warning.message))

for error in validation_results.errors:
fire_event(SemanticValidationFailure(msg=error.message), EventLevel.ERROR)

return not validation_results.errors

def write_json_to_file(self, file_path: str):
semantic_manifest = self._get_pydantic_semantic_manifest()
json = semantic_manifest.json()
Expand Down
12 changes: 12 additions & 0 deletions core/dbt/events/types.proto
Original file line number Diff line number Diff line change
Expand Up @@ -1232,11 +1232,23 @@ message ParseInlineNodeError{
NodeInfo node_info = 1;
string exc = 2;
}

message ParseInlineNodeErrorMsg {
EventInfo info = 1;
ParseInlineNodeError data = 2;
}

// I070
message SemanticValidationFailure {
string msg = 2;
}

message SemanticValidationFailureMsg {
EventInfo info = 1;
SemanticValidationFailure data = 2;
}


// M - Deps generation

// M001
Expand Down
8 changes: 8 additions & 0 deletions core/dbt/events/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -1224,6 +1224,14 @@ def message(self) -> str:
return "Error while parsing node: " + self.node_info.node_name + "\n" + self.exc


class SemanticValidationFailure(WarnLevel):
def code(self):
return "I070"

def message(self) -> str:
return self.msg


# =======================================================
# M - Deps generation
# =======================================================
Expand Down
4,331 changes: 440 additions & 3,891 deletions core/dbt/events/types_pb2.py

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions core/dbt/parser/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,10 @@ def load(self):
self.check_valid_group_config()
self.check_valid_access_property()

semantic_manifest = SemanticManifest(self.manifest)
if not semantic_manifest.validate():
raise dbt.exceptions.ParsingError("Semantic Manifest validation failed.")

# update tracking data
self._perf_info.process_manifest_elapsed = time.perf_counter() - start_process
self._perf_info.static_analysis_parsed_path_count = (
Expand Down
38 changes: 38 additions & 0 deletions tests/functional/semantic_models/test_semantic_model_parsing.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from typing import List

import pytest

from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity

from dbt.cli.main import dbtRunner
from dbt.contracts.graph.manifest import Manifest
from dbt.events.base_types import BaseEvent
from dbt.tests.util import write_file


Expand All @@ -23,22 +26,27 @@
- name: txn_revenue
expr: revenue
agg: sum
agg_time_dimension: ds
- name: sum_of_things
expr: 2
agg: sum
agg_time_dimension: ds
- name: has_revenue
expr: true
agg: sum_boolean
agg_time_dimension: ds
- name: discrete_order_value_p99
expr: order_total
agg: percentile
agg_time_dimension: ds
agg_params:
percentile: 0.99
use_discrete_percentile: True
use_approximate_percentile: False
- name: test_agg_params_optional_are_empty
expr: order_total
agg: percentile
agg_time_dimension: ds
agg_params:
percentile: 0.99
Expand All @@ -53,6 +61,14 @@
- name: user
type: foreign
expr: user_id
metrics:
- name: records_with_revenue
label: "Number of records with revenue"
description: Total number of records with revenue
type: simple
type_params:
measure: has_revenue
"""

schema_without_semantic_model_yml = """models:
Expand Down Expand Up @@ -109,6 +125,28 @@ def test_semantic_model_parsing(self, project):
)
assert len(semantic_model.measures) == 5

def test_semantic_model_error(self, project):
# Next, modify the default schema.yml to remove the semantic model.
error_schema_yml = schema_yml.replace("sum_of_things", "has_revenue")
write_file(error_schema_yml, project.project_root, "models", "schema.yml")
events: List[BaseEvent] = []
runner = dbtRunner(callbacks=[events.append])
result = runner.invoke(["parse"])
assert not result.success

validation_errors = [e for e in events if e.info.name == "SemanticValidationFailure"]
assert validation_errors


class TestSemanticModelPartialParsing:
@pytest.fixture(scope="class")
def models(self):
return {
"schema.yml": schema_yml,
"fct_revenue.sql": fct_revenue_sql,
"metricflow_time_spine.sql": metricflow_time_spine_sql,
}

def test_semantic_model_changed_partial_parsing(self, project):
# First, use the default schema.yml to define our semantic model, and
# run the dbt parse command
Expand Down
1 change: 1 addition & 0 deletions tests/unit/test_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ def test_event_codes(self):
types.RegistryResponseMissingNestedKeys(response=""),
types.RegistryResponseExtraNestedKeys(response=""),
types.DepsSetDownloadDirectory(path=""),
types.SemanticValidationFailure(msg=""),
# Q - Node execution ======================
types.RunningOperationCaughtError(exc=""),
types.CompileComplete(),
Expand Down

0 comments on commit 44572e7

Please sign in to comment.