From fad1a12ce9cbc8a3a27888499e475860faa023d8 Mon Sep 17 00:00:00 2001 From: Mat Date: Wed, 3 Jul 2024 09:40:48 +0100 Subject: [PATCH] Use formatted domain name when assigning to domain (#175) This formatting was introduced when we create the domains, but the assign step needs to be consistent. --- ingestion/dbt_manifest_utils.py | 2 +- tests/test_assign_cadet_domains.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ingestion/dbt_manifest_utils.py b/ingestion/dbt_manifest_utils.py index be98878..a456a83 100644 --- a/ingestion/dbt_manifest_utils.py +++ b/ingestion/dbt_manifest_utils.py @@ -60,7 +60,7 @@ def convert_cadet_manifest_table_to_datahub(node_info: dict) -> Tuple[str, str]: eg 'database__table' is converted to a regex string to detect it's urn like 'urn:li:dataset:\\(urn:li:dataPlatform:dbt,cadet\\.awsdatacatalog\\.database\\.table,PROD\\)' """ - domain = node_info.get("fqn", [])[1] + domain = format_domain_name(node_info.get("fqn", [])[1]) node_table_name = node_info.get("fqn", [])[-1] # In CaDeT the convention is to name a table database__table diff --git a/tests/test_assign_cadet_domains.py b/tests/test_assign_cadet_domains.py index cb4066e..cf99b58 100644 --- a/tests/test_assign_cadet_domains.py +++ b/tests/test_assign_cadet_domains.py @@ -26,7 +26,7 @@ def test_pattern_add_dataset_domain_aspect_name(self, mock_datahub_graph): assert transformer.aspect_name() == models.DomainsClass.ASPECT_NAME def test_pattern_add_dataset_domain_match(self, mock_datahub_graph): - prison_domain = builder.make_domain_urn("prison") + prison_domain = builder.make_domain_urn("Prison") pipeline_context: PipelineContext = PipelineContext( run_id="test_simple_add_dataset_domain" @@ -53,8 +53,8 @@ def test_pattern_add_dataset_domain_match(self, mock_datahub_graph): assert prison_domain in transformed_aspect.domains def test_pattern_add_dataset_domain_overwrite(self, mock_datahub_graph): - prison_domain = builder.make_domain_urn("prison") - probation_domain = builder.make_domain_urn("probation") + prison_domain = builder.make_domain_urn("Prison") + probation_domain = builder.make_domain_urn("Probation") pipeline_context: PipelineContext = PipelineContext( run_id="test_simple_add_dataset_domain"