diff --git a/ingestion/cadet.yaml b/ingestion/cadet.yaml index 07cb212..4f7e606 100644 --- a/ingestion/cadet.yaml +++ b/ingestion/cadet.yaml @@ -25,14 +25,6 @@ source: stateful_ingestion: remove_stale_metadata: true transformers: - - type: "pattern_add_dataset_domain" + - type: "add_dataset_tags" config: - semantics: OVERWRITE - domain_pattern: - rules: - 'urn:li:dataset:\(urn:li:dataPlatform:dbt,awsdatacatalog.*common_platform.*': - ["HMCTS"] - 'urn:li:dataset:\(urn:li:dataPlatform:dbt,awsdatacatalog.*prison.*': - ["HMPPS"] - 'urn:li:dataset:\(urn:li:dataPlatform:dbt,awsdatacatalog.*sirius.*': - ["OPG"] + get_tags_to_add: "ingestion.cadet_display_in_catalogue_tagger.add_display_in_catalogue_tag" diff --git a/ingestion/cadet_display_in_catalogue_tagger.py b/ingestion/cadet_display_in_catalogue_tagger.py new file mode 100644 index 0000000..9f2cee5 --- /dev/null +++ b/ingestion/cadet_display_in_catalogue_tagger.py @@ -0,0 +1,17 @@ +import logging +from typing import List + +import datahub.emitter.mce_builder as builder +from datahub.metadata.schema_classes import TagAssociationClass + + +def add_display_in_catalogue_tag(entity_urn: str) -> List[TagAssociationClass]: + """Compute the tags to associate to a given dataset.""" + if "athena_cadet" not in entity_urn: + tag_urn = builder.make_tag_urn(tag="display_in_catalogue") + tags = [TagAssociationClass(tag=tag_urn)] + + logging.info(f"Tagging dataset {entity_urn} with {tags}.") + else: + tags = [] + return tags