Skip to content

Commit

Permalink
add custom cadet tag transformer (#135)
Browse files Browse the repository at this point in the history
  • Loading branch information
LavMatt committed May 31, 2024
1 parent 84b2a53 commit 0ae942f
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 10 deletions.
12 changes: 2 additions & 10 deletions ingestion/cadet.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,6 @@ source:
stateful_ingestion:
remove_stale_metadata: true
transformers:
- type: "pattern_add_dataset_domain"
- type: "add_dataset_tags"
config:
semantics: OVERWRITE
domain_pattern:
rules:
'urn:li:dataset:\(urn:li:dataPlatform:dbt,awsdatacatalog.*common_platform.*':
["HMCTS"]
'urn:li:dataset:\(urn:li:dataPlatform:dbt,awsdatacatalog.*prison.*':
["HMPPS"]
'urn:li:dataset:\(urn:li:dataPlatform:dbt,awsdatacatalog.*sirius.*':
["OPG"]
get_tags_to_add: "ingestion.cadet_display_in_catalogue_tagger.add_display_in_catalogue_tag"
17 changes: 17 additions & 0 deletions ingestion/cadet_display_in_catalogue_tagger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import logging
from typing import List

import datahub.emitter.mce_builder as builder
from datahub.metadata.schema_classes import TagAssociationClass


def add_display_in_catalogue_tag(entity_urn: str) -> List[TagAssociationClass]:
"""Compute the tags to associate to a given dataset."""
if "athena_cadet" not in entity_urn:
tag_urn = builder.make_tag_urn(tag="display_in_catalogue")
tags = [TagAssociationClass(tag=tag_urn)]

logging.info(f"Tagging dataset {entity_urn} with {tags}.")
else:
tags = []
return tags

0 comments on commit 0ae942f

Please sign in to comment.