diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 5873b7ac25c09..8e8118d6c4e42 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -99,7 +99,7 @@ sqlglot_lib = { # Using an Acryl fork of sqlglot. # https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:hsheth?expand=1 - "acryl-sqlglot==22.3.1.dev3", + "acryl-sqlglot==22.4.1.dev4", } classification_lib = { diff --git a/metadata-ingestion/src/datahub/cli/check_cli.py b/metadata-ingestion/src/datahub/cli/check_cli.py index 419ae5668292d..082e3343d641b 100644 --- a/metadata-ingestion/src/datahub/cli/check_cli.py +++ b/metadata-ingestion/src/datahub/cli/check_cli.py @@ -1,4 +1,7 @@ +import dataclasses +import json import logging +import pathlib import pprint import shutil import tempfile @@ -17,6 +20,7 @@ from datahub.ingestion.source.source_registry import source_registry from datahub.ingestion.transformer.transform_registry import transform_registry from datahub.telemetry import telemetry +from datahub.utilities.file_backed_collections import ConnectionWrapper, FileBackedList logger = logging.getLogger(__name__) @@ -339,3 +343,28 @@ def test_path_spec(config: str, input: str, path_spec_key: str) -> None: f"Failed to validate pattern {pattern_dicts} in path {path_spec_key}" ) raise e + + +@check.command() +@click.argument("query-log-file", type=click.Path(exists=True, dir_okay=False)) +@click.option("--output", type=click.Path()) +def extract_sql_agg_log(query_log_file: str, output: Optional[str]) -> None: + """Convert a sqlite db generated by the SqlParsingAggregator into a JSON.""" + + from datahub.sql_parsing.sql_parsing_aggregator import LoggedQuery + + assert dataclasses.is_dataclass(LoggedQuery) + + shared_connection = ConnectionWrapper(pathlib.Path(query_log_file)) + query_log = FileBackedList[LoggedQuery]( + shared_connection=shared_connection, tablename="stored_queries" + ) + logger.info(f"Extracting {len(query_log)} queries from {query_log_file}") + queries = [dataclasses.asdict(query) for query in query_log] + + if output: + with open(output, "w") as f: + json.dump(queries, f, indent=2) + logger.info(f"Extracted {len(queries)} queries to {output}") + else: + click.echo(json.dumps(queries, indent=2)) diff --git a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py index a8b943099dfc1..53be786d30bf4 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py @@ -1,8 +1,10 @@ +import contextlib import dataclasses import enum import itertools import json import logging +import os import pathlib import tempfile import uuid @@ -15,6 +17,7 @@ from datahub.emitter.mce_builder import get_sys_time, make_ts_millis from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.sql_parsing_builder import compute_upstream_fields +from datahub.ingestion.api.closeable import Closeable from datahub.ingestion.api.report import Report from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.graph.client import DataHubGraph @@ -53,9 +56,6 @@ QueryId = str UrnStr = str -_DEFAULT_USER_URN = CorpUserUrn("_ingestion") -_MISSING_SESSION_ID = "__MISSING_SESSION_ID" - class QueryLogSetting(enum.Enum): DISABLED = "DISABLED" @@ -63,6 +63,23 @@ class QueryLogSetting(enum.Enum): STORE_FAILED = "STORE_FAILED" +_DEFAULT_USER_URN = CorpUserUrn("_ingestion") +_MISSING_SESSION_ID = "__MISSING_SESSION_ID" +_DEFAULT_QUERY_LOG_SETTING = QueryLogSetting[ + os.getenv("DATAHUB_SQL_AGG_QUERY_LOG") or QueryLogSetting.DISABLED.name +] + + +@dataclasses.dataclass +class LoggedQuery: + query: str + session_id: Optional[str] + timestamp: Optional[datetime] + user: Optional[UrnStr] + default_db: Optional[str] + default_schema: Optional[str] + + @dataclasses.dataclass class ViewDefinition: view_definition: str @@ -170,7 +187,7 @@ def compute_stats(self) -> None: return super().compute_stats() -class SqlParsingAggregator: +class SqlParsingAggregator(Closeable): def __init__( self, *, @@ -185,7 +202,7 @@ def __init__( usage_config: Optional[BaseUsageConfig] = None, is_temp_table: Optional[Callable[[UrnStr], bool]] = None, format_queries: bool = True, - query_log: QueryLogSetting = QueryLogSetting.DISABLED, + query_log: QueryLogSetting = _DEFAULT_QUERY_LOG_SETTING, ) -> None: self.platform = DataPlatformUrn(platform) self.platform_instance = platform_instance @@ -210,13 +227,18 @@ def __init__( self.format_queries = format_queries self.query_log = query_log + # The exit stack helps ensure that we close all the resources we open. + self._exit_stack = contextlib.ExitStack() + # Set up the schema resolver. self._schema_resolver: SchemaResolver if graph is None: - self._schema_resolver = SchemaResolver( - platform=self.platform.platform_name, - platform_instance=self.platform_instance, - env=self.env, + self._schema_resolver = self._exit_stack.enter_context( + SchemaResolver( + platform=self.platform.platform_name, + platform_instance=self.platform_instance, + env=self.env, + ) ) else: self._schema_resolver = None # type: ignore @@ -235,27 +257,33 @@ def __init__( # By providing a filename explicitly here, we also ensure that the file # is not automatically deleted on exit. - self._shared_connection = ConnectionWrapper(filename=query_log_path) + self._shared_connection = self._exit_stack.enter_context( + ConnectionWrapper(filename=query_log_path) + ) # Stores the logged queries. - self._logged_queries = FileBackedList[str]( + self._logged_queries = FileBackedList[LoggedQuery]( shared_connection=self._shared_connection, tablename="stored_queries" ) + self._exit_stack.push(self._logged_queries) # Map of query_id -> QueryMetadata self._query_map = FileBackedDict[QueryMetadata]( shared_connection=self._shared_connection, tablename="query_map" ) + self._exit_stack.push(self._query_map) # Map of downstream urn -> { query ids } self._lineage_map = FileBackedDict[OrderedSet[QueryId]]( shared_connection=self._shared_connection, tablename="lineage_map" ) + self._exit_stack.push(self._lineage_map) # Map of view urn -> view definition self._view_definitions = FileBackedDict[ViewDefinition]( shared_connection=self._shared_connection, tablename="view_definitions" ) + self._exit_stack.push(self._view_definitions) # Map of session ID -> {temp table name -> query id} # Needs to use the query_map to find the info about the query. @@ -263,16 +291,20 @@ def __init__( self._temp_lineage_map = FileBackedDict[Dict[UrnStr, QueryId]]( shared_connection=self._shared_connection, tablename="temp_lineage_map" ) + self._exit_stack.push(self._temp_lineage_map) # Map of query ID -> schema fields, only for query IDs that generate temp tables. self._inferred_temp_schemas = FileBackedDict[List[models.SchemaFieldClass]]( - shared_connection=self._shared_connection, tablename="inferred_temp_schemas" + shared_connection=self._shared_connection, + tablename="inferred_temp_schemas", ) + self._exit_stack.push(self._inferred_temp_schemas) # Map of table renames, from original UrnStr to new UrnStr. self._table_renames = FileBackedDict[UrnStr]( shared_connection=self._shared_connection, tablename="table_renames" ) + self._exit_stack.push(self._table_renames) # Usage aggregator. This will only be initialized if usage statistics are enabled. # TODO: Replace with FileBackedDict. @@ -281,6 +313,9 @@ def __init__( assert self.usage_config is not None self._usage_aggregator = UsageAggregator(config=self.usage_config) + def close(self) -> None: + self._exit_stack.close() + @property def _need_schemas(self) -> bool: return self.generate_lineage or self.generate_usage_statistics @@ -499,6 +534,9 @@ def add_observed_query( default_db=default_db, default_schema=default_schema, schema_resolver=schema_resolver, + session_id=session_id, + timestamp=query_timestamp, + user=user, ) if parsed.debug_info.error: self.report.observed_query_parse_failures.append( @@ -700,6 +738,9 @@ def _run_sql_parser( default_db: Optional[str], default_schema: Optional[str], schema_resolver: SchemaResolverInterface, + session_id: str = _MISSING_SESSION_ID, + timestamp: Optional[datetime] = None, + user: Optional[CorpUserUrn] = None, ) -> SqlParsingResult: parsed = sqlglot_lineage( query, @@ -712,7 +753,15 @@ def _run_sql_parser( if self.query_log == QueryLogSetting.STORE_ALL or ( self.query_log == QueryLogSetting.STORE_FAILED and parsed.debug_info.error ): - self._logged_queries.append(query) + query_log_entry = LoggedQuery( + query=query, + session_id=session_id if session_id != _MISSING_SESSION_ID else None, + timestamp=timestamp, + user=user.urn() if user else None, + default_db=default_db, + default_schema=default_schema, + ) + self._logged_queries.append(query_log_entry) # Also add some extra logging. if parsed.debug_info.error: diff --git a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py index 54f6a6e984c00..91f5d6f914676 100644 --- a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py +++ b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py @@ -62,9 +62,13 @@ def assert_metadata_files_equal( # We have to "normalize" the golden file by reading and writing it back out. # This will clean up nulls, double serialization, and other formatting issues. with tempfile.NamedTemporaryFile() as temp: - golden_metadata = read_metadata_file(pathlib.Path(golden_path)) - write_metadata_file(pathlib.Path(temp.name), golden_metadata) - golden = load_json_file(temp.name) + try: + golden_metadata = read_metadata_file(pathlib.Path(golden_path)) + write_metadata_file(pathlib.Path(temp.name), golden_metadata) + golden = load_json_file(temp.name) + except (ValueError, AssertionError) as e: + logger.info(f"Error reformatting golden file as MCP/MCEs: {e}") + golden = load_json_file(golden_path) diff = diff_metadata_json(output, golden, ignore_paths, ignore_order=ignore_order) if diff and update_golden: @@ -107,7 +111,7 @@ def diff_metadata_json( # if ignore_order is False, always use DeepDiff except CannotCompareMCPs as e: logger.info(f"{e}, falling back to MCE diff") - except AssertionError as e: + except (AssertionError, ValueError) as e: logger.warning(f"Reverting to old diff method: {e}") logger.debug("Error with new diff method", exc_info=True) diff --git a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py index 821b69c968ee4..d264a3970fdde 100644 --- a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py +++ b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py @@ -126,6 +126,7 @@ def executemany( def close(self) -> None: for obj in self._dependent_objects: obj.close() + self._dependent_objects.clear() with self.conn_lock: self.conn.close() if self._temp_directory: @@ -440,7 +441,7 @@ def __del__(self) -> None: self.close() -class FileBackedList(Generic[_VT]): +class FileBackedList(Generic[_VT], Closeable): """An append-only, list-like object that stores its contents in a SQLite database.""" _len: int = field(default=0) @@ -456,7 +457,6 @@ def __init__( cache_max_size: Optional[int] = None, cache_eviction_batch_size: Optional[int] = None, ) -> None: - self._len = 0 self._dict = FileBackedDict[_VT]( shared_connection=shared_connection, tablename=tablename, @@ -468,6 +468,12 @@ def __init__( or _DEFAULT_MEMORY_CACHE_EVICTION_BATCH_SIZE, ) + if shared_connection: + shared_connection._dependent_objects.append(self) + + # In case we're reusing an existing list, we need to run a query to get the length. + self._len = len(self._dict) + @property def tablename(self) -> str: return self._dict.tablename diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json index d59fce788c95e..3c5b0027ea8ad 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json @@ -12,7 +12,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -34,7 +35,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -63,19 +65,19 @@ }, "fields": [ { - "fieldPath": "2", + "fieldPath": "Sampling Date", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.DateType": {} } }, - "nativeDataType": "string", + "nativeDataType": "date", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "3", + "fieldPath": "Site ID", "nullable": false, "type": { "type": { @@ -87,7 +89,7 @@ "isPartOfKey": false }, { - "fieldPath": "Br \n(mg/L)", + "fieldPath": "Park ID", "nullable": false, "type": { "type": { @@ -99,7 +101,7 @@ "isPartOfKey": false }, { - "fieldPath": "Ca \n(mg/L)", + "fieldPath": "Lat (\u00b0N)", "nullable": false, "type": { "type": { @@ -111,7 +113,7 @@ "isPartOfKey": false }, { - "fieldPath": "Cl \n(mg/L)", + "fieldPath": "Long (\u00b0W)", "nullable": false, "type": { "type": { @@ -123,7 +125,7 @@ "isPartOfKey": false }, { - "fieldPath": "Cond (\u00b5S/cm)", + "fieldPath": "Water Temp (\u00b0C)", "nullable": false, "type": { "type": { @@ -135,31 +137,31 @@ "isPartOfKey": false }, { - "fieldPath": "DO (mg/L)", + "fieldPath": "Cond (\u00b5S/cm)", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "number", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "DOC [mg/L C]", + "fieldPath": "pH", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "number", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "F \n(mg/L)", + "fieldPath": "DO (mg/L)", "nullable": false, "type": { "type": { @@ -171,19 +173,19 @@ "isPartOfKey": false }, { - "fieldPath": "K \n(mg/L)", + "fieldPath": "Secchi Depth (m)", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "number", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "Lat (\u00b0N)", + "fieldPath": "UV Absorbance, 254nm", "nullable": false, "type": { "type": { @@ -195,7 +197,7 @@ "isPartOfKey": false }, { - "fieldPath": "Long (\u00b0W)", + "fieldPath": "DOC [mg/L C]", "nullable": false, "type": { "type": { @@ -207,7 +209,7 @@ "isPartOfKey": false }, { - "fieldPath": "Mg \n(mg/L)", + "fieldPath": "SUVA, 254nm", "nullable": false, "type": { "type": { @@ -243,31 +245,31 @@ "isPartOfKey": false }, { - "fieldPath": "Na \n(mg/L)", + "fieldPath": "PO4-P \n(mg P/L)", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "number", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "PO4-P \n(mg P/L)", + "fieldPath": "TDN \n(mg N/L)", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "number", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "Park ID", + "fieldPath": "TDP \n(mg P/L)", "nullable": false, "type": { "type": { @@ -279,7 +281,7 @@ "isPartOfKey": false }, { - "fieldPath": "SO4-S \n(mg/L)", + "fieldPath": "Cl \n(mg/L)", "nullable": false, "type": { "type": { @@ -291,7 +293,7 @@ "isPartOfKey": false }, { - "fieldPath": "SUVA, 254nm", + "fieldPath": "SO4-S \n(mg/L)", "nullable": false, "type": { "type": { @@ -303,7 +305,7 @@ "isPartOfKey": false }, { - "fieldPath": "Sampling Date", + "fieldPath": "F \n(mg/L)", "nullable": false, "type": { "type": { @@ -315,7 +317,7 @@ "isPartOfKey": false }, { - "fieldPath": "Secchi Depth (m)", + "fieldPath": "Br \n(mg/L)", "nullable": false, "type": { "type": { @@ -327,19 +329,19 @@ "isPartOfKey": false }, { - "fieldPath": "Site ID", + "fieldPath": "Na \n(mg/L)", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "number", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "TDN \n(mg N/L)", + "fieldPath": "K \n(mg/L)", "nullable": false, "type": { "type": { @@ -351,19 +353,19 @@ "isPartOfKey": false }, { - "fieldPath": "TDP \n(mg P/L)", + "fieldPath": "Ca \n(mg/L)", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "number", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "UV Absorbance, 254nm", + "fieldPath": "Mg \n(mg/L)", "nullable": false, "type": { "type": { @@ -375,19 +377,19 @@ "isPartOfKey": false }, { - "fieldPath": "Water Temp (\u00b0C)", + "fieldPath": "d18O", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "number", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "d18O", + "fieldPath": "dD", "nullable": false, "type": { "type": { @@ -399,7 +401,7 @@ "isPartOfKey": false }, { - "fieldPath": "dD", + "fieldPath": "field29", "nullable": false, "type": { "type": { @@ -411,7 +413,7 @@ "isPartOfKey": false }, { - "fieldPath": "field29", + "fieldPath": "2", "nullable": false, "type": { "type": { @@ -423,7 +425,7 @@ "isPartOfKey": false }, { - "fieldPath": "pH", + "fieldPath": "3", "nullable": false, "type": { "type": { @@ -439,7 +441,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -460,7 +463,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -481,7 +485,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -496,7 +501,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -512,7 +518,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -529,7 +536,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -549,7 +557,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -570,7 +579,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -585,7 +595,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -601,7 +612,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -618,7 +630,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -633,7 +646,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -657,7 +671,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -678,7 +693,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -693,7 +709,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -709,7 +726,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -726,7 +744,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -741,7 +760,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -769,7 +789,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -790,7 +811,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -805,7 +827,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -821,7 +844,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -838,7 +862,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -853,7 +878,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -885,7 +911,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -906,7 +933,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -921,7 +949,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -937,7 +966,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -954,7 +984,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -969,7 +1000,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1005,7 +1037,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1026,7 +1059,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1041,7 +1075,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1057,7 +1092,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1074,7 +1110,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1089,7 +1126,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1129,7 +1167,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1150,7 +1189,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1165,7 +1205,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1181,7 +1222,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1198,7 +1240,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1213,7 +1256,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1257,7 +1301,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1278,7 +1323,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1293,7 +1339,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1309,7 +1356,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1326,7 +1374,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1341,7 +1390,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1389,7 +1439,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1404,7 +1455,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -2701,7 +2753,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -2753,7 +2806,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -2769,7 +2823,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -2791,7 +2846,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -2884,7 +2940,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -2905,7 +2962,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -2920,7 +2978,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -3270,7 +3329,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -3322,7 +3382,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -3338,7 +3399,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -3360,7 +3422,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -3453,7 +3516,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -3474,7 +3538,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -3489,7 +3554,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -3839,7 +3905,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -3891,7 +3958,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -3913,7 +3981,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -3929,7 +3998,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -3998,7 +4068,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -4019,7 +4090,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -4034,7 +4106,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -4067,7 +4140,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -4119,7 +4193,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -4141,7 +4216,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -4234,7 +4310,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -4255,7 +4332,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -4271,7 +4349,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -4286,7 +4365,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -4590,7 +4670,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -4642,7 +4723,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -4664,7 +4746,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -4680,7 +4763,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -4773,7 +4857,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -4794,7 +4879,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -4809,7 +4895,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -4972,7 +5059,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -5024,7 +5112,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -5046,7 +5135,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -5451,7 +5541,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -5472,7 +5563,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -5487,7 +5579,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -7647,7 +7740,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -7699,7 +7793,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -7715,7 +7810,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -7730,7 +7826,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -7745,7 +7842,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -7760,7 +7858,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -7775,7 +7874,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -7790,7 +7890,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -7805,7 +7906,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -7820,7 +7922,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json index 58c225e1ec4c9..d7a9bca716fd6 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json @@ -12,7 +12,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -34,7 +35,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -63,19 +65,19 @@ }, "fields": [ { - "fieldPath": "2", + "fieldPath": "Sampling Date", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.DateType": {} } }, - "nativeDataType": "string", + "nativeDataType": "date", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "3", + "fieldPath": "Site ID", "nullable": false, "type": { "type": { @@ -87,7 +89,7 @@ "isPartOfKey": false }, { - "fieldPath": "Br \n(mg/L)", + "fieldPath": "Park ID", "nullable": false, "type": { "type": { @@ -99,7 +101,7 @@ "isPartOfKey": false }, { - "fieldPath": "Ca \n(mg/L)", + "fieldPath": "Lat (\u00b0N)", "nullable": false, "type": { "type": { @@ -111,7 +113,7 @@ "isPartOfKey": false }, { - "fieldPath": "Cl \n(mg/L)", + "fieldPath": "Long (\u00b0W)", "nullable": false, "type": { "type": { @@ -123,7 +125,7 @@ "isPartOfKey": false }, { - "fieldPath": "Cond (\u00b5S/cm)", + "fieldPath": "Water Temp (\u00b0C)", "nullable": false, "type": { "type": { @@ -135,31 +137,31 @@ "isPartOfKey": false }, { - "fieldPath": "DO (mg/L)", + "fieldPath": "Cond (\u00b5S/cm)", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "number", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "DOC [mg/L C]", + "fieldPath": "pH", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "number", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "F \n(mg/L)", + "fieldPath": "DO (mg/L)", "nullable": false, "type": { "type": { @@ -171,19 +173,19 @@ "isPartOfKey": false }, { - "fieldPath": "K \n(mg/L)", + "fieldPath": "Secchi Depth (m)", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "number", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "Lat (\u00b0N)", + "fieldPath": "UV Absorbance, 254nm", "nullable": false, "type": { "type": { @@ -195,7 +197,7 @@ "isPartOfKey": false }, { - "fieldPath": "Long (\u00b0W)", + "fieldPath": "DOC [mg/L C]", "nullable": false, "type": { "type": { @@ -207,7 +209,7 @@ "isPartOfKey": false }, { - "fieldPath": "Mg \n(mg/L)", + "fieldPath": "SUVA, 254nm", "nullable": false, "type": { "type": { @@ -243,31 +245,31 @@ "isPartOfKey": false }, { - "fieldPath": "Na \n(mg/L)", + "fieldPath": "PO4-P \n(mg P/L)", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "number", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "PO4-P \n(mg P/L)", + "fieldPath": "TDN \n(mg N/L)", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "number", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "Park ID", + "fieldPath": "TDP \n(mg P/L)", "nullable": false, "type": { "type": { @@ -279,7 +281,7 @@ "isPartOfKey": false }, { - "fieldPath": "SO4-S \n(mg/L)", + "fieldPath": "Cl \n(mg/L)", "nullable": false, "type": { "type": { @@ -291,7 +293,7 @@ "isPartOfKey": false }, { - "fieldPath": "SUVA, 254nm", + "fieldPath": "SO4-S \n(mg/L)", "nullable": false, "type": { "type": { @@ -303,7 +305,7 @@ "isPartOfKey": false }, { - "fieldPath": "Sampling Date", + "fieldPath": "F \n(mg/L)", "nullable": false, "type": { "type": { @@ -315,7 +317,7 @@ "isPartOfKey": false }, { - "fieldPath": "Secchi Depth (m)", + "fieldPath": "Br \n(mg/L)", "nullable": false, "type": { "type": { @@ -327,19 +329,19 @@ "isPartOfKey": false }, { - "fieldPath": "Site ID", + "fieldPath": "Na \n(mg/L)", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "number", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "TDN \n(mg N/L)", + "fieldPath": "K \n(mg/L)", "nullable": false, "type": { "type": { @@ -351,19 +353,19 @@ "isPartOfKey": false }, { - "fieldPath": "TDP \n(mg P/L)", + "fieldPath": "Ca \n(mg/L)", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "number", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "UV Absorbance, 254nm", + "fieldPath": "Mg \n(mg/L)", "nullable": false, "type": { "type": { @@ -375,19 +377,19 @@ "isPartOfKey": false }, { - "fieldPath": "Water Temp (\u00b0C)", + "fieldPath": "d18O", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "number", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "d18O", + "fieldPath": "dD", "nullable": false, "type": { "type": { @@ -399,7 +401,7 @@ "isPartOfKey": false }, { - "fieldPath": "dD", + "fieldPath": "field29", "nullable": false, "type": { "type": { @@ -411,7 +413,7 @@ "isPartOfKey": false }, { - "fieldPath": "field29", + "fieldPath": "2", "nullable": false, "type": { "type": { @@ -423,7 +425,7 @@ "isPartOfKey": false }, { - "fieldPath": "pH", + "fieldPath": "3", "nullable": false, "type": { "type": { @@ -439,7 +441,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -460,7 +463,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -481,7 +485,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -496,7 +501,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -512,7 +518,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -529,7 +536,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -549,7 +557,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -570,7 +579,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -585,7 +595,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -601,7 +612,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -618,7 +630,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -633,7 +646,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -657,7 +671,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -678,7 +693,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -693,7 +709,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -709,7 +726,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -726,7 +744,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -741,7 +760,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -769,7 +789,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -790,7 +811,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -805,7 +827,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -821,7 +844,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -838,7 +862,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -853,7 +878,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -885,7 +911,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -900,7 +927,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -936,7 +964,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -958,7 +987,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -974,7 +1004,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1067,7 +1098,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1088,7 +1120,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1103,7 +1136,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1139,7 +1173,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1155,7 +1190,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1177,7 +1213,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1270,7 +1307,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1291,7 +1329,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1306,7 +1345,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1342,7 +1382,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1364,7 +1405,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1380,7 +1422,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1449,7 +1492,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1470,7 +1514,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1485,7 +1530,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1521,7 +1567,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1543,7 +1590,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1559,7 +1607,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1652,7 +1701,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1673,7 +1723,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1688,7 +1739,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1724,7 +1776,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1740,7 +1793,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1762,7 +1816,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1855,7 +1910,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1876,7 +1932,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1891,7 +1948,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1927,7 +1985,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1949,7 +2008,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -2354,7 +2414,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -2375,7 +2436,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -2391,7 +2453,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -2406,7 +2469,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -2442,7 +2506,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -2457,7 +2522,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -2472,7 +2538,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -2487,7 +2554,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -2502,7 +2570,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -2517,7 +2586,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -2532,7 +2602,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } }, { @@ -2547,7 +2618,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json" + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_basic_lineage_query_log.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_basic_lineage_query_log.json new file mode 100644 index 0000000000000..e8e72bf25d303 --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_basic_lineage_query_log.json @@ -0,0 +1,10 @@ +[ + { + "query": "create table foo as select a, b from bar", + "session_id": null, + "timestamp": null, + "user": null, + "default_db": "dev", + "default_schema": "public" + } +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_multistep_temp_table.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_multistep_temp_table.json index 2513147164969..c4d3bee43faa1 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_multistep_temp_table.json +++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_multistep_temp_table.json @@ -83,7 +83,7 @@ "aspect": { "json": { "statement": { - "value": "create table #temp2 as select b, c from upstream2;\n\ncreate table #temp1 as select a, 2*b as b from upstream1;\n\ncreate temp table staging_foo as select up1.a, up1.b, up2.c from #temp1 up1 left join #temp2 up2 on up1.b = up2.b where up1.b > 0;\n\ninsert into table prod_foo\nselect * from staging_foo", + "value": "CREATE TABLE #temp2 AS\nSELECT\n b,\n c\nFROM upstream2;\n\nCREATE TABLE #temp1 AS\nSELECT\n a,\n 2 * b AS b\nFROM upstream1;\n\nCREATE TEMPORARY TABLE staging_foo AS\nSELECT\n up1.a,\n up1.b,\n up2.c\nFROM #temp1 AS up1\nLEFT JOIN #temp2 AS up2\n ON up1.b = up2.b\nWHERE\n up1.b > 0;\n\nINSERT INTO prod_foo\nSELECT\n *\nFROM staging_foo", "language": "SQL" }, "source": "SYSTEM", diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py b/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py index 01755ad1ee340..9f93ab00f0ccd 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py @@ -13,6 +13,7 @@ from datahub.sql_parsing.sql_parsing_common import QueryType from datahub.sql_parsing.sqlglot_lineage import ColumnLineageInfo, ColumnRef from tests.test_helpers import mce_helpers +from tests.test_helpers.click_helpers import run_datahub_cmd RESOURCE_DIR = pathlib.Path(__file__).parent / "aggregator_goldens" FROZEN_TIME = "2024-02-06 01:23:45" @@ -23,12 +24,13 @@ def _ts(ts: int) -> datetime: @freeze_time(FROZEN_TIME) -def test_basic_lineage(pytestconfig: pytest.Config) -> None: +def test_basic_lineage(pytestconfig: pytest.Config, tmp_path: pathlib.Path) -> None: aggregator = SqlParsingAggregator( platform="redshift", generate_lineage=True, generate_usage_statistics=False, generate_operations=False, + query_log=QueryLogSetting.STORE_ALL, ) aggregator.add_observed_query( @@ -45,6 +47,23 @@ def test_basic_lineage(pytestconfig: pytest.Config) -> None: golden_path=RESOURCE_DIR / "test_basic_lineage.json", ) + # This test also validates the query log storage functionality. + aggregator.close() + query_log_db = aggregator.report.query_log_path + query_log_json = tmp_path / "query_log.json" + run_datahub_cmd( + [ + "check", + "extract-sql-agg-log", + str(query_log_db), + "--output", + str(query_log_json), + ] + ) + mce_helpers.check_golden_file( + pytestconfig, query_log_json, RESOURCE_DIR / "test_basic_lineage_query_log.json" + ) + @freeze_time(FROZEN_TIME) def test_overlapping_inserts(pytestconfig: pytest.Config) -> None: