From 521380755861bfccd2842d3f003e75fcb70c51f3 Mon Sep 17 00:00:00 2001 From: bturkus Date: Wed, 17 Jan 2024 14:12:21 -0500 Subject: [PATCH] update filemaker to json add film fields to zero value no skips change logic for handling --- ami_scripts/filemaker_to_json_validator.py | 38 +++++++++++++--------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/ami_scripts/filemaker_to_json_validator.py b/ami_scripts/filemaker_to_json_validator.py index 0105b500..a6189036 100755 --- a/ami_scripts/filemaker_to_json_validator.py +++ b/ami_scripts/filemaker_to_json_validator.py @@ -10,7 +10,10 @@ import glob import numpy as np -ZERO_VALUE_FIELDS = ['source.audioRecording.numberOfAudioTracks'] +ZERO_VALUE_FIELDS = ['source.audioRecording.numberOfAudioTracks', 'source.physicalDescription.conditionfading', + 'source.physicalDescription.conditionscratches', 'source.physicalDescription.conditionsplices', + 'source.physicalDescription.conditionperforationdamage', 'source.physicalDescription.conditiondistortion', + 'source.physicalDescription.shrinkage.measure', 'source.physicalDescription.acetateDecayLevel'] def get_info(source_directory, metadata_directory): source_path = Path(source_directory) @@ -173,8 +176,10 @@ def main(): # Drop empty columns and the 'asset.fileExt' column df = df.dropna(axis=1, how="all") - # Replace NaN values with a default value, e.g., an empty string - df.fillna("", inplace=True) + # Fill NaN values with an empty string for fields not in ZERO_VALUE_FIELDS + for column in df.columns: + if column not in ZERO_VALUE_FIELDS: + df[column] = df[column].fillna("") df = df.drop(['asset.fileExt'], axis=1) # Set the output directory for JSON files @@ -190,20 +195,21 @@ def main(): nested_dict = {} json_tree = row.to_dict() - # Convert the flat dictionary to a nested dictionary for key, value in json_tree.items(): - if value: - if pd.isnull(value): - continue - if type(value) == pd.Timestamp: - value = value.strftime('%Y-%m-%d') - if isinstance(value, np.generic): - value = np.asscalar(value) - nested_dict = convert_dotKeyToNestedDict(nested_dict, key, value) - - # 0-value fields get skipped, but some should be allowed - if key in ZERO_VALUE_FIELDS and value == 0: - nested_dict = convert_dotKeyToNestedDict(nested_dict, key, value) + # Skip null values unless the field is in ZERO_VALUE_FIELDS + if pd.isnull(value) and key not in ZERO_VALUE_FIELDS: + continue + + # Convert Timestamp to a string in the format 'YYYY-MM-DD' + if type(value) == pd.Timestamp: + value = value.strftime('%Y-%m-%d') + + # Convert numpy generic types to native Python types + if isinstance(value, np.generic): + value = np.asscalar(value) + + # Convert the flat dictionary to a nested dictionary + nested_dict = convert_dotKeyToNestedDict(nested_dict, key, value) # Save the nested dictionary as a JSON file json_filename = os.path.splitext(row["asset.referenceFilename"])[0] + ".json"