From 751cf6fc05a8f60796d947b7bba1c0bb14046d3c Mon Sep 17 00:00:00 2001 From: Bryan Hilbert Date: Thu, 15 Aug 2024 21:57:10 -0400 Subject: [PATCH] Treat filename_parser calls consistently. Add filename_recognized key to parser all output --- jwql/jwql_monitors/generate_preview_images.py | 6 +++--- jwql/jwql_monitors/monitor_filesystem.py | 4 ++-- jwql/utils/monitor_template.py | 4 ++-- jwql/utils/organize_filesystem.py | 8 ++++---- jwql/utils/utils.py | 5 ++++- .../apps/jwql/archive_database_update.py | 4 ++-- jwql/website/apps/jwql/data_containers.py | 17 +++++++++-------- jwql/website/apps/jwql/forms.py | 8 ++++---- 8 files changed, 30 insertions(+), 26 deletions(-) diff --git a/jwql/jwql_monitors/generate_preview_images.py b/jwql/jwql_monitors/generate_preview_images.py index 32efe253e..ca006255e 100755 --- a/jwql/jwql_monitors/generate_preview_images.py +++ b/jwql/jwql_monitors/generate_preview_images.py @@ -214,7 +214,7 @@ def check_existence(file_list, outdir): file_parts = filename_parser(file_list[0]) # If filename_parser() does not recognize the filename, return False - if 'program_id' not in file_parts: + if not file_parts['recognized_filename']: return False if file_parts['detector'].upper() in NIRCAM_SHORTWAVE_DETECTORS: @@ -637,8 +637,7 @@ def group_filenames(filenames): # Generate string to be matched with other filenames filename_dict = filename_parser(os.path.basename(filename)) - if 'detector' not in filename_dict: - logging.warning('Could not parse filename for {}'.format(filename)) + if not filename_dict['recognized_filename']: break # If the filename was already involved in a match, then skip @@ -738,6 +737,7 @@ def process_program(program, overwrite): try: identifier = 'jw{}'.format(filename_parser(filename)['program_id']) except KeyError: + # In this case, the filename_parser failed to recognize the filename identifier = os.path.basename(filename).split('.fits')[0] preview_output_directory = os.path.join(SETTINGS['preview_image_filesystem'], identifier) thumbnail_output_directory = os.path.join(SETTINGS['thumbnail_filesystem'], identifier) diff --git a/jwql/jwql_monitors/monitor_filesystem.py b/jwql/jwql_monitors/monitor_filesystem.py index fe9124485..5fb99d628 100755 --- a/jwql/jwql_monitors/monitor_filesystem.py +++ b/jwql/jwql_monitors/monitor_filesystem.py @@ -138,9 +138,9 @@ def gather_statistics(general_results_dict, instrument_results_dict): # Parse out filename information filename_dict = filename_parser(filename) - try: + if filename_dict['recognized_filename']: filename_type = filename_dict['filename_type'] - except KeyError: + else: break # For MSA files, which do not have traditional suffixes, set the diff --git a/jwql/utils/monitor_template.py b/jwql/utils/monitor_template.py index 598ace9e5..c77f4841d 100644 --- a/jwql/utils/monitor_template.py +++ b/jwql/utils/monitor_template.py @@ -106,13 +106,13 @@ def monitor_template_main(): # Example of locating a dataset in the filesystem filesystem = SETTINGS['filesystem'] - try: + if filename_dict['recognized_filename']: dataset = os.path.join(filesystem, 'public', 'jw{}'.format(filename_dict['program_id']), 'jw{}{}{}'.format(filename_dict['program_id'], filename_dict['observation'], filename_dict['visit']), filename_of_interest) - except KeyError: + else: raise KeyError(f'Filename {filename_of_interest} not recognized by filename_parser()') # Example of reading in dataset using jwst.datamodels diff --git a/jwql/utils/organize_filesystem.py b/jwql/utils/organize_filesystem.py index 415f5920b..38b1ca13e 100644 --- a/jwql/utils/organize_filesystem.py +++ b/jwql/utils/organize_filesystem.py @@ -52,12 +52,12 @@ def organize_filesystem(): filename_dict = filename_parser(src) # Build destination path for those filenames that can be parsed - try: + if filename_dict['recognized_filename']: destination_directory = os.path.join( SETTINGS['filesystem'], 'jw{}'.format(filename_dict['program_id']), 'jw{}{}{}'.format(filename_dict['program_id'], filename_dict['observation'], filename_dict['visit'])) - except KeyError: + else: # Some filenames do not have a program_id/observation/visit structure # Files that are not recognized by filename_parser will also end up here. break @@ -94,11 +94,11 @@ def revert_filesystem(): filename_dict = filename_parser(src) # Build destination path for those filenames that can be parsed - try: + if filename_dict['recognized_filename']: destination_directory = os.path.join( SETTINGS['old_filesystem'], 'jw{}'.format(filename_dict['program_id'])) - except KeyError: + else: # Some filenames do not have a program_id/observation/visit structure # Filenames not recognized by filename_parser() will also end up here. break diff --git a/jwql/utils/utils.py b/jwql/utils/utils.py index 7f35f3e70..b141f9ad0 100644 --- a/jwql/utils/utils.py +++ b/jwql/utils/utils.py @@ -570,6 +570,9 @@ def filename_parser(filename): # Convert the regex match to a dictionary filename_dict = jwst_file.groupdict() + # Add an entry indicating that the filename was successfully parsed + filename_dict['recognized_filename'] = True + # Add the filename type to that dict filename_dict['filename_type'] = name_match @@ -598,7 +601,7 @@ def filename_parser(filename): # Raise error if unable to parse the filename except AttributeError: filename_dict = {'recognized_filename': False} - logging.warning((f'\nFile; {filename} was not recognized by filename_parser(). Update parser or ' + logging.exception((f'\nFile; {filename} was not recognized by filename_parser(). Update parser or ' 'constants.py if it should be recognized.\n')) return filename_dict diff --git a/jwql/website/apps/jwql/archive_database_update.py b/jwql/website/apps/jwql/archive_database_update.py index 10a2c2eef..5c07b5e60 100755 --- a/jwql/website/apps/jwql/archive_database_update.py +++ b/jwql/website/apps/jwql/archive_database_update.py @@ -117,11 +117,11 @@ def get_updates(update_database): for rootname in all_rootnames: filename_dict = filename_parser(rootname) - try: + if filename_dict['recognized_filename']: # Weed out file types that are not supported by generate_preview_images if 'stage_3' not in filename_dict['filename_type']: rootnames.append(rootname) - except KeyError: + else: pass if len(filenames) > 0: diff --git a/jwql/website/apps/jwql/data_containers.py b/jwql/website/apps/jwql/data_containers.py index 99c49f22a..21f9dadcb 100644 --- a/jwql/website/apps/jwql/data_containers.py +++ b/jwql/website/apps/jwql/data_containers.py @@ -1326,9 +1326,9 @@ def get_image_info(file_root): parsed_fn = filename_parser(filename) # Get suffix information - try: + if parsed_fn['recognized_filename']: suffix = parsed_fn['suffix'] - except KeyError: + else: # If the filename parser does not recognize the file, skip it continue @@ -2206,12 +2206,12 @@ def thumbnails_query_ajax(rootnames): continue # Parse filename - try: - filename_dict = filename_parser(rootname) + filename_dict = filename_parser(rootname) + if filename_dict['recognized_filename']: # Add to list of all exposure groups exp_groups.add(filename_dict['group_root']) - except KeyError: + else: continue # Get list of available filenames @@ -2229,11 +2229,12 @@ def thumbnails_query_ajax(rootnames): data_dict['file_data'][rootname]['suffixes'] = [] data_dict['file_data'][rootname]['prop'] = rootname[2:7] for filename in available_files: - try: - suffix = filename_parser(filename)['suffix'] + suffix = filename_parser(filename)['suffix'] + if suffix['recognized_filename']: data_dict['file_data'][rootname]['suffixes'].append(suffix) - except KeyError: + else: continue + data_dict['file_data'][rootname]['thumbnail'] = get_thumbnail_by_rootname(rootname) # Extract information for sorting with dropdown menus diff --git a/jwql/website/apps/jwql/forms.py b/jwql/website/apps/jwql/forms.py index 2d78d0c62..e2f7b299d 100644 --- a/jwql/website/apps/jwql/forms.py +++ b/jwql/website/apps/jwql/forms.py @@ -331,12 +331,12 @@ def clean_search(self): continue else: fileinfo = filename_parser(file) - try: + if fileinfo['recognized_filename']: instrument = fileinfo['instrument'] observation = fileinfo['observation'] all_instruments.append(instrument) all_observations[instrument].append(observation) - except KeyError: + else: # If the filename is not recognized by filename_parser(), skip it. continue @@ -388,8 +388,8 @@ def _search_is_fileroot(self, search): Is the search term formatted like a fileroot? """ parsed = filename_parser(search) - if 'instrument' in parsed: - self.fileroot_dict = filename_parser(search) + if parsed['recognized_filename']: + self.fileroot_dict = parsed return True else: return False