Skip to content

Commit

Permalink
more flexibility around csv input and new line
Browse files Browse the repository at this point in the history
  • Loading branch information
dansand committed May 22, 2024
1 parent 089ca20 commit d964b80
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 5 deletions.
14 changes: 9 additions & 5 deletions .github/scripts/parse_issue.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from collections import defaultdict
from request_utils import get_record, check_uri
from parse_metadata_utils import parse_publication, parse_software, parse_organization
from parse_utils import parse_name_or_orcid, parse_yes_no_choice, get_authors, get_funders, process_funding_data, parse_image_and_caption, validate_slug, extract_doi_parts, extract_orcid, remove_duplicates, parse_size
from parse_utils import parse_name_or_orcid, parse_yes_no_choice, get_authors, get_funders, process_funding_data, parse_image_and_caption, validate_slug, extract_doi_parts, extract_orcid, remove_duplicates, parse_size, identify_separator, separate_string
from dateutil import parser
from datetime import datetime

Expand Down Expand Up @@ -182,7 +182,8 @@ def parse_issue(issue):
error_log += "**Software framework authors**\n" + log

# software & algorithm keywords
software_keywords = [x.strip() for x in data["-> software & algorithm keywords"].split(",")]
#software_keywords = [x.strip() for x in data["-> software & algorithm keywords"].split(",")]
software_keywords = separate_string(data["-> software & algorithm keywords"])

#if software_keywords[0] == "_No response_":
if null_response_check(software_keywords[0]):
Expand Down Expand Up @@ -303,7 +304,8 @@ def parse_issue(issue):
data_dict["license"] = license_record

# model category
model_category = [x.strip() for x in data["-> model category"].split(",")]
#model_category = [x.strip() for x in data["-> model category"].split(",")]
model_category = separate_string(data["-> model category"])

#if model_category[0] == "_No response_":
if null_response_check(model_category[0]):
Expand All @@ -317,7 +319,8 @@ def parse_issue(issue):
# model status
model_status = []
try:
model_status = [x.strip() for x in data["-> model status"].split(",")]
#model_status = [x.strip() for x in data["-> model status"].split(",")]
model_status = separate_string(data["-> model status"])

#if model_status[0] == "_No response_":
if null_response_check(model_status[0]):
Expand Down Expand Up @@ -361,7 +364,8 @@ def parse_issue(issue):
data_dict["description"] = description

# scientific keywords
keywords = [x.strip() for x in data["-> scientific keywords"].split(",")]
#keywords = [x.strip() for x in data["-> scientific keywords"].split(",")]
keywords = separate_string(data["-> scientific keywords"])

#if keywords[0] == "_No response_":
if null_response_check(keywords[0]):
Expand Down
24 changes: 24 additions & 0 deletions .github/scripts/parse_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,3 +420,27 @@ def process_funding_data(input_string):


return {'funders': schema_funders, 'funding': schema_funding}



def identify_separator(input_string):
# Strip leading and trailing whitespace and split by newline to get lines
lines = input_string.strip().split('\n')

# Count the number of commas and newlines
comma_count = sum(line.count(',') for line in lines)
newline_count = len(lines) - 1

# Heuristics: more commas than newlines => CSV
if comma_count > newline_count:
return 'csv'
else:
return 'newline'

def separate_string(input_string):
separator_type = identify_separator(input_string)

if separator_type == 'csv':
return [x.strip() for line in input_string.strip().split('\n') for x in line.split(',')]
elif separator_type == 'newline':
return [line.strip() for line in input_string.strip().split('\n') if line]

0 comments on commit d964b80

Please sign in to comment.