Skip to content

Commit

Permalink
update metadata extractor to validate each extracted metadata separately
Browse files Browse the repository at this point in the history
to not crash all the process if only some fields are incorrect
  • Loading branch information
syphax-bouazzouni committed Aug 9, 2024
1 parent 552fbe5 commit f0ddab8
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ def extract_metadata(logger, user_params, heavy_extraction: true)
ontology_iri = extract_ontology_iri
@submission.version = version_info if version_info
@submission.uri = ontology_iri if ontology_iri
@submission.save

if heavy_extraction
begin
# Extract metadata directly from the ontology
Expand All @@ -23,7 +25,13 @@ def extract_metadata(logger, user_params, heavy_extraction: true)
logger.error("Error while extracting additional metadata: #{e}")
end
end
@submission.save

if @submission.valid?
@submission.save
else
logger.error("Error while extracting additional metadata: #{@submission.errors}")
@submission = LinkedData::Models::OntologySubmission.find(@submission.id).first.bring_remaining
end
end

def extract_version
Expand Down Expand Up @@ -72,7 +80,7 @@ def extract_ontology_metadata(logger, user_params)
unless attr_settings[:namespace].nil?
property_to_extract = "#{attr_settings[:namespace].to_s}:#{attr.to_s}"
hash_results = extract_each_metadata(ontology_uri, attr, property_to_extract, logger)
single_extracted = send_value(attr, hash_results) unless hash_results.empty?
single_extracted = send_value(attr, hash_results, logger) unless hash_results.empty?
end

# extracts attribute value from metadata mappings
Expand All @@ -82,12 +90,12 @@ def extract_ontology_metadata(logger, user_params)
break if single_extracted

hash_mapping_results = extract_each_metadata(ontology_uri, attr, mapping.to_s, logger)
single_extracted = send_value(attr, hash_mapping_results) unless hash_mapping_results.empty?
single_extracted = send_value(attr, hash_mapping_results, logger) unless hash_mapping_results.empty?
end

new_value = value(attr, type)

send_value(attr, old_value) if empty_value?(new_value) && !empty_value?(old_value)
send_value(attr, old_value, logger) if empty_value?(new_value) && !empty_value?(old_value)
end
end

Expand All @@ -105,31 +113,36 @@ def value(attr, type)
type.eql?(:list) ? Array(val) || [] : val || ''
end

def send_value(attr, value)
def send_value(attr, new_value, logger)
old_val = nil
single_extracted = false

if enforce?(attr, :list)
# Add the retrieved value(s) to the attribute if the attribute take a list of objects
metadata_values = value(attr, :list)
metadata_values = metadata_values.dup
old_val = value(attr, :list)
new_values = old_val.dup

metadata_values.push(*value.values)
new_values.push(*new_value.values)

@submission.send("#{attr}=", metadata_values.uniq)
@submission.send("#{attr}=", new_values.uniq)
elsif enforce?(attr, :concatenate)
# if multiple value for this attribute, then we concatenate it
# Add the concat at the very end, to easily join the content of the array
metadata_values = value(attr, :string)
metadata_values = metadata_values.split(', ')
new_values = value.values.map { |x| x.to_s.split(', ') }.flatten
old_val = value(attr, :string)
metadata_values = old_val.split(', ')
new_values = new_value.values.map { |x| x.to_s.split(', ') }.flatten

@submission.send("#{attr}=", (metadata_values + new_values).uniq.join(', '))
else
# If multiple value for a metadata that should have a single value: taking one value randomly (the first in the hash)
@submission.send("#{attr}=", new_value.values.first)
single_extracted = true
end

@submission.send("#{attr}=", value.values.first)
return true
unless @submission.valid?
logger.error("Error while extracting metadata for the attribute #{attr}: #{@submission.errors[attr] || @submission.errors}")
@submission.send("#{attr}=", old_val)
end
false

single_extracted
end

# Return a hash with the best literal value for an URI
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def process_submission(logger, options = {})

parsed = @submission.ready?(status: %i[rdf])

@submission.extract_metadata(logger, user_params: options[:params], heavy_extraction: extract_metadata?(options))
@submission = @submission.extract_metadata(logger, user_params: options[:params], heavy_extraction: extract_metadata?(options))

@submission.generate_missing_labels(logger) if generate_missing_labels?(options)

Expand Down

0 comments on commit f0ddab8

Please sign in to comment.