From 33ffc141c7c9ca8e66ba30f73008edc4f5f94eaa Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Wed, 8 Nov 2023 11:51:52 +0100 Subject: [PATCH] Fix: optimize fetch all agents usages query (#111) * add usage method to the agent models * optimize the agents usages query, to do batch loading when fetching all agents --- .../models/agents/agent.rb | 48 +++++++++++++++++++ test/models/test_agent.rb | 24 +++++++++- 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/lib/ontologies_linked_data/models/agents/agent.rb b/lib/ontologies_linked_data/models/agents/agent.rb index 62495f48..3db6cc8a 100644 --- a/lib/ontologies_linked_data/models/agents/agent.rb +++ b/lib/ontologies_linked_data/models/agents/agent.rb @@ -49,6 +49,54 @@ def usages end end + def self.load_agents_usages(agents = []) + is_a = RDF::URI.new('http://www.w3.org/1999/02/22-rdf-syntax-ns#type') + q = Goo.sparql_query_client.select(:id, :property, :agent, :status).distinct + .from(LinkedData::Models::OntologySubmission.uri_type) + .where( + [:id, + is_a, + LinkedData::Models::OntologySubmission.uri_type + ], + [:id, + LinkedData::Models::OntologySubmission.attribute_uri(:submissionStatus), + :status + ] + ) + + + q = q.optional([:agent, is_a, LinkedData::Models::Agent.uri_type]) + q = q.optional([:id, :property, :agent]) + + ready_submission_filter = "?status = <#{RDF::URI.new(LinkedData::Models::SubmissionStatus.id_prefix + 'RDF')}> || ?status = <#{RDF::URI.new(LinkedData::Models::SubmissionStatus.id_prefix + 'UPLOADED')}>" + q.filter(ready_submission_filter) + + + q.filter(agents.map{|agent| "?agent = <#{agent.id}>"}.join(' || ')) unless agents.empty? + + data = q.each_solution.group_by{|x| x[:agent]} + + agents_usages = data.transform_values do |values| + r = values.select { |value| value[:status]['RDF'] } + r = values.select { |value| value[:status]['UPLOADED'] } if r.empty? + r.reject{|x| x[:property].nil? }.map{|x| [x[:id], x[:property]]} + end + + agents.each do |agent| + usages = agents_usages[agent.id] + usages = usages ? usages.group_by(&:shift) : {} + usages = usages.transform_values{|x| x.flatten.map(&:to_s)} + + agent.instance_variable_set("@usages", usages) + agent.loaded_attributes.add(:children) + end + end + + def usages(force_update: false) + self.class.load_agents_usages([self]) if !instance_variable_defined?("@usages") || force_update + @usages + end + def unique_identifiers(inst, attr) inst.bring(attr) if inst.bring?(attr) identifiers = inst.send(attr) diff --git a/test/models/test_agent.rb b/test/models/test_agent.rb index 15342c60..09552803 100644 --- a/test/models/test_agent.rb +++ b/test/models/test_agent.rb @@ -79,10 +79,27 @@ def test_identifier_no_valid id.delete end - def test_agent_usages + def test_all_agents_usages_load count, acronyms, ontologies = create_ontologies_and_submissions(ont_count: 3, submission_count: 1, process_submission: false) + agents, sub1, sub2, sub3 = agent_usages_test_setup(ontologies) + ## using batch load + t1 = Benchmark.measure('batch load') do + LinkedData::Models::Agent.load_agents_usages(agents) + agent_usages_test(agents, sub1, sub2, sub3) + end + + ## using by elems loafing + t2 = Benchmark.measure('eager load') do + agents, sub1, sub2, sub3 = agent_usages_test_setup(ontologies) + agent_usages_test(agents, sub1, sub2, sub3) + end + assert t1.total < t2.total, "batch loading should be more faster than eager loading" + end + private + + def agent_usages_test_setup(ontologies) o1 = ontologies[0] o2 = ontologies[1] o3 = ontologies[2] @@ -111,6 +128,9 @@ def test_agent_usages assert sub2.valid? sub2.save + [agents, sub1, sub2, sub3] + end + def agent_usages_test(agents, sub1, sub2, sub3) usages = agents[0].usages assert_equal 2, usages.size @@ -123,7 +143,7 @@ def test_agent_usages sub3.bring_remaining sub3.save - usages = agents[0].usages + usages = agents[0].usages(force_update: true) assert_equal 3, usages.size refute_nil usages[sub1.id]