Skip to content

Commit

Permalink
Fix: optimize fetch all agents usages query (#111)
Browse files Browse the repository at this point in the history
* add usage method to the agent models

* optimize the agents usages query, to do batch loading when fetching all agents
  • Loading branch information
syphax-bouazzouni authored Nov 8, 2023
1 parent 22d5fdb commit 33ffc14
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 2 deletions.
48 changes: 48 additions & 0 deletions lib/ontologies_linked_data/models/agents/agent.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,54 @@ def usages
end
end

def self.load_agents_usages(agents = [])
is_a = RDF::URI.new('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')
q = Goo.sparql_query_client.select(:id, :property, :agent, :status).distinct
.from(LinkedData::Models::OntologySubmission.uri_type)
.where(
[:id,
is_a,
LinkedData::Models::OntologySubmission.uri_type
],
[:id,
LinkedData::Models::OntologySubmission.attribute_uri(:submissionStatus),
:status
]
)


q = q.optional([:agent, is_a, LinkedData::Models::Agent.uri_type])
q = q.optional([:id, :property, :agent])

ready_submission_filter = "?status = <#{RDF::URI.new(LinkedData::Models::SubmissionStatus.id_prefix + 'RDF')}> || ?status = <#{RDF::URI.new(LinkedData::Models::SubmissionStatus.id_prefix + 'UPLOADED')}>"
q.filter(ready_submission_filter)


q.filter(agents.map{|agent| "?agent = <#{agent.id}>"}.join(' || ')) unless agents.empty?

data = q.each_solution.group_by{|x| x[:agent]}

agents_usages = data.transform_values do |values|
r = values.select { |value| value[:status]['RDF'] }
r = values.select { |value| value[:status]['UPLOADED'] } if r.empty?
r.reject{|x| x[:property].nil? }.map{|x| [x[:id], x[:property]]}
end

agents.each do |agent|
usages = agents_usages[agent.id]
usages = usages ? usages.group_by(&:shift) : {}
usages = usages.transform_values{|x| x.flatten.map(&:to_s)}

agent.instance_variable_set("@usages", usages)
agent.loaded_attributes.add(:children)
end
end

def usages(force_update: false)
self.class.load_agents_usages([self]) if !instance_variable_defined?("@usages") || force_update
@usages
end

def unique_identifiers(inst, attr)
inst.bring(attr) if inst.bring?(attr)
identifiers = inst.send(attr)
Expand Down
24 changes: 22 additions & 2 deletions test/models/test_agent.rb
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,27 @@ def test_identifier_no_valid
id.delete
end

def test_agent_usages
def test_all_agents_usages_load
count, acronyms, ontologies = create_ontologies_and_submissions(ont_count: 3, submission_count: 1,
process_submission: false)
agents, sub1, sub2, sub3 = agent_usages_test_setup(ontologies)
## using batch load
t1 = Benchmark.measure('batch load') do
LinkedData::Models::Agent.load_agents_usages(agents)
agent_usages_test(agents, sub1, sub2, sub3)
end

## using by elems loafing
t2 = Benchmark.measure('eager load') do
agents, sub1, sub2, sub3 = agent_usages_test_setup(ontologies)
agent_usages_test(agents, sub1, sub2, sub3)
end

assert t1.total < t2.total, "batch loading should be more faster than eager loading"
end
private

def agent_usages_test_setup(ontologies)
o1 = ontologies[0]
o2 = ontologies[1]
o3 = ontologies[2]
Expand Down Expand Up @@ -111,6 +128,9 @@ def test_agent_usages
assert sub2.valid?
sub2.save

[agents, sub1, sub2, sub3]
end
def agent_usages_test(agents, sub1, sub2, sub3)
usages = agents[0].usages

assert_equal 2, usages.size
Expand All @@ -123,7 +143,7 @@ def test_agent_usages
sub3.bring_remaining
sub3.save

usages = agents[0].usages
usages = agents[0].usages(force_update: true)
assert_equal 3, usages.size

refute_nil usages[sub1.id]
Expand Down

0 comments on commit 33ffc14

Please sign in to comment.