Skip to content

Commit

Permalink
Merge pull request #402 from bcogrel/sparqlstore-graph-awareness
Browse files Browse the repository at this point in the history
SPARQLStore graph awareness
  • Loading branch information
gromgull committed Jul 10, 2014
2 parents 9321c66 + e1d115b commit d70e84c
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 44 deletions.
73 changes: 56 additions & 17 deletions rdflib/plugins/stores/sparqlstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
from rdflib.store import Store
from rdflib.query import Result
from rdflib import Variable, Namespace, BNode, URIRef, Literal
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID

import httplib
import urlparse
Expand Down Expand Up @@ -185,6 +186,7 @@ class SPARQLStore(NSSPARQLWrapper, Store):
"""
formula_aware = False
transaction_aware = False
graph_aware = True
regex_matching = NATIVE_REGEX

def __init__(self,
Expand All @@ -198,6 +200,7 @@ def __init__(self,
self.nsBindings = {}
self.sparql11 = sparql11
self.context_aware = context_aware
self.graph_aware = context_aware

# Database Management Methods
def create(self, configuration):
Expand Down Expand Up @@ -275,7 +278,7 @@ def query(self, query,
" ".join(initBindings[x].n3() for x in v))

self.resetQuery()
if self.context_aware and queryGraph and queryGraph != '__UNION__':
if self._is_contextual(queryGraph):
self.addDefaultGraph(queryGraph)
self.setQuery(query)

Expand Down Expand Up @@ -368,7 +371,7 @@ def triples(self, (s, p, o), context=None):
pass

self.resetQuery()
if self.context_aware and context is not None:
if self._is_contextual(context):
self.addDefaultGraph(context.identifier)
self.setQuery(query)

Expand Down Expand Up @@ -397,7 +400,7 @@ def __len__(self, context=None):
else:
self.resetQuery()
q = "SELECT (count(*) as ?c) WHERE {?s ?p ?o .}"
if self.context_aware and context is not None:
if self._is_contextual(context):
self.addDefaultGraph(context.identifier)
self.setQuery(q)
doc = ElementTree.parse(SPARQLWrapper.query(self).response)
Expand All @@ -407,24 +410,29 @@ def __len__(self, context=None):

def contexts(self, triple=None):
"""
Iterates over results to SELECT ?NAME { GRAPH ?NAME { ?s ?p ?o } }
returning instances of this store with the SPARQL wrapper
object updated via addNamedGraph(?NAME)
Iterates over results to "SELECT ?NAME { GRAPH ?NAME { ?s ?p ?o } }"
or "SELECT ?NAME { GRAPH ?NAME {} }" if triple is `None`.
Returns instances of this store with the SPARQL wrapper
object updated via addNamedGraph(?NAME).
This causes a named-graph-uri key / value pair to be sent over
the protocol
the protocol.
Please note that some SPARQL endpoints are not able to find empty named
graphs.
"""
self.resetQuery()

if triple:
s, p, o = triple
params = ((s if s else Variable('s')).n3(),
(p if p else Variable('p')).n3(),
(o if o else Variable('o')).n3())
self.setQuery('SELECT ?name WHERE { GRAPH ?name { %s %s %s }}' % params)
else:
s = p = o = None
self.setQuery('SELECT ?name WHERE { GRAPH ?name {} }')

params = ((s if s else Variable('s')).n3(),
(p if p else Variable('p')).n3(),
(o if o else Variable('o')).n3())

self.setQuery(
'SELECT ?name WHERE { GRAPH ?name { %s %s %s }}' % params)
doc = ElementTree.parse(SPARQLWrapper.query(self).response)

return (rt.get(Variable("name"))
Expand All @@ -447,6 +455,23 @@ def namespaces(self):
for prefix, ns in self.nsBindings.items():
yield prefix, ns

def add_graph(self, graph):
raise TypeError('The SPARQL store is read only')

def remove_graph(self, graph):
raise TypeError('The SPARQL store is read only')

def _is_contextual(self, graph):
""" Returns `True` if the "GRAPH" keyword must appear
in the final SPARQL query sent to the endpoint.
"""
if (not self.context_aware) or (graph is None):
return False
if isinstance(graph, basestring):
return graph != '__UNION__'
else:
return graph.identifier != DATASET_DEFAULT_GRAPH_ID


class SPARQLUpdateStore(SPARQLStore):
"""
Expand Down Expand Up @@ -615,7 +640,7 @@ def add(self, spo, context=None, quoted=False):


triple = "%s %s %s ." % (subject.n3(), predicate.n3(), obj.n3())
if self.context_aware and context is not None:
if self._is_contextual(context):
q = "INSERT DATA { GRAPH %s { %s } }" % (
context.identifier.n3(), triple)
else:
Expand Down Expand Up @@ -665,7 +690,7 @@ def remove(self, spo, context):
obj = Variable("O")

triple = "%s %s %s ." % (subject.n3(), predicate.n3(), obj.n3())
if self.context_aware and context is not None:
if self._is_contextual(context):
q = "DELETE { GRAPH %s { %s } } WHERE { GRAPH %s { %s } }" % (
context.identifier.n3(), triple,
context.identifier.n3(), triple)
Expand Down Expand Up @@ -727,7 +752,7 @@ def update(self, query,
self.setNamespaceBindings(initNs)
query = self.injectPrefixes(query)

if self.context_aware and queryGraph and queryGraph != '__UNION__':
if self._is_contextual(queryGraph):
query = self._insert_named_graph(query, queryGraph)

if initBindings:
Expand Down Expand Up @@ -799,3 +824,17 @@ def _insert_named_graph(self, query, query_graph):
modified_query.append(query[pos:])

return "".join(modified_query)

def add_graph(self, graph):
if not self.graph_aware:
Store.add_graph(self, graph)
elif graph.identifier != DATASET_DEFAULT_GRAPH_ID:
self.update("CREATE GRAPH <%s>" % graph.identifier)

def remove_graph(self, graph):
if not self.graph_aware:
Store.remove_graph(self, graph)
elif graph.identifier == DATASET_DEFAULT_GRAPH_ID:
self.update("DROP DEFAULT")
else:
self.update("DROP GRAPH <%s>" % graph.identifier)
91 changes: 64 additions & 27 deletions test/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,49 +24,65 @@ def setUp(self):
if self.store == "SQLite":
_, self.tmppath = mkstemp(
prefix='test', dir='/tmp', suffix='.sqlite')
elif self.store == "SPARQLUpdateStore":
root = "http://localhost:3030/ukpp/"
self.graph.open((root + "sparql", root + "update"))
else:
self.tmppath = mkdtemp()
self.graph.open(self.tmppath, create=True)
self.michel = URIRef(u'michel')
self.tarek = URIRef(u'tarek')
self.bob = URIRef(u'bob')
self.likes = URIRef(u'likes')
self.hates = URIRef(u'hates')
self.pizza = URIRef(u'pizza')
self.cheese = URIRef(u'cheese')

self.c1 = URIRef(u'context-1')
self.c2 = URIRef(u'context-2')

if self.store != "SPARQLUpdateStore":
self.graph.open(self.tmppath, create=True)
self.michel = URIRef(u'urn:michel')
self.tarek = URIRef(u'urn:tarek')
self.bob = URIRef(u'urn:bob')
self.likes = URIRef(u'urn:likes')
self.hates = URIRef(u'urn:hates')
self.pizza = URIRef(u'urn:pizza')
self.cheese = URIRef(u'urn:cheese')

# Use regular URIs because SPARQL endpoints like Fuseki alter short names
self.c1 = URIRef(u'urn:context-1')
self.c2 = URIRef(u'urn:context-2')

# delete the graph for each test!
self.graph.remove((None, None, None))
for c in self.graph.contexts():
c.remove((None, None, None))
assert len(c) == 0
self.graph.remove_graph(c)

def tearDown(self):
self.graph.close()
if os.path.isdir(self.tmppath):
shutil.rmtree(self.tmppath)
if self.store == "SPARQLUpdateStore":
pass
else:
os.remove(self.tmppath)
if os.path.isdir(self.tmppath):
shutil.rmtree(self.tmppath)
else:
os.remove(self.tmppath)


def testGraphAware(self):
def testGraphAware(self):

if not self.graph.store.graph_aware: return

g = self.graph
g1 = g.graph(self.c1)


# added graph exists
self.assertEquals(set(x.identifier for x in self.graph.contexts()),
set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

# Some SPARQL endpoint backends (e.g. TDB) do not consider
# empty named graphs
if self.store != "SPARQLUpdateStore":
# added graph exists
self.assertEquals(set(x.identifier for x in self.graph.contexts()),
set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

# added graph is empty
self.assertEquals(len(g1), 0)

g1.add( (self.tarek, self.likes, self.pizza) )

# added graph still exists
self.assertEquals(set(x.identifier for x in self.graph.contexts()),
self.assertEquals(set(x.identifier for x in self.graph.contexts()),
set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

# added graph contains one triple
Expand All @@ -77,17 +93,24 @@ def testGraphAware(self):
# added graph is empty
self.assertEquals(len(g1), 0)

# graph still exists, although empty
self.assertEquals(set(x.identifier for x in self.graph.contexts()),
set([self.c1, DATASET_DEFAULT_GRAPH_ID]))
# Some SPARQL endpoint backends (e.g. TDB) do not consider
# empty named graphs
if self.store != "SPARQLUpdateStore":
# graph still exists, although empty
self.assertEquals(set(x.identifier for x in self.graph.contexts()),
set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

g.remove_graph(self.c1)

# graph is gone
self.assertEquals(set(x.identifier for x in self.graph.contexts()),
set([DATASET_DEFAULT_GRAPH_ID]))

def testDefaultGraph(self):
def testDefaultGraph(self):
# Something the default graph is read-only (e.g. TDB in union mode)
if self.store == "SPARQLUpdateStore":
print "Please make sure updating the default graph " \
"is supported by your SPARQL endpoint"

self.graph.add(( self.tarek, self.likes, self.pizza))
self.assertEquals(len(self.graph), 1)
Expand All @@ -103,7 +126,11 @@ def testDefaultGraph(self):
self.assertEquals(set(x.identifier for x in self.graph.contexts()),
set([DATASET_DEFAULT_GRAPH_ID]))

def testNotUnion(self):
def testNotUnion(self):
# Union depends on the SPARQL endpoint configuration
if self.store == "SPARQLUpdateStore":
print "Please make sure your SPARQL endpoint has not configured " \
"its default graph as the union of the named graphs"
g1 = self.graph.graph(self.c1)
g1.add((self.tarek, self.likes, self.pizza))

Expand All @@ -120,13 +147,23 @@ def testNotUnion(self):
pluginname = sys.argv[1]

tests = 0

for s in plugin.plugins(pluginname, plugin.Store):
if s.name in ('default', 'IOMemory', 'Auditable',
'Concurrent', 'SPARQLStore', 'SPARQLUpdateStore'):
'Concurrent', 'SPARQLStore'):
continue # these are tested by default

if not s.getClass().graph_aware:
continue

if s.name == "SPARQLUpdateStore":
import urllib2
try:
assert len(urllib2.urlopen("http://localhost:3030/").read()) > 0
except:
sys.stderr.write("No SPARQL endpoint for %s (tests skipped)\n" % s.name)
continue

locals()["t%d" % tests] = type("%sContextTestCase" % s.name, (
DatasetTestCase,), {"store": s.name})
tests += 1
Expand Down
10 changes: 10 additions & 0 deletions test/test_sparqlupdatestore.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,16 @@ def testNamedGraphUpdateWithInitBindings(self):
'only michel likes pizza'
)

def testEmptyNamedGraph(self):
empty_graph_iri = u"urn:empty-graph-1"
self.graph.update(u"CREATE GRAPH <%s>" % empty_graph_iri)
named_graphs = [unicode(r[0]) for r in self.graph.query(
"SELECT ?name WHERE { GRAPH ?name {} }")]
# Some SPARQL endpoint backends (like TDB) are not able to find empty named graphs
# (at least with this query)
if empty_graph_iri in named_graphs:
self.assertTrue(empty_graph_iri in [unicode(g.identifier)
for g in self.graph.contexts()])

from nose import SkipTest
import urllib2
Expand Down

0 comments on commit d70e84c

Please sign in to comment.