Merge pull request #402 from bcogrel/sparqlstore-graph-awareness

SPARQLStore graph awareness
RDFLib · Jul 10, 2014 · d70e84c · d70e84c
2 parents 9321c66 + e1d115b
commit d70e84c
Show file tree

Hide file tree

Showing 3 changed files with 130 additions and 44 deletions.
diff --git a/rdflib/plugins/stores/sparqlstore.py b/rdflib/plugins/stores/sparqlstore.py
@@ -52,6 +52,7 @@
 from rdflib.store import Store
 from rdflib.query import Result
 from rdflib import Variable, Namespace, BNode, URIRef, Literal
+from rdflib.graph import DATASET_DEFAULT_GRAPH_ID
 
 import httplib
 import urlparse
@@ -185,6 +186,7 @@ class SPARQLStore(NSSPARQLWrapper, Store):
     """
     formula_aware = False
     transaction_aware = False
+    graph_aware = True
     regex_matching = NATIVE_REGEX
 
     def __init__(self,
@@ -198,6 +200,7 @@ def __init__(self,
         self.nsBindings = {}
         self.sparql11 = sparql11
         self.context_aware = context_aware
+        self.graph_aware = context_aware
 
     # Database Management Methods
     def create(self, configuration):
@@ -275,7 +278,7 @@ def query(self, query,
                    " ".join(initBindings[x].n3() for x in v))
 
         self.resetQuery()
-        if self.context_aware and queryGraph and queryGraph != '__UNION__':
+        if self._is_contextual(queryGraph):
             self.addDefaultGraph(queryGraph)
         self.setQuery(query)
 
@@ -368,7 +371,7 @@ def triples(self, (s, p, o), context=None):
             pass
 
         self.resetQuery()
-        if self.context_aware and context is not None:
+        if self._is_contextual(context):
             self.addDefaultGraph(context.identifier)
         self.setQuery(query)
 
@@ -397,7 +400,7 @@ def __len__(self, context=None):
         else:
             self.resetQuery()
             q = "SELECT (count(*) as ?c) WHERE {?s ?p ?o .}"
-            if self.context_aware and context is not None:
+            if self._is_contextual(context):
                 self.addDefaultGraph(context.identifier)
             self.setQuery(q)
             doc = ElementTree.parse(SPARQLWrapper.query(self).response)
@@ -407,24 +410,29 @@ def __len__(self, context=None):
 
     def contexts(self, triple=None):
         """
-        Iterates over results to SELECT ?NAME { GRAPH ?NAME { ?s ?p ?o } }
-        returning instances of this store with the SPARQL wrapper
-        object updated via addNamedGraph(?NAME)
+        Iterates over results to "SELECT ?NAME { GRAPH ?NAME { ?s ?p ?o } }"
+        or "SELECT ?NAME { GRAPH ?NAME {} }" if triple is `None`.
+
+        Returns instances of this store with the SPARQL wrapper
+        object updated via addNamedGraph(?NAME).
+
         This causes a named-graph-uri key / value  pair to be sent over
-        the protocol
+        the protocol.
+
+        Please note that some SPARQL endpoints are not able to find empty named
+        graphs.
         """
+        self.resetQuery()
 
         if triple:
             s, p, o = triple
+            params = ((s if s else Variable('s')).n3(),
+                      (p if p else Variable('p')).n3(),
+                      (o if o else Variable('o')).n3())
+            self.setQuery('SELECT ?name WHERE { GRAPH ?name { %s %s %s }}' % params)
         else:
-            s = p = o = None
+            self.setQuery('SELECT ?name WHERE { GRAPH ?name {} }')
 
-        params = ((s if s else Variable('s')).n3(),
-                  (p if p else Variable('p')).n3(),
-                  (o if o else Variable('o')).n3())
-
-        self.setQuery(
-            'SELECT ?name WHERE { GRAPH ?name { %s %s %s }}' % params)
         doc = ElementTree.parse(SPARQLWrapper.query(self).response)
 
         return (rt.get(Variable("name"))
@@ -447,6 +455,23 @@ def namespaces(self):
         for prefix, ns in self.nsBindings.items():
             yield prefix, ns
 
+    def add_graph(self, graph):
+        raise TypeError('The SPARQL store is read only')
+
+    def remove_graph(self, graph):
+        raise TypeError('The SPARQL store is read only')
+
+    def _is_contextual(self, graph):
+        """ Returns `True` if the "GRAPH" keyword must appear
+        in the final SPARQL query sent to the endpoint.
+        """
+        if (not self.context_aware) or (graph is None):
+            return False
+        if isinstance(graph, basestring):
+            return graph != '__UNION__'
+        else:
+            return graph.identifier != DATASET_DEFAULT_GRAPH_ID
+
 
 class SPARQLUpdateStore(SPARQLStore):
     """
@@ -615,7 +640,7 @@ def add(self, spo, context=None, quoted=False):
 
 
         triple = "%s %s %s ." % (subject.n3(), predicate.n3(), obj.n3())
-        if self.context_aware and context is not None:
+        if self._is_contextual(context):
             q = "INSERT DATA { GRAPH %s { %s } }" % (
                 context.identifier.n3(), triple)
         else:
@@ -665,7 +690,7 @@ def remove(self, spo, context):
             obj = Variable("O")
 
         triple = "%s %s %s ." % (subject.n3(), predicate.n3(), obj.n3())
-        if self.context_aware and context is not None:
+        if self._is_contextual(context):
             q = "DELETE { GRAPH %s { %s } } WHERE { GRAPH %s { %s } }" % (
                 context.identifier.n3(), triple,
                 context.identifier.n3(), triple)
@@ -727,7 +752,7 @@ def update(self, query,
         self.setNamespaceBindings(initNs)
         query = self.injectPrefixes(query)
 
-        if self.context_aware and queryGraph and queryGraph != '__UNION__':
+        if self._is_contextual(queryGraph):
             query = self._insert_named_graph(query, queryGraph)
 
         if initBindings:
@@ -799,3 +824,17 @@ def _insert_named_graph(self, query, query_graph):
         modified_query.append(query[pos:])
 
         return "".join(modified_query)
+
+    def add_graph(self, graph):
+        if not self.graph_aware:
+            Store.add_graph(self, graph)
+        elif graph.identifier != DATASET_DEFAULT_GRAPH_ID:
+            self.update("CREATE GRAPH <%s>" % graph.identifier)
+
+    def remove_graph(self, graph):
+        if not self.graph_aware:
+            Store.remove_graph(self, graph)
+        elif graph.identifier == DATASET_DEFAULT_GRAPH_ID:
+            self.update("DROP DEFAULT")
+        else:
+            self.update("DROP GRAPH <%s>" % graph.identifier)
diff --git a/test/test_dataset.py b/test/test_dataset.py
@@ -24,49 +24,65 @@ def setUp(self):
         if self.store == "SQLite":
             _, self.tmppath = mkstemp(
                 prefix='test', dir='/tmp', suffix='.sqlite')
+        elif self.store == "SPARQLUpdateStore":
+            root = "http://localhost:3030/ukpp/"
+            self.graph.open((root + "sparql", root + "update"))
         else:
             self.tmppath = mkdtemp()
-        self.graph.open(self.tmppath, create=True)
-        self.michel = URIRef(u'michel')
-        self.tarek = URIRef(u'tarek')
-        self.bob = URIRef(u'bob')
-        self.likes = URIRef(u'likes')
-        self.hates = URIRef(u'hates')
-        self.pizza = URIRef(u'pizza')
-        self.cheese = URIRef(u'cheese')
-
-        self.c1 = URIRef(u'context-1')
-        self.c2 = URIRef(u'context-2')
+
+        if self.store != "SPARQLUpdateStore":
+            self.graph.open(self.tmppath, create=True)
+        self.michel = URIRef(u'urn:michel')
+        self.tarek = URIRef(u'urn:tarek')
+        self.bob = URIRef(u'urn:bob')
+        self.likes = URIRef(u'urn:likes')
+        self.hates = URIRef(u'urn:hates')
+        self.pizza = URIRef(u'urn:pizza')
+        self.cheese = URIRef(u'urn:cheese')
+
+        # Use regular URIs because SPARQL endpoints like Fuseki alter short names
+        self.c1 = URIRef(u'urn:context-1')
+        self.c2 = URIRef(u'urn:context-2')
 
         # delete the graph for each test!
         self.graph.remove((None, None, None))
+        for c in self.graph.contexts():
+            c.remove((None, None, None))
+            assert len(c) == 0
+            self.graph.remove_graph(c)
 
     def tearDown(self):
         self.graph.close()
-        if os.path.isdir(self.tmppath):
-            shutil.rmtree(self.tmppath)
+        if self.store == "SPARQLUpdateStore":
+            pass
         else:
-            os.remove(self.tmppath)
+            if os.path.isdir(self.tmppath):
+                shutil.rmtree(self.tmppath)
+            else:
+                os.remove(self.tmppath)
 
 
-    def testGraphAware(self): 
+    def testGraphAware(self):
+
         if not self.graph.store.graph_aware: return 
 
         g = self.graph
         g1 = g.graph(self.c1)
-
-
-        # added graph exists
-        self.assertEquals(set(x.identifier for x in self.graph.contexts()), 
-                          set([self.c1, DATASET_DEFAULT_GRAPH_ID]))
+
+        # Some SPARQL endpoint backends (e.g. TDB) do not consider
+        # empty named graphs
+        if self.store != "SPARQLUpdateStore":
+            # added graph exists
+            self.assertEquals(set(x.identifier for x in self.graph.contexts()),
+                              set([self.c1, DATASET_DEFAULT_GRAPH_ID]))
 
         # added graph is empty 
         self.assertEquals(len(g1), 0)
 
         g1.add( (self.tarek, self.likes, self.pizza) )
 
         # added graph still exists
-        self.assertEquals(set(x.identifier for x in self.graph.contexts()), 
+        self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                           set([self.c1, DATASET_DEFAULT_GRAPH_ID]))
 
         # added graph contains one triple
@@ -77,17 +93,24 @@ def testGraphAware(self):
         # added graph is empty 
         self.assertEquals(len(g1), 0)
 
-        # graph still exists, although empty
-        self.assertEquals(set(x.identifier for x in self.graph.contexts()), 
-                          set([self.c1, DATASET_DEFAULT_GRAPH_ID]))
+        # Some SPARQL endpoint backends (e.g. TDB) do not consider
+        # empty named graphs
+        if self.store != "SPARQLUpdateStore":
+            # graph still exists, although empty
+            self.assertEquals(set(x.identifier for x in self.graph.contexts()),
+                              set([self.c1, DATASET_DEFAULT_GRAPH_ID]))
 
         g.remove_graph(self.c1)
 
         # graph is gone
         self.assertEquals(set(x.identifier for x in self.graph.contexts()), 
                           set([DATASET_DEFAULT_GRAPH_ID]))
 
-    def testDefaultGraph(self): 
+    def testDefaultGraph(self):
+        # Something the default graph is read-only (e.g. TDB in union mode)
+        if self.store == "SPARQLUpdateStore":
+            print "Please make sure updating the default graph " \
+                  "is supported by your SPARQL endpoint"
 
         self.graph.add(( self.tarek, self.likes, self.pizza))
         self.assertEquals(len(self.graph), 1)
@@ -103,7 +126,11 @@ def testDefaultGraph(self):
         self.assertEquals(set(x.identifier for x in self.graph.contexts()), 
                           set([DATASET_DEFAULT_GRAPH_ID]))
 
-    def testNotUnion(self): 
+    def testNotUnion(self):
+        # Union depends on the SPARQL endpoint configuration
+        if self.store == "SPARQLUpdateStore":
+            print "Please make sure your SPARQL endpoint has not configured " \
+                  "its default graph as the union of the named graphs"
         g1 = self.graph.graph(self.c1)
         g1.add((self.tarek, self.likes, self.pizza))
 
@@ -120,13 +147,23 @@ def testNotUnion(self):
         pluginname = sys.argv[1]
 
 tests = 0
+
 for s in plugin.plugins(pluginname, plugin.Store):
     if s.name in ('default', 'IOMemory', 'Auditable',
-                  'Concurrent', 'SPARQLStore', 'SPARQLUpdateStore'):
+                  'Concurrent', 'SPARQLStore'):
         continue  # these are tested by default
+
     if not s.getClass().graph_aware:
         continue
 
+    if s.name == "SPARQLUpdateStore":
+        import urllib2
+        try:
+            assert len(urllib2.urlopen("http://localhost:3030/").read()) > 0
+        except:
+            sys.stderr.write("No SPARQL endpoint for %s (tests skipped)\n" % s.name)
+            continue
+
     locals()["t%d" % tests] = type("%sContextTestCase" % s.name, (
         DatasetTestCase,), {"store": s.name})
     tests += 1

diff --git a/test/test_sparqlupdatestore.py b/test/test_sparqlupdatestore.py
@@ -258,6 +258,16 @@ def testNamedGraphUpdateWithInitBindings(self):
             'only michel likes pizza'
         )
 
+    def testEmptyNamedGraph(self):
+        empty_graph_iri = u"urn:empty-graph-1"
+        self.graph.update(u"CREATE GRAPH <%s>" % empty_graph_iri)
+        named_graphs = [unicode(r[0]) for r in self.graph.query(
+            "SELECT ?name WHERE { GRAPH ?name {} }")]
+        # Some SPARQL endpoint backends (like TDB) are not able to find empty named graphs
+        # (at least with this query)
+        if empty_graph_iri in named_graphs:
+            self.assertTrue(empty_graph_iri in [unicode(g.identifier)
+                                                for g in self.graph.contexts()])
 
 from nose import SkipTest
 import urllib2