Rework turtle-like test suites (#1986)

This patch reworks the turtle-like test suites (N-Quads, N-Triples, Turtle, TriG). The changes includes: - A new DAWG Manifest processor: - Incorporates pytest expected failure support in test parameter generation. - Supports URI mapping so that local filesystem URIs can be mapped to the correct remote base URIs and still work correctly with local files. - Supports custom entry types so that suite specific handling can be implemented separately from the core manifest processor. This will be used for SPARQL test suite and JSON test suite processing. - Updated EARL reporter: - Support for the new DAWG Manifest processor. - Writing of EARL reports by default if a report prefix is specified in the DAWG Manifest processor. - Reports output is sorted so that it can be compared with text based diff tools. - Fixed reporting of xfail tests. These were reported as skipped before, now they are reported as failed. The testing is also more accurate, and now tests which were reported as passing before are now correctly reported as failing. With one exception all these failures are related to leniency in the parser, as our parser accepts input that should raise an error when parsing. Other changes: - Moved IRI related test utilities to a separate module. - Added generated reports for turtle-like formats to git with `-HEAD` suffix. Subsequent changes will use this new processor for SPARQL and RDF/XML.
RDFLib · Jun 5, 2022 · 60ff2fd · 60ff2fd
1 parent 7d8cab6
commit 60ff2fd
Show file tree

Hide file tree

Showing 16 changed files with 7,631 additions and 401 deletions.
diff --git a/test/README.rst b/test/README.rst
@@ -50,56 +50,56 @@ EARL test reports can be generated using the EARL reporter plugin from ``earl.py
 
 When this plugin is enabled it will create an ``earl:Assertion`` for every test that has a ``rdf_test_uri`` parameter which can be either a string or an ``URIRef``.
 
-To enable the EARL reporter plugin an output file path must be supplied to pytest with ``--earl-report``. The report will be written to this location in turtle format.
+To enable the EARL reporter plugin an output file path must be supplied to pytest with ``--earl-output-file``. The report will be written to this location in turtle format.
 
 Some examples of generating test reports:
 
 .. code-block:: bash
 
    pytest \
-      --earl-asserter-homepage=http://example.com \
-      --earl-asserter-name 'Example Name' \
-      --earl-report=/var/tmp/earl/earl-jsonld-local.ttl \
+      --earl-assertor-homepage=http://example.com \
+      --earl-assertor-name 'Example Name' \
+      --earl-output-file=/var/tmp/earl/earl-jsonld-local.ttl \
       test/jsonld/test_localsuite.py
 
    pytest \
-      --earl-asserter-homepage=http://example.com \
-      --earl-asserter-name 'Example Name' \
-      --earl-report=/var/tmp/earl/earl-jsonld-v1.1.ttl \
+      --earl-assertor-homepage=http://example.com \
+      --earl-assertor-name 'Example Name' \
+      --earl-output-file=/var/tmp/earl/earl-jsonld-v1.1.ttl \
       test/jsonld/test_onedotone.py
 
    pytest \
-      --earl-asserter-homepage=http://example.com \
-      --earl-asserter-name 'Example Name' \
-      --earl-report=/var/tmp/earl/earl-jsonld-v1.0.ttl \
+      --earl-assertor-homepage=http://example.com \
+      --earl-assertor-name 'Example Name' \
+      --earl-output-file=/var/tmp/earl/earl-jsonld-v1.0.ttl \
       test/jsonld/test_testsuite.py
 
    pytest \
-      --earl-asserter-homepage=http://example.com \
-      --earl-asserter-name 'Example Name' \
-      --earl-report=/var/tmp/earl/earl-sparql.ttl \
-      test/test_dawg.py
+      --earl-assertor-homepage=http://example.com \
+      --earl-assertor-name 'Example Name' \
+      --earl-output-file=/var/tmp/earl/earl-sparql.ttl \
+      test/test_w3c_spec/test_sparql_w3c.py
 
    pytest \
-      --earl-asserter-homepage=http://example.com \
-      --earl-asserter-name 'Example Name' \
-      --earl-report=/var/tmp/earl/earl-nquads.ttl \
-      test/test_nquads_w3c.py
+      --earl-assertor-homepage=http://example.com \
+      --earl-assertor-name 'Example Name' \
+      --earl-output-file=/var/tmp/earl/earl-nquads.ttl \
+      test/test_w3c_spec/test_nquads_w3c.py
 
    pytest \
-      --earl-asserter-homepage=http://example.com \
-      --earl-asserter-name 'Example Name' \
-      --earl-report=/var/tmp/earl/earl-nt.ttl \
-      test/test_nt_w3c.py
+      --earl-assertor-homepage=http://example.com \
+      --earl-assertor-name 'Example Name' \
+      --earl-output-file=/var/tmp/earl/earl-nt.ttl \
+      test/test_w3c_spec/test_nt_w3c.py
 
    pytest \
-      --earl-asserter-uri=http://example.com \
-      --earl-asserter-name 'Example Name' \
-      --earl-report=/var/tmp/earl/earl-trig.ttl \
-      test/test_trig_w3c.py
+      --earl-assertor-homepage=http://example.com \
+      --earl-assertor-name 'Example Name' \
+      --earl-output-file=/var/tmp/earl/earl-trig.ttl \
+      test/test_w3c_spec/test_trig_w3c.py
 
    pytest \
-      --earl-asserter-uri=http://example.com \
-      --earl-asserter-name 'Example Name' \
-      --earl-report=/var/tmp/earl/earl-turtle.ttl \
-      test/test_turtle_w3c.py
+      --earl-assertor-homepage=http://example.com \
+      --earl-assertor-name 'Example Name' \
+      --earl-output-file=/var/tmp/earl/earl-turtle.ttl \
+      test/test_w3c_spec/test_turtle_w3c.py
diff --git a/test/conftest.py b/test/conftest.py
@@ -5,9 +5,9 @@
 from rdflib import Graph
 
 from .data import TEST_DATA_DIR
-from .utils.earl import EarlReporter
+from .utils.earl import EARLReporter  # noqa: E402
 
-pytest_plugins = [EarlReporter.__module__]
+pytest_plugins = [EARLReporter.__module__]
 
 # This is here so that asserts from these modules are formatted for human
 # readibility.

diff --git a/test/test_w3c_spec/test_nquads_w3c.py b/test/test_w3c_spec/test_nquads_w3c.py
@@ -1,42 +1,92 @@
 """This runs the nquads tests for the W3C RDF Working Group's N-Quads
 test suite."""
-
-
-import os
+import logging
+from contextlib import ExitStack
 from test.data import TEST_DATA_DIR
-from test.utils.manifest import RDFTest, read_manifest
+from test.utils import BNodeHandling, GraphHelper, ensure_suffix
+from test.utils.dawg_manifest import ManifestEntry, params_from_sources
+from test.utils.iri import URIMapper
 from test.utils.namespace import RDFT
-from typing import Callable, Dict
+from typing import Optional
 
 import pytest
 
-from rdflib import ConjunctiveGraph
-from rdflib.term import Node, URIRef
+from rdflib.graph import Dataset
 
-verbose = False
+logger = logging.getLogger(__name__)
+
+REMOTE_BASE_IRI = "http://www.w3.org/2013/NQuadsTests/"
+LOCAL_BASE_DIR = TEST_DATA_DIR / "suites/w3c/nquads/"
+ENCODING = "utf-8"
+MAPPER = URIMapper.from_mappings(
+    (REMOTE_BASE_IRI, ensure_suffix(LOCAL_BASE_DIR.as_uri(), "/"))
+)
+VALID_TYPES = {RDFT.TestNQuadsPositiveSyntax, RDFT.TestNQuadsNegativeSyntax}
 
 
-def nquads(test):
-    g = ConjunctiveGraph()
+def check_entry(entry: ManifestEntry) -> None:
+    assert entry.action is not None
+    assert entry.type in VALID_TYPES
+    action_path = entry.uri_mapper.to_local_path(entry.action)
+    if logger.isEnabledFor(logging.DEBUG):
+        logger.debug(
+            "action = %s\n%s", action_path, action_path.read_text(encoding=ENCODING)
+        )
+    catcher: Optional[pytest.ExceptionInfo[Exception]] = None
+    dataset = Dataset()
+    with ExitStack() as xstack:
+        if entry.type == RDFT.TestNQuadsNegativeSyntax:
+            catcher = xstack.enter_context(pytest.raises(Exception))
+        dataset.parse(action_path, publicID=entry.action, format="nquads")
+    if catcher is not None:
+        assert catcher.value is not None
 
-    try:
-        g.parse(test.action, format="nquads")
-        if not test.syntax:
-            raise AssertionError("Input shouldn't have parsed!")
-    except:
-        if test.syntax:
-            raise
+    if entry.type == RDFT.TestNQuadsPositiveSyntax:
+        graph_data = dataset.serialize(format="nquads")
+        result_dataset = Dataset()
+        result_dataset.parse(data=graph_data, publicID=entry.action, format="nquads")
+        GraphHelper.assert_cgraph_isomorphic(
+            dataset, result_dataset, exclude_bnodes=True
+        )
+        GraphHelper.assert_sets_equals(
+            dataset, result_dataset, bnode_handling=BNodeHandling.COLLAPSE
+        )
 
 
-testers: Dict[Node, Callable[[RDFTest], None]] = {
-    RDFT.TestNQuadsPositiveSyntax: nquads,
-    RDFT.TestNQuadsNegativeSyntax: nquads,
+MARK_DICT = {
+    f"{REMOTE_BASE_IRI}#nt-syntax-bad-uri-02": pytest.mark.xfail(
+        reason="accepts an invalid IRI"
+    ),
+    f"{REMOTE_BASE_IRI}#nt-syntax-bad-uri-03": pytest.mark.xfail(
+        reason="accepts an invalid IRI"
+    ),
+    f"{REMOTE_BASE_IRI}#nt-syntax-bad-uri-04": pytest.mark.xfail(
+        reason="accepts an invalid IRI"
+    ),
+    f"{REMOTE_BASE_IRI}#nt-syntax-bad-uri-05": pytest.mark.xfail(
+        reason="accepts an invalid IRI"
+    ),
+    f"{REMOTE_BASE_IRI}#nt-syntax-bad-esc-01": pytest.mark.xfail(
+        reason="accepts badly escaped literal"
+    ),
+    f"{REMOTE_BASE_IRI}#nt-syntax-bad-esc-02": pytest.mark.xfail(
+        reason="accepts badly escaped literal"
+    ),
+    f"{REMOTE_BASE_IRI}#nt-syntax-bad-esc-03": pytest.mark.skip(
+        reason="accepts badly escaped literal"
+    ),
 }
 
 
 @pytest.mark.parametrize(
-    "rdf_test_uri, type, rdf_test",
-    read_manifest(os.path.join(TEST_DATA_DIR, "suites", "w3c/nquads/manifest.ttl")),
+    ["manifest_entry"],
+    params_from_sources(
+        MAPPER,
+        ManifestEntry,
+        LOCAL_BASE_DIR / "manifest.ttl",
+        mark_dict=MARK_DICT,
+        report_prefix="rdflib_w3c_nquads",
+    ),
 )
-def test_manifest(rdf_test_uri: URIRef, type: Node, rdf_test: RDFTest):
-    testers[type](rdf_test)
+def test_entry(manifest_entry: ManifestEntry) -> None:
+    check_entry(manifest_entry)
diff --git a/test/test_w3c_spec/test_nt_w3c.py b/test/test_w3c_spec/test_nt_w3c.py
@@ -1,42 +1,96 @@
-"""This runs the nt tests for the W3C RDF Working Group's N-Quads
+"""This runs the nt tests for the W3C RDF Working Group's N-Triples
 test suite."""
-import os
+import logging
+from contextlib import ExitStack
 from test.data import TEST_DATA_DIR
-from test.utils.manifest import RDFTest, read_manifest
+from test.utils import BNodeHandling, GraphHelper, ensure_suffix
+from test.utils.dawg_manifest import ManifestEntry, params_from_sources
+from test.utils.iri import URIMapper
 from test.utils.namespace import RDFT
-from typing import Callable, Dict
+from typing import Optional
 
 import pytest
 
-from rdflib import Graph
-from rdflib.term import Node, URIRef
+from rdflib.graph import Graph
 
-verbose = False
+logger = logging.getLogger(__name__)
+
+REMOTE_BASE_IRI = "http://www.w3.org/2013/N-TriplesTests/"
+LOCAL_BASE_DIR = TEST_DATA_DIR / "suites/w3c/ntriples/"
+ENCODING = "utf-8"
+MAPPER = URIMapper.from_mappings(
+    (REMOTE_BASE_IRI, ensure_suffix(LOCAL_BASE_DIR.as_uri(), "/"))
+)
+VALID_TYPES = {RDFT.TestNTriplesPositiveSyntax, RDFT.TestNTriplesNegativeSyntax}
 
 
-def nt(test):
-    g = Graph()
+def check_entry(entry: ManifestEntry) -> None:
+    assert entry.action is not None
+    assert entry.type in VALID_TYPES
+    action_path = entry.uri_mapper.to_local_path(entry.action)
+    if logger.isEnabledFor(logging.DEBUG):
+        logger.debug(
+            "action = %s\n%s", action_path, action_path.read_text(encoding=ENCODING)
+        )
+    catcher: Optional[pytest.ExceptionInfo[Exception]] = None
+    graph = Graph()
+    with ExitStack() as xstack:
+        if entry.type == RDFT.TestNTriplesNegativeSyntax:
+            catcher = xstack.enter_context(pytest.raises(Exception))
+        graph.parse(action_path, publicID=entry.action, format="ntriples")
+    if catcher is not None:
+        assert catcher.value is not None
 
-    try:
-        g.parse(test.action, format="nt")
-        if not test.syntax:
-            raise AssertionError("Input shouldn't have parsed!")
-    except:
-        if test.syntax:
-            raise
+    if entry.type == RDFT.TestNTriplesPositiveSyntax:
+        graph_data = graph.serialize(format="ntriples")
+        result_graph = Graph()
+        result_graph.parse(data=graph_data, publicID=entry.action, format="ntriples")
+        GraphHelper.assert_isomorphic(graph, result_graph)
+        GraphHelper.assert_sets_equals(
+            graph, result_graph, bnode_handling=BNodeHandling.COLLAPSE
+        )
 
 
-testers: Dict[Node, Callable[[RDFTest], None]] = {
-    RDFT.TestNTriplesPositiveSyntax: nt,
-    RDFT.TestNTriplesNegativeSyntax: nt,
+MARK_DICT = {
+    f"{REMOTE_BASE_IRI}#nt-syntax-bad-uri-02": pytest.mark.xfail(
+        reason="accepts an invalid IRI"
+    ),
+    f"{REMOTE_BASE_IRI}#nt-syntax-bad-uri-03": pytest.mark.xfail(
+        reason="accepts an invalid IRI"
+    ),
+    f"{REMOTE_BASE_IRI}#nt-syntax-bad-uri-04": pytest.mark.xfail(
+        reason="accepts an invalid IRI"
+    ),
+    f"{REMOTE_BASE_IRI}#nt-syntax-bad-uri-05": pytest.mark.xfail(
+        reason="accepts an invalid IRI"
+    ),
+    f"{REMOTE_BASE_IRI}#nt-syntax-bad-esc-01": pytest.mark.xfail(
+        reason="accepts badly escaped literal"
+    ),
+    f"{REMOTE_BASE_IRI}#nt-syntax-bad-esc-02": pytest.mark.xfail(
+        reason="accepts badly escaped literal"
+    ),
+    f"{REMOTE_BASE_IRI}#nt-syntax-bad-esc-03": pytest.mark.xfail(
+        reason="accepts badly escaped literal"
+    ),
+    f"{REMOTE_BASE_IRI}#nt-syntax-bad-esc-04": pytest.mark.xfail(
+        reason="accepts badly escaped literal"
+    ),
+    f"{REMOTE_BASE_IRI}#minimal_whitespace": pytest.mark.xfail(
+        reason="Not parsing valid N-Triples syntax."
+    ),
 }
 
 
 @pytest.mark.parametrize(
-    "rdf_test_uri, type, rdf_test",
-    read_manifest(
-        os.path.join(TEST_DATA_DIR, "suites", "w3c/ntriples/manifest.ttl"), legacy=True
+    ["manifest_entry"],
+    params_from_sources(
+        MAPPER,
+        ManifestEntry,
+        LOCAL_BASE_DIR / "manifest.ttl",
+        mark_dict=MARK_DICT,
+        report_prefix="rdflib_w3c_ntriples",
     ),
 )
-def test_manifest(rdf_test_uri: URIRef, type: Node, rdf_test: RDFTest):
-    testers[type](rdf_test)
+def test_entry(manifest_entry: ManifestEntry) -> None:
+    check_entry(manifest_entry)