Skip to content

Commit

Permalink
Fix for issue1957 sparql parser percent encoded reserved chars (#1959)
Browse files Browse the repository at this point in the history
Seems like [`_hexExpand`](https://github.com/RDFLib/rdflib/blob/6ed2ef48ed38679bcdafe7cae250a2ef4b315e7b/rdflib/plugins/sparql/parser.py#L230) internal SPARQL parser function inappropriately expands [percent-encoded reserved characters](https://en.wikipedia.org/wiki/Percent-encoding).

Removed it as this does not seem like it is needed.

Merged with only one review as this is fairly well tested and not a very complicated fix.

Co-authored-by: Iwan Aucamp <aucampia@gmail.com>
  • Loading branch information
Graham Higgins and aucampia authored May 25, 2022
1 parent 8e24878 commit 958b9a1
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 7 deletions.
7 changes: 0 additions & 7 deletions rdflib/plugins/sparql/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,13 +226,6 @@ def expandCollection(terms):
)


def _hexExpand(match):
return chr(int(match.group(0)[1:], 16))


PN_LOCAL.setParseAction(lambda x: re.sub("(%s)" % PERCENT_re, _hexExpand, x[0]))


# [141] PNAME_LN ::= PNAME_NS PN_LOCAL
PNAME_LN = PNAME_NS + Param("localname", PN_LOCAL.leaveWhitespace())

Expand Down
135 changes: 135 additions & 0 deletions test/test_sparql/test_prefixed_name.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import itertools
import logging
from contextlib import ExitStack
from typing import Type, Union

import pyparsing
import pytest
from pyparsing import Optional

import rdflib
from rdflib import Graph
from rdflib.namespace import Namespace
from rdflib.term import URIRef

RESERVED_PCHARS = [
"%20",
"%21",
"%23",
"%24",
"%25",
"%26",
"%27",
"%28",
"%29",
"%2A",
"%2B",
"%2C",
"%2F",
"%3A",
"%3B",
"%3D",
"%3F",
"%40",
"%5B",
"%5D",
]


@pytest.mark.parametrize(
"reserved_char_percent_encoded",
RESERVED_PCHARS,
)
def test_sparql_parse_reserved_char_percent_encoded(reserved_char_percent_encoded):
data = f"""@prefix : <https://www.example.co/reserved/language#> .
<https://www.example.co/reserved/root> :_id "01G39WKRH76BGY5D3SKDHJP2SX" ;
:transcript{reserved_char_percent_encoded}data [ :_id "01G39WKRH7JYRX78X7FG4RCNYF" ;
:_key "transcript{reserved_char_percent_encoded}data" ;
:value "value" ;
:value_id "01G39WKRH7PVK1DXQHWT08DZA8" ] ."""

q = f"""PREFIX : <https://www.example.co/reserved/language#>
SELECT ?o
WHERE {{ ?s :transcript{reserved_char_percent_encoded}data/:value ?o . }}"""

g = rdflib.Graph()
g.parse(data=data, format="ttl")
res = g.query(q)

assert list(res)[0][0] == rdflib.term.Literal("value")

assert reserved_char_percent_encoded in str(
rdflib.plugins.sparql.parser.parseQuery(q)
)


PNAME_PREFIX = Namespace("https://example.com/test_pnames/")


@pytest.fixture(scope="module")
def blank_graph() -> Graph:
return Graph()


@pytest.mark.parametrize(
["pname_ns", "pname", "expected_result"],
itertools.chain(
[
("eg", "invalid/PN_PREFIX", pyparsing.exceptions.ParseException),
("", "eg:a", Exception),
("", ":invalid PN_LOCAL", pyparsing.exceptions.ParseException),
("", ":invalid/PN_LOCAL", pyparsing.exceptions.ParseException),
("", ":a:b:c", PNAME_PREFIX["a:b:c"]),
("", ":", URIRef(f"{PNAME_PREFIX}")),
("", ":a", PNAME_PREFIX.a),
("eg", " eg:obj ", PNAME_PREFIX.obj),
("", " :obj ", PNAME_PREFIX.obj),
("eg", " \t eg:obj \t ", PNAME_PREFIX.obj),
("", " \n :obj \n ", PNAME_PREFIX.obj),
("eg", "eg:", URIRef(f"{PNAME_PREFIX}")),
("eg", "eg:a", PNAME_PREFIX.a),
("", ":transcript%20data", PNAME_PREFIX["transcript%20data"]),
],
(
("", f":aaa{pchar}zzz", PNAME_PREFIX[f"aaa{pchar}zzz"])
for pchar in RESERVED_PCHARS
),
),
)
def test_pnames(
pname_ns: str,
pname: str,
expected_result: Union[URIRef, Type[Exception]],
blank_graph: Graph,
) -> None:
"""
The given pname produces the expected result.
"""
catcher: Optional[pytest.ExceptionInfo[Exception]] = None

with ExitStack() as xstack:
if isinstance(expected_result, type) and issubclass(expected_result, Exception):
catcher = xstack.enter_context(pytest.raises(expected_result))
query_string = f"""\
PREFIX {pname_ns}: <{PNAME_PREFIX}>
CONSTRUCT {{
<example:_subject> <example:_predicate> {pname}.
}} WHERE {{}}
"""
query_result = blank_graph.query(query_string)
assert query_result.type == "CONSTRUCT"
assert isinstance(query_result.graph, Graph)
triples = list(query_result.graph.triples((None, None, None)))
assert len(triples) == 1
triple = triples[0]
result = triple[2]
logging.debug("result = %s", result)

if catcher is not None:
assert isinstance(catcher, pytest.ExceptionInfo)
assert catcher.value is not None
else:
assert isinstance(expected_result, URIRef)
assert expected_result == result

0 comments on commit 958b9a1

Please sign in to comment.