Skip to content

Commit 6dc9a4e

Browse files
authored
Merge branch 'main' into n3-no-norm-float
2 parents 3d96939 + fd21a00 commit 6dc9a4e

File tree

2 files changed

+200
-5
lines changed

2 files changed

+200
-5
lines changed

Diff for: rdflib/extras/shacl.py

+118-1
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,28 @@
66

77
from typing import TYPE_CHECKING
88

9-
from rdflib import Graph, Literal, URIRef, paths
9+
from rdflib import BNode, Graph, Literal, URIRef, paths
10+
from rdflib.collection import Collection
1011
from rdflib.namespace import RDF, SH
1112
from rdflib.paths import Path
1213

1314
if TYPE_CHECKING:
1415
from rdflib.graph import _ObjectType
16+
from rdflib.term import IdentifiedNode
1517

1618

1719
class SHACLPathError(Exception):
1820
pass
1921

2022

23+
# Map the variable length path operators to the corresponding SHACL path predicates
24+
_PATH_MOD_TO_PRED = {
25+
paths.ZeroOrMore: SH.zeroOrMorePath,
26+
paths.OneOrMore: SH.oneOrMorePath,
27+
paths.ZeroOrOne: SH.zeroOrOnePath,
28+
}
29+
30+
2131
# This implementation is roughly based on
2232
# pyshacl.helper.sparql_query_helper::SPARQLQueryHelper._shacl_path_to_sparql_path
2333
def parse_shacl_path(
@@ -93,3 +103,110 @@ def parse_shacl_path(
93103
raise SHACLPathError(f"Cannot parse {repr(path_identifier)} as a SHACL Path.")
94104

95105
return path
106+
107+
108+
def _build_path_component(
109+
graph: Graph, path_component: URIRef | Path
110+
) -> IdentifiedNode:
111+
"""
112+
Helper method that implements the recursive component of SHACL path
113+
triple construction.
114+
115+
:param graph: A :class:`~rdflib.graph.Graph` into which to insert triples
116+
:param graph_component: A :class:`~rdflib.term.URIRef` or
117+
:class:`~rdflib.paths.Path` that is part of a path expression
118+
:return: The :class:`~rdflib.term.IdentifiedNode of the resource in the
119+
graph that corresponds to the provided path_component
120+
"""
121+
# Literals or other types are not allowed
122+
if not isinstance(path_component, (URIRef, Path)):
123+
raise TypeError(
124+
f"Objects of type {type(path_component)} are not valid "
125+
+ "components of a SHACL path."
126+
)
127+
128+
# If the path component is a URI, return it
129+
elif isinstance(path_component, URIRef):
130+
return path_component
131+
# Otherwise, the path component is represented as a blank node
132+
bnode = BNode()
133+
134+
# Handle Sequence Paths
135+
if isinstance(path_component, paths.SequencePath):
136+
# Sequence paths are a Collection directly with at least two items
137+
if len(path_component.args) < 2:
138+
raise SHACLPathError(
139+
"A list of SHACL Sequence Paths must contain at least two path items."
140+
)
141+
Collection(
142+
graph,
143+
bnode,
144+
[_build_path_component(graph, arg) for arg in path_component.args],
145+
)
146+
147+
# Handle Inverse Paths
148+
elif isinstance(path_component, paths.InvPath):
149+
graph.add(
150+
(bnode, SH.inversePath, _build_path_component(graph, path_component.arg))
151+
)
152+
153+
# Handle Alternative Paths
154+
elif isinstance(path_component, paths.AlternativePath):
155+
# Alternative paths are a Collection but referenced by sh:alternativePath
156+
# with at least two items
157+
if len(path_component.args) < 2:
158+
raise SHACLPathError(
159+
"List of SHACL alternate paths must have at least two path items."
160+
)
161+
coll = Collection(
162+
graph,
163+
BNode(),
164+
[_build_path_component(graph, arg) for arg in path_component.args],
165+
)
166+
graph.add((bnode, SH.alternativePath, coll.uri))
167+
168+
# Handle Variable Length Paths
169+
elif isinstance(path_component, paths.MulPath):
170+
# Get the predicate corresponding to the path modifiier
171+
pred = _PATH_MOD_TO_PRED.get(path_component.mod)
172+
if pred is None:
173+
raise SHACLPathError(f"Unknown path modifier {path_component.mod}")
174+
graph.add((bnode, pred, _build_path_component(graph, path_component.path)))
175+
176+
# Return the blank node created for the provided path_component
177+
return bnode
178+
179+
180+
def build_shacl_path(
181+
path: URIRef | Path, target_graph: Graph | None = None
182+
) -> tuple[IdentifiedNode, Graph | None]:
183+
"""
184+
Build the SHACL Path triples for a path given by a :class:`~rdflib.term.URIRef` for
185+
simple paths or a :class:`~rdflib.paths.Path` for complex paths.
186+
187+
Returns an :class:`~rdflib.term.IdentifiedNode` for the path (which should be
188+
the object of a triple with predicate sh:path) and the graph into which any
189+
new triples were added.
190+
191+
:param path: A :class:`~rdflib.term.URIRef` or a :class:`~rdflib.paths.Path`
192+
:param target_graph: Optionally, a :class:`~rdflib.graph.Graph` into which to put
193+
constructed triples. If not provided, a new graph will be created
194+
:return: A (path_identifier, graph) tuple where:
195+
- path_identifier: If path is a :class:`~rdflib.term.URIRef`, this is simply
196+
the provided path. If path is a :class:`~rdflib.paths.Path`, this is
197+
the :class:`~rdflib.term.BNode` corresponding to the root of the SHACL
198+
path expression added to the graph.
199+
- graph: None if path is a :class:`~rdflib.term.URIRef` (as no new triples
200+
are constructed). If path is a :class:`~rdflib.paths.Path`, this is either the
201+
target_graph provided or a new graph into which the path triples were added.
202+
"""
203+
# If a path is a URI, that's the whole path. No graph needs to be constructed.
204+
if isinstance(path, URIRef):
205+
return path, None
206+
207+
# Create a graph if one was not provided
208+
if target_graph is None:
209+
target_graph = Graph()
210+
211+
# Recurse through the path to build the graph representation
212+
return _build_path_component(target_graph, path), target_graph

Diff for: test/test_extras/test_shacl_extras.py

+82-4
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44

55
import pytest
66

7-
from rdflib import Graph, URIRef
8-
from rdflib.extras.shacl import SHACLPathError, parse_shacl_path
7+
from rdflib import Graph, Literal, URIRef, paths
8+
from rdflib.compare import graph_diff
9+
from rdflib.extras.shacl import SHACLPathError, build_shacl_path, parse_shacl_path
910
from rdflib.namespace import SH, Namespace
1011
from rdflib.paths import Path
1112

@@ -109,7 +110,32 @@ def path_source_data():
109110
) ;
110111
] ;
111112
.
112-
ex:TestPropShape10
113+
ex:TestPropShape10a
114+
sh:path (
115+
[
116+
sh:zeroOrMorePath [
117+
sh:inversePath ex:pred1 ;
118+
] ;
119+
]
120+
[
121+
sh:alternativePath (
122+
[
123+
sh:zeroOrMorePath [
124+
sh:inversePath ex:pred1 ;
125+
] ;
126+
]
127+
ex:pred1
128+
[
129+
sh:oneOrMorePath ex:pred2 ;
130+
]
131+
[
132+
sh:zeroOrMorePath ex:pred3 ;
133+
]
134+
) ;
135+
]
136+
) ;
137+
.
138+
ex:TestPropShape10b
113139
sh:path (
114140
[
115141
sh:zeroOrMorePath [
@@ -192,7 +218,13 @@ def path_source_data():
192218
~EX.pred1 | EX.pred1 / EX.pred2 | EX.pred1 | EX.pred2 | EX.pred3,
193219
),
194220
(
195-
EX.TestPropShape10,
221+
EX.TestPropShape10a,
222+
~EX.pred1
223+
* "*"
224+
/ (~EX.pred1 * "*" | EX.pred1 | EX.pred2 * "+" | EX.pred3 * "*"), # type: ignore[operator]
225+
),
226+
(
227+
EX.TestPropShape10b,
196228
~EX.pred1
197229
* "*"
198230
/ (~EX.pred1 * "*" | EX.pred1 | EX.pred2 * "+" | EX.pred3 * "*"), # type: ignore[operator]
@@ -217,3 +249,49 @@ def test_parse_shacl_path(
217249
parse_shacl_path(path_source_data, path_root) # type: ignore[arg-type]
218250
else:
219251
assert parse_shacl_path(path_source_data, path_root) == expected # type: ignore[arg-type]
252+
253+
254+
@pytest.mark.parametrize(
255+
("resource", "path"),
256+
(
257+
# Single SHACL Path
258+
(EX.TestPropShape1, EX.pred1),
259+
(EX.TestPropShape2a, EX.pred1 / EX.pred2 / EX.pred3),
260+
(EX.TestPropShape3, ~EX.pred1),
261+
(EX.TestPropShape4a, EX.pred1 | EX.pred2 | EX.pred3),
262+
(EX.TestPropShape5, EX.pred1 * "*"), # type: ignore[operator]
263+
(EX.TestPropShape6, EX.pred1 * "+"), # type: ignore[operator]
264+
(EX.TestPropShape7, EX.pred1 * "?"), # type: ignore[operator]
265+
# SHACL Path Combinations
266+
(EX.TestPropShape8, ~EX.pred1 * "*"),
267+
(
268+
EX.TestPropShape10a,
269+
~EX.pred1
270+
* "*"
271+
/ (~EX.pred1 * "*" | EX.pred1 | EX.pred2 * "+" | EX.pred3 * "*"), # type: ignore[operator]
272+
),
273+
(TypeError, Literal("Not a valid path")),
274+
(SHACLPathError, paths.SequencePath(SH.targetClass)),
275+
(SHACLPathError, paths.AlternativePath(SH.targetClass)),
276+
),
277+
)
278+
def test_build_shacl_path(
279+
path_source_data: Graph, resource: URIRef | type, path: Union[URIRef, Path]
280+
):
281+
if isinstance(resource, type):
282+
with pytest.raises(resource):
283+
build_shacl_path(path)
284+
else:
285+
expected_path_root = path_source_data.value(resource, SH.path)
286+
actual_path_root, actual_path_graph = build_shacl_path(path)
287+
if isinstance(expected_path_root, URIRef):
288+
assert actual_path_root == expected_path_root
289+
assert actual_path_graph is None
290+
else:
291+
assert isinstance(actual_path_graph, Graph)
292+
expected_path_graph = path_source_data.cbd(expected_path_root) # type: ignore[arg-type]
293+
in_both, in_first, in_second = graph_diff(
294+
expected_path_graph, actual_path_graph
295+
)
296+
assert len(in_first) == 0
297+
assert len(in_second) == 0

0 commit comments

Comments
 (0)