| 1 | from rdfalchemy import URIRef , Literal, BNode, RDF |
|---|
| 2 | from rdfalchemy.exceptions import MalformedQueryError, QueryEvaluationError |
|---|
| 3 | from rdfalchemy.sparql.parsers import _XMLSPARQLHandler,_JSONSPARQLHandler |
|---|
| 4 | |
|---|
| 5 | from rdflib import ConjunctiveGraph |
|---|
| 6 | from rdflib.plugins.parsers.ntriples import NTriplesParser |
|---|
| 7 | |
|---|
| 8 | from urllib2 import urlopen, Request, HTTPError |
|---|
| 9 | from urllib import urlencode |
|---|
| 10 | |
|---|
| 11 | import re |
|---|
| 12 | import logging |
|---|
| 13 | |
|---|
| 14 | __all__=["SPARQLGraph",] |
|---|
| 15 | |
|---|
| 16 | log=logging.getLogger(__name__) |
|---|
| 17 | |
|---|
| 18 | class DumpSink(object): |
|---|
| 19 | def __init__(self): |
|---|
| 20 | self.length = 0 |
|---|
| 21 | |
|---|
| 22 | def triple(self, s, p, o): |
|---|
| 23 | self.length += 1 |
|---|
| 24 | self._triple=(s,p,o) |
|---|
| 25 | |
|---|
| 26 | def get_triple(self): |
|---|
| 27 | return self._triple |
|---|
| 28 | |
|---|
| 29 | |
|---|
| 30 | class SPARQLGraph(object): |
|---|
| 31 | """provides (some) rdflib api via http to a SPARQL endpoint |
|---|
| 32 | gives 'read-only' access to the graph |
|---|
| 33 | constructor takes http endpoint and repository name |
|---|
| 34 | e.g. SPARQLGraph('http://localhost:2020/sparql')""" |
|---|
| 35 | |
|---|
| 36 | parsers = {'xml': _XMLSPARQLHandler, 'json': _JSONSPARQLHandler} |
|---|
| 37 | |
|---|
| 38 | def __init__(self, url, context=None): |
|---|
| 39 | self.url = url |
|---|
| 40 | self.context=context |
|---|
| 41 | |
|---|
| 42 | def construct(self, strOrTriple, initBindings={}, initNs={}): |
|---|
| 43 | """ |
|---|
| 44 | Executes a SPARQL Construct |
|---|
| 45 | :param strOrTriple: can be either |
|---|
| 46 | |
|---|
| 47 | * a string in which case it it considered a CONSTRUCT query |
|---|
| 48 | * a triple in which case it acts as the rdflib `triples((s,p,o))` |
|---|
| 49 | |
|---|
| 50 | :param initBindings: A mapping from a Variable to an RDFLib term (used as initial bindings for SPARQL query) |
|---|
| 51 | :param initNs: A mapping from a namespace prefix to a namespace |
|---|
| 52 | |
|---|
| 53 | :returns: an instance of rdflib.ConjuctiveGraph('IOMemory') |
|---|
| 54 | """ |
|---|
| 55 | if isinstance(strOrTriple, str): |
|---|
| 56 | query = strOrTriple |
|---|
| 57 | if initNs: |
|---|
| 58 | prefixes = ''.join(["prefix %s: <%s>\n"%(p,n) for p,n in initNs.items()]) |
|---|
| 59 | query = prefixes + query |
|---|
| 60 | else: |
|---|
| 61 | s,p,o = strOrTriple |
|---|
| 62 | t='%s %s %s'%((s and s.n3() or '?s'),(p and p.n3() or '?p'),(o and o.n3() or '?o')) |
|---|
| 63 | query='construct {%s} where {%s}'%(t,t) |
|---|
| 64 | query = dict(query=query) |
|---|
| 65 | |
|---|
| 66 | url = self.url+"?"+urlencode(query) |
|---|
| 67 | req = Request(url) |
|---|
| 68 | req.add_header('Accept','application/rdf+xml') |
|---|
| 69 | log.debug("Request url: %s\n with headers: %s" % (req.get_full_url(), req.header_items())) |
|---|
| 70 | subgraph = ConjunctiveGraph('IOMemory') |
|---|
| 71 | subgraph.parse(urlopen(req)) |
|---|
| 72 | return subgraph |
|---|
| 73 | |
|---|
| 74 | def triples(self, (s,p,o), method='CONSTRUCT'): |
|---|
| 75 | """ |
|---|
| 76 | :returns: a generator over triples matching the pattern |
|---|
| 77 | :param method: must be 'CONSTRUCT' or 'SELECT' |
|---|
| 78 | |
|---|
| 79 | * CONSTRUCT calls CONSTRUCT query and returns a Graph result |
|---|
| 80 | * SELECT calls a SELECT query and returns an interator streaming over the results |
|---|
| 81 | |
|---|
| 82 | Use SELECT if you expect a large result set or may consume less than the entire result""" |
|---|
| 83 | if method == 'CONSTRUCT': |
|---|
| 84 | return self.construct((s,p,o)).triples((None,None,None)) |
|---|
| 85 | elif method == 'SELECT': |
|---|
| 86 | pattern = "%s %s %s"%((s and s.n3() or '?s'),(p and p.n3() or '?p'),(o and o.n3() or '?o')) |
|---|
| 87 | query = "select ?s ?p ?o where { %s . }" % pattern |
|---|
| 88 | return self.query(query) |
|---|
| 89 | else: |
|---|
| 90 | raise ValueError, "Unknown method: %s"%(method) |
|---|
| 91 | |
|---|
| 92 | def __iter__(self): |
|---|
| 93 | """Iterates over all triples in the store""" |
|---|
| 94 | return self.triples((None, None, None)) |
|---|
| 95 | |
|---|
| 96 | def __contains__(self, triple): |
|---|
| 97 | """Support for 'triple in graph' syntax""" |
|---|
| 98 | for triple in self.triples(triple): |
|---|
| 99 | return 1 |
|---|
| 100 | return 0 |
|---|
| 101 | |
|---|
| 102 | def subjects(self, predicate=None, object=None): |
|---|
| 103 | """A generator of subjects with the given predicate and object""" |
|---|
| 104 | for s, p, o in self.triples((None, predicate, object)): |
|---|
| 105 | yield s |
|---|
| 106 | |
|---|
| 107 | def predicates(self, subject=None, object=None): |
|---|
| 108 | """A generator of predicates with the given subject and object""" |
|---|
| 109 | for s, p, o in self.triples((subject, None, object)): |
|---|
| 110 | yield p |
|---|
| 111 | |
|---|
| 112 | def objects(self, subject=None, predicate=None): |
|---|
| 113 | """A generator of objects with the given subject and predicate""" |
|---|
| 114 | for s, p, o in self.triples((subject, predicate, None)): |
|---|
| 115 | yield o |
|---|
| 116 | |
|---|
| 117 | def subject_predicates(self, object=None): |
|---|
| 118 | """A generator of (subject, predicate) tuples for the given object""" |
|---|
| 119 | for s, p, o in self.triples((None, None, object)): |
|---|
| 120 | yield s, p |
|---|
| 121 | |
|---|
| 122 | def subject_objects(self, predicate=None): |
|---|
| 123 | """A generator of (subject, object) tuples for the given predicate""" |
|---|
| 124 | for s, p, o in self.triples((None, predicate, None)): |
|---|
| 125 | yield s, o |
|---|
| 126 | |
|---|
| 127 | def predicate_objects(self, subject=None): |
|---|
| 128 | """A generator of (predicate, object) tuples for the given subject""" |
|---|
| 129 | for s, p, o in self.triples((subject, None, None)): |
|---|
| 130 | yield p, o |
|---|
| 131 | |
|---|
| 132 | |
|---|
| 133 | def value(self, subject=None, predicate=RDF.value, object=None, default=None, any=True): |
|---|
| 134 | """Get a value for a pair of two criteria |
|---|
| 135 | |
|---|
| 136 | Exactly one of subject, predicate, object must be None. Useful if one |
|---|
| 137 | knows that there may only be one value. |
|---|
| 138 | |
|---|
| 139 | It is one of those situations that occur a lot, hence this *macro* like utility |
|---|
| 140 | |
|---|
| 141 | :param subject, predicate, object: exactly one must be None |
|---|
| 142 | :param default: value to be returned if no values found |
|---|
| 143 | :param any: if more than one answer return **any one** answer, otherwise `raise UniquenessError` |
|---|
| 144 | """ |
|---|
| 145 | retval = default |
|---|
| 146 | |
|---|
| 147 | if (subject is None and (predicate is None or object is None)) or \ |
|---|
| 148 | (predicate is None and object is None): |
|---|
| 149 | return None |
|---|
| 150 | |
|---|
| 151 | if object is None: |
|---|
| 152 | values = self.objects(subject, predicate) |
|---|
| 153 | if subject is None: |
|---|
| 154 | values = self.subjects(predicate, object) |
|---|
| 155 | if predicate is None: |
|---|
| 156 | values = self.predicates(subject, object) |
|---|
| 157 | |
|---|
| 158 | try: |
|---|
| 159 | retval = values.next() |
|---|
| 160 | except StopIteration, e: |
|---|
| 161 | retval = default |
|---|
| 162 | else: |
|---|
| 163 | if any is False: |
|---|
| 164 | try: |
|---|
| 165 | next = values.next() |
|---|
| 166 | msg = ("While trying to find a value for (%s, %s, %s) the " |
|---|
| 167 | "following multiple values where found:\n" % |
|---|
| 168 | (subject, predicate, object)) |
|---|
| 169 | triples = self.triples((subject, predicate, object)) |
|---|
| 170 | for (s, p, o) in triples: |
|---|
| 171 | msg += "(%s, %s, %s)\n" % (s, p, o) |
|---|
| 172 | raise exceptions.UniquenessError(msg) |
|---|
| 173 | except StopIteration, e: |
|---|
| 174 | pass |
|---|
| 175 | return retval |
|---|
| 176 | |
|---|
| 177 | def label(self, subject, default=''): |
|---|
| 178 | """Query for the RDFS.label of the subject |
|---|
| 179 | |
|---|
| 180 | Return default if no label exists |
|---|
| 181 | """ |
|---|
| 182 | if subject is None: |
|---|
| 183 | return default |
|---|
| 184 | return self.value(subject, RDFS.label, default=default, any=True) |
|---|
| 185 | |
|---|
| 186 | def comment(self, subject, default=''): |
|---|
| 187 | """Query for the RDFS.comment of the subject |
|---|
| 188 | |
|---|
| 189 | Return default if no comment exists |
|---|
| 190 | """ |
|---|
| 191 | if subject is None: |
|---|
| 192 | return default |
|---|
| 193 | return self.value(subject, RDFS.comment, default=default, any=True) |
|---|
| 194 | |
|---|
| 195 | def items(self, list): |
|---|
| 196 | """Generator over all items in the resource specified by list |
|---|
| 197 | |
|---|
| 198 | list is an RDF collection. |
|---|
| 199 | """ |
|---|
| 200 | while list: |
|---|
| 201 | item = self.value(list, RDF.first) |
|---|
| 202 | if item: |
|---|
| 203 | yield item |
|---|
| 204 | list = self.value(list, RDF.rest) |
|---|
| 205 | |
|---|
| 206 | def transitive_objects(self, subject, property, remember=None): |
|---|
| 207 | """Transitively generate objects for the `property` relationship |
|---|
| 208 | |
|---|
| 209 | Generated objects belong to the depth first transitive closure of the |
|---|
| 210 | `property` relationship starting at `subject`. |
|---|
| 211 | """ |
|---|
| 212 | if remember is None: |
|---|
| 213 | remember = {} |
|---|
| 214 | if subject in remember: |
|---|
| 215 | return |
|---|
| 216 | remember[subject] = 1 |
|---|
| 217 | yield subject |
|---|
| 218 | for object in self.objects(subject, property): |
|---|
| 219 | for o in self.transitive_objects(object, property, remember): |
|---|
| 220 | yield o |
|---|
| 221 | |
|---|
| 222 | def transitive_subjects(self, predicate, object, remember=None): |
|---|
| 223 | """Transitively generate objects for the `property` relationship |
|---|
| 224 | |
|---|
| 225 | Generated objects belong to the depth first transitive closure of the |
|---|
| 226 | `property` relationship starting at `subject`. |
|---|
| 227 | """ |
|---|
| 228 | if remember is None: |
|---|
| 229 | remember = {} |
|---|
| 230 | if object in remember: |
|---|
| 231 | return |
|---|
| 232 | remember[object] = 1 |
|---|
| 233 | yield object |
|---|
| 234 | for subject in self.subjects(predicate, object): |
|---|
| 235 | for s in self.transitive_subjects(predicate, subject, remember): |
|---|
| 236 | yield s |
|---|
| 237 | |
|---|
| 238 | def qname(self,uri): |
|---|
| 239 | """turn uri into a qname given self.namespaces |
|---|
| 240 | This works for rdflib graphs and is defined for SesameGraph |
|---|
| 241 | but is **not** part of SPARQLGraph""" |
|---|
| 242 | raise NotImplementedError |
|---|
| 243 | |
|---|
| 244 | |
|---|
| 245 | def query(self, strOrQuery, initBindings={}, initNs={}, resultMethod="xml",processor="sparql",rawResults=False): |
|---|
| 246 | """ |
|---|
| 247 | Executes a SPARQL query against this Graph |
|---|
| 248 | |
|---|
| 249 | :param strOrQuery: Is either a string consisting of the SPARQL query |
|---|
| 250 | :param initBindings: *optional* mapping from a Variable to an RDFLib term (used as initial bindings for SPARQL query) |
|---|
| 251 | :param initNs: optional mapping from a namespace prefix to a namespace |
|---|
| 252 | :param resultMethod: results query requested (must be 'xml' or 'json') |
|---|
| 253 | xml streams over the result set and json must read the entire set to succeed |
|---|
| 254 | :param processor: The kind of RDF query (must be 'sparql' or 'serql') |
|---|
| 255 | :param rawResults: If set to `True`, returns the raw xml or json stream rather than the parsed results. |
|---|
| 256 | """ |
|---|
| 257 | log.debug("Raw Query: %s"%(strOrQuery)) |
|---|
| 258 | prefixes = ''.join(["prefix %s: <%s>\n"%(p,n) for p,n in initNs.items()]) |
|---|
| 259 | if initBindings : |
|---|
| 260 | query=self._processInitBindings(strOrQuery,initBindings) |
|---|
| 261 | else: |
|---|
| 262 | query = strOrQuery |
|---|
| 263 | query = prefixes + query |
|---|
| 264 | log.debug("Prepared Query: %s"%(query)) |
|---|
| 265 | query = dict(query=query,queryLn=processor) |
|---|
| 266 | url = self.url+"?"+urlencode(query) |
|---|
| 267 | parser = self.getParser(resultMethod, url) |
|---|
| 268 | |
|---|
| 269 | return rawResults and parser.stream or parser.parse() |
|---|
| 270 | |
|---|
| 271 | def getParser(self, resultMethod, url): |
|---|
| 272 | try: |
|---|
| 273 | return self.parsers[resultMethod](url) |
|---|
| 274 | except LookupError: |
|---|
| 275 | raise ValueError , "Invalid resultMethod: %s" % resultMethod |
|---|
| 276 | except HTTPError, e: |
|---|
| 277 | if e.code == 400: # and e.msg.startswith('Parse_error'): |
|---|
| 278 | errmsg = e.fp.read() |
|---|
| 279 | submsg = re.search("<pre>(.*)</pre>",errmsg,re.MULTILINE|re.DOTALL) |
|---|
| 280 | submsg = submsg and submsg.groups()[0] |
|---|
| 281 | raise MalformedQueryError, submsg or errmsg |
|---|
| 282 | raise HTTPError, e |
|---|
| 283 | |
|---|
| 284 | @classmethod |
|---|
| 285 | def _processInitBindings(cls, query, initBindings): |
|---|
| 286 | """_processInitBindings will convert a query by replacing the Variables |
|---|
| 287 | |
|---|
| 288 | >>> SPARQLGraph._processInitBindings('SELECT ?x { ?x ?y ?z }', {'z' : 'hi'}) |
|---|
| 289 | u'SELECT ?x { ?x ?y "hi" }' |
|---|
| 290 | >>> SPARQLGraph._processInitBindings('SELECT ?x { ?x <http://example/?z=1> ?z }', {'z' : 'hi'}) |
|---|
| 291 | u'SELECT ?x { ?x <http://example/?z=1> "hi" }' |
|---|
| 292 | |
|---|
| 293 | :param query: the query to process |
|---|
| 294 | :param initBindings: a dict of variable to value""" |
|---|
| 295 | # TODO: what if a BNode is the val in the bindings |
|---|
| 296 | # should it be left at a ?var or converted to a _:bnode ??? |
|---|
| 297 | def varval(x): |
|---|
| 298 | var = x.groups()[0] |
|---|
| 299 | if var in initBindings: |
|---|
| 300 | val = initBindings[var] |
|---|
| 301 | try: |
|---|
| 302 | return val.n3() |
|---|
| 303 | except: |
|---|
| 304 | return Literal(val).n3() |
|---|
| 305 | return x.group() |
|---|
| 306 | |
|---|
| 307 | re_qvars = re.compile('(?<=[\]\.\;\{\s])\?(%s)'%('|'.join(initBindings.keys()))) |
|---|
| 308 | return re_qvars.sub(varval,query) |
|---|
| 309 | |
|---|
| 310 | |
|---|
| 311 | def describe(self, s_or_po, initBindings={}, initNs={}): |
|---|
| 312 | """ |
|---|
| 313 | Executes a SPARQL describe of resource |
|---|
| 314 | |
|---|
| 315 | :param s_or_po: is either |
|---|
| 316 | |
|---|
| 317 | * a subject ... should be a URIRef |
|---|
| 318 | * a tuple of (predicate,object) ... pred should be inverse functional |
|---|
| 319 | * a describe query string |
|---|
| 320 | |
|---|
| 321 | :param initBindings: A mapping from a Variable to an RDFLib term (used as initial bindings for SPARQL query) |
|---|
| 322 | :param initNs: A mapping from a namespace prefix to a namespace |
|---|
| 323 | """ |
|---|
| 324 | if isinstance(s_or_po, str): |
|---|
| 325 | query = s_or_po |
|---|
| 326 | if initNs: |
|---|
| 327 | prefixes = ''.join(["prefix %s: <%s>\n"%(p,n) for p,n in initNs.items()]) |
|---|
| 328 | query = prefixes + query |
|---|
| 329 | elif isinstance(s_or_po, URIRef) or isinstance(s_or_po, BNode): |
|---|
| 330 | query = "describe %s" % (s_or_po.n3()) |
|---|
| 331 | else: |
|---|
| 332 | p,o = s_or_po |
|---|
| 333 | query = "describe ?s where {?s %s %s}"%(p.n3(),o.n3()) |
|---|
| 334 | query = dict(query=query) |
|---|
| 335 | |
|---|
| 336 | url = self.url+"?"+urlencode(query) |
|---|
| 337 | req = Request(url) |
|---|
| 338 | req.add_header('Accept','application/rdf+xml') |
|---|
| 339 | log.debug("opening url: %s\n with headers: %s" % (req.get_full_url(), req.header_items())) |
|---|
| 340 | subgraph = ConjunctiveGraph() |
|---|
| 341 | subgraph.parse(urlopen(req)) |
|---|
| 342 | return subgraph |
|---|