Package rdfalchemy :: Package sparql
[hide private]
[frames] | no frames]

Source Code for Package rdfalchemy.sparql

  1  from rdfalchemy import URIRef , Literal, BNode, RDF 
  2  from rdfalchemy.exceptions import MalformedQueryError, QueryEvaluationError 
  3  from rdfalchemy.sparql.parsers import _XMLSPARQLHandler,_JSONSPARQLHandler 
  4   
  5  from rdflib import ConjunctiveGraph 
  6  from rdflib.syntax.parsers.ntriples import NTriplesParser 
  7   
  8  from urllib2 import urlopen, Request, HTTPError 
  9  from urllib import urlencode 
 10   
 11  import re 
 12  import logging 
 13   
 14  __all__=["SPARQLGraph",] 
 15   
 16  log=logging.getLogger(__name__) 
17 18 -class DumpSink(object):
19 - def __init__(self):
20 self.length = 0
21
22 - def triple(self, s, p, o):
23 self.length += 1 24 self._triple=(s,p,o)
25
26 - def get_triple(self):
27 return self._triple
28
29 30 -class SPARQLGraph(object):
31 """provides (some) rdflib api via http to a SPARQL endpoint 32 gives 'read-only' access to the graph 33 constructor takes http endpoint and repository name 34 e.g. SPARQLGraph('http://localhost:2020/sparql')""" 35 36 parsers = {'xml': _XMLSPARQLHandler, 'json': _JSONSPARQLHandler} 37
38 - def __init__(self, url, context=None):
39 self.url = url 40 self.context=context
41
42 - def construct(self, strOrTriple, initBindings={}, initNs={}):
43 """ 44 Executes a SPARQL Construct 45 :param strOrTriple: can be either 46 47 * a string in which case it it considered a CONSTRUCT query 48 * a triple in which case it acts as the rdflib `triples((s,p,o))` 49 50 :param initBindings: A mapping from a Variable to an RDFLib term (used as initial bindings for SPARQL query) 51 :param initNs: A mapping from a namespace prefix to a namespace 52 53 :returns: an instance of rdflib.ConjuctiveGraph('IOMemory') 54 """ 55 if isinstance(strOrTriple, str): 56 query = strOrTriple 57 if initNs: 58 prefixes = ''.join(["prefix %s: <%s>\n"%(p,n) for p,n in initNs.items()]) 59 query = prefixes + query 60 else: 61 s,p,o = strOrTriple 62 t='%s %s %s'%((s and s.n3() or '?s'),(p and p.n3() or '?p'),(o and o.n3() or '?o')) 63 query='construct {%s} where {%s}'%(t,t) 64 query = dict(query=query) 65 66 url = self.url+"?"+urlencode(query) 67 req = Request(url) 68 req.add_header('Accept','application/rdf+xml') 69 log.debug("Request url: %s\n with headers: %s" % (req.get_full_url(), req.header_items())) 70 subgraph = ConjunctiveGraph('IOMemory') 71 subgraph.parse(urlopen(req)) 72 return subgraph
73
74 - def triples(self, (s,p,o), method='CONSTRUCT'):
75 """ 76 :returns: a generator over triples matching the pattern 77 :param method: must be 'CONSTRUCT' or 'SELECT' 78 79 * CONSTRUCT calls CONSTRUCT query and returns a Graph result 80 * SELECT calls a SELECT query and returns an interator streaming over the results 81 82 Use SELECT if you expect a large result set or may consume less than the entire result""" 83 if method == 'CONSTRUCT': 84 return self.construct((s,p,o)).triples((None,None,None)) 85 elif method == 'SELECT': 86 pattern = "%s %s %s"%((s and s.n3() or '?s'),(p and p.n3() or '?p'),(o and o.n3() or '?o')) 87 query = "select ?s ?p ?o where { %s . }" % pattern 88 return self.query(query) 89 else: 90 raise ValueError, "Unknown method: %s"%(method)
91
92 - def __iter__(self):
93 """Iterates over all triples in the store""" 94 return self.triples((None, None, None))
95
96 - def __contains__(self, triple):
97 """Support for 'triple in graph' syntax""" 98 for triple in self.triples(triple): 99 return 1 100 return 0
101
102 - def subjects(self, predicate=None, object=None):
103 """A generator of subjects with the given predicate and object""" 104 for s, p, o in self.triples((None, predicate, object)): 105 yield s
106
107 - def predicates(self, subject=None, object=None):
108 """A generator of predicates with the given subject and object""" 109 for s, p, o in self.triples((subject, None, object)): 110 yield p
111
112 - def objects(self, subject=None, predicate=None):
113 """A generator of objects with the given subject and predicate""" 114 for s, p, o in self.triples((subject, predicate, None)): 115 yield o
116
117 - def subject_predicates(self, object=None):
118 """A generator of (subject, predicate) tuples for the given object""" 119 for s, p, o in self.triples((None, None, object)): 120 yield s, p
121
122 - def subject_objects(self, predicate=None):
123 """A generator of (subject, object) tuples for the given predicate""" 124 for s, p, o in self.triples((None, predicate, None)): 125 yield s, o
126
127 - def predicate_objects(self, subject=None):
128 """A generator of (predicate, object) tuples for the given subject""" 129 for s, p, o in self.triples((subject, None, None)): 130 yield p, o
131 132
133 - def value(self, subject=None, predicate=RDF.value, object=None, default=None, any=True):
134 """Get a value for a pair of two criteria 135 136 Exactly one of subject, predicate, object must be None. Useful if one 137 knows that there may only be one value. 138 139 It is one of those situations that occur a lot, hence this *macro* like utility 140 141 :param subject, predicate, object: exactly one must be None 142 :param default: value to be returned if no values found 143 :param any: if more than one answer return **any one** answer, otherwise `raise UniquenessError` 144 """ 145 retval = default 146 147 if (subject is None and (predicate is None or object is None)) or \ 148 (predicate is None and object is None): 149 return None 150 151 if object is None: 152 values = self.objects(subject, predicate) 153 if subject is None: 154 values = self.subjects(predicate, object) 155 if predicate is None: 156 values = self.predicates(subject, object) 157 158 try: 159 retval = values.next() 160 except StopIteration, e: 161 retval = default 162 else: 163 if any is False: 164 try: 165 next = values.next() 166 msg = ("While trying to find a value for (%s, %s, %s) the " 167 "following multiple values where found:\n" % 168 (subject, predicate, object)) 169 triples = self.triples((subject, predicate, object)) 170 for (s, p, o) in triples: 171 msg += "(%s, %s, %s)\n" % (s, p, o) 172 raise exceptions.UniquenessError(msg) 173 except StopIteration, e: 174 pass 175 return retval
176
177 - def label(self, subject, default=''):
178 """Query for the RDFS.label of the subject 179 180 Return default if no label exists 181 """ 182 if subject is None: 183 return default 184 return self.value(subject, RDFS.label, default=default, any=True)
185
186 - def comment(self, subject, default=''):
187 """Query for the RDFS.comment of the subject 188 189 Return default if no comment exists 190 """ 191 if subject is None: 192 return default 193 return self.value(subject, RDFS.comment, default=default, any=True)
194
195 - def items(self, list):
196 """Generator over all items in the resource specified by list 197 198 list is an RDF collection. 199 """ 200 while list: 201 item = self.value(list, RDF.first) 202 if item: 203 yield item 204 list = self.value(list, RDF.rest)
205
206 - def transitive_objects(self, subject, property, remember=None):
207 """Transitively generate objects for the `property` relationship 208 209 Generated objects belong to the depth first transitive closure of the 210 `property` relationship starting at `subject`. 211 """ 212 if remember is None: 213 remember = {} 214 if subject in remember: 215 return 216 remember[subject] = 1 217 yield subject 218 for object in self.objects(subject, property): 219 for o in self.transitive_objects(object, property, remember): 220 yield o
221
222 - def transitive_subjects(self, predicate, object, remember=None):
223 """Transitively generate objects for the `property` relationship 224 225 Generated objects belong to the depth first transitive closure of the 226 `property` relationship starting at `subject`. 227 """ 228 if remember is None: 229 remember = {} 230 if object in remember: 231 return 232 remember[object] = 1 233 yield object 234 for subject in self.subjects(predicate, object): 235 for s in self.transitive_subjects(predicate, subject, remember): 236 yield s
237
238 - def qname(self,uri):
239 """turn uri into a qname given self.namespaces 240 This works for rdflib graphs and is defined for SesameGraph 241 but is **not** part of SPARQLGraph""" 242 raise NotImplementedError
243 244
245 - def query(self, strOrQuery, initBindings={}, initNs={}, resultMethod="xml",processor="sparql",rawResults=False):
246 """ 247 Executes a SPARQL query against this Graph 248 249 :param strOrQuery: Is either a string consisting of the SPARQL query 250 :param initBindings: *optional* mapping from a Variable to an RDFLib term (used as initial bindings for SPARQL query) 251 :param initNs: optional mapping from a namespace prefix to a namespace 252 :param resultMethod: results query requested (must be 'xml' or 'json') 253 xml streams over the result set and json must read the entire set to succeed 254 :param processor: The kind of RDF query (must be 'sparql' or 'serql') 255 :param rawResults: If set to `True`, returns the raw xml or json stream rather than the parsed results. 256 """ 257 log.debug("Raw Query: %s"%(strOrQuery)) 258 prefixes = ''.join(["prefix %s: <%s>\n"%(p,n) for p,n in initNs.items()]) 259 if initBindings : 260 query=self._processInitBindings(strOrQuery,initBindings) 261 else: 262 query = strOrQuery 263 query = prefixes + query 264 log.debug("Prepared Query: %s"%(query)) 265 query = dict(query=query,queryLn=processor) 266 url = self.url+"?"+urlencode(query) 267 parser = self.getParser(resultMethod, url) 268 269 return rawResults and parser.stream or parser.parse()
270
271 - def getParser(self, resultMethod, url):
272 try: 273 return self.parsers[resultMethod](url) 274 except LookupError: 275 raise ValueError , "Invalid resultMethod: %s" % resultMethod 276 except HTTPError, e: 277 if e.code == 400: # and e.msg.startswith('Parse_error'): 278 errmsg = e.fp.read() 279 submsg = re.search("<pre>(.*)</pre>",errmsg,re.MULTILINE|re.DOTALL) 280 submsg = submsg and submsg.groups()[0] 281 raise MalformedQueryError, submsg or errmsg 282 raise HTTPError, e
283 284 @classmethod
285 - def _processInitBindings(cls, query, initBindings):
286 """_processInitBindings will convert a query by replacing the Variables 287 288 >>> SPARQLGraph._processInitBindings('SELECT ?x { ?x ?y ?z }', {'z' : 'hi'}) 289 u'SELECT ?x { ?x ?y "hi" }' 290 >>> SPARQLGraph._processInitBindings('SELECT ?x { ?x <http://example/?z=1> ?z }', {'z' : 'hi'}) 291 u'SELECT ?x { ?x <http://example/?z=1> "hi" }' 292 293 :param query: the query to process 294 :param initBindings: a dict of variable to value""" 295 # TODO: what if a BNode is the val in the bindings 296 # should it be left at a ?var or converted to a _:bnode ??? 297 def varval(x): 298 var = x.groups()[0] 299 if var in initBindings: 300 val = initBindings[var] 301 try: 302 return val.n3() 303 except: 304 return Literal(val).n3() 305 return x.group()
306 307 re_qvars = re.compile('(?<=[\]\.\;\{\s])\?(%s)'%('|'.join(initBindings.keys()))) 308 return re_qvars.sub(varval,query)
309 310
311 - def describe(self, s_or_po, initBindings={}, initNs={}):
312 """ 313 Executes a SPARQL describe of resource 314 315 :param s_or_po: is either 316 317 * a subject ... should be a URIRef 318 * a tuple of (predicate,object) ... pred should be inverse functional 319 * a describe query string 320 321 :param initBindings: A mapping from a Variable to an RDFLib term (used as initial bindings for SPARQL query) 322 :param initNs: A mapping from a namespace prefix to a namespace 323 """ 324 if isinstance(s_or_po, str): 325 query = s_or_po 326 if initNs: 327 prefixes = ''.join(["prefix %s: <%s>\n"%(p,n) for p,n in initNs.items()]) 328 query = prefixes + query 329 elif isinstance(s_or_po, URIRef) or isinstance(s_or_po, BNode): 330 query = "describe %s" % (s_or_po.n3()) 331 else: 332 p,o = s_or_po 333 query = "describe ?s where {?s %s %s}"%(p.n3(),o.n3()) 334 query = dict(query=query) 335 336 url = self.url+"?"+urlencode(query) 337 req = Request(url) 338 req.add_header('Accept','application/rdf+xml') 339 log.debug("opening url: %s\n with headers: %s" % (req.get_full_url(), req.header_items())) 340 subgraph = ConjunctiveGraph() 341 subgraph.parse(urlopen(req)) 342 return subgraph
343