Package rdfalchemy :: Package sparql :: Module sesame2
[hide private]
[frames] | no frames]

Source Code for Module rdfalchemy.sparql.sesame2

  1  from rdfalchemy import Literal, BNode, Namespace, URIRef 
  2  from rdfalchemy.sparql import SPARQLGraph, DumpSink 
  3  from rdfalchemy.sparql.parsers import _BRTRSPARQLHandler,_XMLSPARQLHandler,_JSONSPARQLHandler 
  4   
  5  from rdflib.syntax.parsers.ntriples import NTriplesParser 
  6   
  7  from urllib2 import urlopen, Request, HTTPError 
  8  from urllib import urlencode 
  9   
 10  from rdfalchemy.exceptions import MalformedQueryError, QueryEvaluationError 
 11   
 12  import os 
 13  import re 
 14  import simplejson 
 15  import logging 
 16   
 17  __all__=["SesameGraph"] 
 18   
 19  log=logging.getLogger(__name__) 
 20   
21 -class SesameGraph(SPARQLGraph):
22 """openrdf-sesame graph via http 23 Uses the sesame2 HTTP communication protocol 24 to provide rdflib type api constructor takes http endpoint and repository name 25 e.g. SesameGraph('http://www.openvest.org:8080/sesame/repositories/Test')""" 26 27 parsers = {'xml': _XMLSPARQLHandler, 'json': _JSONSPARQLHandler,'brtr': _BRTRSPARQLHandler} 28
29 - def __init__(self, url, context=None):
30 self.url = url 31 self.context=context
32
33 - def get_namespaces(self):
34 """Namespaces dict""" 35 try: 36 return self._namespaces 37 except: 38 pass 39 req = Request(self.url+'/namespaces') 40 req.add_header('Accept','application/sparql-results+json') 41 log.debug("opening url: %s\n with headers: %s" % (req.get_full_url(), req.header_items())) 42 ret=simplejson.load(urlopen(req)) 43 bindings=ret['results']['bindings'] 44 self._namespaces = dict([(b['prefix']['value'],b['namespace']['value']) for b in bindings]) 45 return self._namespaces
46 namespaces=property(get_namespaces) 47
48 - def get_contexts(self):
49 """context list ... pretty slow""" 50 try: 51 return self._contexts 52 except: 53 pass 54 req = Request(self.url+'/contexts') 55 req.add_header('Accept','application/sparql-results+json') 56 ret=simplejson.load(urlopen(req)) 57 bindings=ret['results']['bindings'] 58 self._contexts = [(b['contextID']['value']) for b in bindings] 59 return self._contexts
60 contexts=property(get_contexts) 61
62 - def _statement_encode(self, (s, p, o), context):
63 """helper function to encode triples to sesame statement uri's""" 64 query = {} 65 url = self.url+'/statements' 66 if s: 67 query['subj'] = s.n3() 68 if p: 69 query['pred'] = p.n3() 70 if o: 71 query['obj'] = o.n3() 72 if context: 73 ### TODO FIXME what about bnodes like _:adf23123 74 query['context'] = "<%s>"%context 75 if query: 76 url = url+"?"+urlencode(query) 77 return url
78
79 - def add(self, (s, p, o), context=None):
80 """Add a triple with optional context""" 81 url = self.url+'/statements' 82 ctx = context or self.context 83 if ctx: 84 url = url+"?"+urlencode(dict(context=ctx)) 85 req = Request(url) 86 req.data = "%s %s %s .\n" % (s.n3(), p.n3(), o.n3()) 87 req.add_header('Content-Type','text/rdf+n3') 88 try: 89 result = urlopen(req).read() 90 except HTTPError, e: 91 if e.code == 204: 92 return 93 else: 94 log.error(e) 95 return result
96
97 - def remove(self, (s, p, o), context=None):
98 """Remove a triple from the graph 99 100 If the triple does not provide a context attribute, removes the triple 101 from all contexts. 102 """ 103 url = self._statement_encode((s, p, o), context) 104 req = Request(url) 105 req.get_method=lambda : 'DELETE' 106 try: 107 result = urlopen(req).read() 108 except HTTPError, e: 109 if e.code == 204: 110 return 111 else: 112 log.error(e) 113 return result
114
115 - def triples(self, (s, p, o), context=None):
116 """Generator over the triple store 117 118 Returns triples that match the given triple pattern. If triple pattern 119 does not provide a context, all contexts will be searched. 120 """ 121 url = self._statement_encode((s, p, o), context) 122 req = Request(url) 123 req.add_header('Accept','text/plain') # N-Triples is best for generator (one line per triple) 124 log.debug("Request: %s" % req.get_full_url()) 125 dumper=DumpSink() 126 parser=NTriplesParser(dumper) 127 128 for l in urlopen(req): 129 log.debug('line: %s'%l) 130 parser.parsestring(l) 131 yield dumper.get_triple()
132
133 - def __len__(self):
134 """Returns the number of triples in the graph 135 calls http://{self.url}/size very fast 136 """ 137 return int(urlopen(self.url+"/size").read())
138
139 - def set(self, (subject, predicate, object)):
140 """Convenience method to update the value of object 141 142 Remove any existing triples for subject and predicate before adding 143 (subject, predicate, object). 144 """ 145 self.remove((subject, predicate, None)) 146 self.add((subject, predicate, object))
147 148
149 - def qname(self,uri):
150 """turn uri into a qname given self.namespaces""" 151 for p,n in self.namespaces.items(): 152 if uri.startswith(n): 153 return "%s:%s"%(p,uri[len(n):]) 154 return uri
155
156 - def query(self, strOrQuery, initBindings={}, initNs={}, resultMethod="brtr",processor="sparql",rawResults=False):
157 """ 158 Executes a SPARQL query against this Graph 159 160 :param strOrQuery: Is either a string consisting of the SPARQL query 161 :param initBindings: *optional* mapping from a Variable to an RDFLib term (used as initial bindings for SPARQL query) 162 :param initNs: optional mapping from a namespace prefix to a namespace 163 :param resultMethod: results query requested (must be 'xml', 'json' 'brtr') 164 xml streams over the result set and json must read the entire set to succeed 165 :param processor: The kind of RDF query (must be 'sparql' or 'serql') 166 :param rawResults: If set to `True`, returns the raw xml or json stream rather than the parsed results. 167 """ 168 # same method as super with different resultMethod default 169 return super(SesameGraph, self).query(strOrQuery, initBindings, initNs, resultMethod,processor,rawResults)
170 171
172 - def parse(self, source, publicID=None, format="xml", method='POST'):
173 """ 174 Parse source into Graph 175 176 Graph will get loaded into it's own context (sub graph). 177 Format defaults to 'xml' (AKA: rdf/xml). 178 179 :returns: Returns the context into which the source was parsed. 180 181 :param source: source file in the form of "http://....." or "~/dir/file.rdf" 182 :param publicID: *optional* the logical URI if it's different from the physical source URI. 183 :param format: must be one of 'xml' or 'n3' 184 :param method: must be one of 185 186 * 'POST' -- method adds data to a context 187 * 'PUT' -- method replaces data in a context 188 """ 189 url = self.url+'/statements' 190 if not (source.startswith('http://') or source.startswith('file://')): 191 source = 'file://'+os.path.abspath(os.path.expanduser(source)) 192 ctx = "<%s>" % (publicID or source) 193 url = url+"?"+urlencode(dict(context=ctx)) 194 195 req = Request(url) 196 req.get_method = lambda : method 197 198 if format=='xml': 199 req.add_header('Content-Type','application/rdf+xml') 200 elif format=='n3': 201 req.add_header('Content-Type','text/rdf+n3') 202 else: 203 raise "Unknown format: %s"% format 204 205 req.data = urlopen(source).read() 206 log.debug("Request: %s" % req.get_full_url()) 207 try: 208 result = urlopen(req).read() 209 log.debug("Result: "+result) 210 except HTTPError, e: 211 # 204 is actually the "success" code 212 if e.code == 204: 213 return 214 log.error(e) 215 raise HTTPError, e 216 return result
217
218 - def load(self, source, publicID=None, format="xml"):
219 self.parse(source, publicID, format)
220