| 1 | from rdfalchemy import Literal, BNode, Namespace, URIRef |
|---|
| 2 | from rdfalchemy.sparql import SPARQLGraph, DumpSink |
|---|
| 3 | from rdfalchemy.sparql.parsers import _BRTRSPARQLHandler,_XMLSPARQLHandler,_JSONSPARQLHandler |
|---|
| 4 | |
|---|
| 5 | from rdflib.syntax.parsers.ntriples import NTriplesParser |
|---|
| 6 | |
|---|
| 7 | from urllib2 import urlopen, Request, HTTPError |
|---|
| 8 | from urllib import urlencode |
|---|
| 9 | |
|---|
| 10 | import os |
|---|
| 11 | import re |
|---|
| 12 | import simplejson |
|---|
| 13 | import logging |
|---|
| 14 | |
|---|
| 15 | __all__=["SesameGraph"] |
|---|
| 16 | |
|---|
| 17 | log=logging.getLogger(__name__) |
|---|
| 18 | |
|---|
| 19 | class SesameGraph(SPARQLGraph): |
|---|
| 20 | """openrdf-sesame graph via http |
|---|
| 21 | Uses the sesame2 HTTP communication protocol |
|---|
| 22 | to provide rdflib type api constructor takes http endpoint and repository name |
|---|
| 23 | e.g. SesameGraph('http://www.openvest.org:8080/sesame/repositories/Test')""" |
|---|
| 24 | |
|---|
| 25 | parsers = {'xml': _XMLSPARQLHandler, 'json': _JSONSPARQLHandler,'brtr': _BRTRSPARQLHandler} |
|---|
| 26 | |
|---|
| 27 | def __init__(self, url, context=None): |
|---|
| 28 | self.url = url |
|---|
| 29 | self.context=context |
|---|
| 30 | |
|---|
| 31 | def get_namespaces(self): |
|---|
| 32 | """Namespaces dict""" |
|---|
| 33 | try: |
|---|
| 34 | return self._namespaces |
|---|
| 35 | except: |
|---|
| 36 | pass |
|---|
| 37 | req = Request(self.url+'/namespaces') |
|---|
| 38 | req.add_header('Accept','application/sparql-results+json') |
|---|
| 39 | log.debug("opening url: %s\n with headers: %s" % (req.get_full_url(), req.header_items())) |
|---|
| 40 | ret=simplejson.load(urlopen(req)) |
|---|
| 41 | bindings=ret['results']['bindings'] |
|---|
| 42 | self._namespaces = dict([(b['prefix']['value'],b['namespace']['value']) for b in bindings]) |
|---|
| 43 | return self._namespaces |
|---|
| 44 | namespaces=property(get_namespaces) |
|---|
| 45 | |
|---|
| 46 | def get_contexts(self): |
|---|
| 47 | """context list ... pretty slow""" |
|---|
| 48 | try: |
|---|
| 49 | return self._contexts |
|---|
| 50 | except: |
|---|
| 51 | pass |
|---|
| 52 | req = Request(self.url+'/contexts') |
|---|
| 53 | req.add_header('Accept','application/sparql-results+json') |
|---|
| 54 | ret=simplejson.load(urlopen(req)) |
|---|
| 55 | bindings=ret['results']['bindings'] |
|---|
| 56 | self._contexts = [(b['contextID']['value']) for b in bindings] |
|---|
| 57 | return self._contexts |
|---|
| 58 | contexts=property(get_contexts) |
|---|
| 59 | |
|---|
| 60 | def _statement_encode(self, (s, p, o), context): |
|---|
| 61 | """helper function to encode triples to sesame statement uri's""" |
|---|
| 62 | query = {} |
|---|
| 63 | url = self.url+'/statements' |
|---|
| 64 | if s: |
|---|
| 65 | query['subj'] = s.n3() |
|---|
| 66 | if p: |
|---|
| 67 | query['pred'] = p.n3() |
|---|
| 68 | if o: |
|---|
| 69 | query['obj'] = o.n3() |
|---|
| 70 | if context: |
|---|
| 71 | ### TODO FIXME what about bnodes like _:adf23123 |
|---|
| 72 | query['context'] = "<%s>"%context |
|---|
| 73 | if query: |
|---|
| 74 | url = url+"?"+urlencode(query) |
|---|
| 75 | return url |
|---|
| 76 | |
|---|
| 77 | def add(self, (s, p, o), context=None): |
|---|
| 78 | """Add a triple with optional context""" |
|---|
| 79 | url = self.url+'/statements' |
|---|
| 80 | ctx = context or self.context |
|---|
| 81 | if ctx: |
|---|
| 82 | url = url+"?"+urlencode(dict(context=ctx)) |
|---|
| 83 | req = Request(url) |
|---|
| 84 | req.data = "%s %s %s .\n" % (s.n3(), p.n3(), o.n3()) |
|---|
| 85 | req.add_header('Content-Type','text/rdf+n3') |
|---|
| 86 | try: |
|---|
| 87 | result = urlopen(req).read() |
|---|
| 88 | except HTTPError, e: |
|---|
| 89 | if e.code == 204: |
|---|
| 90 | return |
|---|
| 91 | else: |
|---|
| 92 | log.error(e) |
|---|
| 93 | return result |
|---|
| 94 | |
|---|
| 95 | def remove(self, (s, p, o), context=None): |
|---|
| 96 | """Remove a triple from the graph |
|---|
| 97 | |
|---|
| 98 | If the triple does not provide a context attribute, removes the triple |
|---|
| 99 | from all contexts. |
|---|
| 100 | """ |
|---|
| 101 | url = self._statement_encode((s, p, o), context) |
|---|
| 102 | req = Request(url) |
|---|
| 103 | req.get_method=lambda : 'DELETE' |
|---|
| 104 | try: |
|---|
| 105 | result = urlopen(req).read() |
|---|
| 106 | except HTTPError, e: |
|---|
| 107 | if e.code == 204: |
|---|
| 108 | return |
|---|
| 109 | else: |
|---|
| 110 | log.error(e) |
|---|
| 111 | return result |
|---|
| 112 | |
|---|
| 113 | def triples(self, (s, p, o), context=None): |
|---|
| 114 | """Generator over the triple store |
|---|
| 115 | |
|---|
| 116 | Returns triples that match the given triple pattern. If triple pattern |
|---|
| 117 | does not provide a context, all contexts will be searched. |
|---|
| 118 | """ |
|---|
| 119 | url = self._statement_encode((s, p, o), context) |
|---|
| 120 | req = Request(url) |
|---|
| 121 | req.add_header('Accept','text/plain') # N-Triples is best for generator (one line per triple) |
|---|
| 122 | log.debug("Request: %s" % req.get_full_url()) |
|---|
| 123 | dumper=DumpSink() |
|---|
| 124 | parser=NTriplesParser(dumper) |
|---|
| 125 | |
|---|
| 126 | for l in urlopen(req): |
|---|
| 127 | log.debug('line: %s'%l) |
|---|
| 128 | parser.parsestring(l) |
|---|
| 129 | yield dumper.get_triple() |
|---|
| 130 | |
|---|
| 131 | def __len__(self): |
|---|
| 132 | """Returns the number of triples in the graph |
|---|
| 133 | calls http://{self.url}/size very fast |
|---|
| 134 | """ |
|---|
| 135 | return int(urlopen(self.url+"/size").read()) |
|---|
| 136 | |
|---|
| 137 | def set(self, (subject, predicate, object)): |
|---|
| 138 | """Convenience method to update the value of object |
|---|
| 139 | |
|---|
| 140 | Remove any existing triples for subject and predicate before adding |
|---|
| 141 | (subject, predicate, object). |
|---|
| 142 | """ |
|---|
| 143 | self.remove((subject, predicate, None)) |
|---|
| 144 | self.add((subject, predicate, object)) |
|---|
| 145 | |
|---|
| 146 | |
|---|
| 147 | def qname(self,uri): |
|---|
| 148 | """turn uri into a qname given self.namespaces""" |
|---|
| 149 | for p,n in self.namespaces.items(): |
|---|
| 150 | if uri.startswith(n): |
|---|
| 151 | return "%s:%s"%(p,uri[len(n):]) |
|---|
| 152 | return uri |
|---|
| 153 | |
|---|
| 154 | def query(self, strOrQuery, initBindings={}, initNs={}, resultMethod="brtr",processor="sparql",rawResults=False): |
|---|
| 155 | """ |
|---|
| 156 | Executes a SPARQL query against this Graph |
|---|
| 157 | |
|---|
| 158 | :param strOrQuery: Is either a string consisting of the SPARQL query |
|---|
| 159 | :param initBindings: *optional* mapping from a Variable to an RDFLib term (used as initial bindings for SPARQL query) |
|---|
| 160 | :param initNs: optional mapping from a namespace prefix to a namespace |
|---|
| 161 | :param resultMethod: results query requested (must be 'xml', 'json' 'brtr') |
|---|
| 162 | xml streams over the result set and json must read the entire set to succeed |
|---|
| 163 | :param processor: The kind of RDF query (must be 'sparql' or 'serql') |
|---|
| 164 | :param rawResults: If set to `True`, returns the raw xml or json stream rather than the parsed results. |
|---|
| 165 | """ |
|---|
| 166 | # same method as super with different resultMethod default |
|---|
| 167 | return super(SesameGraph, self).query(strOrQuery, initBindings, initNs, resultMethod,processor,rawResults) |
|---|
| 168 | |
|---|
| 169 | |
|---|
| 170 | def parse(self, source, publicID=None, format="xml", method='POST'): |
|---|
| 171 | """ |
|---|
| 172 | Parse source into Graph |
|---|
| 173 | |
|---|
| 174 | Graph will get loaded into it's own context (sub graph). |
|---|
| 175 | Format defaults to 'xml' (AKA: rdf/xml). |
|---|
| 176 | |
|---|
| 177 | :returns: Returns the context into which the source was parsed. |
|---|
| 178 | |
|---|
| 179 | :param source: source file in the form of "http://....." or "~/dir/file.rdf" |
|---|
| 180 | :param publicID: *optional* the logical URI if it's different from the physical source URI. |
|---|
| 181 | :param format: must be one of 'xml' or 'n3' |
|---|
| 182 | :param method: must be one of |
|---|
| 183 | |
|---|
| 184 | * 'POST' -- method adds data to a context |
|---|
| 185 | * 'PUT' -- method replaces data in a context |
|---|
| 186 | """ |
|---|
| 187 | url = self.url+'/statements' |
|---|
| 188 | if not (source.startswith('http://') or source.startswith('file://')): |
|---|
| 189 | source = 'file://'+os.path.abspath(os.path.expanduser(source)) |
|---|
| 190 | ctx = "<%s>" % (publicID or source) |
|---|
| 191 | url = url+"?"+urlencode(dict(context=ctx)) |
|---|
| 192 | |
|---|
| 193 | req = Request(url) |
|---|
| 194 | req.get_method = lambda : method |
|---|
| 195 | |
|---|
| 196 | if format=='xml': |
|---|
| 197 | req.add_header('Content-Type','application/rdf+xml') |
|---|
| 198 | elif format=='n3': |
|---|
| 199 | req.add_header('Content-Type','text/rdf+n3') |
|---|
| 200 | else: |
|---|
| 201 | raise "Unknown format: %s"% format |
|---|
| 202 | |
|---|
| 203 | req.data = urlopen(source).read() |
|---|
| 204 | log.debug("Request: %s" % req.get_full_url()) |
|---|
| 205 | try: |
|---|
| 206 | result = urlopen(req).read() |
|---|
| 207 | log.debug("Result: "+result) |
|---|
| 208 | except HTTPError, e: |
|---|
| 209 | # 204 is actually the "success" code |
|---|
| 210 | if e.code == 204: |
|---|
| 211 | return |
|---|
| 212 | log.error(e) |
|---|
| 213 | raise HTTPError, e |
|---|
| 214 | return result |
|---|
| 215 | |
|---|
| 216 | def load(self, source, publicID=None, format="xml"): |
|---|
| 217 | self.parse(source, publicID, format) |
|---|