| 1 | from rdflib import URIRef , Literal, BNode |
|---|
| 2 | |
|---|
| 3 | from urllib2 import urlopen, Request, HTTPError |
|---|
| 4 | from struct import unpack |
|---|
| 5 | |
|---|
| 6 | from rdfalchemy.exceptions import MalformedQueryError, QueryEvaluationError |
|---|
| 7 | |
|---|
| 8 | import simplejson |
|---|
| 9 | import logging |
|---|
| 10 | |
|---|
| 11 | __all__=["_JSONSPARQLHandler","_XMLSPARQLHandler","_BRTRSPARQLHandler"] |
|---|
| 12 | |
|---|
| 13 | log=logging.getLogger(__name__) |
|---|
| 14 | |
|---|
| 15 | # use a fast ElementTree |
|---|
| 16 | # TODO: test these each for iterparse compatability and relative speed |
|---|
| 17 | try: |
|---|
| 18 | import cElementTree as ET # effbot's C module |
|---|
| 19 | except ImportError: |
|---|
| 20 | try: |
|---|
| 21 | import xml.etree.ElementTree as ET # in python >=2.5 |
|---|
| 22 | except ImportError: |
|---|
| 23 | try: |
|---|
| 24 | import lxml.etree as ET # ElementTree API using libxml2 |
|---|
| 25 | except ImportError: |
|---|
| 26 | import elementtree.ElementTree as ET # effbot's pure Python module |
|---|
| 27 | log.debug('Using ElementTree: %s' % ET) |
|---|
| 28 | |
|---|
| 29 | |
|---|
| 30 | class _SPARQLHandler(object): |
|---|
| 31 | """Abstract base class for parsing the response stream of a sparql query |
|---|
| 32 | Real classhes should subclass from here but should **not** do too much during `__init__` |
|---|
| 33 | |
|---|
| 34 | `__init__` should stip after opening the stream and not read so that users have the |
|---|
| 35 | option to call p.stream.read() to get the rawResults |
|---|
| 36 | """ |
|---|
| 37 | mimetype = "" |
|---|
| 38 | |
|---|
| 39 | def __init__(self, url): |
|---|
| 40 | req = Request(url) |
|---|
| 41 | if self.mimetype: |
|---|
| 42 | req.add_header('Accept',self.mimetype) |
|---|
| 43 | self.stream = urlopen(req) |
|---|
| 44 | |
|---|
| 45 | |
|---|
| 46 | class _JSONSPARQLHandler(_SPARQLHandler): |
|---|
| 47 | """Parse the results of a sparql query returned as json. |
|---|
| 48 | |
|---|
| 49 | Note: this uses simplejson.load which will consume the entire |
|---|
| 50 | stream before returning any results. The XML handler uses a generator |
|---|
| 51 | type return so it returns the first tuple as soon as it's available |
|---|
| 52 | *without* having to comsume the entire stream |
|---|
| 53 | """ |
|---|
| 54 | mimetype = 'application/sparql-results+json' |
|---|
| 55 | |
|---|
| 56 | def parse(self): |
|---|
| 57 | ret=simplejson.load(self.stream) |
|---|
| 58 | var_names = ret['head']['vars'] |
|---|
| 59 | bindings = ret['results']['bindings'] |
|---|
| 60 | for b in bindings: |
|---|
| 61 | for var,val in b.items(): |
|---|
| 62 | type = val['type'] |
|---|
| 63 | if type=='uri': |
|---|
| 64 | b[var]=URIRef(val['value']) |
|---|
| 65 | elif type == 'bnode': |
|---|
| 66 | b[var]=BNode(val['value']) |
|---|
| 67 | elif type == 'literal': |
|---|
| 68 | b[var]=Literal(val['value'],lang=val.get('xml:lang')) |
|---|
| 69 | elif type == 'typed-literal': |
|---|
| 70 | b[var]=Literal(val['value'],datatype=val.get('datatype')) |
|---|
| 71 | else: |
|---|
| 72 | raise AttributeError("Binding type error: %s"%(type)) |
|---|
| 73 | yield tuple([b.get(var) for var in var_names]) |
|---|
| 74 | |
|---|
| 75 | |
|---|
| 76 | # some constants for parsing the xml tree |
|---|
| 77 | _S_NS = "{http://www.w3.org/2005/sparql-results#}" |
|---|
| 78 | _VARIABLE= _S_NS+"variable" |
|---|
| 79 | _BNODE = _S_NS+"bnode" |
|---|
| 80 | _URI = _S_NS+"uri" |
|---|
| 81 | _BINDING = _S_NS+"binding" |
|---|
| 82 | _LITERAL = _S_NS+"literal" |
|---|
| 83 | _HEAD = _S_NS+"head" |
|---|
| 84 | _RESULT = _S_NS+"result" |
|---|
| 85 | _X_NS = "{http://www.w3.org/XML/1998/namespace}" |
|---|
| 86 | _LANG = _X_NS+"lang" |
|---|
| 87 | |
|---|
| 88 | |
|---|
| 89 | class _XMLSPARQLHandler(_SPARQLHandler): |
|---|
| 90 | """Parse the results of a sparql query returned as xml. |
|---|
| 91 | |
|---|
| 92 | Note: returns a generator so that the first tuple is |
|---|
| 93 | available as soon as it is sent. This does **not** need to consume |
|---|
| 94 | the entire results stream before returning results (that's a good thing :-). |
|---|
| 95 | """ |
|---|
| 96 | mimetype = 'application/sparql-results+xml' |
|---|
| 97 | |
|---|
| 98 | def parse(self): |
|---|
| 99 | var_names=[] |
|---|
| 100 | bindings=[] |
|---|
| 101 | events = iter(ET.iterparse(self.stream,events=('start','end'))) |
|---|
| 102 | # lets gather up the variable names in head |
|---|
| 103 | for (event, node) in events: |
|---|
| 104 | if event == 'start' and node.tag == _VARIABLE: |
|---|
| 105 | var_names.append(node.get('name')) |
|---|
| 106 | elif event == 'end' and node.tag == _HEAD: |
|---|
| 107 | break |
|---|
| 108 | # now let's yield each result as we parse them |
|---|
| 109 | for (event, node) in events: |
|---|
| 110 | if event == 'start': |
|---|
| 111 | if node.tag == _BINDING: |
|---|
| 112 | idx = var_names.index(node.get('name')) |
|---|
| 113 | elif node.tag == _RESULT: |
|---|
| 114 | bindings = [None,] * len(var_names) |
|---|
| 115 | elif event == 'end': |
|---|
| 116 | if node.tag == _URI: |
|---|
| 117 | bindings[idx] = URIRef(node.text) |
|---|
| 118 | elif node.tag == _BNODE: |
|---|
| 119 | bindings[idx] = BNode(node.text) |
|---|
| 120 | elif node.tag == _LITERAL: |
|---|
| 121 | bindings[idx] = Literal(node.text or '', |
|---|
| 122 | datatype = node.get('datatype'), |
|---|
| 123 | lang= node.get(_LANG)) |
|---|
| 124 | elif node.tag == _RESULT: |
|---|
| 125 | node.clear() |
|---|
| 126 | yield tuple(bindings) |
|---|
| 127 | |
|---|
| 128 | |
|---|
| 129 | class _BRTRSPARQLHandler(_SPARQLHandler): |
|---|
| 130 | """Handler for the sesame binary table format BRTR_ |
|---|
| 131 | |
|---|
| 132 | .. _BRTR: http://www.openrdf.org/doc/sesame/api/org/openrdf/sesame/query/BinaryTableResultConstants.html |
|---|
| 133 | """ |
|---|
| 134 | |
|---|
| 135 | def readint(self): |
|---|
| 136 | return unpack('>i',self.stream.read(4))[0] |
|---|
| 137 | |
|---|
| 138 | def readstr(self): |
|---|
| 139 | l = self.readint() |
|---|
| 140 | return self.stream.read(l).decode("utf-8") |
|---|
| 141 | |
|---|
| 142 | def parse(self): |
|---|
| 143 | if self.stream.read(4) <> 'BRTR': raise ParseError("First 4 bytes in should be BRTR") |
|---|
| 144 | self.ver = self.readint() # ver of protocol |
|---|
| 145 | self.ncols = self.readint() |
|---|
| 146 | self.keys = tuple(self.readstr() for x in range(self.ncols)) |
|---|
| 147 | self.values = [None,]*self.ncols |
|---|
| 148 | self.ns = {} |
|---|
| 149 | while True: |
|---|
| 150 | for i in range(self.ncols): |
|---|
| 151 | val = self.getval() |
|---|
| 152 | if val is 1: # REPEAT here is like skip...the val is already in self.values[i] |
|---|
| 153 | continue |
|---|
| 154 | self.values[i] = val |
|---|
| 155 | yield tuple(self.values) |
|---|
| 156 | |
|---|
| 157 | def getval(self): |
|---|
| 158 | while True: |
|---|
| 159 | rtype = ord(self.stream.read(1)) |
|---|
| 160 | if rtype == 0: #NULL |
|---|
| 161 | return None |
|---|
| 162 | elif rtype == 1: #REPEAT |
|---|
| 163 | return 1 |
|---|
| 164 | elif rtype == 2: #NAMESPACE |
|---|
| 165 | nsid = self.readint() |
|---|
| 166 | url = self.readstr() |
|---|
| 167 | self.ns[nsid] = url |
|---|
| 168 | elif rtype == 3: # QNAME |
|---|
| 169 | nsid = self.readint() |
|---|
| 170 | localname = self.readstr() |
|---|
| 171 | return URIRef(self.ns[nsid] + localname) |
|---|
| 172 | elif rtype == 4: # URI |
|---|
| 173 | return URIRef(self.readstr()) |
|---|
| 174 | elif rtype == 5: # BNODE |
|---|
| 175 | return BNode(self.readstr()) |
|---|
| 176 | elif rtype == 6: # PLAIN LITERAL |
|---|
| 177 | return Literal(self.readstr()) |
|---|
| 178 | elif rtype == 7: # LANGUAGE LITERAL |
|---|
| 179 | lit = self.readstr() |
|---|
| 180 | lang= self.readstr() |
|---|
| 181 | return Literal(lit,lang=lang) |
|---|
| 182 | elif rtype == 8: # DATATYPE LITERAL |
|---|
| 183 | lit = self.readstr() |
|---|
| 184 | datatype = self.getval() |
|---|
| 185 | return Literal(lit,datatype=datatype) |
|---|
| 186 | elif rtype == 126: # ERROR |
|---|
| 187 | errType = ord(self.stream.read(1)) |
|---|
| 188 | errStr = self.readstr() |
|---|
| 189 | if errType == 1: |
|---|
| 190 | raise MalformedQueryError(errStr) |
|---|
| 191 | elif errType == 2: |
|---|
| 192 | raise QueryEvaluationError(errStr) |
|---|
| 193 | else: |
|---|
| 194 | raise errStr |
|---|
| 195 | elif rtype == 127: # EOF |
|---|
| 196 | raise StopIteration() |
|---|
| 197 | else: |
|---|
| 198 | raise ParseError("Undefined record type: %s" % rtype) |
|---|