1 from rdflib import URIRef , Literal, BNode
2
3 from urllib2 import urlopen, Request, HTTPError
4 from struct import unpack
5
6 import simplejson
7 import logging
8
9 __all__=["_JSONSPARQLHandler","_XMLSPARQLHandler","_BRTRSPARQLHandler"]
10
11 log=logging.getLogger(__name__)
12
13
14
15 try:
16 import cElementTree as ET
17 except ImportError:
18 try:
19 import xml.etree.ElementTree as ET
20 except ImportError:
21 try:
22 import lxml.etree as ET
23 except ImportError:
24 import elementtree.ElementTree as ET
25 log.debug('Using ElementTree: %s' % ET)
26
27
29 """Abstract base class for parsing the response stream of a sparql query
30 Real classhes should subclass from here but should **not** do too much during `__init__`
31
32 `__init__` should stip after opening the stream and not read so that users have the
33 option to call p.stream.read() to get the rawResults
34 """
35 mimetype = ""
36
38 req = Request(url)
39 if self.mimetype:
40 req.add_header('Accept',self.mimetype)
41 self.stream = urlopen(req)
42
43
45 """Parse the results of a sparql query returned as json.
46
47 Note: this uses simplejson.load which will consume the entire
48 stream before returning any results. The XML handler uses a generator
49 type return so it returns the first tuple as soon as it's available
50 *without* having to comsume the entire stream
51 """
52 mimetype = 'application/sparql-results+json'
53
55 ret=simplejson.load(self.stream)
56 var_names = ret['head']['vars']
57 bindings = ret['results']['bindings']
58 for b in bindings:
59 for var,val in b.items():
60 type = val['type']
61 if type=='uri':
62 b[var]=URIRef(val['value'])
63 elif type == 'bnode':
64 b[var]=BNode(val['value'])
65 elif type == 'literal':
66 b[var]=Literal(val['value'],lang=val.get('xml:lang'))
67 elif type == 'typed-literal':
68 b[var]=Literal(val['value'],datatype=val.get('datatype'))
69 else:
70 raise AttributeError("Binding type error: %s"%(type))
71 yield tuple([b.get(var) for var in var_names])
72
73
74
75 _S_NS = "{http://www.w3.org/2005/sparql-results#}"
76 _VARIABLE= _S_NS+"variable"
77 _BNODE = _S_NS+"bnode"
78 _URI = _S_NS+"uri"
79 _BINDING = _S_NS+"binding"
80 _LITERAL = _S_NS+"literal"
81 _HEAD = _S_NS+"head"
82 _RESULT = _S_NS+"result"
83 _X_NS = "{http://www.w3.org/XML/1998/namespace}"
84 _LANG = _X_NS+"lang"
85
86
88 """Parse the results of a sparql query returned as xml.
89
90 Note: returns a generator so that the first tuple is
91 available as soon as it is sent. This does **not** need to consume
92 the entire results stream before returning results (that's a good thing :-).
93 """
94 mimetype = 'application/sparql-results+xml'
95
97 var_names=[]
98 bindings=[]
99 events = iter(ET.iterparse(self.stream,events=('start','end')))
100
101 for (event, node) in events:
102 if event == 'start' and node.tag == _VARIABLE:
103 var_names.append(node.get('name'))
104 elif event == 'end' and node.tag == _HEAD:
105 break
106
107 for (event, node) in events:
108 if event == 'start':
109 if node.tag == _BINDING:
110 idx = var_names.index(node.get('name'))
111 elif node.tag == _RESULT:
112 bindings = [None,] * len(var_names)
113 elif event == 'end':
114 if node.tag == _URI:
115 bindings[idx] = URIRef(node.text)
116 elif node.tag == _BNODE:
117 bindings[idx] = BNode(node.text)
118 elif node.tag == _LITERAL:
119 bindings[idx] = Literal(node.text or '',
120 datatype = node.get('datatype'),
121 lang= node.get(_LANG))
122 elif node.tag == _RESULT:
123 node.clear()
124 yield tuple(bindings)
125
126
128 """Handler for the sesame binary table format BRTR_
129
130 .. _BRTR: http://www.openrdf.org/doc/sesame/api/org/openrdf/sesame/query/BinaryTableResultConstants.html
131 """
132
134 return unpack('>i',self.stream.read(4))[0]
135
137 l = self.readint()
138 return self.stream.read(l).decode("utf-8")
139
141 if self.stream.read(4) <> 'BRTR': raise ParseError("First 4 bytes in should be BRTR")
142 self.ver = self.readint()
143 self.ncols = self.readint()
144 self.keys = tuple(self.readstr() for x in range(self.ncols))
145 self.values = [None,]*self.ncols
146 self.ns = {}
147 while True:
148 for i in range(self.ncols):
149 val = self.getval()
150 if val is 1:
151 continue
152 self.values[i] = val
153 yield tuple(self.values)
154
156 while True:
157 rtype = ord(self.stream.read(1))
158 if rtype == 0:
159 return None
160 elif rtype == 1:
161 return 1
162 elif rtype == 2:
163 nsid = self.readint()
164 url = self.readstr()
165 self.ns[nsid] = url
166 elif rtype == 3:
167 nsid = self.readint()
168 localname = self.readstr()
169 return URIRef(self.ns[nsid] + localname)
170 elif rtype == 4:
171 return URIRef(self.readstr())
172 elif rtype == 5:
173 return BNode(self.readstr())
174 elif rtype == 6:
175 return Literal(self.readstr())
176 elif rtype == 7:
177 lit = self.readstr()
178 lang= self.readstr()
179 return Literal(lit,lang=lang)
180 elif rtype == 8:
181 lit = self.readstr()
182 datatype = self.getval()
183 return Literal(lit,datatype=datatype)
184 elif rtype == 126:
185 errType = ord(self.stream.read(1))
186 errStr = self.readstr()
187 if errType == 1:
188 raise MalformedQueryError(errStr)
189 elif errType == 2:
190 raise QueryEvaluationError(errStr)
191 else:
192 raise errStr
193 elif rtype == 127:
194 raise StopIteration()
195 else:
196 raise ParseError("Undefined record type: %s" % rtype)
197