Package rdfalchemy :: Package sparql :: Module parsers
[hide private]
[frames] | no frames]

Source Code for Module rdfalchemy.sparql.parsers

  1  from rdflib import URIRef , Literal, BNode 
  2   
  3  from urllib2 import urlopen, Request, HTTPError 
  4  from struct import unpack 
  5   
  6  import simplejson 
  7  import logging 
  8   
  9  __all__=["_JSONSPARQLHandler","_XMLSPARQLHandler","_BRTRSPARQLHandler"] 
 10   
 11  log=logging.getLogger(__name__) 
 12   
 13  # use a fast ElementTree 
 14  # TODO: test these each for iterparse compatability and relative speed 
 15  try: 
 16      import cElementTree as ET # effbot's C module 
 17  except ImportError: 
 18      try: 
 19          import xml.etree.ElementTree as ET # in python >=2.5 
 20      except ImportError: 
 21          try: 
 22              import lxml.etree as ET # ElementTree API using libxml2 
 23          except ImportError: 
 24              import elementtree.ElementTree as ET # effbot's pure Python module 
 25  log.debug('Using ElementTree: %s' % ET) 
 26   
 27   
28 -class _SPARQLHandler(object):
29 """Abstract base class for parsing the response stream of a sparql query 30 Real classhes should subclass from here but should **not** do too much during `__init__` 31 32 `__init__` should stip after opening the stream and not read so that users have the 33 option to call p.stream.read() to get the rawResults 34 """ 35 mimetype = "" 36
37 - def __init__(self, url):
38 req = Request(url) 39 if self.mimetype: 40 req.add_header('Accept',self.mimetype) 41 self.stream = urlopen(req)
42 43
44 -class _JSONSPARQLHandler(_SPARQLHandler):
45 """Parse the results of a sparql query returned as json. 46 47 Note: this uses simplejson.load which will consume the entire 48 stream before returning any results. The XML handler uses a generator 49 type return so it returns the first tuple as soon as it's available 50 *without* having to comsume the entire stream 51 """ 52 mimetype = 'application/sparql-results+json' 53
54 - def parse(self):
55 ret=simplejson.load(self.stream) 56 var_names = ret['head']['vars'] 57 bindings = ret['results']['bindings'] 58 for b in bindings: 59 for var,val in b.items(): 60 type = val['type'] 61 if type=='uri': 62 b[var]=URIRef(val['value']) 63 elif type == 'bnode': 64 b[var]=BNode(val['value']) 65 elif type == 'literal': 66 b[var]=Literal(val['value'],lang=val.get('xml:lang')) 67 elif type == 'typed-literal': 68 b[var]=Literal(val['value'],datatype=val.get('datatype')) 69 else: 70 raise AttributeError("Binding type error: %s"%(type)) 71 yield tuple([b.get(var) for var in var_names])
72 73 74 # some constants for parsing the xml tree 75 _S_NS = "{http://www.w3.org/2005/sparql-results#}" 76 _VARIABLE= _S_NS+"variable" 77 _BNODE = _S_NS+"bnode" 78 _URI = _S_NS+"uri" 79 _BINDING = _S_NS+"binding" 80 _LITERAL = _S_NS+"literal" 81 _HEAD = _S_NS+"head" 82 _RESULT = _S_NS+"result" 83 _X_NS = "{http://www.w3.org/XML/1998/namespace}" 84 _LANG = _X_NS+"lang" 85 86
87 -class _XMLSPARQLHandler(_SPARQLHandler):
88 """Parse the results of a sparql query returned as xml. 89 90 Note: returns a generator so that the first tuple is 91 available as soon as it is sent. This does **not** need to consume 92 the entire results stream before returning results (that's a good thing :-). 93 """ 94 mimetype = 'application/sparql-results+xml' 95
96 - def parse(self):
97 var_names=[] 98 bindings=[] 99 events = iter(ET.iterparse(self.stream,events=('start','end'))) 100 # lets gather up the variable names in head 101 for (event, node) in events: 102 if event == 'start' and node.tag == _VARIABLE: 103 var_names.append(node.get('name')) 104 elif event == 'end' and node.tag == _HEAD: 105 break 106 # now let's yield each result as we parse them 107 for (event, node) in events: 108 if event == 'start': 109 if node.tag == _BINDING: 110 idx = var_names.index(node.get('name')) 111 elif node.tag == _RESULT: 112 bindings = [None,] * len(var_names) 113 elif event == 'end': 114 if node.tag == _URI: 115 bindings[idx] = URIRef(node.text) 116 elif node.tag == _BNODE: 117 bindings[idx] = BNode(node.text) 118 elif node.tag == _LITERAL: 119 bindings[idx] = Literal(node.text or '', 120 datatype = node.get('datatype'), 121 lang= node.get(_LANG)) 122 elif node.tag == _RESULT: 123 node.clear() 124 yield tuple(bindings)
125 126
127 -class _BRTRSPARQLHandler(_SPARQLHandler):
128 """Handler for the sesame binary table format BRTR_ 129 130 .. _BRTR: http://www.openrdf.org/doc/sesame/api/org/openrdf/sesame/query/BinaryTableResultConstants.html 131 """ 132
133 - def readint(self):
134 return unpack('>i',self.stream.read(4))[0]
135
136 - def readstr(self):
137 l = self.readint() 138 return self.stream.read(l).decode("utf-8")
139
140 - def parse(self):
141 if self.stream.read(4) <> 'BRTR': raise ParseError("First 4 bytes in should be BRTR") 142 self.ver = self.readint() # ver of protocol 143 self.ncols = self.readint() 144 self.keys = tuple(self.readstr() for x in range(self.ncols)) 145 self.values = [None,]*self.ncols 146 self.ns = {} 147 while True: 148 for i in range(self.ncols): 149 val = self.getval() 150 if val is 1: # REPEAT here is like skip...the val is already in self.values[i] 151 continue 152 self.values[i] = val 153 yield tuple(self.values)
154
155 - def getval(self):
156 while True: 157 rtype = ord(self.stream.read(1)) 158 if rtype == 0: #NULL 159 return None 160 elif rtype == 1: #REPEAT 161 return 1 162 elif rtype == 2: #NAMESPACE 163 nsid = self.readint() 164 url = self.readstr() 165 self.ns[nsid] = url 166 elif rtype == 3: # QNAME 167 nsid = self.readint() 168 localname = self.readstr() 169 return URIRef(self.ns[nsid] + localname) 170 elif rtype == 4: # URI 171 return URIRef(self.readstr()) 172 elif rtype == 5: # BNODE 173 return BNode(self.readstr()) 174 elif rtype == 6: # PLAIN LITERAL 175 return Literal(self.readstr()) 176 elif rtype == 7: # LANGUAGE LITERAL 177 lit = self.readstr() 178 lang= self.readstr() 179 return Literal(lit,lang=lang) 180 elif rtype == 8: # DATATYPE LITERAL 181 lit = self.readstr() 182 datatype = self.getval() 183 return Literal(lit,datatype=datatype) 184 elif rtype == 126: # ERROR 185 errType = ord(self.stream.read(1)) 186 errStr = self.readstr() 187 if errType == 1: 188 raise MalformedQueryError(errStr) 189 elif errType == 2: 190 raise QueryEvaluationError(errStr) 191 else: 192 raise errStr 193 elif rtype == 127: # EOF 194 raise StopIteration() 195 else: 196 raise ParseError("Undefined record type: %s" % rtype)
197