1 from rdfalchemy import Literal, BNode, Namespace, URIRef
2 from rdfalchemy.sparql import SPARQLGraph, DumpSink
3 from rdfalchemy.sparql.parsers import _BRTRSPARQLHandler,_XMLSPARQLHandler,_JSONSPARQLHandler
4
5 from rdflib.syntax.parsers.ntriples import NTriplesParser
6
7 from urllib2 import urlopen, Request, HTTPError
8 from urllib import urlencode
9
10 from rdfalchemy.exceptions import MalformedQueryError, QueryEvaluationError
11
12 import os
13 import re
14 import simplejson
15 import logging
16
17 __all__=["SesameGraph"]
18
19 log=logging.getLogger(__name__)
20
22 """openrdf-sesame graph via http
23 Uses the sesame2 HTTP communication protocol
24 to provide rdflib type api constructor takes http endpoint and repository name
25 e.g. SesameGraph('http://www.openvest.org:8080/sesame/repositories/Test')"""
26
27 parsers = {'xml': _XMLSPARQLHandler, 'json': _JSONSPARQLHandler,'brtr': _BRTRSPARQLHandler}
28
30 self.url = url
31 self.context=context
32
34 """Namespaces dict"""
35 try:
36 return self._namespaces
37 except:
38 pass
39 req = Request(self.url+'/namespaces')
40 req.add_header('Accept','application/sparql-results+json')
41 log.debug("opening url: %s\n with headers: %s" % (req.get_full_url(), req.header_items()))
42 ret=simplejson.load(urlopen(req))
43 bindings=ret['results']['bindings']
44 self._namespaces = dict([(b['prefix']['value'],b['namespace']['value']) for b in bindings])
45 return self._namespaces
46 namespaces=property(get_namespaces)
47
48 - def get_contexts(self):
49 """context list ... pretty slow"""
50 try:
51 return self._contexts
52 except:
53 pass
54 req = Request(self.url+'/contexts')
55 req.add_header('Accept','application/sparql-results+json')
56 ret=simplejson.load(urlopen(req))
57 bindings=ret['results']['bindings']
58 self._contexts = [(b['contextID']['value']) for b in bindings]
59 return self._contexts
60 contexts=property(get_contexts)
61
63 """helper function to encode triples to sesame statement uri's"""
64 query = {}
65 url = self.url+'/statements'
66 if s:
67 query['subj'] = s.n3()
68 if p:
69 query['pred'] = p.n3()
70 if o:
71 query['obj'] = o.n3()
72 if context:
73
74 query['context'] = "<%s>"%context
75 if query:
76 url = url+"?"+urlencode(query)
77 return url
78
79 - def add(self, (s, p, o), context=None):
80 """Add a triple with optional context"""
81 url = self.url+'/statements'
82 ctx = context or self.context
83 if ctx:
84 url = url+"?"+urlencode(dict(context=ctx))
85 req = Request(url)
86 req.data = "%s %s %s .\n" % (s.n3(), p.n3(), o.n3())
87 req.add_header('Content-Type','text/rdf+n3')
88 try:
89 result = urlopen(req).read()
90 except HTTPError, e:
91 if e.code == 204:
92 return
93 else:
94 log.error(e)
95 return result
96
97 - def remove(self, (s, p, o), context=None):
98 """Remove a triple from the graph
99
100 If the triple does not provide a context attribute, removes the triple
101 from all contexts.
102 """
103 url = self._statement_encode((s, p, o), context)
104 req = Request(url)
105 req.get_method=lambda : 'DELETE'
106 try:
107 result = urlopen(req).read()
108 except HTTPError, e:
109 if e.code == 204:
110 return
111 else:
112 log.error(e)
113 return result
114
115 - def triples(self, (s, p, o), context=None):
116 """Generator over the triple store
117
118 Returns triples that match the given triple pattern. If triple pattern
119 does not provide a context, all contexts will be searched.
120 """
121 url = self._statement_encode((s, p, o), context)
122 req = Request(url)
123 req.add_header('Accept','text/plain')
124 log.debug("Request: %s" % req.get_full_url())
125 dumper=DumpSink()
126 parser=NTriplesParser(dumper)
127
128 for l in urlopen(req):
129 log.debug('line: %s'%l)
130 parser.parsestring(l)
131 yield dumper.get_triple()
132
134 """Returns the number of triples in the graph
135 calls http://{self.url}/size very fast
136 """
137 return int(urlopen(self.url+"/size").read())
138
139 - def set(self, (subject, predicate, object)):
140 """Convenience method to update the value of object
141
142 Remove any existing triples for subject and predicate before adding
143 (subject, predicate, object).
144 """
145 self.remove((subject, predicate, None))
146 self.add((subject, predicate, object))
147
148
150 """turn uri into a qname given self.namespaces"""
151 for p,n in self.namespaces.items():
152 if uri.startswith(n):
153 return "%s:%s"%(p,uri[len(n):])
154 return uri
155
156 - def query(self, strOrQuery, initBindings={}, initNs={}, resultMethod="brtr",processor="sparql",rawResults=False):
157 """
158 Executes a SPARQL query against this Graph
159
160 :param strOrQuery: Is either a string consisting of the SPARQL query
161 :param initBindings: *optional* mapping from a Variable to an RDFLib term (used as initial bindings for SPARQL query)
162 :param initNs: optional mapping from a namespace prefix to a namespace
163 :param resultMethod: results query requested (must be 'xml', 'json' 'brtr')
164 xml streams over the result set and json must read the entire set to succeed
165 :param processor: The kind of RDF query (must be 'sparql' or 'serql')
166 :param rawResults: If set to `True`, returns the raw xml or json stream rather than the parsed results.
167 """
168
169 return super(SesameGraph, self).query(strOrQuery, initBindings, initNs, resultMethod,processor,rawResults)
170
171
172 - def parse(self, source, publicID=None, format="xml", method='POST'):
173 """
174 Parse source into Graph
175
176 Graph will get loaded into it's own context (sub graph).
177 Format defaults to 'xml' (AKA: rdf/xml).
178
179 :returns: Returns the context into which the source was parsed.
180
181 :param source: source file in the form of "http://....." or "~/dir/file.rdf"
182 :param publicID: *optional* the logical URI if it's different from the physical source URI.
183 :param format: must be one of 'xml' or 'n3'
184 :param method: must be one of
185
186 * 'POST' -- method adds data to a context
187 * 'PUT' -- method replaces data in a context
188 """
189 url = self.url+'/statements'
190 if not (source.startswith('http://') or source.startswith('file://')):
191 source = 'file://'+os.path.abspath(os.path.expanduser(source))
192 ctx = "<%s>" % (publicID or source)
193 url = url+"?"+urlencode(dict(context=ctx))
194
195 req = Request(url)
196 req.get_method = lambda : method
197
198 if format=='xml':
199 req.add_header('Content-Type','application/rdf+xml')
200 elif format=='n3':
201 req.add_header('Content-Type','text/rdf+n3')
202 else:
203 raise "Unknown format: %s"% format
204
205 req.data = urlopen(source).read()
206 log.debug("Request: %s" % req.get_full_url())
207 try:
208 result = urlopen(req).read()
209 log.debug("Result: "+result)
210 except HTTPError, e:
211
212 if e.code == 204:
213 return
214 log.error(e)
215 raise HTTPError, e
216 return result
217
218 - def load(self, source, publicID=None, format="xml"):
219 self.parse(source, publicID, format)
220