| 1 | #!/usr/bin/env python |
|---|
| 2 | """ |
|---|
| 3 | rdfalchemy.py - a Simple API for RDF |
|---|
| 4 | |
|---|
| 5 | |
|---|
| 6 | Requires rdflib <http://www.rdflib.net/> version 2.3 ??. |
|---|
| 7 | |
|---|
| 8 | """ |
|---|
| 9 | |
|---|
| 10 | from rdflib import ConjunctiveGraph |
|---|
| 11 | from rdflib import BNode, Namespace, URIRef, RDF |
|---|
| 12 | from rdflib.Identifier import Identifier |
|---|
| 13 | from rdfalchemy.exceptions import RDFAlchemyError |
|---|
| 14 | from rdfalchemy.Literal import Literal |
|---|
| 15 | import re |
|---|
| 16 | |
|---|
| 17 | try: |
|---|
| 18 | from hashlib import md5 |
|---|
| 19 | except ImportError: |
|---|
| 20 | from md5 import md5 |
|---|
| 21 | |
|---|
| 22 | import logging |
|---|
| 23 | #console = logging.StreamHandler() |
|---|
| 24 | #formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s') |
|---|
| 25 | #console.setFormatter(formatter) |
|---|
| 26 | |
|---|
| 27 | log=logging.getLogger(__name__) |
|---|
| 28 | #log.setLevel(logging.DEBUG) |
|---|
| 29 | #log.addHandler(console) |
|---|
| 30 | |
|---|
| 31 | re_ns_n = re.compile('(.*[/#])(.*)') |
|---|
| 32 | |
|---|
| 33 | # Note: Non data descriptors (get only) lookup in obj.__dict__ first |
|---|
| 34 | # Data descriptors (get and set) use the __get__ first |
|---|
| 35 | |
|---|
| 36 | ################################################################################## |
|---|
| 37 | # define our Base Class for all "subjects" in python |
|---|
| 38 | ################################################################################## |
|---|
| 39 | |
|---|
| 40 | class rdfSubject(object): |
|---|
| 41 | db=ConjunctiveGraph() |
|---|
| 42 | """Default graph for access to instances of this type""" |
|---|
| 43 | rdf_type=None |
|---|
| 44 | """rdf:type of instances of this class""" |
|---|
| 45 | |
|---|
| 46 | def __init__(self, resUri = None, **kwargs): |
|---|
| 47 | """The constructor tries hard to do return you an rdfSubject |
|---|
| 48 | |
|---|
| 49 | :param resUri: the "resource uri". If `None` then create an instance with a BNode resUri. |
|---|
| 50 | Can be given as one of: |
|---|
| 51 | |
|---|
| 52 | * an instance of an rdfSubject |
|---|
| 53 | * an instance of a BNode or a URIRef |
|---|
| 54 | * an n3 uriref string like: "<urn:isbn:1234567890>" |
|---|
| 55 | * an n3 bnode string like: "_:xyz1234" |
|---|
| 56 | |
|---|
| 57 | :param kwargs: is a set of values that will be set using the keys to find the appropriate descriptor""" |
|---|
| 58 | |
|---|
| 59 | if not resUri: # create a bnode |
|---|
| 60 | self.resUri = BNode() |
|---|
| 61 | if self.rdf_type: |
|---|
| 62 | self.db.add((self.resUri,RDF.type,self.rdf_type)) |
|---|
| 63 | elif isinstance(resUri, (BNode, URIRef)): # user the identifier passed in |
|---|
| 64 | self.resUri=resUri |
|---|
| 65 | if self.rdf_type and not list(self.db.triples((self.resUri,RDF.type,self.rdf_type))): |
|---|
| 66 | self.db.add((self.resUri,RDF.type,self.rdf_type)) |
|---|
| 67 | elif isinstance(resUri, rdfSubject): # use the resUri of the subject passed in |
|---|
| 68 | self.resUri=resUri.resUri |
|---|
| 69 | self.db=resUri.db |
|---|
| 70 | elif isinstance(resUri, (str, unicode)): # create one from a <uri> or _:bnode string |
|---|
| 71 | if resUri[0]=="<" and resUri[-1]==">": |
|---|
| 72 | self.resUri=URIRef(resUri[1:-1]) |
|---|
| 73 | elif resUri.startswith("_:"): |
|---|
| 74 | self.resUri=BNode(resUri[2:]) |
|---|
| 75 | else: |
|---|
| 76 | raise AttributeError("cannot construct rdfSubject from %s"%(str(resUri))) |
|---|
| 77 | |
|---|
| 78 | if kwargs: |
|---|
| 79 | self._set_with_dict(kwargs) |
|---|
| 80 | |
|---|
| 81 | |
|---|
| 82 | def n3(self): |
|---|
| 83 | """n3 repr of this node""" |
|---|
| 84 | return self.resUri.n3() |
|---|
| 85 | |
|---|
| 86 | |
|---|
| 87 | @classmethod |
|---|
| 88 | def _getdescriptor(cls, key): |
|---|
| 89 | """__get_descriptor returns the descriptor for the key. |
|---|
| 90 | It essentially cls.__dict__[key] with recursive calls to super""" |
|---|
| 91 | # NOT SURE if mro is the way to do this or if we should call super or bases? |
|---|
| 92 | for kls in cls.mro(): |
|---|
| 93 | if key in kls.__dict__: |
|---|
| 94 | return kls.__dict__[key] |
|---|
| 95 | raise AttributeError("descriptor %s not found for class %s" % (key,cls)) |
|---|
| 96 | |
|---|
| 97 | #short term hack. Need to go to a sqlalchemy 0.4 style query method |
|---|
| 98 | # obj.query.get_by should map to obj.get_by ..same for fetch_by |
|---|
| 99 | @classmethod |
|---|
| 100 | def query(cls): |
|---|
| 101 | return cls |
|---|
| 102 | |
|---|
| 103 | |
|---|
| 104 | @classmethod |
|---|
| 105 | def get_by(cls, **kwargs): |
|---|
| 106 | """Class Method, returns a single instance of the class |
|---|
| 107 | by a single kwarg. the keyword must be a descriptor of the |
|---|
| 108 | class. |
|---|
| 109 | example: |
|---|
| 110 | |
|---|
| 111 | bigBlue = Company.get_by(symbol='IBM') |
|---|
| 112 | |
|---|
| 113 | :Note: |
|---|
| 114 | the keyword should map to an rdf predicate |
|---|
| 115 | that is of type owl:InverseFunctional""" |
|---|
| 116 | if len(kwargs) != 1: |
|---|
| 117 | raise ValueError("get_by wanted exactly 1 but got %i args\nMaybe you wanted filter_by"%(len(kwargs))) |
|---|
| 118 | key,value = kwargs.items()[0] |
|---|
| 119 | if isinstance(value, (URIRef,BNode,Literal)): |
|---|
| 120 | o = value |
|---|
| 121 | else: |
|---|
| 122 | o = Literal(value) |
|---|
| 123 | pred=cls._getdescriptor(key).pred |
|---|
| 124 | uri=cls.db.value(None,pred,o) |
|---|
| 125 | if uri: |
|---|
| 126 | return cls(uri) |
|---|
| 127 | else: |
|---|
| 128 | raise LookupError("%s = %s not found"%(key,value)) |
|---|
| 129 | |
|---|
| 130 | @classmethod |
|---|
| 131 | def filter_by(cls, **kwargs): |
|---|
| 132 | """Class method returns a generator over classs instances |
|---|
| 133 | meeting the kwargs conditions. |
|---|
| 134 | |
|---|
| 135 | Each keyword must be a class descriptor |
|---|
| 136 | |
|---|
| 137 | filter by RDF.type == cls.rdf_type is implicit |
|---|
| 138 | |
|---|
| 139 | Order helps, the first keyword should be the most restrictive |
|---|
| 140 | """ |
|---|
| 141 | filters = [] |
|---|
| 142 | for key,value in kwargs.items(): |
|---|
| 143 | pred = cls._getdescriptor(key).pred |
|---|
| 144 | # try to make the value be OK for the triple query as an object |
|---|
| 145 | if isinstance(value, Identifier): |
|---|
| 146 | obj = value |
|---|
| 147 | else: |
|---|
| 148 | obj = Literal(value) |
|---|
| 149 | filters.append((pred,obj)) |
|---|
| 150 | # make sure we filter by type |
|---|
| 151 | if not (RDF.type,cls.rdf_type) in filters: |
|---|
| 152 | filters.append((RDF.type,cls.rdf_type)) |
|---|
| 153 | pred, obj = filters[0] |
|---|
| 154 | log.debug("Checking %s, %s" % (pred,obj)) |
|---|
| 155 | for sub in cls.db.subjects(pred,obj): |
|---|
| 156 | log.debug( "maybe %s" % sub ) |
|---|
| 157 | for pred,obj in filters[1:]: |
|---|
| 158 | log.debug("Checking %s, %s" % (pred,obj)) |
|---|
| 159 | try: |
|---|
| 160 | cls.db.triples((sub,pred,obj)).next() |
|---|
| 161 | except: |
|---|
| 162 | log.warn( "No %s" % sub ) |
|---|
| 163 | break |
|---|
| 164 | else: |
|---|
| 165 | yield cls(sub) |
|---|
| 166 | |
|---|
| 167 | @classmethod |
|---|
| 168 | def ClassInstances(cls): |
|---|
| 169 | """return a generator for instances of this rdf:type |
|---|
| 170 | you can look in MyClass.rdf_type to see the predicate being used""" |
|---|
| 171 | beenthere = set([]) |
|---|
| 172 | for i in cls.db.subjects(RDF.type, cls.rdf_type): |
|---|
| 173 | if not i in beenthere: |
|---|
| 174 | yield cls(i) |
|---|
| 175 | beenthere.add(i) |
|---|
| 176 | |
|---|
| 177 | @classmethod |
|---|
| 178 | def GetRandom(cls): |
|---|
| 179 | """for develoment just returns a random instance of this class""" |
|---|
| 180 | from random import choice |
|---|
| 181 | xii=list(cls.ClassInstances()) |
|---|
| 182 | return choice(xii) |
|---|
| 183 | |
|---|
| 184 | def __hash__(self): |
|---|
| 185 | return hash("ranD0Mi$h_"+self.n3()) |
|---|
| 186 | |
|---|
| 187 | def __cmp__(self, other): |
|---|
| 188 | return cmp(self.n3(), other.n3()) |
|---|
| 189 | |
|---|
| 190 | def __repr__(self): |
|---|
| 191 | return """%s('%s')""" % (self.__class__.__name__, self.n3()) |
|---|
| 192 | |
|---|
| 193 | def __str__(self): |
|---|
| 194 | return str(self.resUri) |
|---|
| 195 | |
|---|
| 196 | def __getitem__(self, pred): |
|---|
| 197 | log.debug("Getting with __getitem__ %s for %s"%(pred,self.n3())) |
|---|
| 198 | val=self.db.value(self.resUri, pred) |
|---|
| 199 | if isinstance(val,Literal): |
|---|
| 200 | val = val.toPython() |
|---|
| 201 | elif isinstance(val, (BNode,URIRef)): |
|---|
| 202 | val=rdfSubject(val) |
|---|
| 203 | return val |
|---|
| 204 | |
|---|
| 205 | |
|---|
| 206 | def __delitem__(self, pred): |
|---|
| 207 | log.debug("Deleting with __delitem__ %s for %s"%(pred,self)) |
|---|
| 208 | for s,p,o in self.db.triples((self.resUri, pred, None)): |
|---|
| 209 | self.db.remove((s,p,o)) |
|---|
| 210 | #finally if the object in the triple was a bnode |
|---|
| 211 | #cascade delete the thing it referenced |
|---|
| 212 | # ?? FIXME Do we really want to cascade if it's an rdfSubject?? |
|---|
| 213 | if isinstance(o, (BNode, rdfSubject)): |
|---|
| 214 | rdfSubject(o)._remove(db=self.db,cascade='bnode') |
|---|
| 215 | |
|---|
| 216 | def _set_with_dict(self, kv): |
|---|
| 217 | """ |
|---|
| 218 | :param kv: a dict |
|---|
| 219 | |
|---|
| 220 | for each key,value pair in dict kv |
|---|
| 221 | set self.key = value |
|---|
| 222 | |
|---|
| 223 | """ |
|---|
| 224 | for key,value in kv.items(): |
|---|
| 225 | descriptor = self.__class__._getdescriptor(key) |
|---|
| 226 | descriptor.__set__(self, value) |
|---|
| 227 | |
|---|
| 228 | |
|---|
| 229 | def _remove(self, db=None, cascade = 'bnode', bnodeCheck=True): |
|---|
| 230 | """remove all triples where this rdfSubject is the subject of the triple |
|---|
| 231 | |
|---|
| 232 | :param db: limit the remove operation to this graph |
|---|
| 233 | :param cascade: must be one of: |
|---|
| 234 | |
|---|
| 235 | * none -- remove none |
|---|
| 236 | * bnode -- (default) remove all unreferenced bnodes |
|---|
| 237 | * all -- remove all unreferenced bnode(s) AND uri(s) |
|---|
| 238 | |
|---|
| 239 | :param bnodeCheck: boolean |
|---|
| 240 | |
|---|
| 241 | * True -- (default) check bnodes and raise exception if there are |
|---|
| 242 | still references to this node |
|---|
| 243 | * False -- do not check. This can leave orphaned object reference |
|---|
| 244 | in triples. Use only if you are resetting the value in |
|---|
| 245 | the same transaction |
|---|
| 246 | """ |
|---|
| 247 | noderef = self.resUri |
|---|
| 248 | log.debug("Called remove on %s" % self) |
|---|
| 249 | if not db: |
|---|
| 250 | db = self.db |
|---|
| 251 | |
|---|
| 252 | # we cannot delete a bnode if it is still referenced, |
|---|
| 253 | # i.e. if it is the o of a s,p,o |
|---|
| 254 | if bnodeCheck: |
|---|
| 255 | if isinstance(noderef ,BNode): |
|---|
| 256 | for s,p,o in db.triples((None,None,noderef)): |
|---|
| 257 | raise RDFAlchemyError("Cannot delete a bnode %s becuase %s still references it" % (noderef.n3(), s.n3())) |
|---|
| 258 | # determine an appropriate test for cascade decisions |
|---|
| 259 | if cascade == 'bnode': |
|---|
| 260 | #we cannot delete a bnode if there are still references to it |
|---|
| 261 | def test(node): |
|---|
| 262 | if isinstance(node,(URIRef,Literal)): |
|---|
| 263 | return False |
|---|
| 264 | for s,p,o in db.triples((None,None,node)): |
|---|
| 265 | return False |
|---|
| 266 | return True |
|---|
| 267 | elif cascade == 'none': |
|---|
| 268 | def test(node): |
|---|
| 269 | return False |
|---|
| 270 | elif cascade == 'all': |
|---|
| 271 | def test(node): |
|---|
| 272 | if isinstance(node, Literal): |
|---|
| 273 | return False |
|---|
| 274 | for s,p,o in db.triples((None,None,node)): |
|---|
| 275 | return False |
|---|
| 276 | return True |
|---|
| 277 | else: |
|---|
| 278 | raise AttributeError, "unknown cascade argument" |
|---|
| 279 | for s,p,o in db.triples((noderef, None, None)): |
|---|
| 280 | db.remove((s,p,o)) |
|---|
| 281 | if test(o): |
|---|
| 282 | rdfSubject(o)._remove(db=db,cascade=cascade) |
|---|
| 283 | |
|---|
| 284 | |
|---|
| 285 | def _rename(self, name, db=None): |
|---|
| 286 | """rename a node """ |
|---|
| 287 | if not db: |
|---|
| 288 | db = self.db |
|---|
| 289 | if not (isinstance(name, (BNode,URIRef))): |
|---|
| 290 | raise AttributeError, ("cannot rename to %s" % name) |
|---|
| 291 | for s,p,o in db.triples((self.resUri,None,None)): |
|---|
| 292 | db.remove((s, p, o)) |
|---|
| 293 | db.add((name, p, o)) |
|---|
| 294 | for s,p,o in db.triples((None,None,self.resUri)): |
|---|
| 295 | db.set((s, p, name)) |
|---|
| 296 | self.resUri = name |
|---|
| 297 | |
|---|
| 298 | |
|---|
| 299 | def _ppo(self,db=None): |
|---|
| 300 | """Like pretty print... |
|---|
| 301 | Return a 'pretty predicate,object' of self |
|---|
| 302 | returning all predicate object pairs with qnames""" |
|---|
| 303 | db = db or self.db |
|---|
| 304 | for p,o in db.predicate_objects(self.resUri): |
|---|
| 305 | print "%20s = %s"% (db.qname(p),str(o)) |
|---|
| 306 | print " " |
|---|
| 307 | |
|---|
| 308 | def md5_term_hash(self): |
|---|
| 309 | """Not sure what good this method is but it's defined for |
|---|
| 310 | rdflib.Identifiers so it's here for now""" |
|---|
| 311 | return self.resUri.md5_term_hash() |
|---|
| 312 | |
|---|
| 313 | |
|---|
| 314 | |
|---|