| 1 | #!/usr/bin/env python |
|---|
| 2 | """ |
|---|
| 3 | rdfalchemy.py - a Simple API for RDF |
|---|
| 4 | |
|---|
| 5 | |
|---|
| 6 | Requires rdflib <http://www.rdflib.net/> version 2.3 ??. |
|---|
| 7 | |
|---|
| 8 | """ |
|---|
| 9 | |
|---|
| 10 | from rdflib import ConjunctiveGraph |
|---|
| 11 | from rdflib import BNode, Namespace, URIRef, RDF |
|---|
| 12 | from rdflib.Identifier import Identifier |
|---|
| 13 | from rdfalchemy.exceptions import RDFAlchemyError |
|---|
| 14 | from rdfalchemy.Literal import Literal |
|---|
| 15 | import re |
|---|
| 16 | |
|---|
| 17 | try: |
|---|
| 18 | from hashlib import md5 |
|---|
| 19 | except ImportError: |
|---|
| 20 | from md5 import md5 |
|---|
| 21 | |
|---|
| 22 | import logging |
|---|
| 23 | #console = logging.StreamHandler() |
|---|
| 24 | #formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s') |
|---|
| 25 | #console.setFormatter(formatter) |
|---|
| 26 | |
|---|
| 27 | log=logging.getLogger(__name__) |
|---|
| 28 | #log.setLevel(logging.DEBUG) |
|---|
| 29 | #log.addHandler(console) |
|---|
| 30 | |
|---|
| 31 | re_ns_n = re.compile('(.*[/#])(.*)') |
|---|
| 32 | |
|---|
| 33 | # Note: Non data descriptors (get only) lookup in obj.__dict__ first |
|---|
| 34 | # Data descriptors (get and set) use the __get__ first |
|---|
| 35 | |
|---|
| 36 | ################################################################################## |
|---|
| 37 | # define our Base Class for all "subjects" in python |
|---|
| 38 | ################################################################################## |
|---|
| 39 | |
|---|
| 40 | #class rdfSubject(object): |
|---|
| 41 | class rdfSubject(object): |
|---|
| 42 | db=ConjunctiveGraph() |
|---|
| 43 | """Default graph for access to instances of this type""" |
|---|
| 44 | rdf_type=None |
|---|
| 45 | """rdf:type of instances of this class""" |
|---|
| 46 | |
|---|
| 47 | def __init__(self, resUri = None, **kwargs): |
|---|
| 48 | """The constructor tries hard to do return you an rdfSubject |
|---|
| 49 | |
|---|
| 50 | :param resUri: the "resource uri". If `None` then create an instance with a BNode resUri. |
|---|
| 51 | Can be given as one of: |
|---|
| 52 | |
|---|
| 53 | * an instance of an rdfSubject |
|---|
| 54 | * an instance of a BNode or a URIRef |
|---|
| 55 | * an n3 uriref string like: "<urn:isbn:1234567890>" |
|---|
| 56 | * an n3 bnode string like: "_:xyz1234" |
|---|
| 57 | |
|---|
| 58 | :param kwargs: is a set of values that will be set using the keys to find the appropriate descriptor""" |
|---|
| 59 | |
|---|
| 60 | if not resUri: # create a bnode |
|---|
| 61 | self.resUri = BNode() |
|---|
| 62 | if self.rdf_type: |
|---|
| 63 | self.db.add((self.resUri,RDF.type,self.rdf_type)) |
|---|
| 64 | elif isinstance(resUri, (BNode, URIRef)): # user the identifier passed in |
|---|
| 65 | self.resUri=resUri |
|---|
| 66 | elif isinstance(resUri, rdfSubject): # use the resUri of the subject passed in |
|---|
| 67 | self.resUri=resUri.resUri |
|---|
| 68 | self.db=resUri.db |
|---|
| 69 | elif isinstance(resUri, (str, unicode)): # create one from a <uri> or _:bnode string |
|---|
| 70 | if resUri[0]=="<" and resUri[-1]==">": |
|---|
| 71 | self.resUri=URIRef(resUri[1:-1]) |
|---|
| 72 | elif resUri.startswith("_:"): |
|---|
| 73 | self.resUri=BNode(resUri[2:]) |
|---|
| 74 | else: |
|---|
| 75 | raise AttributeError("cannot construct rdfSubject from %s"%(str(resUri))) |
|---|
| 76 | |
|---|
| 77 | if kwargs: |
|---|
| 78 | self._set_with_dict(kwargs) |
|---|
| 79 | |
|---|
| 80 | |
|---|
| 81 | def n3(self): |
|---|
| 82 | """n3 repr of this node""" |
|---|
| 83 | return self.resUri.n3() |
|---|
| 84 | |
|---|
| 85 | |
|---|
| 86 | @classmethod |
|---|
| 87 | def _getdescriptor(cls, key): |
|---|
| 88 | """__get_descriptor returns the descriptor for the key. |
|---|
| 89 | It essentially cls.__dict__[key] with recursive calls to super""" |
|---|
| 90 | # NOT SURE if mro is the way to do this or if we should call super or bases? |
|---|
| 91 | for kls in cls.mro(): |
|---|
| 92 | if key in kls.__dict__: |
|---|
| 93 | return kls.__dict__[key] |
|---|
| 94 | raise AttributeError("descriptor %s not found for class %s" % (key,cls)) |
|---|
| 95 | |
|---|
| 96 | #short term hack. Need to go to a sqlalchemy 0.4 style query method |
|---|
| 97 | # obj.query.get_by should map to obj.get_by ..same for fetch_by |
|---|
| 98 | @classmethod |
|---|
| 99 | def query(cls): |
|---|
| 100 | return cls |
|---|
| 101 | |
|---|
| 102 | |
|---|
| 103 | @classmethod |
|---|
| 104 | def get_by(cls, **kwargs): |
|---|
| 105 | """Class Method, returns a single instance of the class |
|---|
| 106 | by a single kwarg. the keyword must be a descriptor of the |
|---|
| 107 | class. |
|---|
| 108 | example: |
|---|
| 109 | |
|---|
| 110 | bigBlue = Company.get_by(symbol='IBM') |
|---|
| 111 | |
|---|
| 112 | :Note: |
|---|
| 113 | the keyword should map to an rdf predicate |
|---|
| 114 | that is of type owl:InverseFunctional""" |
|---|
| 115 | if len(kwargs) != 1: |
|---|
| 116 | raise ValueError("get_by wanted exactly 1 but got %i args\nMaybe you wanted filter_by"%(len(kwargs))) |
|---|
| 117 | key,value = kwargs.items()[0] |
|---|
| 118 | if isinstance(value, URIRef) or isinstance(value,BNode) or isinstance(value,Literal): |
|---|
| 119 | o = value |
|---|
| 120 | else: |
|---|
| 121 | o = Literal(value) |
|---|
| 122 | pred=cls._getdescriptor(key).pred |
|---|
| 123 | uri=cls.db.value(None,pred,o) |
|---|
| 124 | if uri: |
|---|
| 125 | return cls(uri) |
|---|
| 126 | else: |
|---|
| 127 | raise LookupError("%s = %s not found"%(key,value)) |
|---|
| 128 | |
|---|
| 129 | @classmethod |
|---|
| 130 | def filter_by(cls, **kwargs): |
|---|
| 131 | """Class method returns a generator over classs instances |
|---|
| 132 | meeting the kwargs conditions. |
|---|
| 133 | |
|---|
| 134 | Each keyword must be a class descriptor |
|---|
| 135 | |
|---|
| 136 | filter by RDF.type == cls.rdf_type is implicit |
|---|
| 137 | |
|---|
| 138 | Order helps, the first keyword should be the most restrictive |
|---|
| 139 | """ |
|---|
| 140 | filters = [] |
|---|
| 141 | for key,value in kwargs.items(): |
|---|
| 142 | pred = cls._getdescriptor(key).pred |
|---|
| 143 | # try to make the value be OK for the triple query as an object |
|---|
| 144 | if isinstance(value, Identifier): |
|---|
| 145 | obj = value |
|---|
| 146 | else: |
|---|
| 147 | obj = Literal(value) |
|---|
| 148 | filters.append((pred,obj)) |
|---|
| 149 | # make sure we filter by type |
|---|
| 150 | if not (RDF.type,cls.rdf_type) in filters: |
|---|
| 151 | filters.append((RDF.type,cls.rdf_type)) |
|---|
| 152 | pred, obj = filters[0] |
|---|
| 153 | log.debug("Checking %s, %s" % (pred,obj)) |
|---|
| 154 | for sub in cls.db.subjects(pred,obj): |
|---|
| 155 | log.debug( "maybe %s" % sub ) |
|---|
| 156 | for pred,obj in filters[1:]: |
|---|
| 157 | log.debug("Checking %s, %s" % (pred,obj)) |
|---|
| 158 | try: |
|---|
| 159 | cls.db.triples((sub,pred,obj)).next() |
|---|
| 160 | except: |
|---|
| 161 | log.warn( "No %s" % sub ) |
|---|
| 162 | break |
|---|
| 163 | else: |
|---|
| 164 | yield cls(sub) |
|---|
| 165 | |
|---|
| 166 | @classmethod |
|---|
| 167 | def ClassInstances(cls): |
|---|
| 168 | """return a generator for instances of this rdf:type |
|---|
| 169 | you can look in MyClass.rdf_type to see the predicate being used""" |
|---|
| 170 | beenthere = set([]) |
|---|
| 171 | for i in cls.db.subjects(RDF.type, cls.rdf_type): |
|---|
| 172 | if not i in beenthere: |
|---|
| 173 | yield cls(i) |
|---|
| 174 | beenthere.add(i) |
|---|
| 175 | |
|---|
| 176 | @classmethod |
|---|
| 177 | def GetRandom(cls): |
|---|
| 178 | """for develoment just returns a random instance of this class""" |
|---|
| 179 | from random import choice |
|---|
| 180 | xii=list(cls.ClassInstances()) |
|---|
| 181 | return choice(xii) |
|---|
| 182 | |
|---|
| 183 | def __hash__(self): |
|---|
| 184 | return hash("ranD0Mi$h_"+self.n3()) |
|---|
| 185 | |
|---|
| 186 | def __cmp__(self, other): |
|---|
| 187 | return cmp(self.n3(), other.n3()) |
|---|
| 188 | |
|---|
| 189 | def __repr__(self): |
|---|
| 190 | return """%s('%s')""" % (self.__class__.__name__, self.n3()) |
|---|
| 191 | |
|---|
| 192 | def __getitem__(self, pred): |
|---|
| 193 | log.debug("Getting with __getitem__ %s for %s"%(pred,self.n3())) |
|---|
| 194 | val=self.db.value(self.resUri, pred) |
|---|
| 195 | if isinstance(val,Literal): |
|---|
| 196 | val = val.toPython() |
|---|
| 197 | elif isinstance(val, (BNode,URIRef)): |
|---|
| 198 | val=rdfSubject(val) |
|---|
| 199 | return val |
|---|
| 200 | |
|---|
| 201 | |
|---|
| 202 | def __delitem__(self, pred): |
|---|
| 203 | log.debug("Deleting with __delitem__ %s for %s"%(pred,self)) |
|---|
| 204 | for s,p,o in self.db.triples((self.resUri, pred, None)): |
|---|
| 205 | self.db.remove((s,p,o)) |
|---|
| 206 | #finally if the object in the triple was a bnode |
|---|
| 207 | #cascade delete the thing it referenced |
|---|
| 208 | # ?? FIXME Do we really want to cascade if it's an rdfSubject?? |
|---|
| 209 | if isinstance(o, (BNode, rdfSubject)): |
|---|
| 210 | rdfSubject(o)._remove(db=self.db,cascade='bnode') |
|---|
| 211 | |
|---|
| 212 | def _set_with_dict(self, kv): |
|---|
| 213 | """ |
|---|
| 214 | :param kv: a dict |
|---|
| 215 | |
|---|
| 216 | for each key,value pair in dict kv |
|---|
| 217 | set self.key = value |
|---|
| 218 | |
|---|
| 219 | """ |
|---|
| 220 | for key,value in kv.items(): |
|---|
| 221 | descriptor = self.__class__._getdescriptor(key) |
|---|
| 222 | descriptor.__set__(self, value) |
|---|
| 223 | |
|---|
| 224 | |
|---|
| 225 | def _remove(self, db=None, cascade = 'bnode', bnodeCheck=True): |
|---|
| 226 | """remove all triples where this rdfSubject is the subject of the triple |
|---|
| 227 | |
|---|
| 228 | :param db: limit the remove operation to this graph |
|---|
| 229 | :param cascade: must be one of: |
|---|
| 230 | |
|---|
| 231 | * none -- remove none |
|---|
| 232 | * bnode -- (default) remove all unreferenced bnodes |
|---|
| 233 | * all -- remove all unreferenced bnode(s) AND uri(s) |
|---|
| 234 | |
|---|
| 235 | :param bnodeCheck: boolean |
|---|
| 236 | |
|---|
| 237 | * True -- (default) check bnodes and raise exception if there are |
|---|
| 238 | still references to this node |
|---|
| 239 | * False -- do not check. This can leave orphaned object reference |
|---|
| 240 | in triples. Use only if you are resetting the value in |
|---|
| 241 | the same transaction |
|---|
| 242 | """ |
|---|
| 243 | noderef = self.resUri |
|---|
| 244 | log.debug("Called remove on %s" % self) |
|---|
| 245 | if not db: |
|---|
| 246 | db = self.db |
|---|
| 247 | |
|---|
| 248 | # we cannot delete a bnode if it is still referenced, |
|---|
| 249 | # i.e. if it is the o of a s,p,o |
|---|
| 250 | if bnodeCheck: |
|---|
| 251 | if isinstance(noderef ,BNode): |
|---|
| 252 | for s,p,o in db.triples((None,None,noderef)): |
|---|
| 253 | raise RDFAlchemyError("Cannot delete a bnode %s becuase %s still references it" % (noderef.n3(), s.n3())) |
|---|
| 254 | # determine an appropriate test for cascade decisions |
|---|
| 255 | if cascade == 'bnode': |
|---|
| 256 | #we cannot delete a bnode if there are still references to it |
|---|
| 257 | def test(node): |
|---|
| 258 | if isinstance(node,(URIRef,Literal)): |
|---|
| 259 | return False |
|---|
| 260 | for s,p,o in db.triples((None,None,node)): |
|---|
| 261 | return False |
|---|
| 262 | return True |
|---|
| 263 | elif cascade == 'none': |
|---|
| 264 | def test(node): |
|---|
| 265 | return False |
|---|
| 266 | elif cascade == 'all': |
|---|
| 267 | def test(node): |
|---|
| 268 | if isinstance(node, Literal): |
|---|
| 269 | return False |
|---|
| 270 | for s,p,o in db.triples((None,None,node)): |
|---|
| 271 | return False |
|---|
| 272 | return True |
|---|
| 273 | else: |
|---|
| 274 | raise AttributeError, "unknown cascade argument" |
|---|
| 275 | for s,p,o in db.triples((noderef, None, None)): |
|---|
| 276 | db.remove((s,p,o)) |
|---|
| 277 | if test(o): |
|---|
| 278 | rdfSubject(o)._remove(db=db,cascade=cascade) |
|---|
| 279 | |
|---|
| 280 | |
|---|
| 281 | def _rename(self, name, db=None): |
|---|
| 282 | """rename a node """ |
|---|
| 283 | if not db: |
|---|
| 284 | db = self.db |
|---|
| 285 | if not (isinstance(name, (BNode,URIRef))): |
|---|
| 286 | raise AttributeError, ("cannot rename to %s" % name) |
|---|
| 287 | for s,p,o in db.triples((self.resUri,None,None)): |
|---|
| 288 | db.remove((s, p, o)) |
|---|
| 289 | db.add((name, p, o)) |
|---|
| 290 | for s,p,o in db.triples((None,None,self.resUri)): |
|---|
| 291 | db.set((s, p, name)) |
|---|
| 292 | self.resUri = name |
|---|
| 293 | |
|---|
| 294 | |
|---|
| 295 | def _ppo(self,db=None): |
|---|
| 296 | """Like pretty print... |
|---|
| 297 | Return a 'pretty predicate,object' of self |
|---|
| 298 | returning all predicate object pairs with qnames""" |
|---|
| 299 | db = db or self.db |
|---|
| 300 | for p,o in db.predicate_objects(self.resUri): |
|---|
| 301 | print "%20s = %s"% (db.qname(p),str(o)) |
|---|
| 302 | print " " |
|---|
| 303 | |
|---|
| 304 | def md5_term_hash(self): |
|---|
| 305 | """Not sure what good this method is but it's defined for |
|---|
| 306 | rdflib.Identifiers so it's here for now""" |
|---|
| 307 | return self.resUri.md5_term_hash() |
|---|
| 308 | |
|---|
| 309 | |
|---|