| 1 | #!/usr/bin/env python |
|---|
| 2 | # encoding: utf-8 |
|---|
| 3 | """ |
|---|
| 4 | rdfsSubject.py |
|---|
| 5 | |
|---|
| 6 | rdfsSubject is similar to rdfsSubject but includes more |
|---|
| 7 | processing and *magic* based on an `RDF Schema`__ |
|---|
| 8 | |
|---|
| 9 | __ ::http://www.w3.org/TR/rdf-schema/ |
|---|
| 10 | |
|---|
| 11 | Created by Philip Cooper on 2008-05-14. |
|---|
| 12 | Copyright (c) 2008 Openvest. All rights reserved. |
|---|
| 13 | """ |
|---|
| 14 | |
|---|
| 15 | from rdfalchemy import rdfSubject, RDF, RDFS, Namespace, BNode, URIRef |
|---|
| 16 | from rdflib.Identifier import Identifier |
|---|
| 17 | from descriptors import * |
|---|
| 18 | from orm import mapper, allsub |
|---|
| 19 | |
|---|
| 20 | import logging |
|---|
| 21 | |
|---|
| 22 | log=logging.getLogger(__name__) |
|---|
| 23 | #log.debug("logger is %s",log.name) |
|---|
| 24 | log.setLevel(logging.INFO) |
|---|
| 25 | |
|---|
| 26 | |
|---|
| 27 | from weakref import WeakValueDictionary |
|---|
| 28 | import re |
|---|
| 29 | |
|---|
| 30 | OWL = Namespace("http://www.w3.org/2002/07/owl#") |
|---|
| 31 | |
|---|
| 32 | _all_ = ['rdfsSubject','rdfsClass','rdfsProperty', |
|---|
| 33 | 'owlObjectProperty','owlDatatypeProperty', |
|---|
| 34 | 'owlSymetricProperty', 'owlTransitiveProperty', |
|---|
| 35 | 'owlFunctionalProperty','owlInverseFunctionalProperty'] |
|---|
| 36 | |
|---|
| 37 | |
|---|
| 38 | re_ns_n = re.compile(r'(.*[/#])(.*)') |
|---|
| 39 | |
|---|
| 40 | |
|---|
| 41 | class rdfsSubject(rdfSubject, Identifier): |
|---|
| 42 | _weakrefs = WeakValueDictionary() |
|---|
| 43 | |
|---|
| 44 | def __new__(cls, resUri = None, schemaGraph=None, *args, **kwargs): |
|---|
| 45 | if not resUri or isinstance(resUri, BNode) or issubclass(cls, BNode): # create a bnode |
|---|
| 46 | obj = BNode.__new__(cls, resUri) |
|---|
| 47 | obj._nodetype = BNode |
|---|
| 48 | elif isinstance(resUri, URIRef) or issubclass(cls, URIRef): # user the identifier passed in |
|---|
| 49 | obj = URIRef.__new__(cls,resUri) |
|---|
| 50 | obj._nodetype = URIRef |
|---|
| 51 | elif isinstance(resUri, rdfSubject): # use the resUri of the subject passed in |
|---|
| 52 | obj= type(resUri.resUri).__new__(cls, resUri.resUri) |
|---|
| 53 | obj._nodetype = type(resUri.resUri) |
|---|
| 54 | elif isinstance(resUri, (str, unicode)): # create one from a <uri> or _:bnode string |
|---|
| 55 | if resUri[0]=="<" and resUri[-1]==">": |
|---|
| 56 | obj=URIRef.__new__(cls, resUri[1:-1]) |
|---|
| 57 | obj._nodetype = URIRef |
|---|
| 58 | elif resUri.startswith("_:"): |
|---|
| 59 | obj=BNode.__new__(cls, resUri[2:]) |
|---|
| 60 | obj._nodetype = BNode |
|---|
| 61 | else: |
|---|
| 62 | raise AttributeError("cannot construct rdfSubject from %s"%(str(resUri))) |
|---|
| 63 | |
|---|
| 64 | # At this point we have an obj to return...but we might want to look deeper |
|---|
| 65 | # if there is an RDF:type entry on the Graph, find the mapped subclass and return |
|---|
| 66 | # an object of that new type |
|---|
| 67 | if resUri: |
|---|
| 68 | rdf_type = obj[RDF.type] |
|---|
| 69 | if rdf_type: |
|---|
| 70 | class_dict = dict([(str(cl.rdf_type), cl) for cl in allsub(cls) if cl.rdf_type]) |
|---|
| 71 | subclass = class_dict.get(str(rdf_type.resUri),cls) |
|---|
| 72 | else: |
|---|
| 73 | subclass = cls |
|---|
| 74 | else: |
|---|
| 75 | subclass = cls |
|---|
| 76 | |
|---|
| 77 | # improve this do do some kind of hash with classname?? |
|---|
| 78 | # this uses _weakrefs to allow us to return an existing object |
|---|
| 79 | # rather than copies |
|---|
| 80 | md5id = obj.md5_term_hash() |
|---|
| 81 | newobj = rdfsSubject._weakrefs.get(md5id,None) |
|---|
| 82 | log.debug("looking for weakref %s found %s",md5id,newobj) |
|---|
| 83 | if newobj: |
|---|
| 84 | return newobj |
|---|
| 85 | newobj = super(rdfSubject,obj).__new__(subclass, obj.resUri)#, **kwargs) |
|---|
| 86 | log.debug("add a weakref %s", newobj) |
|---|
| 87 | newobj._nodetype = obj._nodetype |
|---|
| 88 | rdfsSubject._weakrefs[newobj.md5_term_hash()] = newobj |
|---|
| 89 | return newobj |
|---|
| 90 | |
|---|
| 91 | def __init__(self, resUri = None, **kwargs): |
|---|
| 92 | if not self[RDF.type] and self.rdf_type: |
|---|
| 93 | self.db.add((self.resUri,RDF.type,self.rdf_type)) |
|---|
| 94 | if kwargs: |
|---|
| 95 | self._set_with_dict(kwargs) |
|---|
| 96 | |
|---|
| 97 | |
|---|
| 98 | @property |
|---|
| 99 | def resUri(self): |
|---|
| 100 | return self._nodetype(unicode(self)) |
|---|
| 101 | |
|---|
| 102 | def _splitname(self): |
|---|
| 103 | return re.match(r'(.*[/#])(.*)',self.resUri).groups() |
|---|
| 104 | |
|---|
| 105 | @classmethod |
|---|
| 106 | def ClassInstances(cls): |
|---|
| 107 | """return a generator for instances of this rdf:type |
|---|
| 108 | you can look in MyClass.rdf_type to see the predicate being used""" |
|---|
| 109 | # Start with all things of "my" type in the db |
|---|
| 110 | beenthere = set([]) |
|---|
| 111 | for i in cls.db.subjects(RDF.type, cls.rdf_type): |
|---|
| 112 | if not i in beenthere: |
|---|
| 113 | yield cls(i) |
|---|
| 114 | beenthere.add(i) |
|---|
| 115 | |
|---|
| 116 | # for all subclasses of me in python do the same (recursivly) |
|---|
| 117 | pySubClasses = allsub(cls) |
|---|
| 118 | for sub in pySubClasses: |
|---|
| 119 | for i in sub.ClassInstances(): |
|---|
| 120 | if not i in beenthere: |
|---|
| 121 | yield i |
|---|
| 122 | beenthere.add(i) |
|---|
| 123 | |
|---|
| 124 | # not done yet, for all db subclasses that I have not processed already...get them too |
|---|
| 125 | dbSubClasses = rdfsClass(cls.rdf_type).transitive_subClasses |
|---|
| 126 | moreSubClasses = [dbsub.resUri for dbsub in dbSubClasses |
|---|
| 127 | if dbsub.resUri not in [pysub.rdf_type for pysub in pySubClasses]] |
|---|
| 128 | for sub in moreSubClasses: |
|---|
| 129 | for i in cls.db.subjects(RDF.type, sub): |
|---|
| 130 | if '' and not i in beenthere: |
|---|
| 131 | yield i |
|---|
| 132 | beenthere.add(i) |
|---|
| 133 | |
|---|
| 134 | |
|---|
| 135 | |
|---|
| 136 | class rdfsClass(rdfsSubject): |
|---|
| 137 | """rdfSbject with some RDF Schema addons |
|---|
| 138 | *Some* inferencing is implied |
|---|
| 139 | Bleading edge: be careful""" |
|---|
| 140 | rdf_type = RDFS.Class |
|---|
| 141 | comment = rdfSingle(RDFS.comment) |
|---|
| 142 | label = rdfSingle(RDFS.label) |
|---|
| 143 | subClassOf = rdfMultiple(RDFS.subClassOf, range_type = RDFS.Class) |
|---|
| 144 | |
|---|
| 145 | @property |
|---|
| 146 | def transitive_subClassOf(self): |
|---|
| 147 | return [rdfsClass(s) for s in self.db.transitive_objects(self.resUri,RDFS.subClassOf)] |
|---|
| 148 | |
|---|
| 149 | @property |
|---|
| 150 | def transitive_subClasses(self): |
|---|
| 151 | return [rdfsClass(s) for s in self.db.transitive_subjects(RDFS.subClassOf, self.resUri)] |
|---|
| 152 | |
|---|
| 153 | @property |
|---|
| 154 | def properties(self): |
|---|
| 155 | # this doesn't get the rdfsProperty subclasses |
|---|
| 156 | # return list(rdfsProperty.filter_by(domain=self.resUri)) |
|---|
| 157 | # TODO: why iterate all rdfsProperty subclasses |
|---|
| 158 | # try self.db.subjects(RDFS.domain,self.resUri) |
|---|
| 159 | return [x for x in rdfsProperty.ClassInstances() if x.domain == self] |
|---|
| 160 | |
|---|
| 161 | |
|---|
| 162 | |
|---|
| 163 | def _emit_rdfSubject(self, visitedNS={}, visitedClass=set([])): |
|---|
| 164 | """Procude the text that might be used for a .py file |
|---|
| 165 | TODO: This code should probably move into the commands module since that's the only place it's used""" |
|---|
| 166 | ns,loc = self._splitname() |
|---|
| 167 | try: |
|---|
| 168 | prefix, qloc = self.db.qname(self.resUri).split(':') |
|---|
| 169 | except: |
|---|
| 170 | raise Exception("don't know how to handle a qname like %s" % self.db.qname(self.resUri)) |
|---|
| 171 | prefix = prefix.upper() |
|---|
| 172 | |
|---|
| 173 | if not visitedNS: |
|---|
| 174 | src = """ |
|---|
| 175 | from rdfalchemy import rdfSubject, Namespace, URIRef |
|---|
| 176 | from rdfalchemy.rdfsSubject import rdfsSubject |
|---|
| 177 | from rdfalchemy.orm import mapper |
|---|
| 178 | |
|---|
| 179 | """ |
|---|
| 180 | for k,v in self.db.namespaces(): |
|---|
| 181 | visitedNS[str(v)] = k.upper() |
|---|
| 182 | src += '%s = Namespace("%s")\n' % (k.upper().replace('-','_'),v) |
|---|
| 183 | else: |
|---|
| 184 | src = "" |
|---|
| 185 | |
|---|
| 186 | mySupers = [] |
|---|
| 187 | for mySuper in self.subClassOf: |
|---|
| 188 | sns, sloc = mySuper._splitname() |
|---|
| 189 | if ns == sns: |
|---|
| 190 | src += mySuper._emit_rdfSubject(visitedNS=visitedNS) |
|---|
| 191 | mySupers.append( sloc.replace('-','_') ) |
|---|
| 192 | |
|---|
| 193 | |
|---|
| 194 | |
|---|
| 195 | mySupers = ",".join(mySupers) or "rdfsSubject" |
|---|
| 196 | src += '\nclass %s(%s):\n'%(loc.replace('-','_'), mySupers) |
|---|
| 197 | src += '\t"""%s %s"""\n'%(self.label, self.comment) |
|---|
| 198 | src += '\trdf_type = %s["%s"]\n' % (visitedNS[ns],loc) |
|---|
| 199 | |
|---|
| 200 | |
|---|
| 201 | for p in self.properties: |
|---|
| 202 | pns, ploc = p._splitname() |
|---|
| 203 | ppy = '%s["%s"]' % (visitedNS[pns],ploc) |
|---|
| 204 | try: |
|---|
| 205 | assert str(p.range[RDF.type].resUri).endswith('Class') # rdfs.Class and owl.Class |
|---|
| 206 | rns, rloc = rdfsSubject(p.range)._splitname() |
|---|
| 207 | range_type = ', range_type = %s["%s"]' % (visitedNS[rns],rloc) |
|---|
| 208 | except Exception, e: |
|---|
| 209 | range_type = '' |
|---|
| 210 | src += '\t%s = rdfMultiple(%s%s)\n' % (ploc.replace('-','_') ,ppy,range_type) |
|---|
| 211 | |
|---|
| 212 | # Just want this once at the end |
|---|
| 213 | src.replace("mapper()\n","") |
|---|
| 214 | src += "mapper()\n" |
|---|
| 215 | |
|---|
| 216 | return src |
|---|
| 217 | |
|---|
| 218 | class rdfsProperty(rdfsSubject): |
|---|
| 219 | rdf_type = RDF.Property |
|---|
| 220 | domain = rdfSingle(RDFS.domain, range_type=RDFS.Class) |
|---|
| 221 | range = rdfSingle(RDFS.range) |
|---|
| 222 | subPropertyOf = rdfMultiple(RDFS.subPropertyOf) |
|---|
| 223 | default_descriptor = rdfMultiple # |
|---|
| 224 | |
|---|
| 225 | ##################################################################### |
|---|
| 226 | # Beginings of a OWL package |
|---|
| 227 | |
|---|
| 228 | class owlClass(rdfsClass): |
|---|
| 229 | """rdfSbject with some RDF Schema addons |
|---|
| 230 | *Some* inferencing is implied |
|---|
| 231 | Bleading edge: be careful""" |
|---|
| 232 | rdf_type = OWL["Class"] |
|---|
| 233 | disjointWith = rdfMultiple(OWL["disjointWith"], range_type = OWL["Class"]) |
|---|
| 234 | equivalentClass = rdfMultiple(OWL["equivalentClass"], range_type = OWL["Class"]) |
|---|
| 235 | intersectionOf = rdfMultiple(OWL["intersectionOf"]) |
|---|
| 236 | unionOf = rdfMultiple(OWL["unionOf"]) |
|---|
| 237 | complementOf = rdfMultiple(OWL["complementOf"], range_type = OWL["Class"]) |
|---|
| 238 | |
|---|
| 239 | |
|---|
| 240 | ######################################## |
|---|
| 241 | # properties |
|---|
| 242 | |
|---|
| 243 | class owlFunctionalProperty(rdfsProperty): |
|---|
| 244 | rdf_type = OWL.FunctionalProperty |
|---|
| 245 | default_descriptor = rdfSingle |
|---|
| 246 | |
|---|
| 247 | class owlDatatypeProperty(rdfsProperty): |
|---|
| 248 | rdf_type = OWL.DatatypeProperty |
|---|
| 249 | range = rdfSingle(RDFS.range, range_type = RDFS.Class) |
|---|
| 250 | default_descriptor = rdfMultiple |
|---|
| 251 | |
|---|
| 252 | ######################################## |
|---|
| 253 | # Object properties |
|---|
| 254 | class owlObjectProperty(rdfsProperty): |
|---|
| 255 | rdf_type = OWL.ObjectProperty |
|---|
| 256 | range = rdfSingle(RDFS.range, range_type = RDFS.Class) |
|---|
| 257 | inverseOf = rdfSingle(OWL.inverseOf, range_type = OWL.ObjectProperty) |
|---|
| 258 | default_descriptor = rdfMultiple |
|---|
| 259 | |
|---|
| 260 | class owlInverseFunctionalProperty(owlObjectProperty): |
|---|
| 261 | rdf_type = OWL.InverseFunctionalProperty |
|---|
| 262 | default_descriptor = rdfSingle |
|---|
| 263 | |
|---|
| 264 | class owlSymetricProperty(owlObjectProperty): |
|---|
| 265 | rdf_type = OWL.SymetricProperty |
|---|
| 266 | default_descriptor = rdfMultiple |
|---|
| 267 | |
|---|
| 268 | class owlTransitiveProperty(owlObjectProperty): |
|---|
| 269 | rdf_type = OWL.TransitiveProperty |
|---|
| 270 | default_descriptor = owlTransitive |
|---|
| 271 | |
|---|
| 272 | # this maps the return type of subClassOf back to rdfsClass |
|---|
| 273 | mapper() |
|---|
| 274 | |
|---|