1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29 package org.openrdf.elmo.codegen;
30
31 import info.aduna.iteration.CloseableIteration;
32
33 import java.util.Arrays;
34 import java.util.HashMap;
35 import java.util.HashSet;
36 import java.util.Map;
37 import java.util.Set;
38 import java.util.TreeSet;
39 import java.util.regex.Matcher;
40 import java.util.regex.Pattern;
41
42 import javax.xml.namespace.QName;
43
44 import org.openrdf.concepts.owl.AllDifferent;
45 import org.openrdf.concepts.owl.Class;
46 import org.openrdf.concepts.owl.DatatypeProperty;
47 import org.openrdf.concepts.owl.ObjectProperty;
48 import org.openrdf.concepts.owl.Ontology;
49 import org.openrdf.concepts.rdf.List;
50 import org.openrdf.concepts.rdf.Property;
51 import org.openrdf.concepts.rdfs.Datatype;
52 import org.openrdf.elmo.ElmoModule;
53 import org.openrdf.elmo.Entity;
54 import org.openrdf.elmo.exceptions.ElmoIOException;
55 import org.openrdf.elmo.sesame.SesameManager;
56 import org.openrdf.elmo.sesame.SesameManagerFactory;
57 import org.openrdf.elmo.sesame.roles.SesameEntity;
58 import org.openrdf.model.BNode;
59 import org.openrdf.model.Literal;
60 import org.openrdf.model.Namespace;
61 import org.openrdf.model.Resource;
62 import org.openrdf.model.Statement;
63 import org.openrdf.model.URI;
64 import org.openrdf.model.Value;
65 import org.openrdf.model.ValueFactory;
66 import org.openrdf.model.vocabulary.OWL;
67 import org.openrdf.model.vocabulary.RDF;
68 import org.openrdf.model.vocabulary.RDFS;
69 import org.openrdf.model.vocabulary.XMLSchema;
70 import org.openrdf.query.BindingSet;
71 import org.openrdf.query.MalformedQueryException;
72 import org.openrdf.query.QueryEvaluationException;
73 import org.openrdf.query.TupleQuery;
74 import org.openrdf.query.TupleQueryResult;
75 import org.openrdf.repository.Repository;
76 import org.openrdf.repository.RepositoryException;
77 import org.openrdf.repository.RepositoryResult;
78 import org.openrdf.repository.contextaware.ContextAwareConnection;
79 import org.slf4j.Logger;
80 import org.slf4j.LoggerFactory;
81
82
83
84
85
86
87
88
89
90 public class OwlNormalizer {
91 private final Logger logger = LoggerFactory.getLogger(OwlNormalizer.class);
92
93 private static final String PREFIX = "PREFIX rdf: <" + RDF.NAMESPACE
94 + "> PREFIX rdfs: <" + RDFS.NAMESPACE + "> ";
95
96 private static final String SELECT_DEFINED = PREFIX + "SELECT ?bean "
97 + "WHERE { ?bean rdfs:isDefinedBy ?ont "
98 + "FILTER ( ?bean != ?ont ) } ";
99
100 private static final String SELECT_ORPHANS = PREFIX
101 + "SELECT DISTINCT ?bean" + " WHERE { ?bean rdf:type ?type"
102 + " OPTIONAL { ?bean rdfs:isDefinedBy ?ont }"
103 + " FILTER ( isURI(?bean) && ! bound(?ont) ) }";
104
105 private static final String BEAN_DEFINED_BY = PREFIX
106 + "SELECT ?bean WHERE { ?bean rdfs:isDefinedBy ?ont }";
107
108 private SesameManagerFactory factory;
109
110 private SesameManager manager;
111
112 private Set<URI> anonymousClasses = new HashSet<URI>();
113
114 private Map<URI, URI> aliases = new HashMap<URI, URI>();
115
116 private Map<String, Ontology> ontologies;
117
118 private Set<String> commonNS = new HashSet<String>(Arrays.asList(
119 RDF.NAMESPACE, RDFS.NAMESPACE, OWL.NAMESPACE));
120
121 private static final Pattern NS_PREFIX = Pattern
122 .compile("^.*[/#](\\w+)[/#]?$");
123
124 public void setRepository(Repository repository) {
125 this.factory = new SesameManagerFactory(new ElmoModule(), repository);
126 }
127
128 public URI getOriginal(URI alias) {
129 if (anonymousClasses.contains(alias))
130 return null;
131 if (aliases.containsKey(alias))
132 return aliases.get(alias);
133 return alias;
134 }
135
136 public void normalize() throws Exception {
137 infer(factory.getRepository());
138 this.manager = factory.createElmoManager();
139 try {
140 manager.getTransaction().begin();
141 ontologies = findOntologies();
142 commit();
143 checkNamespacePrefixes();
144 commit();
145 checkPropertyDomains(Property.class);
146 checkPropertyDomains(ObjectProperty.class);
147 checkPropertyDomains(DatatypeProperty.class);
148 commit();
149 renameAnonymousClasses();
150 commit();
151 mergeUnionClasses();
152 commit();
153 processAllDifferent();
154 commit();
155 moveForiegnDomains(manager.getConnection());
156 manager.getTransaction().commit();
157 } finally {
158 manager.close();
159 }
160 }
161
162 private void commit() {
163 manager.getTransaction().commit();
164 manager.getTransaction().begin();
165 }
166
167 private void infer(Repository rep) throws RepositoryException {
168 logger.debug("inferring");
169 ValueFactory vf = rep.getValueFactory();
170 ContextAwareConnection conn = new ContextAwareConnection(rep);
171 conn.setAutoCommit(false);
172 symmetric(conn, OWL.INVERSEOF);
173 symmetric(conn, OWL.EQUIVALENTCLASS);
174 symmetric(conn, OWL.EQUIVALENTPROPERTY);
175 symmetric(conn, OWL.DISJOINTWITH);
176 setSubjectType(conn, RDF.FIRST, null, RDF.LIST);
177 setSubjectType(conn, RDF.REST, null, RDF.LIST);
178 setSubjectType(conn, OWL.UNIONOF, null, OWL.CLASS);
179 setSubjectType(conn, OWL.DISJOINTWITH, null, OWL.CLASS);
180 setSubjectType(conn, OWL.COMPLEMENTOF, null, OWL.CLASS);
181 setSubjectType(conn, OWL.EQUIVALENTCLASS, null, OWL.CLASS);
182 setSubjectType(conn, OWL.INTERSECTIONOF, null, OWL.CLASS);
183 setSubjectType(conn, RDF.TYPE, RDFS.CLASS, OWL.CLASS);
184 setObjectType(conn, OWL.UNIONOF, RDF.LIST);
185 setObjectType(conn, RDFS.ISDEFINEDBY, OWL.ONTOLOGY);
186 setSubjectType(conn, OWL.INVERSEOF, null, OWL.OBJECTPROPERTY);
187 setObjectType(conn, OWL.INVERSEOF, OWL.OBJECTPROPERTY);
188 setDatatype(vf, conn, OWL.CARDINALITY, XMLSchema.NON_NEGATIVE_INTEGER);
189 setDatatype(vf, conn, OWL.MINCARDINALITY,
190 XMLSchema.NON_NEGATIVE_INTEGER);
191 setDatatype(vf, conn, OWL.MAXCARDINALITY,
192 XMLSchema.NON_NEGATIVE_INTEGER);
193 conn.commit();
194 conn.close();
195 }
196
197 private Map<String, Ontology> findOntologies()
198 throws MalformedQueryException, RepositoryException,
199 QueryEvaluationException {
200 Map<String, Ontology> ontologies = new HashMap<String, Ontology>();
201 ContextAwareConnection conn = manager.getConnection();
202 TupleQuery query = conn.prepareTupleQuery(BEAN_DEFINED_BY);
203 for (Ontology ont : manager.findAll(Ontology.class)) {
204 logger.debug("found ontology {}", ont);
205 ontologies.put(ont.toString(), ont);
206 ontologies.put(ont.getQName().getNamespaceURI(), ont);
207 ontologies.put(ont.toString() + '#', ont);
208 Set<String> spaces = new HashSet<String>();
209 query.setBinding("ont", ((SesameEntity) ont).getSesameResource());
210 TupleQueryResult result = query.evaluate();
211 try {
212 while (result.hasNext()) {
213 BindingSet tuple = result.next();
214 Value bean = tuple.getBinding("bean").getValue();
215 if (bean instanceof URI)
216 spaces.add(((URI) bean).getNamespace());
217 }
218 } finally {
219 result.close();
220 }
221 if (spaces.size() > 0) {
222 for (String ns : spaces) {
223 ontologies.put(ns, ont);
224 }
225 } else {
226 ontologies.put(guessNamespace(ont), ont);
227 }
228 }
229 query = conn.prepareTupleQuery(SELECT_ORPHANS);
230 TupleQueryResult result = query.evaluate();
231 try {
232 while (result.hasNext()) {
233 BindingSet tuple = result.next();
234 URI uri = (URI) tuple.getBinding("bean").getValue();
235 String ns = uri.getNamespace();
236 Ontology ont = findOntology(ns, ontologies);
237 logger.debug("assigning {} {}", uri, ont);
238 conn.add(uri, RDFS.ISDEFINEDBY, ((SesameEntity) ont)
239 .getSesameResource());
240 }
241 } finally {
242 result.close();
243 }
244 return ontologies;
245 }
246
247 private String guessNamespace(Entity bean) {
248 QName qname = bean.getQName();
249 String ns = qname.getNamespaceURI();
250 String local = qname.getLocalPart();
251 if (local.endsWith("#") || local.endsWith("/")) {
252 return ns + local;
253 }
254 if (ns.endsWith("#")) {
255 return ns;
256 }
257 return ns + local + "#";
258 }
259
260 private Ontology findOntology(String ns, Map<String, Ontology> ontologies) {
261 if (ontologies.containsKey(ns)) {
262 return ontologies.get(ns);
263 }
264 for (Map.Entry<String, Ontology> e : ontologies.entrySet()) {
265 String key = e.getKey();
266 if (ns.startsWith(key.substring(0, key.length() - 1)))
267 return e.getValue();
268 }
269 QName qname = new QName(ns);
270 if (ns.endsWith("#")) {
271 qname = new QName(ns.substring(0, ns.length() - 1));
272 }
273 Ontology ont = manager.designate(Ontology.class, qname);
274 ontologies.put(ns, ont);
275 return ont;
276 }
277
278 private void processAllDifferent() {
279 for (AllDifferent different : manager.findAll(AllDifferent.class)) {
280 for (Class c : different.getOwlDistinctMembers()) {
281 for (Class d : different.getOwlDistinctMembers()) {
282 if (!c.equals(d))
283 c.getOwlDisjointWith().add(d);
284 }
285 }
286 }
287 }
288
289 private void symmetric(ContextAwareConnection conn, URI pred)
290 throws RepositoryException {
291 CloseableIteration<? extends Statement, RepositoryException> stmts;
292 stmts = conn.getStatements(null, pred, null);
293 try {
294 while (stmts.hasNext()) {
295 Statement stmt = stmts.next();
296 Resource subj = (Resource) stmt.getObject();
297 conn.add(subj, pred, stmt.getSubject());
298 }
299 } finally {
300 stmts.close();
301 }
302 }
303
304 private void setSubjectType(ContextAwareConnection conn, URI pred,
305 Value obj, URI type) throws RepositoryException {
306 CloseableIteration<? extends Statement, RepositoryException> stmts;
307 stmts = conn.getStatements(null, pred, obj);
308 try {
309 while (stmts.hasNext()) {
310 conn.add(stmts.next().getSubject(), RDF.TYPE, type);
311 }
312 } finally {
313 stmts.close();
314 }
315 }
316
317 private void setObjectType(ContextAwareConnection conn, URI pred, URI type)
318 throws RepositoryException {
319 CloseableIteration<? extends Statement, RepositoryException> stmts;
320 stmts = conn.getStatements(null, pred, null);
321 try {
322 while (stmts.hasNext()) {
323 Resource subj = (Resource) stmts.next().getObject();
324 conn.add(subj, RDF.TYPE, type);
325 }
326 } finally {
327 stmts.close();
328 }
329 }
330
331 private <C extends org.openrdf.concepts.rdfs.Class> void addBaseClass(
332 Class base, java.lang.Class<C> type) {
333 String ns = base.getQName().getNamespaceURI();
334 for (C c : manager.findAll(type)) {
335 if (c.equals(base))
336 continue;
337 QName qname = c.getQName();
338 if (qname != null && qname.getNamespaceURI().equals(ns)) {
339 boolean isBase = true;
340 for (org.openrdf.concepts.rdfs.Class e : c.getRdfsSubClassOf()) {
341 QName bname = e.getQName();
342 if (bname != null && bname.getNamespaceURI().equals(ns))
343 isBase = false;
344 }
345 if (isBase) {
346 logger.debug("extending {} {}", c, base);
347 c.getRdfsSubClassOf().add(base);
348 }
349 }
350 }
351 }
352
353 private void setDatatype(ValueFactory vf, ContextAwareConnection conn,
354 URI pred, URI datatype) throws RepositoryException {
355 CloseableIteration<? extends Statement, RepositoryException> stmts;
356 stmts = conn.getStatements(null, pred, null);
357 try {
358 while (stmts.hasNext()) {
359 Statement stmt = stmts.next();
360 String label = ((Literal) stmt.getObject()).getLabel();
361 Literal literal = vf.createLiteral(label, datatype);
362 conn.remove(stmt);
363 conn.add(stmt.getSubject(), stmt.getPredicate(), literal);
364 }
365 } finally {
366 stmts.close();
367 }
368 }
369
370 private void checkPropertyDomains(java.lang.Class<? extends Property> type) {
371 for (Property p : manager.findAll(type)) {
372 if (p.getRdfsDomains().isEmpty()) {
373 boolean found = false;
374 for (Property sup : p.getRdfsSubPropertyOf()) {
375 if (!sup.getRdfsDomains().isEmpty()) {
376 found = true;
377 p.getRdfsDomains().addAll(sup.getRdfsDomains());
378 }
379 }
380 if (!found) {
381 Class res = manager.designate(RDFS.RESOURCE, Class.class);
382 p.getRdfsDomains().add(res);
383 }
384 }
385 }
386 }
387
388 private void moveForiegnDomains(ContextAwareConnection conn)
389 throws RepositoryException {
390 CloseableIteration<? extends Statement, RepositoryException> stmts;
391 stmts = conn.getStatements(null, RDFS.DOMAIN, null);
392 try {
393 while (stmts.hasNext()) {
394 Statement stmt = stmts.next();
395 if (stmt.getSubject() instanceof URI
396 && stmt.getObject() instanceof URI) {
397 URI subj = (URI) stmt.getSubject();
398 URI obj = (URI) stmt.getObject();
399 for (Map.Entry<String, Ontology> e : ontologies.entrySet()) {
400 String ns = e.getKey();
401 Ontology ont = e.getValue();
402 if (subj.getNamespace().equals(ns)
403 && !subj.getNamespace().equals(
404 obj.getNamespace())) {
405 URI nc = createLocalClass(ns, obj);
406 logger.debug("moving {} {}", subj, nc);
407 conn.remove(stmt);
408 conn.add(subj, RDFS.DOMAIN, nc);
409 conn.add(nc, RDF.TYPE, OWL.CLASS);
410 conn.add(nc, RDFS.SUBCLASSOF, obj);
411 conn.add(nc, RDFS.ISDEFINEDBY, ((SesameEntity) ont)
412 .getSesameResource());
413 }
414 }
415 }
416 }
417 } finally {
418 stmts.close();
419 }
420 }
421
422 private URI createLocalClass(String ns, URI obj) throws RepositoryException {
423 String localName = obj.getLocalName();
424 ValueFactory vf = factory.getRepository().getValueFactory();
425 String prefix = findPrefix(ns);
426 if (prefix == null)
427 return vf.createURI(ns, localName);
428 return vf.createURI(ns, initcap(prefix) + initcap(localName));
429 }
430
431 private String findPrefix(String ns) throws RepositoryException {
432 RepositoryResult<Namespace> spaces;
433 spaces = manager.getConnection().getNamespaces();
434 try {
435 while (spaces.hasNext()) {
436 Namespace next = spaces.next();
437 if (next.getName().equals(ns))
438 return next.getPrefix();
439 }
440 } finally {
441 spaces.close();
442 }
443 return null;
444 }
445
446 private void renameClass(ContextAwareConnection conn, URI obj, URI nc)
447 throws RepositoryException {
448 logger.debug("renaming {} {}", obj, nc);
449 aliases.put(nc, obj);
450 CloseableIteration<? extends Statement, RepositoryException> stmts;
451 stmts = conn.getStatements(null, null, obj);
452 try {
453 while (stmts.hasNext()) {
454 Statement stmt = stmts.next();
455 if (isLocal(nc.getNamespace(), stmt.getSubject())) {
456 if (!stmt.getPredicate().equals(RDFS.RANGE)
457 || !stmt.getObject().equals(RDFS.RESOURCE)) {
458 if (!stmt.getPredicate().equals(RDF.TYPE))
459 conn.remove(stmt);
460 conn.add(stmt.getSubject(), stmt.getPredicate(), nc);
461 }
462 }
463 }
464 } finally {
465 stmts.close();
466 }
467 if (obj.equals(RDFS.RESOURCE)) {
468 Class base = manager.designate(nc, Class.class);
469 addBaseClass(base, org.openrdf.concepts.rdfs.Class.class);
470 addBaseClass(base, Class.class);
471 }
472 }
473
474 private boolean isLocal(String ns, Value obj) {
475 if (obj instanceof BNode)
476 return true;
477 URI uri = (URI) obj;
478 return uri.getNamespace().equals(ns);
479 }
480
481 private void renameAnonymousClasses() throws RepositoryException {
482 Iterable<Class> classes = manager.findAll(Class.class);
483 for (Class clazz : classes) {
484 Resource res = ((SesameEntity) clazz).getSesameResource();
485 if (res instanceof URI)
486 continue;
487 List<Class> unionOf = clazz.getOwlUnionOf();
488 if (unionOf != null) {
489 renameClass(clazz, "Or", unionOf);
490 }
491 List<Class> intersectionOf = clazz.getOwlIntersectionOf();
492 if (intersectionOf != null) {
493 renameClass(clazz, "And", intersectionOf);
494 }
495 }
496 commit();
497 for (Class clazz : classes) {
498 Resource res = ((SesameEntity) clazz).getSesameResource();
499 if (res instanceof URI)
500 continue;
501 Class complement = clazz.getOwlComplementOf();
502 if (complement != null) {
503 QName comp = complement.getQName();
504 String name = "Not" + comp.getLocalPart();
505 QName qname = new QName(comp.getNamespaceURI(), name);
506 rename(clazz, qname);
507 }
508 }
509 }
510
511 private void mergeUnionClasses() throws RepositoryException {
512 Iterable<Class> classes = manager.findAll(Class.class);
513 for (Class clazz : classes) {
514 List<? extends Entity> unionOf = clazz.getOwlUnionOf();
515 if (unionOf != null) {
516 Value clazzValue = ((SesameEntity) clazz).getSesameResource();
517 if (clazzValue instanceof URI) {
518 anonymousClasses.add((URI) clazzValue);
519 }
520 QName qName = clazz.getQName();
521 for (Entity bean : unionOf) {
522 Class of = manager.designate(Class.class, bean.getQName());
523 Value ofValue = ((SesameEntity) of).getSesameResource();
524 if (bean instanceof Datatype) {
525
526 rename(clazz, bean.getQName());
527 } else if (isLocal(qName.getNamespaceURI(), ofValue)) {
528 of.getRdfsSubClassOf().add(clazz);
529 } else {
530 URI nc = createLocalClass(qName.getNamespaceURI(),
531 (URI) ofValue);
532 ContextAwareConnection conn = manager.getConnection();
533 conn.add(nc, RDF.TYPE, OWL.CLASS);
534 conn.add(nc, RDFS.SUBCLASSOF, ofValue);
535 conn.add(nc, RDFS.SUBCLASSOF, clazzValue);
536 Ontology ont = findOntology(nc.getNamespace(),
537 ontologies);
538 conn.add(nc, RDFS.ISDEFINEDBY, ((SesameEntity) ont)
539 .getSesameResource());
540 renameClass(conn, (URI) ofValue, nc);
541 }
542 }
543 }
544 }
545 }
546
547 private Class renameClass(Class clazz, String and, List<? extends Entity> list)
548 throws RepositoryException {
549 String namespace = null;
550 Set<String> names = new TreeSet<String>();
551 for (Entity of : list) {
552 QName qname = of.getQName();
553 if (namespace == null || commonNS.contains(namespace)) {
554 namespace = qname.getNamespaceURI();
555 }
556 names.add(qname.getLocalPart());
557 }
558 StringBuilder sb = new StringBuilder();
559 for (String localPart : names) {
560 sb.append(initcap(localPart));
561 sb.append(and);
562 }
563 sb.setLength(sb.length() - and.length());
564 return rename(clazz, new QName(namespace, sb.toString()));
565 }
566
567 private Class rename(Class clazz, QName qname) throws RepositoryException {
568 logger.debug("renaming {} {}", clazz, qname);
569 Class copy = manager.designate(clazz.getClass(), qname);
570 Ontology ont = findOntology(qname.getNamespaceURI(), ontologies);
571 copy.getRdfsIsDefinedBy().add(ont);
572 ContextAwareConnection conn = manager.getConnection();
573 CloseableIteration<? extends Statement, RepositoryException> stmts;
574 org.openrdf.model.Resource orig = ((SesameEntity) clazz)
575 .getSesameResource();
576 org.openrdf.model.Resource dest = ((SesameEntity) copy)
577 .getSesameResource();
578 stmts = conn.getStatements(orig, null, null);
579 try {
580 while (stmts.hasNext()) {
581 Statement stmt = stmts.next();
582 conn.add(dest, stmt.getPredicate(), stmt.getObject());
583 conn.remove(stmt);
584 }
585 } finally {
586 stmts.close();
587 }
588 stmts = conn.getStatements(null, null, orig);
589 try {
590 while (stmts.hasNext()) {
591 Statement stmt = stmts.next();
592 conn.add(stmt.getSubject(), stmt.getPredicate(), dest);
593 conn.remove(stmt);
594 }
595 } finally {
596 stmts.close();
597 }
598 return copy;
599 }
600
601 private String initcap(String str) {
602 return str.substring(0, 1).toUpperCase() + str.substring(1);
603 }
604
605 private void checkNamespacePrefixes() throws Exception {
606 ContextAwareConnection conn;
607 conn = manager.getConnection();
608 TupleQuery query = conn.prepareTupleQuery(SELECT_DEFINED);
609 TupleQueryResult result = query.evaluate();
610 try {
611 while (result.hasNext()) {
612 BindingSet tuple = result.next();
613 Value value = tuple.getBinding("bean").getValue();
614 if (value instanceof BNode)
615 continue;
616 String ns = ((URI) value).getNamespace();
617 String prefix = getPrefix(ns);
618 if (prefix == null) {
619 Matcher matcher = NS_PREFIX.matcher(ns);
620 if (matcher.find()) {
621 prefix = matcher.group(1);
622 logger.debug("creating prefix {} {}", prefix, ns);
623 conn.setNamespace(prefix, ns);
624 }
625 }
626 }
627 } finally {
628 result.close();
629 }
630 }
631
632 private String getPrefix(String namespace) {
633 CloseableIteration<? extends Namespace, RepositoryException> namespaces = null;
634 try {
635 try {
636 ContextAwareConnection conn;
637 conn = manager.getConnection();
638 namespaces = conn.getNamespaces();
639 while (namespaces.hasNext()) {
640 Namespace ns = namespaces.next();
641 if (namespace.equals(ns.getName()))
642 return ns.getPrefix();
643 }
644 return null;
645 } finally {
646 if (namespaces != null)
647 namespaces.close();
648 }
649 } catch (RepositoryException e) {
650 throw new ElmoIOException(e);
651 }
652 }
653 }