1 /*
2 * Copyright (c) 2007, Peter Mika All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * - Redistributions of source code must retain the above copyright notice, this
8 * list of conditions and the following disclaimer.
9 * - Redistributions in binary form must reproduce the above copyright notice,
10 * this list of conditions and the following disclaimer in the documentation
11 * and/or other materials provided with the distribution.
12 * - Neither the name of the openrdf.org nor the names of its contributors may
13 * be used to endorse or promote products derived from this software without
14 * specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 *
28 */
29 package org.openrdf.elmo.scutter;
30
31 import java.net.URL;
32
33 import org.openrdf.concepts.foaf.Person;
34 import org.openrdf.model.Resource;
35 import org.openrdf.model.URI;
36 import org.openrdf.model.Value;
37 import org.openrdf.model.vocabulary.RDF;
38 import org.openrdf.repository.Repository;
39 import org.slf4j.Logger;
40 import org.slf4j.LoggerFactory;
41
42
43 /**
44 * This retriever is specific to crawling for FOAF data. It only collects
45 * statements in the RDF, RDF-S, FOAF and GEO namespaces and doesn't follow
46 * seeAlso links in documents that do not contain foaf:Person instances.
47 *
48 * @author Peter Mika (pmika@cs.vu.nl)
49 *
50 */
51 public class FoafRetriever extends SimpleRetriever implements Retriever {
52
53
54 protected final static Logger _logger = LoggerFactory.getLogger(FoafRetriever.class);
55
56 public FoafRetriever(final URL url, final Repository repository,
57 final Scutter scutter) {
58 super(url, repository, scutter);
59
60 setFilter(new FOAFStatementFilter());
61 _handler = new FOAFDocumentHandler((FOAFStatementFilter)_filter);
62 }
63
64 static class FOAFStatementFilter implements StatementFilter {
65 public final static String RDF_NS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
66 public final static String RDFS_NS = "http://www.w3.org/2000/01/rdf-schema#";
67 public final static String FOAF_NS = "http://xmlns.com/foaf/0.1/";
68 public final static String GEO_NS = "http://www.w3.org/2003/01/geo/wgs84_pos#";
69
70 // TODO: factor our openacademia specific parts
71 public final static String SWRC_NS = "http://swrc.ontoware.org/ontology#";
72 public final static String SWRC_EXT_NS = "http://www.cs.vu.nl/~mcaklein/onto/swrc-ext/2005/05#";
73 public final static String SOCIONET_NS = "http://www.cs.vu.nl/~pmika/socionet#";
74 public final static String OPENACADEMIA_NS = "http://www.openacademia.org#";
75
76 private boolean foundRelevant = false;
77
78 public boolean allowStatement(Resource subject, URI predicate, Value object) {
79 boolean relevant = false;
80
81 // if we find a Person definition or any of the namespace
82 // used by openacademia then there is relevant data in the
83 if (!foundRelevant && (predicate.equals(RDF.TYPE) &&
84 object.equals(Util.getType(Person.class))) ||
85 // TODO: factor out openacademia specific parts
86 predicate.toString().startsWith(SWRC_NS) ||
87 predicate.toString().startsWith(SWRC_EXT_NS) ||
88 predicate.toString().startsWith(SOCIONET_NS) ||
89 predicate.toString().startsWith(OPENACADEMIA_NS)
90 ) {
91 relevant = true;
92 foundRelevant = true;
93 }
94 // What is relevant is a subset of the statements we allow through
95 if (predicate.toString().startsWith(RDF_NS) ||
96 predicate.toString().startsWith(RDFS_NS) ||
97 predicate.toString().startsWith(FOAF_NS) ||
98 predicate.toString().startsWith(GEO_NS) ||
99 relevant
100 ) {
101 return true;
102 } else {
103 return false;
104 }
105 }
106
107
108 }
109
110 class FOAFDocumentHandler extends SimpleDocumentHandler {
111
112
113 public FOAFDocumentHandler(FOAFStatementFilter filter) {
114 super(filter);
115 }
116
117
118 public boolean followLinks() {
119 return ((FOAFStatementFilter) _filter).foundRelevant;
120 }
121
122 public boolean aggregateContent() {
123 return true;
124 }
125 }
126
127 }
128
129
130