1 package eu.fbk.dkm.pikes.rdf.util;
2
3 import java.util.Arrays;
4 import java.util.Collections;
5 import java.util.Comparator;
6 import java.util.List;
7
8 import com.google.common.base.Preconditions;
9 import com.google.common.collect.ImmutableList;
10 import com.google.common.collect.Lists;
11
12
13 import com.google.common.collect.Ordering;
14 import eu.fbk.rdfpro.util.Namespaces;
15 import org.eclipse.rdf4j.model.BNode;
16 import org.eclipse.rdf4j.model.Literal;
17 import org.eclipse.rdf4j.model.Resource;
18 import org.eclipse.rdf4j.model.Statement;
19 import org.eclipse.rdf4j.model.IRI;
20 import org.eclipse.rdf4j.model.Value;
21 import org.eclipse.rdf4j.model.vocabulary.RDF;
22 import org.eclipse.rdf4j.model.vocabulary.XMLSchema;
23 import org.eclipse.rdf4j.rio.RDFHandler;
24 import org.eclipse.rdf4j.rio.RDFHandlerException;
25
26 import eu.fbk.rdfpro.Mapper;
27 import eu.fbk.rdfpro.RDFProcessor;
28 import eu.fbk.rdfpro.RDFProcessors;
29 import eu.fbk.rdfpro.Reducer;
30 import eu.fbk.rdfpro.util.Hash;
31 import eu.fbk.rdfpro.util.Options;
32 import eu.fbk.rdfpro.util.Statements;
33
34 import javax.annotation.Nullable;
35
36 public final class ProcessorASNorm implements RDFProcessor {
37
38 private final String namespace;
39
40 private final Mapper checkedMapper;
41
42 private final Mapper uncheckedMapper;
43
44 private final Reducer factReducer;
45
46 private final Reducer metaReducer;
47
48 private static final Ordering<Value> DEFAULT_VALUE_ORDERING = new ValueOrdering(null);
49
50 private static final Ordering<Statement> DEFAULT_STATEMENT_ORDERING = new StatementOrdering(
51 "spoc", new ValueOrdering(ImmutableList.of(RDF.NAMESPACE)));
52
53 static RDFProcessor create(final String name, final String... args) {
54 final Options options = Options.parse("!", args);
55 final String namespace = options.getPositionalArg(0, String.class);
56 return new ProcessorASNorm(namespace);
57 }
58
59 public ProcessorASNorm(final String namespace) {
60 this.namespace = namespace;
61 this.checkedMapper = new CheckedMapper();
62 this.uncheckedMapper = new UncheckedMapper();
63 this.factReducer = new FactReducer();
64 this.metaReducer = new MetaReducer();
65 }
66
67 @Override
68 public RDFHandler wrap(final RDFHandler handler) {
69 return RDFProcessors.mapReduce(this.checkedMapper, this.factReducer, true).wrap(
70 RDFProcessors.mapReduce(this.uncheckedMapper, this.metaReducer, true)
71 .wrap(handler));
72 }
73
74 private boolean match(final Value value) {
75 return value instanceof IRI && ((IRI) value).getNamespace().equals(this.namespace);
76 }
77
78 private IRI hash(final Resource subject, final IRI predicate, final Value object) {
79 final List<String> list = Lists.newArrayList();
80 for (final Value value : new Value[] { subject, predicate, object }) {
81 if (value instanceof IRI) {
82 list.add("\u0001");
83 list.add(value.stringValue());
84 } else if (value instanceof BNode) {
85 list.add("\u0002");
86 list.add(((BNode) value).getID());
87 } else if (value instanceof Literal) {
88 final Literal l = (Literal) value;
89 list.add("\u0003");
90 list.add(l.getLabel());
91 if (!l.getDatatype().equals(XMLSchema.STRING)) {
92 list.add(l.getDatatype().stringValue());
93 } else if (l.getLanguage().isPresent()) {
94 list.add(l.getLanguage().get());
95 }
96 }
97 }
98 final String hash = Hash.murmur3(list.toArray(new String[list.size()])).toString();
99 return Statements.VALUE_FACTORY.createIRI(this.namespace, hash);
100 }
101
102 private IRI hash(final IRI id, final Iterable<Statement> statements) {
103 final List<String> list = Lists.newArrayList();
104 for (final Statement stmt : statements) {
105 for (final Value value : new Value[] { stmt.getSubject(), stmt.getPredicate(),
106 stmt.getObject(), stmt.getContext() }) {
107 if (value == null) {
108 list.add("\u0004");
109 } else if (value.equals(id)) {
110 list.add("\u0005");
111 } else if (value instanceof IRI) {
112 list.add("\u0001");
113 list.add(value.stringValue());
114 } else if (value instanceof BNode) {
115 list.add("\u0002");
116 list.add(((BNode) value).getID());
117 } else if (value instanceof Literal) {
118 final Literal l = (Literal) value;
119 list.add("\u0003");
120 list.add(l.getLabel());
121 if (!l.getDatatype().equals(XMLSchema.STRING)) {
122 list.add(l.getDatatype().stringValue());
123 } else if (l.getLanguage().isPresent()) {
124 list.add(l.getLanguage().get());
125 }
126 }
127 }
128 }
129 final String hash = Hash.murmur3(list.toArray(new String[list.size()])).toString();
130 return Statements.VALUE_FACTORY.createIRI(this.namespace, hash);
131 }
132
133 @SuppressWarnings("unchecked")
134 private <T extends Value, R extends Value> T replace(final T value, final R matchedValue,
135 final R newValue) {
136 if (value != null && value.equals(matchedValue)) {
137 return (T) newValue;
138 } else {
139 return value;
140 }
141 }
142
143 private void emit(final RDFHandler handler, final Value oldID, final IRI newID,
144 final Statement factStmt, final Iterable<Statement> metaStmts)
145 throws RDFHandlerException {
146
147 if (oldID.equals(newID)) {
148
149 handler.handleStatement(factStmt);
150 for (final Statement metaStmt : metaStmts) {
151 handler.handleStatement(metaStmt);
152 }
153
154 } else {
155
156 handler.handleStatement(Statements.VALUE_FACTORY.createStatement(
157 factStmt.getSubject(), factStmt.getPredicate(), factStmt.getObject(), newID));
158 for (final Statement metaStmt : metaStmts) {
159 final Resource metaSubj = replace(metaStmt.getSubject(), oldID, newID);
160 final IRI metaPred = replace(metaStmt.getPredicate(), oldID, newID);
161 final Value metaObj = replace(metaStmt.getObject(), oldID, newID);
162 final Resource metaCtx = replace(metaStmt.getContext(), oldID, newID);
163 if (metaCtx == null) {
164 handler.handleStatement(Statements.VALUE_FACTORY.createStatement(metaSubj,
165 metaPred, metaObj));
166 } else {
167 handler.handleStatement(Statements.VALUE_FACTORY.createStatement(metaSubj,
168 metaPred, metaObj, metaCtx));
169 }
170 }
171 }
172 }
173
174 private final class CheckedMapper implements eu.fbk.rdfpro.Mapper {
175
176 @Override
177 public Value[] map(final Statement statement) throws RDFHandlerException {
178 final String message = "Multiple annotation IDs in same statement";
179 Value key = null;
180 if (match(statement.getSubject())) {
181 key = statement.getSubject();
182 }
183 if (match(statement.getContext())) {
184 Preconditions.checkArgument(key == null, message);
185 key = statement.getContext();
186 }
187 if (match(statement.getObject())) {
188 Preconditions.checkArgument(key == null, message);
189 key = statement.getObject();
190 }
191 if (match(statement.getPredicate())) {
192 Preconditions.checkArgument(key == null, message);
193 key = statement.getPredicate();
194 }
195 if (key == null) {
196 key = Mapper.BYPASS_KEY;
197 }
198 return new Value[] { key };
199 }
200
201 }
202
203 private final class UncheckedMapper implements eu.fbk.rdfpro.Mapper {
204
205 @Override
206 public Value[] map(final Statement statement) throws RDFHandlerException {
207 if (match(statement.getSubject())) {
208 return new Value[] { statement.getSubject() };
209 } else if (match(statement.getContext())) {
210 return new Value[] { statement.getContext() };
211 } else if (match(statement.getObject())) {
212 return new Value[] { statement.getObject() };
213 } else if (match(statement.getPredicate())) {
214 return new Value[] { statement.getPredicate() };
215 } else {
216 return new Value[] { eu.fbk.rdfpro.Mapper.BYPASS_KEY };
217 }
218 }
219
220 }
221
222 private final class FactReducer implements Reducer {
223
224 @Override
225 public void reduce(final Value id, final Statement[] stmts, final RDFHandler handler)
226 throws RDFHandlerException {
227
228
229 final List<Statement> factStmts = Lists.newArrayListWithCapacity(stmts.length);
230 final List<Statement> metaStmts = Lists.newArrayListWithCapacity(stmts.length);
231 for (final Statement stmt : stmts) {
232 if (id.equals(stmt.getContext())) {
233 factStmts.add(stmt);
234 } else {
235 metaStmts.add(stmt);
236 }
237 }
238
239
240 for (final Statement factStmt : factStmts) {
241 final IRI newID = hash(factStmt.getSubject(), factStmt.getPredicate(),
242 factStmt.getObject());
243 emit(handler, id, newID, factStmt, metaStmts);
244 }
245 }
246
247 }
248
249 private final class MetaReducer implements Reducer {
250
251 private final Comparator<Statement> comparator = statementOrdering("spoc",
252 valueOrdering(ProcessorASNorm.this.namespace));
253
254 @Override
255 public void reduce(final Value key, final Statement[] stmts, final RDFHandler handler)
256 throws RDFHandlerException {
257
258
259 Statement factStmt = null;
260 final List<Statement> metaStmts = Lists.newArrayListWithCapacity(stmts.length);
261 for (final Statement stmt : stmts) {
262 if (key.equals(stmt.getContext())) {
263 assert factStmt == null;
264 factStmt = stmt;
265 } else {
266 metaStmts.add(stmt);
267 }
268 }
269 assert factStmt != null;
270
271
272 Collections.sort(metaStmts, this.comparator);
273 final IRI metadataID = hash((IRI) key, metaStmts);
274 emit(handler, key, metadataID, factStmt, metaStmts);
275 }
276
277 }
278
279 private static Ordering<Statement> statementOrdering(@Nullable final String components,
280 @Nullable final Comparator<? super Value> valueComparator) {
281 if (components == null) {
282 return valueComparator == null ? DEFAULT_STATEMENT_ORDERING
283 : new StatementOrdering("spoc", valueComparator);
284 } else {
285 return new StatementOrdering(components,
286 valueComparator == null ? DEFAULT_VALUE_ORDERING : valueComparator);
287 }
288 }
289
290 public static Ordering<Value> valueOrdering(final String... rankedNamespaces) {
291 return rankedNamespaces == null || rankedNamespaces.length == 0 ? DEFAULT_VALUE_ORDERING
292 : new ValueOrdering(Arrays.asList(rankedNamespaces));
293 }
294
295
296 private static final class ValueOrdering extends Ordering<Value> {
297
298 private final List<String> rankedNamespaces;
299
300 public ValueOrdering(@Nullable final Iterable<? extends String> rankedNamespaces) {
301 this.rankedNamespaces = rankedNamespaces == null ? ImmutableList.of() : ImmutableList
302 .copyOf(rankedNamespaces);
303 }
304
305 @Override
306 public int compare(final Value v1, final Value v2) {
307 if (v1 instanceof IRI) {
308 if (v2 instanceof IRI) {
309 final int rank1 = this.rankedNamespaces.indexOf(((IRI) v1).getNamespace());
310 final int rank2 = this.rankedNamespaces.indexOf(((IRI) v2).getNamespace());
311 if (rank1 >= 0 && (rank1 < rank2 || rank2 < 0)) {
312 return -1;
313 } else if (rank2 >= 0 && (rank2 < rank1 || rank1 < 0)) {
314 return 1;
315 }
316 final String string1 = Statements.formatValue(v1, Namespaces.DEFAULT);
317 final String string2 = Statements.formatValue(v2, Namespaces.DEFAULT);
318 return string1.compareTo(string2);
319 } else {
320 return -1;
321 }
322 } else if (v1 instanceof BNode) {
323 if (v2 instanceof BNode) {
324 return ((BNode) v1).getID().compareTo(((BNode) v2).getID());
325 } else if (v2 instanceof IRI) {
326 return 1;
327 } else {
328 return -1;
329 }
330 } else if (v1 instanceof Literal) {
331 if (v2 instanceof Literal) {
332 return ((Literal) v1).getLabel().compareTo(((Literal) v2).getLabel());
333 } else if (v2 instanceof Resource) {
334 return 1;
335 } else {
336 return -1;
337 }
338 } else {
339 if (v1 == v2) {
340 return 0;
341 } else {
342 return 1;
343 }
344 }
345 }
346
347 }
348
349 private static final class StatementOrdering extends Ordering<Statement> {
350
351 private final String components;
352
353 private final Comparator<? super Value> valueComparator;
354
355 public StatementOrdering(final String components,
356 final Comparator<? super Value> valueComparator) {
357 this.components = components.trim().toLowerCase();
358 this.valueComparator = Preconditions.checkNotNull(valueComparator);
359 for (int i = 0; i < this.components.length(); ++i) {
360 final char c = this.components.charAt(i);
361 if (c != 's' && c != 'p' && c != 'o' && c != 'c') {
362 throw new IllegalArgumentException("Invalid components: " + components);
363 }
364 }
365 }
366
367 @Override
368 public int compare(final Statement s1, final Statement s2) {
369 for (int i = 0; i < this.components.length(); ++i) {
370 final char c = this.components.charAt(i);
371 final Value v1 = getValue(s1, c);
372 final Value v2 = getValue(s2, c);
373 final int result = this.valueComparator.compare(v1, v2);
374 if (result != 0) {
375 return result;
376 }
377 }
378 return 0;
379 }
380
381 private Value getValue(final Statement statement, final char component) {
382 switch (component) {
383 case 's':
384 return statement.getSubject();
385 case 'p':
386 return statement.getPredicate();
387 case 'o':
388 return statement.getObject();
389 case 'c':
390 return statement.getContext();
391 default:
392 throw new Error();
393 }
394 }
395
396 }
397 }