1   package eu.fbk.dkm.pikes.rdf.util;
2   
3   import java.util.Arrays;
4   import java.util.Collections;
5   import java.util.Comparator;
6   import java.util.List;
7   
8   import com.google.common.base.Preconditions;
9   import com.google.common.collect.ImmutableList;
10  import com.google.common.collect.Lists;
11  
12  
13  import com.google.common.collect.Ordering;
14  import eu.fbk.rdfpro.util.Namespaces;
15  import org.eclipse.rdf4j.model.BNode;
16  import org.eclipse.rdf4j.model.Literal;
17  import org.eclipse.rdf4j.model.Resource;
18  import org.eclipse.rdf4j.model.Statement;
19  import org.eclipse.rdf4j.model.IRI;
20  import org.eclipse.rdf4j.model.Value;
21  import org.eclipse.rdf4j.model.vocabulary.RDF;
22  import org.eclipse.rdf4j.model.vocabulary.XMLSchema;
23  import org.eclipse.rdf4j.rio.RDFHandler;
24  import org.eclipse.rdf4j.rio.RDFHandlerException;
25  
26  import eu.fbk.rdfpro.Mapper;
27  import eu.fbk.rdfpro.RDFProcessor;
28  import eu.fbk.rdfpro.RDFProcessors;
29  import eu.fbk.rdfpro.Reducer;
30  import eu.fbk.rdfpro.util.Hash;
31  import eu.fbk.rdfpro.util.Options;
32  import eu.fbk.rdfpro.util.Statements;
33  
34  import javax.annotation.Nullable;
35  
36  public final class ProcessorASNorm implements RDFProcessor {
37  
38      private final String namespace;
39  
40      private final Mapper checkedMapper;
41  
42      private final Mapper uncheckedMapper;
43  
44      private final Reducer factReducer;
45  
46      private final Reducer metaReducer;
47  
48      private static final Ordering<Value> DEFAULT_VALUE_ORDERING = new ValueOrdering(null);
49  
50      private static final Ordering<Statement> DEFAULT_STATEMENT_ORDERING = new StatementOrdering(
51              "spoc", new ValueOrdering(ImmutableList.of(RDF.NAMESPACE)));
52  
53      static RDFProcessor create(final String name, final String... args) {
54          final Options options = Options.parse("!", args);
55          final String namespace = options.getPositionalArg(0, String.class);
56          return new ProcessorASNorm(namespace);
57      }
58  
59      public ProcessorASNorm(final String namespace) {
60          this.namespace = namespace;
61          this.checkedMapper = new CheckedMapper();
62          this.uncheckedMapper = new UncheckedMapper();
63          this.factReducer = new FactReducer();
64          this.metaReducer = new MetaReducer();
65      }
66  
67      @Override
68      public RDFHandler wrap(final RDFHandler handler) {
69          return RDFProcessors.mapReduce(this.checkedMapper, this.factReducer, true).wrap(
70                  RDFProcessors.mapReduce(this.uncheckedMapper, this.metaReducer, true)
71                          .wrap(handler));
72      }
73  
74      private boolean match(final Value value) {
75          return value instanceof IRI && ((IRI) value).getNamespace().equals(this.namespace);
76      }
77  
78      private IRI hash(final Resource subject, final IRI predicate, final Value object) {
79          final List<String> list = Lists.newArrayList();
80          for (final Value value : new Value[] { subject, predicate, object }) {
81              if (value instanceof IRI) {
82                  list.add("\u0001");
83                  list.add(value.stringValue());
84              } else if (value instanceof BNode) {
85                  list.add("\u0002");
86                  list.add(((BNode) value).getID());
87              } else if (value instanceof Literal) {
88                  final Literal l = (Literal) value;
89                  list.add("\u0003");
90                  list.add(l.getLabel());
91                  if (!l.getDatatype().equals(XMLSchema.STRING)) {
92                      list.add(l.getDatatype().stringValue());
93                  } else if (l.getLanguage().isPresent()) {
94                      list.add(l.getLanguage().get());
95                  }
96              }
97          }
98          final String hash = Hash.murmur3(list.toArray(new String[list.size()])).toString();
99          return Statements.VALUE_FACTORY.createIRI(this.namespace, hash);
100     }
101 
102     private IRI hash(final IRI id, final Iterable<Statement> statements) {
103         final List<String> list = Lists.newArrayList();
104         for (final Statement stmt : statements) {
105             for (final Value value : new Value[] { stmt.getSubject(), stmt.getPredicate(),
106                     stmt.getObject(), stmt.getContext() }) {
107                 if (value == null) {
108                     list.add("\u0004");
109                 } else if (value.equals(id)) {
110                     list.add("\u0005");
111                 } else if (value instanceof IRI) {
112                     list.add("\u0001");
113                     list.add(value.stringValue());
114                 } else if (value instanceof BNode) {
115                     list.add("\u0002");
116                     list.add(((BNode) value).getID());
117                 } else if (value instanceof Literal) {
118                     final Literal l = (Literal) value;
119                     list.add("\u0003");
120                     list.add(l.getLabel());
121                     if (!l.getDatatype().equals(XMLSchema.STRING)) {
122                         list.add(l.getDatatype().stringValue());
123                     } else if (l.getLanguage().isPresent()) {
124                         list.add(l.getLanguage().get());
125                     }
126                 }
127             }
128         }
129         final String hash = Hash.murmur3(list.toArray(new String[list.size()])).toString();
130         return Statements.VALUE_FACTORY.createIRI(this.namespace, hash);
131     }
132 
133     @SuppressWarnings("unchecked")
134     private <T extends Value, R extends Value> T replace(final T value, final R matchedValue,
135             final R newValue) {
136         if (value != null && value.equals(matchedValue)) {
137             return (T) newValue;
138         } else {
139             return value;
140         }
141     }
142 
143     private void emit(final RDFHandler handler, final Value oldID, final IRI newID,
144             final Statement factStmt, final Iterable<Statement> metaStmts)
145             throws RDFHandlerException {
146 
147         if (oldID.equals(newID)) {
148             // If annotation ID equal to old ID, emit fact and metadata unchanged
149             handler.handleStatement(factStmt);
150             for (final Statement metaStmt : metaStmts) {
151                 handler.handleStatement(metaStmt);
152             }
153 
154         } else {
155             // Else, replace old ID with new one
156             handler.handleStatement(Statements.VALUE_FACTORY.createStatement(
157                     factStmt.getSubject(), factStmt.getPredicate(), factStmt.getObject(), newID));
158             for (final Statement metaStmt : metaStmts) {
159                 final Resource metaSubj = replace(metaStmt.getSubject(), oldID, newID);
160                 final IRI metaPred = replace(metaStmt.getPredicate(), oldID, newID);
161                 final Value metaObj = replace(metaStmt.getObject(), oldID, newID);
162                 final Resource metaCtx = replace(metaStmt.getContext(), oldID, newID);
163                 if (metaCtx == null) {
164                     handler.handleStatement(Statements.VALUE_FACTORY.createStatement(metaSubj,
165                             metaPred, metaObj));
166                 } else {
167                     handler.handleStatement(Statements.VALUE_FACTORY.createStatement(metaSubj,
168                             metaPred, metaObj, metaCtx));
169                 }
170             }
171         }
172     }
173 
174     private final class CheckedMapper implements eu.fbk.rdfpro.Mapper {
175 
176         @Override
177         public Value[] map(final Statement statement) throws RDFHandlerException {
178             final String message = "Multiple annotation IDs in same statement";
179             Value key = null;
180             if (match(statement.getSubject())) {
181                 key = statement.getSubject();
182             }
183             if (match(statement.getContext())) {
184                 Preconditions.checkArgument(key == null, message);
185                 key = statement.getContext();
186             }
187             if (match(statement.getObject())) {
188                 Preconditions.checkArgument(key == null, message);
189                 key = statement.getObject();
190             }
191             if (match(statement.getPredicate())) {
192                 Preconditions.checkArgument(key == null, message);
193                 key = statement.getPredicate();
194             }
195             if (key == null) {
196                 key = Mapper.BYPASS_KEY;
197             }
198             return new Value[] { key };
199         }
200 
201     }
202 
203     private final class UncheckedMapper implements eu.fbk.rdfpro.Mapper {
204 
205         @Override
206         public Value[] map(final Statement statement) throws RDFHandlerException {
207             if (match(statement.getSubject())) {
208                 return new Value[] { statement.getSubject() };
209             } else if (match(statement.getContext())) {
210                 return new Value[] { statement.getContext() };
211             } else if (match(statement.getObject())) {
212                 return new Value[] { statement.getObject() };
213             } else if (match(statement.getPredicate())) {
214                 return new Value[] { statement.getPredicate() };
215             } else {
216                 return new Value[] { eu.fbk.rdfpro.Mapper.BYPASS_KEY };
217             }
218         }
219 
220     }
221 
222     private final class FactReducer implements Reducer {
223 
224         @Override
225         public void reduce(final Value id, final Statement[] stmts, final RDFHandler handler)
226                 throws RDFHandlerException {
227 
228             // Split statements into facts and meta
229             final List<Statement> factStmts = Lists.newArrayListWithCapacity(stmts.length);
230             final List<Statement> metaStmts = Lists.newArrayListWithCapacity(stmts.length);
231             for (final Statement stmt : stmts) {
232                 if (id.equals(stmt.getContext())) {
233                     factStmts.add(stmt);
234                 } else {
235                     metaStmts.add(stmt);
236                 }
237             }
238 
239             // Emit each fact statement with its own metadata statements, possibly changing IDs
240             for (final Statement factStmt : factStmts) {
241                 final IRI newID = hash(factStmt.getSubject(), factStmt.getPredicate(),
242                         factStmt.getObject());
243                 emit(handler, id, newID, factStmt, metaStmts);
244             }
245         }
246 
247     }
248 
249     private final class MetaReducer implements Reducer {
250 
251         private final Comparator<Statement> comparator = statementOrdering("spoc",
252                 valueOrdering(ProcessorASNorm.this.namespace));
253 
254         @Override
255         public void reduce(final Value key, final Statement[] stmts, final RDFHandler handler)
256                 throws RDFHandlerException {
257 
258             // Split statements into fact (unique) and meta
259             Statement factStmt = null;
260             final List<Statement> metaStmts = Lists.newArrayListWithCapacity(stmts.length);
261             for (final Statement stmt : stmts) {
262                 if (key.equals(stmt.getContext())) {
263                     assert factStmt == null;
264                     factStmt = stmt;
265                 } else {
266                     metaStmts.add(stmt);
267                 }
268             }
269             assert factStmt != null;
270 
271             // Emit statements changing the annotation ID
272             Collections.sort(metaStmts, this.comparator);
273             final IRI metadataID = hash((IRI) key, metaStmts);
274             emit(handler, key, metadataID, factStmt, metaStmts);
275         }
276 
277     }
278 
279     private  static Ordering<Statement> statementOrdering(@Nullable final String components,
280                                                           @Nullable final Comparator<? super Value> valueComparator) {
281         if (components == null) {
282             return valueComparator == null ? DEFAULT_STATEMENT_ORDERING //
283                     : new StatementOrdering("spoc", valueComparator);
284         } else {
285             return new StatementOrdering(components,
286                     valueComparator == null ? DEFAULT_VALUE_ORDERING : valueComparator);
287         }
288     }
289 
290     public static Ordering<Value> valueOrdering(final String... rankedNamespaces) {
291         return rankedNamespaces == null || rankedNamespaces.length == 0 ? DEFAULT_VALUE_ORDERING
292                 : new ValueOrdering(Arrays.asList(rankedNamespaces));
293     }
294 
295 
296     private static final class ValueOrdering extends Ordering<Value> {
297 
298         private final List<String> rankedNamespaces;
299 
300         public ValueOrdering(@Nullable final Iterable<? extends String> rankedNamespaces) {
301             this.rankedNamespaces = rankedNamespaces == null ? ImmutableList.of() : ImmutableList
302                     .copyOf(rankedNamespaces);
303         }
304 
305         @Override
306         public int compare(final Value v1, final Value v2) {
307             if (v1 instanceof IRI) {
308                 if (v2 instanceof IRI) {
309                     final int rank1 = this.rankedNamespaces.indexOf(((IRI) v1).getNamespace());
310                     final int rank2 = this.rankedNamespaces.indexOf(((IRI) v2).getNamespace());
311                     if (rank1 >= 0 && (rank1 < rank2 || rank2 < 0)) {
312                         return -1;
313                     } else if (rank2 >= 0 && (rank2 < rank1 || rank1 < 0)) {
314                         return 1;
315                     }
316                     final String string1 = Statements.formatValue(v1, Namespaces.DEFAULT);
317                     final String string2 = Statements.formatValue(v2, Namespaces.DEFAULT);
318                     return string1.compareTo(string2);
319                 } else {
320                     return -1;
321                 }
322             } else if (v1 instanceof BNode) {
323                 if (v2 instanceof BNode) {
324                     return ((BNode) v1).getID().compareTo(((BNode) v2).getID());
325                 } else if (v2 instanceof IRI) {
326                     return 1;
327                 } else {
328                     return -1;
329                 }
330             } else if (v1 instanceof Literal) {
331                 if (v2 instanceof Literal) {
332                     return ((Literal) v1).getLabel().compareTo(((Literal) v2).getLabel());
333                 } else if (v2 instanceof Resource) {
334                     return 1;
335                 } else {
336                     return -1;
337                 }
338             } else {
339                 if (v1 == v2) {
340                     return 0;
341                 } else {
342                     return 1;
343                 }
344             }
345         }
346 
347     }
348 
349     private static final class StatementOrdering extends Ordering<Statement> {
350 
351         private final String components;
352 
353         private final Comparator<? super Value> valueComparator;
354 
355         public StatementOrdering(final String components,
356                                  final Comparator<? super Value> valueComparator) {
357             this.components = components.trim().toLowerCase();
358             this.valueComparator = Preconditions.checkNotNull(valueComparator);
359             for (int i = 0; i < this.components.length(); ++i) {
360                 final char c = this.components.charAt(i);
361                 if (c != 's' && c != 'p' && c != 'o' && c != 'c') {
362                     throw new IllegalArgumentException("Invalid components: " + components);
363                 }
364             }
365         }
366 
367         @Override
368         public int compare(final Statement s1, final Statement s2) {
369             for (int i = 0; i < this.components.length(); ++i) {
370                 final char c = this.components.charAt(i);
371                 final Value v1 = getValue(s1, c);
372                 final Value v2 = getValue(s2, c);
373                 final int result = this.valueComparator.compare(v1, v2);
374                 if (result != 0) {
375                     return result;
376                 }
377             }
378             return 0;
379         }
380 
381         private Value getValue(final Statement statement, final char component) {
382             switch (component) {
383                 case 's':
384                     return statement.getSubject();
385                 case 'p':
386                     return statement.getPredicate();
387                 case 'o':
388                     return statement.getObject();
389                 case 'c':
390                     return statement.getContext();
391                 default:
392                     throw new Error();
393             }
394         }
395 
396     }
397 }