1   package eu.fbk.dkm.pikes.eval;
2   
3   import java.io.IOException;
4   import java.io.OutputStream;
5   import java.io.OutputStreamWriter;
6   import java.io.Writer;
7   import java.nio.charset.Charset;
8   import java.util.Collection;
9   import java.util.Iterator;
10  import java.util.Map;
11  import java.util.Set;
12  
13  import com.google.common.base.Preconditions;
14  import com.google.common.base.Predicate;
15  import com.google.common.base.Predicates;
16  import com.google.common.collect.Iterables;
17  import com.google.common.collect.LinkedHashMultimap;
18  import com.google.common.collect.Maps;
19  import com.google.common.collect.Multimap;
20  import com.google.common.collect.Sets;
21  
22  import org.eclipse.rdf4j.model.vocabulary.XMLSchema;
23  import org.eclipse.rdf4j.model.BNode;
24  import org.eclipse.rdf4j.model.Literal;
25  import org.eclipse.rdf4j.model.Resource;
26  import org.eclipse.rdf4j.model.Statement;
27  import org.eclipse.rdf4j.model.IRI;
28  import org.eclipse.rdf4j.model.Value;
29  import org.eclipse.rdf4j.model.vocabulary.RDF;
30  import org.eclipse.rdf4j.rio.RDFFormat;
31  import org.eclipse.rdf4j.rio.RDFHandlerException;
32  import org.eclipse.rdf4j.rio.RDFWriterFactory;
33  import org.eclipse.rdf4j.rio.turtle.TurtleWriter;
34  
35  /**
36   * A Sesame RIO extension for emitting 'prettified' Turtle output.
37   */
38  public final class PrettyTurtle implements RDFWriterFactory
39  {
40  
41      public static PrettyTurtle INSTANCE = new PrettyTurtle();
42  
43      public static final RDFFormat FORMAT = new RDFFormat("Pretty Turtle",
44              "text/turtle;prettify=true", Charset.forName("UTF-8"), "ttl", true, true);
45  
46      private volatile Predicate<? super BNode> defaultBNodePreservationPolicy;
47  
48      private volatile boolean defaultGeneratePrefixes;
49  
50  //    static {
51  //        RDFFormat.register(FORMAT);
52  //    }
53  
54      public PrettyTurtle()
55      {
56          this.defaultBNodePreservationPolicy = Predicates.alwaysFalse();
57          this.defaultGeneratePrefixes = false;
58      }
59  
60      public Predicate<? super BNode> getDefaultBNodePreservationPolicy()
61      {
62          return this.defaultBNodePreservationPolicy;
63      }
64  
65      public void setDefaultBNodePreservationPolicy(
66              final Predicate<? super BNode> defaultBNodePreservationPolicy)
67      {
68          if (defaultBNodePreservationPolicy != null) {
69              this.defaultBNodePreservationPolicy = defaultBNodePreservationPolicy;
70          } else {
71              this.defaultBNodePreservationPolicy = Predicates.alwaysFalse();
72          }
73      }
74  
75      public boolean isDefaultGeneratePrefixes()
76      {
77          return this.defaultGeneratePrefixes;
78      }
79  
80      public void setDefaultGeneratePrefixes(final boolean defaultGeneratePrefixes)
81      {
82          this.defaultGeneratePrefixes = defaultGeneratePrefixes;
83      }
84  
85      @Override
86      public RDFFormat getRDFFormat()
87      {
88          return FORMAT;
89      }
90  
91      @Override
92      public RDFWriter getWriter(final OutputStream stream)
93      {
94          return getWriter(stream, this.defaultBNodePreservationPolicy, this.defaultGeneratePrefixes);
95      }
96  
97      public RDFWriter getWriter(final OutputStream stream,
98              final Predicate<? super BNode> bnodePreservationPolicy, final boolean generatePrefixes)
99      {
100         return new RDFWriter(stream, bnodePreservationPolicy, generatePrefixes);
101     }
102 
103     @Override
104     public RDFWriter getWriter(final Writer writer)
105     {
106         return getWriter(writer, this.defaultBNodePreservationPolicy, this.defaultGeneratePrefixes);
107     }
108 
109     public RDFWriter getWriter(final Writer writer,
110             final Predicate<? super BNode> bnodePreservationPolicy, final boolean generatePrefixes)
111     {
112         return new RDFWriter(writer, bnodePreservationPolicy, generatePrefixes);
113     }
114 
115     private static class RDFWriter extends TurtleWriter
116     {
117 
118         private final Predicate<? super BNode> bnodePreservationPolicy;
119 
120         private final boolean generatePrefixes;
121 
122         private final Map<Resource, Multimap<IRI, Value>> subjectProperties;
123 
124         // value true if bnode must be emitted
125         private final Map<BNode, Boolean> objectBNodes;
126 
127         private final Set<String> mentionedNamespaces;
128 
129         private final Set<BNode> inlinedBNodes;
130 
131         public RDFWriter(final OutputStream out,
132                 final Predicate<? super BNode> bnodePreservationPolicy,
133                 final boolean generatePrefixes)
134         {
135             // Assume UTF-8 is the encoding, as for specification.
136             this(new OutputStreamWriter(out, Charset.forName("UTF-8")), bnodePreservationPolicy,
137                     generatePrefixes);
138         }
139 
140         public RDFWriter(final Writer writer,
141                 final Predicate<? super BNode> bnodePreservationPolicy,
142                 final boolean generatePrefixes)
143         {
144             super(writer);
145 
146             this.bnodePreservationPolicy = bnodePreservationPolicy;
147             this.generatePrefixes = generatePrefixes;
148 
149             this.subjectProperties = Maps.newLinkedHashMap();
150             this.objectBNodes = Maps.newHashMap();
151             this.mentionedNamespaces = Sets.newHashSet();
152             this.inlinedBNodes = Sets.newLinkedHashSet();
153         }
154 
155         @Override
156         public RDFFormat getRDFFormat()
157         {
158             return FORMAT;
159         }
160 
161         @Override
162         public void startRDF() throws RDFHandlerException
163         {
164             Preconditions.checkState(!this.writingStarted, "Writing already started");
165             this.writingStarted = true;
166         }
167 
168         @Override
169         public void handleNamespace(final String prefix, final String uri)
170                 throws RDFHandlerException
171         {
172             Preconditions.checkState(this.writingStarted, "Writing not yet started");
173 
174             // Temporarily change the state, in order for the overridden method to work properly
175             // (if writingStarted, namespaces will be emitted)
176             final boolean writingStarted = this.writingStarted;
177             this.writingStarted = false;
178 
179             super.handleNamespace(prefix, uri);
180 
181             this.writingStarted = writingStarted;
182         }
183 
184         @Override
185         public void handleStatement(final Statement statement) throws RDFHandlerException
186         {
187             Preconditions.checkState(this.writingStarted, "Writing not yet started");
188 
189             final Resource subject = statement.getSubject();
190             final IRI predicate = statement.getPredicate();
191             final Value object = statement.getObject();
192 
193             registerMentionedNamespaces(subject);
194             registerMentionedNamespaces(predicate);
195             registerMentionedNamespaces(object);
196             if (object instanceof Literal && !((Literal) object).getDatatype().equals(XMLSchema.STRING)) {
197                 registerMentionedNamespaces(((Literal) object).getDatatype());
198             }
199 
200             Multimap<IRI, Value> properties = this.subjectProperties.get(subject);
201             if (properties == null) {
202                 properties = LinkedHashMultimap.create();
203                 this.subjectProperties.put(subject, properties);
204             }
205             properties.put(predicate, object);
206 
207             if (object instanceof BNode && !this.bnodePreservationPolicy.apply((BNode) object)) {
208                 this.objectBNodes.put((BNode) object, this.objectBNodes.containsKey(object)
209                         || object.equals(subject) ? Boolean.TRUE : Boolean.FALSE);
210             }
211         }
212 
213         @Override
214         public void handleComment(final String comment) throws RDFHandlerException
215         {
216             // FIXME Comments currently ignored.
217         }
218 
219         @Override
220         public void endRDF() throws RDFHandlerException
221         {
222             Preconditions.checkState(this.writingStarted, "Writing not yet started");
223 
224             if (this.generatePrefixes) {
225                 generatePrefixes();
226             }
227 
228             this.writingStarted = false;
229             try {
230                 writeNamespaces();
231                 writeStatements();
232                 this.writer.flush();
233 
234             } catch (final IOException ex) {
235                 throw new RDFHandlerException(ex);
236             }
237         }
238 
239         private void registerMentionedNamespaces(final Value value)
240         {
241             if (value instanceof IRI) {
242                 this.mentionedNamespaces.add(((IRI) value).getNamespace());
243             }
244         }
245 
246         private void generatePrefixes() throws RDFHandlerException
247         {
248             final boolean writingStarted = this.writingStarted;
249             this.writingStarted = false;
250 
251             for (final String namespace : Sets.difference(this.mentionedNamespaces,
252                     this.namespaceTable.keySet())) {
253                 final int endIndex = Math.max(namespace.lastIndexOf(':'),
254                         Math.max(namespace.lastIndexOf('/'), namespace.lastIndexOf('#')));
255                 int startIndex = endIndex;
256                 while (startIndex > 0 && Character.isLetter(namespace.charAt(startIndex - 1))) {
257                     --startIndex;
258                 }
259                 if (startIndex >= endIndex) {
260                     continue;
261                 }
262                 final String candidatePrefix = namespace.substring(startIndex, endIndex)
263                         .toLowerCase();
264                 if (!this.namespaceTable.containsKey(candidatePrefix)) {
265                     super.handleNamespace(candidatePrefix, namespace);
266                 }
267             }
268 
269             this.writingStarted = writingStarted;
270         }
271 
272         private void writeNamespaces() throws IOException
273         {
274             if (!this.namespaceTable.isEmpty()) {
275                 for (final Map.Entry<String, String> namespace : this.namespaceTable.entrySet()) {
276                     final String prefix = namespace.getValue();
277                     final String uri = namespace.getKey();
278                     if (this.mentionedNamespaces.contains(uri)) {
279                         writeNamespace(prefix, uri);
280                     }
281                 }
282             }
283         }
284 
285         private void writeStatements() throws IOException
286         {
287             // Keep track of BNodes not emitted as subjects.
288             final Set<BNode> skippedBNodes = Sets.newLinkedHashSet();
289 
290             // Emit subjects and their properties, skipping bnodes that can be potentially inlined
291             boolean first = true;
292             for (Resource subject : Util.VALUE_ORDERING.sortedCopy(subjectProperties.keySet())) {
293 
294                 final Multimap<IRI, Value> properties = subjectProperties.get(subject);
295 
296                 final boolean emitSubject = !(subject instanceof BNode)
297                         || this.bnodePreservationPolicy.apply((BNode) subject)
298                         || this.objectBNodes.get(subject) != Boolean.FALSE;
299 
300                 if (emitSubject) {
301                     if (!first) {
302                         this.writer.writeEOL();
303                     }
304                     writeSubject(subject, properties);
305                     first = false;
306 
307                 } else {
308                     skippedBNodes.add((BNode) subject);
309                 }
310             }
311 
312             // Emit bnodes skipped as subject but not inlined as objects.
313             while (true) {
314                 skippedBNodes.removeAll(this.inlinedBNodes);
315                 this.inlinedBNodes.clear();
316                 if (skippedBNodes.isEmpty()) {
317                     break;
318                 }
319                 if (!first) {
320                     this.writer.writeEOL();
321                 }
322                 final Iterator<BNode> iterator = skippedBNodes.iterator();
323                 final BNode node = iterator.next();
324                 iterator.remove();
325                 writeSubject(node, this.subjectProperties.get(node));
326                 first = false;
327             }
328         }
329 
330         private void writeSubject(final Resource subject, final Multimap<IRI, Value> properties)
331                 throws IOException
332         {
333             this.writer.writeEOL();
334 
335             if (!(subject instanceof BNode) || this.bnodePreservationPolicy.apply((BNode) subject)
336                     || this.objectBNodes.containsKey(subject)) {
337                 writeResource(subject,false);
338                 this.writer.write(" ");
339             } else {
340                 this.writer.write("[] ");
341             }
342 
343             this.writer.increaseIndentation();
344             writeProperties(properties);
345             this.writer.write(" .");
346             this.writer.decreaseIndentation();
347         }
348 
349         private void writeProperties(final Multimap<IRI, Value> properties) throws IOException
350         {
351             boolean first = true;
352             for (final Map.Entry<IRI, Collection<Value>> entry : properties.asMap().entrySet()) {
353                 if (!first) {
354                     this.writer.write(" ;");
355                     this.writer.writeEOL();
356                 }
357                 writeProperty(entry.getKey(), entry.getValue());
358                 first = false;
359             }
360         }
361 
362         private void writeProperty(final IRI predicate, final Collection<Value> values)
363                 throws IOException
364         {
365             if (predicate.equals(RDF.TYPE)) {
366                 this.writer.write("a");
367             } else {
368                 writeURI(predicate);
369             }
370             this.writer.write(" ");
371 
372             // Emit the property values in two phases. First, IRIs, literals and BNodes whose ID
373             // must be preserved are emitted (phase = 0). Then, BNodes that can be expanded inline
374             // are emitted. The expansion check is done here and passed to writeObject() as hint.
375             boolean first = true;
376             for (int phase = 0; phase < 2 && !values.isEmpty(); ++phase) {
377                 for (final Iterator<Value> iterator = values.iterator(); iterator.hasNext();) {
378                     final Value value = iterator.next();
379                     final boolean bnodeExpansion = value instanceof BNode
380                             && !this.bnodePreservationPolicy.apply((BNode) value)
381                             && this.objectBNodes.get(value) != Boolean.TRUE;
382                     if (!bnodeExpansion && phase == 0 || bnodeExpansion && phase == 1) {
383                         if (!first) {
384                             this.writer.write(" , ");
385                         }
386                         writeObject(value, bnodeExpansion);
387                         first = false;
388                     }
389                 }
390             }
391         }
392 
393         private void writeObject(final Value value, final Boolean bnodeExpansionHint)
394                 throws IOException
395         {
396             // Determine whether a BNode expansion must occur, possibly reusing the supplied hint.
397             final boolean bnodeExpansion = bnodeExpansionHint != null ? bnodeExpansionHint
398                     .booleanValue() : value instanceof BNode
399                     && !this.bnodePreservationPolicy.apply((BNode) value)
400                     && this.objectBNodes.get(value) != Boolean.TRUE;
401 
402             if (!bnodeExpansion) {
403                 writeValue(value,false);
404             } else {
405                 this.inlinedBNodes.add((BNode) value);
406                 Multimap<IRI, Value> properties = this.subjectProperties.get(value);
407 
408                 if (properties == null) {
409                     // No properties: emit an empty blank node.
410                     this.writer.write("[]");
411 
412                 } else if (!properties.containsKey(RDF.FIRST)) {
413                     // Some properties, not a collection: emit the properties inline.
414                     this.writer.write("[");
415                     this.writer.increaseIndentation();
416                     this.writer.writeEOL();
417                     writeProperties(properties);
418                     this.writer.decreaseIndentation();
419                     this.writer.writeEOL();
420                     this.writer.write("]");
421 
422                 } else {
423                     // A collection: emit it inline.
424                     this.writer.write("(");
425                     Value node = value;
426                     while (true) {
427                         this.writer.write(" ");
428                         final Value element = Iterables.getFirst(properties.get(RDF.FIRST), null);
429                         writeObject(element, null); // no expansion hint here
430                         node = Iterables.getFirst(properties.get(RDF.REST), null);
431                         if (node != null && !node.equals(RDF.NIL)) {
432                             properties = this.subjectProperties.get(node);
433                             this.inlinedBNodes.add((BNode) node);
434                         } else {
435                             break;
436                         }
437                     }
438                     this.writer.write(" )");
439                 }
440             }
441         }
442 
443     }
444 
445 }