1 package eu.fbk.dkm.pikes.eval;
2
3 import java.io.IOException;
4 import java.io.OutputStream;
5 import java.io.OutputStreamWriter;
6 import java.io.Writer;
7 import java.nio.charset.Charset;
8 import java.util.Collection;
9 import java.util.Iterator;
10 import java.util.Map;
11 import java.util.Set;
12
13 import com.google.common.base.Preconditions;
14 import com.google.common.base.Predicate;
15 import com.google.common.base.Predicates;
16 import com.google.common.collect.Iterables;
17 import com.google.common.collect.LinkedHashMultimap;
18 import com.google.common.collect.Maps;
19 import com.google.common.collect.Multimap;
20 import com.google.common.collect.Sets;
21
22 import org.eclipse.rdf4j.model.vocabulary.XMLSchema;
23 import org.eclipse.rdf4j.model.BNode;
24 import org.eclipse.rdf4j.model.Literal;
25 import org.eclipse.rdf4j.model.Resource;
26 import org.eclipse.rdf4j.model.Statement;
27 import org.eclipse.rdf4j.model.IRI;
28 import org.eclipse.rdf4j.model.Value;
29 import org.eclipse.rdf4j.model.vocabulary.RDF;
30 import org.eclipse.rdf4j.rio.RDFFormat;
31 import org.eclipse.rdf4j.rio.RDFHandlerException;
32 import org.eclipse.rdf4j.rio.RDFWriterFactory;
33 import org.eclipse.rdf4j.rio.turtle.TurtleWriter;
34
35
36
37
38 public final class PrettyTurtle implements RDFWriterFactory
39 {
40
41 public static PrettyTurtle INSTANCE = new PrettyTurtle();
42
43 public static final RDFFormat FORMAT = new RDFFormat("Pretty Turtle",
44 "text/turtle;prettify=true", Charset.forName("UTF-8"), "ttl", true, true);
45
46 private volatile Predicate<? super BNode> defaultBNodePreservationPolicy;
47
48 private volatile boolean defaultGeneratePrefixes;
49
50
51
52
53
54 public PrettyTurtle()
55 {
56 this.defaultBNodePreservationPolicy = Predicates.alwaysFalse();
57 this.defaultGeneratePrefixes = false;
58 }
59
60 public Predicate<? super BNode> getDefaultBNodePreservationPolicy()
61 {
62 return this.defaultBNodePreservationPolicy;
63 }
64
65 public void setDefaultBNodePreservationPolicy(
66 final Predicate<? super BNode> defaultBNodePreservationPolicy)
67 {
68 if (defaultBNodePreservationPolicy != null) {
69 this.defaultBNodePreservationPolicy = defaultBNodePreservationPolicy;
70 } else {
71 this.defaultBNodePreservationPolicy = Predicates.alwaysFalse();
72 }
73 }
74
75 public boolean isDefaultGeneratePrefixes()
76 {
77 return this.defaultGeneratePrefixes;
78 }
79
80 public void setDefaultGeneratePrefixes(final boolean defaultGeneratePrefixes)
81 {
82 this.defaultGeneratePrefixes = defaultGeneratePrefixes;
83 }
84
85 @Override
86 public RDFFormat getRDFFormat()
87 {
88 return FORMAT;
89 }
90
91 @Override
92 public RDFWriter getWriter(final OutputStream stream)
93 {
94 return getWriter(stream, this.defaultBNodePreservationPolicy, this.defaultGeneratePrefixes);
95 }
96
97 public RDFWriter getWriter(final OutputStream stream,
98 final Predicate<? super BNode> bnodePreservationPolicy, final boolean generatePrefixes)
99 {
100 return new RDFWriter(stream, bnodePreservationPolicy, generatePrefixes);
101 }
102
103 @Override
104 public RDFWriter getWriter(final Writer writer)
105 {
106 return getWriter(writer, this.defaultBNodePreservationPolicy, this.defaultGeneratePrefixes);
107 }
108
109 public RDFWriter getWriter(final Writer writer,
110 final Predicate<? super BNode> bnodePreservationPolicy, final boolean generatePrefixes)
111 {
112 return new RDFWriter(writer, bnodePreservationPolicy, generatePrefixes);
113 }
114
115 private static class RDFWriter extends TurtleWriter
116 {
117
118 private final Predicate<? super BNode> bnodePreservationPolicy;
119
120 private final boolean generatePrefixes;
121
122 private final Map<Resource, Multimap<IRI, Value>> subjectProperties;
123
124
125 private final Map<BNode, Boolean> objectBNodes;
126
127 private final Set<String> mentionedNamespaces;
128
129 private final Set<BNode> inlinedBNodes;
130
131 public RDFWriter(final OutputStream out,
132 final Predicate<? super BNode> bnodePreservationPolicy,
133 final boolean generatePrefixes)
134 {
135
136 this(new OutputStreamWriter(out, Charset.forName("UTF-8")), bnodePreservationPolicy,
137 generatePrefixes);
138 }
139
140 public RDFWriter(final Writer writer,
141 final Predicate<? super BNode> bnodePreservationPolicy,
142 final boolean generatePrefixes)
143 {
144 super(writer);
145
146 this.bnodePreservationPolicy = bnodePreservationPolicy;
147 this.generatePrefixes = generatePrefixes;
148
149 this.subjectProperties = Maps.newLinkedHashMap();
150 this.objectBNodes = Maps.newHashMap();
151 this.mentionedNamespaces = Sets.newHashSet();
152 this.inlinedBNodes = Sets.newLinkedHashSet();
153 }
154
155 @Override
156 public RDFFormat getRDFFormat()
157 {
158 return FORMAT;
159 }
160
161 @Override
162 public void startRDF() throws RDFHandlerException
163 {
164 Preconditions.checkState(!this.writingStarted, "Writing already started");
165 this.writingStarted = true;
166 }
167
168 @Override
169 public void handleNamespace(final String prefix, final String uri)
170 throws RDFHandlerException
171 {
172 Preconditions.checkState(this.writingStarted, "Writing not yet started");
173
174
175
176 final boolean writingStarted = this.writingStarted;
177 this.writingStarted = false;
178
179 super.handleNamespace(prefix, uri);
180
181 this.writingStarted = writingStarted;
182 }
183
184 @Override
185 public void handleStatement(final Statement statement) throws RDFHandlerException
186 {
187 Preconditions.checkState(this.writingStarted, "Writing not yet started");
188
189 final Resource subject = statement.getSubject();
190 final IRI predicate = statement.getPredicate();
191 final Value object = statement.getObject();
192
193 registerMentionedNamespaces(subject);
194 registerMentionedNamespaces(predicate);
195 registerMentionedNamespaces(object);
196 if (object instanceof Literal && !((Literal) object).getDatatype().equals(XMLSchema.STRING)) {
197 registerMentionedNamespaces(((Literal) object).getDatatype());
198 }
199
200 Multimap<IRI, Value> properties = this.subjectProperties.get(subject);
201 if (properties == null) {
202 properties = LinkedHashMultimap.create();
203 this.subjectProperties.put(subject, properties);
204 }
205 properties.put(predicate, object);
206
207 if (object instanceof BNode && !this.bnodePreservationPolicy.apply((BNode) object)) {
208 this.objectBNodes.put((BNode) object, this.objectBNodes.containsKey(object)
209 || object.equals(subject) ? Boolean.TRUE : Boolean.FALSE);
210 }
211 }
212
213 @Override
214 public void handleComment(final String comment) throws RDFHandlerException
215 {
216
217 }
218
219 @Override
220 public void endRDF() throws RDFHandlerException
221 {
222 Preconditions.checkState(this.writingStarted, "Writing not yet started");
223
224 if (this.generatePrefixes) {
225 generatePrefixes();
226 }
227
228 this.writingStarted = false;
229 try {
230 writeNamespaces();
231 writeStatements();
232 this.writer.flush();
233
234 } catch (final IOException ex) {
235 throw new RDFHandlerException(ex);
236 }
237 }
238
239 private void registerMentionedNamespaces(final Value value)
240 {
241 if (value instanceof IRI) {
242 this.mentionedNamespaces.add(((IRI) value).getNamespace());
243 }
244 }
245
246 private void generatePrefixes() throws RDFHandlerException
247 {
248 final boolean writingStarted = this.writingStarted;
249 this.writingStarted = false;
250
251 for (final String namespace : Sets.difference(this.mentionedNamespaces,
252 this.namespaceTable.keySet())) {
253 final int endIndex = Math.max(namespace.lastIndexOf(':'),
254 Math.max(namespace.lastIndexOf('/'), namespace.lastIndexOf('#')));
255 int startIndex = endIndex;
256 while (startIndex > 0 && Character.isLetter(namespace.charAt(startIndex - 1))) {
257 --startIndex;
258 }
259 if (startIndex >= endIndex) {
260 continue;
261 }
262 final String candidatePrefix = namespace.substring(startIndex, endIndex)
263 .toLowerCase();
264 if (!this.namespaceTable.containsKey(candidatePrefix)) {
265 super.handleNamespace(candidatePrefix, namespace);
266 }
267 }
268
269 this.writingStarted = writingStarted;
270 }
271
272 private void writeNamespaces() throws IOException
273 {
274 if (!this.namespaceTable.isEmpty()) {
275 for (final Map.Entry<String, String> namespace : this.namespaceTable.entrySet()) {
276 final String prefix = namespace.getValue();
277 final String uri = namespace.getKey();
278 if (this.mentionedNamespaces.contains(uri)) {
279 writeNamespace(prefix, uri);
280 }
281 }
282 }
283 }
284
285 private void writeStatements() throws IOException
286 {
287
288 final Set<BNode> skippedBNodes = Sets.newLinkedHashSet();
289
290
291 boolean first = true;
292 for (Resource subject : Util.VALUE_ORDERING.sortedCopy(subjectProperties.keySet())) {
293
294 final Multimap<IRI, Value> properties = subjectProperties.get(subject);
295
296 final boolean emitSubject = !(subject instanceof BNode)
297 || this.bnodePreservationPolicy.apply((BNode) subject)
298 || this.objectBNodes.get(subject) != Boolean.FALSE;
299
300 if (emitSubject) {
301 if (!first) {
302 this.writer.writeEOL();
303 }
304 writeSubject(subject, properties);
305 first = false;
306
307 } else {
308 skippedBNodes.add((BNode) subject);
309 }
310 }
311
312
313 while (true) {
314 skippedBNodes.removeAll(this.inlinedBNodes);
315 this.inlinedBNodes.clear();
316 if (skippedBNodes.isEmpty()) {
317 break;
318 }
319 if (!first) {
320 this.writer.writeEOL();
321 }
322 final Iterator<BNode> iterator = skippedBNodes.iterator();
323 final BNode node = iterator.next();
324 iterator.remove();
325 writeSubject(node, this.subjectProperties.get(node));
326 first = false;
327 }
328 }
329
330 private void writeSubject(final Resource subject, final Multimap<IRI, Value> properties)
331 throws IOException
332 {
333 this.writer.writeEOL();
334
335 if (!(subject instanceof BNode) || this.bnodePreservationPolicy.apply((BNode) subject)
336 || this.objectBNodes.containsKey(subject)) {
337 writeResource(subject,false);
338 this.writer.write(" ");
339 } else {
340 this.writer.write("[] ");
341 }
342
343 this.writer.increaseIndentation();
344 writeProperties(properties);
345 this.writer.write(" .");
346 this.writer.decreaseIndentation();
347 }
348
349 private void writeProperties(final Multimap<IRI, Value> properties) throws IOException
350 {
351 boolean first = true;
352 for (final Map.Entry<IRI, Collection<Value>> entry : properties.asMap().entrySet()) {
353 if (!first) {
354 this.writer.write(" ;");
355 this.writer.writeEOL();
356 }
357 writeProperty(entry.getKey(), entry.getValue());
358 first = false;
359 }
360 }
361
362 private void writeProperty(final IRI predicate, final Collection<Value> values)
363 throws IOException
364 {
365 if (predicate.equals(RDF.TYPE)) {
366 this.writer.write("a");
367 } else {
368 writeURI(predicate);
369 }
370 this.writer.write(" ");
371
372
373
374
375 boolean first = true;
376 for (int phase = 0; phase < 2 && !values.isEmpty(); ++phase) {
377 for (final Iterator<Value> iterator = values.iterator(); iterator.hasNext();) {
378 final Value value = iterator.next();
379 final boolean bnodeExpansion = value instanceof BNode
380 && !this.bnodePreservationPolicy.apply((BNode) value)
381 && this.objectBNodes.get(value) != Boolean.TRUE;
382 if (!bnodeExpansion && phase == 0 || bnodeExpansion && phase == 1) {
383 if (!first) {
384 this.writer.write(" , ");
385 }
386 writeObject(value, bnodeExpansion);
387 first = false;
388 }
389 }
390 }
391 }
392
393 private void writeObject(final Value value, final Boolean bnodeExpansionHint)
394 throws IOException
395 {
396
397 final boolean bnodeExpansion = bnodeExpansionHint != null ? bnodeExpansionHint
398 .booleanValue() : value instanceof BNode
399 && !this.bnodePreservationPolicy.apply((BNode) value)
400 && this.objectBNodes.get(value) != Boolean.TRUE;
401
402 if (!bnodeExpansion) {
403 writeValue(value,false);
404 } else {
405 this.inlinedBNodes.add((BNode) value);
406 Multimap<IRI, Value> properties = this.subjectProperties.get(value);
407
408 if (properties == null) {
409
410 this.writer.write("[]");
411
412 } else if (!properties.containsKey(RDF.FIRST)) {
413
414 this.writer.write("[");
415 this.writer.increaseIndentation();
416 this.writer.writeEOL();
417 writeProperties(properties);
418 this.writer.decreaseIndentation();
419 this.writer.writeEOL();
420 this.writer.write("]");
421
422 } else {
423
424 this.writer.write("(");
425 Value node = value;
426 while (true) {
427 this.writer.write(" ");
428 final Value element = Iterables.getFirst(properties.get(RDF.FIRST), null);
429 writeObject(element, null);
430 node = Iterables.getFirst(properties.get(RDF.REST), null);
431 if (node != null && !node.equals(RDF.NIL)) {
432 properties = this.subjectProperties.get(node);
433 this.inlinedBNodes.add((BNode) node);
434 } else {
435 break;
436 }
437 }
438 this.writer.write(" )");
439 }
440 }
441 }
442
443 }
444
445 }