1   package eu.fbk.dkm.pikes.eval;
2   
3   import java.util.Arrays;
4   import java.util.Collection;
5   import java.util.HashMap;
6   import java.util.List;
7   import java.util.Map;
8   import java.util.Objects;
9   import java.util.Set;
10  import java.util.function.Function;
11  
12  import javax.annotation.Nullable;
13  
14  import com.google.common.collect.HashMultimap;
15  import com.google.common.collect.ImmutableList;
16  import com.google.common.collect.ImmutableSet;
17  import com.google.common.collect.Iterables;
18  import com.google.common.collect.Lists;
19  import com.google.common.collect.Maps;
20  import com.google.common.collect.Multimap;
21  import com.google.common.collect.Ordering;
22  import com.google.common.collect.Sets;
23  
24  import org.eclipse.rdf4j.model.Literal;
25  import org.eclipse.rdf4j.model.Resource;
26  import org.eclipse.rdf4j.model.Statement;
27  import org.eclipse.rdf4j.model.IRI;
28  import org.eclipse.rdf4j.model.Value;
29  import org.eclipse.rdf4j.model.ValueFactory;
30  import org.eclipse.rdf4j.model.vocabulary.DCTERMS;
31  import org.eclipse.rdf4j.model.vocabulary.OWL;
32  import org.eclipse.rdf4j.model.vocabulary.RDF;
33  import org.eclipse.rdf4j.model.vocabulary.RDFS;
34  import org.eclipse.rdf4j.query.BindingSet;
35  import org.eclipse.rdf4j.query.algebra.TupleExpr;
36  import org.eclipse.rdf4j.rio.RDFHandler;
37  import org.slf4j.LoggerFactory;
38  
39  import eu.fbk.utils.core.CommandLine;
40  import eu.fbk.utils.core.CommandLine.Type;
41  import eu.fbk.dkm.pikes.rdf.vocab.NIF;
42  import eu.fbk.rdfpro.RDFHandlers;
43  import eu.fbk.rdfpro.RDFSources;
44  import eu.fbk.rdfpro.util.QuadModel;
45  import eu.fbk.rdfpro.util.Statements;
46  
47  public class Converter {
48  
49      private static final Set<String> AM_ROLES = ImmutableSet.of("dir", "loc", "mnr", "ext", "rec",
50              "prd", "pnc", "cau", "dis", "adv", "mod", "neg");
51  
52      private static final IRI DUL_ASSOCIATED_WITH = Statements.VALUE_FACTORY
53              .createIRI("http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#associatedWith");
54  
55      public static final Converter FRED_CONVERTER = new Converter(
56              "fred",
57              "" //
58                      + "SELECT ?uri (REPLACE(?t, '_', ' ') AS ?text)\n" //
59                      + "WHERE { ?uri a nif:Context ; nif:isString ?t . }\n",
60              "" //
61                      + "PREFIX fsem: <http://ontologydesignpatterns.org/cp/owl/semiotics.owl#>\n"
62                      + "PREFIX eval: <http://pikes.fbk.eu/ontologies/eval#>\n"
63                      + "SELECT ?node ?begin ?end ?head\n"
64                      + "WHERE {\n"
65                      + "  ?m fsem:denotes|fsem:hasInterpretant ?node ;\n"
66                      + "  nif:beginIndex ?begin ;\n"
67                      + "  nif:endIndex ?end ;\n"
68                      + "  OPTIONAL { ?m eval:head ?head }\n"
69                      + "  FILTER EXISTS { ?node ?p ?o }\n"
70                      + "  FILTER NOT EXISTS { ?s ?node ?o }\n"
71                      + "  FILTER NOT EXISTS { ?s a ?node }\n"
72                      + "  FILTER NOT EXISTS { ?node a owl:Class }\n" //
73                      + "}\n" //
74                      + "ORDER BY ?m", //
75              (final IRI uri) -> {
76                  String ns = uri.getNamespace();
77                  String name = uri.getLocalName();
78                  if (ns.equals("http://www.ontologydesignpatterns.org/ont/vn/abox/role/")
79                          || ns.equals("http://www.ontologydesignpatterns.org/ont/boxer/boxer.owl#")
80                          && (name.equals("agent") || name.equals("patient") || name.equals("theme"))) {
81                      ns = "http://pikes.fbk.eu/ontologies/verbnet#";
82                      name = name.toLowerCase();
83                  } else if (ns.equals("http://www.ontologydesignpatterns.org/ont/vn/data/")) {
84                      ns = "http://pikes.fbk.eu/ontologies/verbnet#";
85                      final String code = name.substring(name.lastIndexOf('_') + 1);
86                      final int l = code.length();
87                      final int n1 = l < 2 ? 0 : Integer.parseInt(code.substring(0, 2));
88                      final int n2 = l < 4 ? 0 : Integer.parseInt(code.substring(2, 4));
89                      final int n3 = l < 5 ? 0 : Character.digit(code.charAt(4), 10);
90                      final int n4 = l < 6 ? 0 : Character.digit(code.charAt(5), 10);
91                      final int n5 = l < 7 ? 0 : Character.digit(code.charAt(6), 10);
92                      final int n6 = l < 8 ? 0 : Character.digit(code.charAt(7), 10);
93                      final StringBuilder b = new StringBuilder().append(n1);
94                      assert n1 >= 0 && n2 >= 0 && n3 >= 0 && n4 >= 0 && n5 >= 0 && n6 >= 0;
95                      if (n2 != 0) {
96                          b.append('.').append(n2);
97                          if (n3 != 0) {
98                              b.append('.').append(n3);
99                          }
100                     }
101                     if (n4 != 0) {
102                         b.append('-').append(n4);
103                         if (n5 != 0) {
104                             b.append('-').append(n5);
105                             if (n6 != 0) {
106                                 b.append('-').append(n6);
107                             }
108                         }
109                     }
110                     name = b.toString();
111                 }
112                 return Statements.VALUE_FACTORY.createIRI(ns, name);
113             }, "PREFIX fsem: <http://ontologydesignpatterns.org/cp/owl/semiotics.owl#>\n"
114                     + "SELECT ?s (owl:sameAs AS ?p) ?o\n "
115                     + "WHERE { ?s fsem:denotes ?o. FILTER EXISTS { ?m fsem:denotes ?s } }");
116 
117     public static final Converter GOLD_CONVERTER = new Converter("gold", "" //
118             + "SELECT ?uri ?text\n" //
119             + "WHERE { ?uri rdfs:label ?text . }\n", "" //
120             + "PREFIX fsem: <http://ontologydesignpatterns.org/cp/owl/semiotics.owl#>\n"
121             + "PREFIX eval: <http://pikes.fbk.eu/ontologies/eval#>\n"
122             + "SELECT DISTINCT ?node (?node AS ?head)\n" //
123             + "WHERE {\n"
124             + "  { ?node a eval:Node } UNION\n"
125             + "  { ?node a eval:Entity } UNION\n"
126             + "  { ?node a eval:Frame } UNION\n"
127             + "  { ?node a eval:Quality }\n" //
128             + "}\n" //
129             + "ORDER BY ?m", //
130             (final IRI uri) -> {
131                 final String ns = uri.getNamespace();
132                 String name = uri.getLocalName();
133                 if (ns.equals("http://pikes.fbk.eu/ontologies/verbnet#")) {
134                     final int index = name.indexOf('-');
135                     if (index > 0) {
136                         name = name.substring(index + 1);
137                     }
138                 }
139                 return Statements.VALUE_FACTORY.createIRI(ns, name);
140             });
141 
142     public static final Converter PIKES_CONVERTER = new Converter("pikes", "" //
143             + "PREFIX eval: <http://pikes.fbk.eu/ontologies/eval#>\n" //
144             + "SELECT ?uri ?text\n" //
145             + "WHERE { ?uri a eval:Sentence ; rdfs:label ?text . }\n", "" //
146             + "PREFIX gaf: <http://groundedannotationframework.org/gaf#>\n"
147             + "PREFIX eval: <http://pikes.fbk.eu/ontologies/eval#>\n"
148             + "SELECT ?node ?begin ?end ?head (?m AS ?sentence)\n" //
149             + "WHERE {\n" //
150             + "  ?node gaf:denotedBy ?m .\n"
151             + "  ?m nif:beginIndex ?begin ;\n"
152             + "     nif:endIndex ?end ;\n" //
153             + "  OPTIONAL { ?m eval:head ?head }\n" + "}\n" //
154             + "ORDER BY ?m", //
155             (final IRI uri) -> {
156                 String ns = uri.getNamespace();
157                 String name = uri.getLocalName();
158                 boolean rewriteName = false;
159                 if (ns.equals("http://www.newsreader-project.eu/ontologies/propbank/")) {
160                     ns = "http://pikes.fbk.eu/ontologies/propbank#";
161                     rewriteName = true;
162                 } else if (ns.equals("http://www.newsreader-project.eu/ontologies/nombank/")) {
163                     ns = "http://pikes.fbk.eu/ontologies/nombank#";
164                     rewriteName = true;
165                 } else if (ns.equals("http://www.newsreader-project.eu/ontologies/verbnet/")) {
166                     ns = "http://pikes.fbk.eu/ontologies/verbnet#";
167                     final int index = name.indexOf('-');
168                     if (index > 0) {
169                         name = name.substring(index + 1);
170                     }
171                 } else if (ns.equals("http://www.newsreader-project.eu/ontologies/framenet/")) {
172                     ns = "http://pikes.fbk.eu/ontologies/framenet#";
173                 } else if (ns.equals("http://dkm.fbk.eu/ontologies/knowledgestore#")
174                         && name.equals("mod")) {
175                     ns = "http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#";
176                     name = "associatedWith";
177                 }
178                 if (rewriteName) {
179                     if (AM_ROLES.contains(name.toLowerCase())) {
180                         name = "am-" + name.toLowerCase();
181                     } else if (name.endsWith("_0") || name.endsWith("_1") || name.endsWith("_2")
182                             || name.endsWith("_3") || name.endsWith("_4") || name.endsWith("_5")) {
183                         name = "a" + name.charAt(name.length() - 1);
184                     }
185                 }
186                 return Statements.VALUE_FACTORY.createIRI(ns, name);
187             });
188 
189     private static final Set<IRI> IGNORABLE_TERMS = ImmutableSet.of( //
190             Statements.VALUE_FACTORY.createIRI("http://www.newsreader-project.eu/ontologies/propbank/adv"), //
191             Statements.VALUE_FACTORY.createIRI("http://www.newsreader-project.eu/ontologies/nombank/adv"), //
192             Statements.VALUE_FACTORY.createIRI("http://groundedannotationframework.org/gaf#denotedBy"), //
193             Statements.VALUE_FACTORY.createIRI("http://www.ontologydesignpatterns.org/ont/fred/pos.owl#boxerpos"), //
194             Statements.VALUE_FACTORY.createIRI("http://ontologydesignpatterns.org/cp/owl/semiotics.owl#denotes"), //
195             Statements.VALUE_FACTORY.createIRI("http://ontologydesignpatterns.org/cp/owl/semiotics.owl#hasInterpretant"), //
196             NIF.OFFSET_BASED_STRING, NIF.BEGIN_INDEX, NIF.END_INDEX, NIF.REFERENCE_CONTEXT);
197 
198     private final String creator;
199 
200     private final TupleExpr textQuery;
201 
202     private final TupleExpr nodeQuery;
203 
204     private final Function<IRI, IRI> uriRewriter;
205 
206     private final TupleExpr[] expandQueries;
207 
208     public Converter(final String creator, final String textQuery, final String nodeQuery,
209             @Nullable final Function<IRI, IRI> uriRewriter, final String... expandQueries) {
210         this.creator = Objects.requireNonNull(creator);
211         this.textQuery = Util.parse(textQuery);
212         this.nodeQuery = Util.parse(nodeQuery);
213         this.uriRewriter = uriRewriter;
214         this.expandQueries = new TupleExpr[expandQueries.length];
215         for (int i = 0; i < expandQueries.length; ++i) {
216             this.expandQueries[i] = Util.parse(expandQueries[i]);
217         }
218 
219     }
220 
221     public QuadModel convert(final QuadModel model) throws Throwable {
222 
223         final ValueFactory vf = Statements.VALUE_FACTORY;
224         final QuadModel result = QuadModel.create();
225 
226         final Map<IRI, Sentence> sentences = new HashMap<>();
227         for (final BindingSet binding : Util.query(model, this.textQuery)) {
228             final IRI uri = vf.createIRI(((IRI) binding.getValue("uri")).getNamespace());
229             final String text = binding.getValue("text").stringValue().trim();
230             sentences.put(uri, new Sentence(text));
231         }
232 
233         final Map<Value, IRI> nodeSentences = Maps.newHashMap();
234         final Multimap<Value, String> nodeTerms = HashMultimap.create();
235         for (final BindingSet binding : Util.query(model, this.nodeQuery)) {
236             final IRI node = (IRI) binding.getValue("node");
237             final IRI head = (IRI) binding.getValue("head");
238             IRI sentenceIRI = (IRI) binding.getValue("sentence");
239             sentenceIRI = sentenceIRI != null ? vf.createIRI(sentenceIRI.getNamespace()) : vf
240                     .createIRI(node.getNamespace());
241             final Sentence sentence = sentences.get(sentenceIRI);
242             final String term;
243             if (head != null) {
244                 term = sentence.getTerm(head.getLocalName());
245             } else {
246                 final int begin = ((Literal) binding.getValue("begin")).intValue();
247                 final int end = ((Literal) binding.getValue("end")).intValue();
248                 term = sentence.getTerm(begin, end);
249             }
250             nodeTerms.put(node, term);
251             nodeSentences.put(node, sentenceIRI);
252         }
253 
254         final Set<Statement> splittingStmts = Sets.newHashSet();
255         for (final Statement stmt : model) {
256             if (EVAL.METADATA.equals(stmt.getContext())) {
257                 splittingStmts.add(stmt);
258             }
259         }
260 
261         for (final Map.Entry<IRI, Sentence> entry : sentences.entrySet()) {
262             final IRI sentenceIRI = entry.getKey();
263             final IRI graphIRI = vf.createIRI(sentenceIRI + "graph_" + this.creator);
264             result.add(sentenceIRI, RDF.TYPE, EVAL.SENTENCE, EVAL.METADATA);
265             result.add(sentenceIRI, RDFS.LABEL, vf.createLiteral(entry.getValue().getText()),
266                     EVAL.METADATA);
267             result.add(graphIRI, RDF.TYPE, EVAL.KNOWLEDGE_GRAPH, EVAL.METADATA);
268             result.add(graphIRI, DCTERMS.SOURCE, sentenceIRI, EVAL.METADATA);
269             result.add(graphIRI, DCTERMS.CREATOR, vf.createLiteral(this.creator), EVAL.METADATA);
270         }
271 
272         for (final Value node : nodeTerms.keySet()) {
273             final IRI sentenceIRI = nodeSentences.get(node);
274             final IRI graphIRI = vf.createIRI(sentenceIRI + "graph_" + this.creator);
275             final Collection<String> terms = nodeTerms.get(node);
276             for (final String term : terms) {
277                 final IRI termIRI = vf.createIRI(sentenceIRI + "term_" + term);
278                 final IRI nodeIRI = terms.size() == 1 ? (IRI) node : vf.createIRI(node + "_"
279                         + term);
280                 result.add(nodeIRI, RDF.TYPE, EVAL.NODE, graphIRI);
281                 result.add(nodeIRI, EVAL.DENOTED_BY, termIRI, graphIRI);
282             }
283         }
284 
285         final Set<Statement> expanded = Sets.newHashSet();
286         for (final TupleExpr expandQuery : this.expandQueries) {
287             for (final BindingSet bindings : Util.query(model, expandQuery)) {
288                 final Value s = bindings.getValue("s");
289                 final Value p = bindings.getValue("p");
290                 final Value o = bindings.getValue("o");
291                 if (s instanceof Resource && p instanceof IRI && o instanceof Value) {
292                     expanded.add(vf.createStatement((Resource) s, (IRI) p, o));
293                 }
294             }
295         }
296 
297         for (final Statement stmt : Iterables.concat(model, expanded)) {
298             IRI pred = stmt.getPredicate();
299             Value obj = stmt.getObject();
300             if (EVAL.METADATA.equals(stmt.getContext())) {
301                 continue;
302             }
303             final Resource subj = stmt.getSubject();
304             if (IGNORABLE_TERMS.contains(pred) || pred.equals(RDF.TYPE)
305                     && IGNORABLE_TERMS.contains(obj)) {
306                 continue;
307             }
308             if (this.uriRewriter != null) {
309                 pred = this.uriRewriter.apply(pred);
310                 if (pred.equals(RDF.TYPE) && obj instanceof IRI) {
311                     obj = this.uriRewriter.apply((IRI) obj);
312                 }
313             }
314             final Collection<String> subjTerms = nodeTerms.get(subj);
315             if (!subjTerms.isEmpty()) {
316                 final IRI sentenceIRI = nodeSentences.get(subj);
317                 final IRI graphIRI = vf.createIRI(sentenceIRI + "graph_" + this.creator);
318                 final List<Value> subjIRIs = split(subj, subjTerms);
319                 final List<Value> objValues = split(obj, nodeTerms.get(obj));
320                 corefer(result, graphIRI, subjIRIs);
321                 corefer(result, graphIRI, objValues);
322                 boolean added = false;
323                 final boolean splitting = subjIRIs.size() > 1 || objValues.size() > 1;
324                 for (final Value subjIRI : subjIRIs) {
325                     for (final Value objValue : objValues) {
326                         final Statement s = vf.createStatement((IRI) subjIRI, pred, objValue,
327                                 graphIRI);
328                         if (!splitting || splittingStmts.contains(s)) {
329                             result.add(s);
330                             added = true;
331                         }
332                     }
333                 }
334                 if (!added) {
335                     throw new IllegalArgumentException("Could not split statement: "
336                             + vf.createStatement(subj, pred, obj, stmt.getContext()) + "\nsubj: "
337                             + subjIRIs + "\nobj: " + objValues);
338                 }
339             }
340         }
341 
342         return result;
343     }
344 
345     public static void replaceNominalFrames(final QuadModel model) {
346 
347         for (final Resource graphID : model.contexts()) {
348 
349             final Map<IRI, IRI> terms = Maps.newHashMap();
350             for (final Statement stmt : model.filter(null, EVAL.DENOTED_BY, null, graphID)) {
351                 terms.put((IRI) stmt.getSubject(), (IRI) stmt.getObject());
352             }
353 
354             final Set<IRI> allPreds = Sets.newHashSet();
355             final Set<IRI> nbPreds = Sets.newHashSet();
356             final Set<IRI> pbPreds = Sets.newHashSet();
357             for (final Statement stmt : model.filter(null, RDF.TYPE, null, graphID)) {
358                 if (stmt.getObject() instanceof IRI) {
359                     final String ns = ((IRI) stmt.getObject()).getNamespace();
360                     if (isFrameNS(ns)) {
361                         final IRI pred = (IRI) stmt.getSubject();
362                         allPreds.add(pred);
363                         if (ns.equals("http://pikes.fbk.eu/ontologies/propbank#")) {
364                             pbPreds.add(pred);
365                         }
366                         if (ns.equals("http://pikes.fbk.eu/ontologies/nombank#")) {
367                             nbPreds.add(pred);
368                         }
369                     }
370                 }
371             }
372             final Set<IRI> nomPreds = Sets.newHashSet();
373             nomPreds.addAll(nbPreds);
374             nomPreds.addAll(Sets.difference(allPreds, pbPreds));
375 
376             for (final IRI pred : nomPreds) {
377                 final IRI predTerm = terms.get(pred);
378                 final List<Statement> stmts = Lists.newArrayList(model.filter(pred, null, null,
379                         graphID));
380                 IRI newSubj = pred;
381                 for (final Statement stmt : stmts) {
382                     final IRI argTerm = terms.get(stmt.getObject());
383                     if (predTerm.equals(argTerm)) {
384                         newSubj = (IRI) stmt.getObject();
385                         break;
386                     }
387                 }
388                 for (final Statement stmt : stmts) {
389                     final boolean isFrameRole = isFrameNS(stmt.getPredicate().getNamespace());
390                     final boolean isFrameType = !isFrameRole && stmt.getObject() instanceof IRI
391                             && isFrameNS(((IRI) stmt.getObject()).getNamespace());
392                     if (isFrameRole && !newSubj.equals(stmt.getObject())) {
393                         model.add(newSubj, DUL_ASSOCIATED_WITH, stmt.getObject(), graphID);
394                     }
395                     if (isFrameRole || isFrameType || newSubj != pred) {
396                         model.remove(stmt);
397                     }
398                 }
399             }
400         }
401     }
402 
403     private static boolean isFrameNS(final String ns) {
404         return ns.equals("http://pikes.fbk.eu/ontologies/propbank#")
405                 || ns.equals("http://pikes.fbk.eu/ontologies/nombank#")
406                 || ns.equals("http://pikes.fbk.eu/ontologies/verbnet#")
407                 || ns.equals("http://pikes.fbk.eu/ontologies/framenet#");
408     }
409 
410     private static List<Value> split(final Value value, final Collection<String> terms) {
411         if (terms.size() <= 1) {
412             return ImmutableList.of(value);
413         } else {
414             final List<Value> values = Lists.newArrayListWithCapacity(terms.size());
415             for (final String term : terms) {
416                 values.add(Statements.VALUE_FACTORY.createIRI(value + "_" + term));
417             }
418             return ImmutableList.copyOf(values);
419         }
420     }
421 
422     private static void corefer(final QuadModel model, final Resource graph,
423             @Nullable final Collection<Value> values) {
424         if (values != null && values.size() > 1) {
425             for (final Value value1 : values) {
426                 for (final Value value2 : values) {
427                     if (Util.VALUE_ORDERING.compare(value1, value2) < 0) {
428                         model.add((Resource) value1, OWL.SAMEAS, (Resource) value2, graph);
429                     }
430                 }
431             }
432         }
433     }
434 
435     public static void main(final String... args) {
436 
437         try {
438             // Parse command line
439             final CommandLine cmd = CommandLine
440                     .parser()
441                     .withName("eval-converter")
442                     .withHeader("Convert a tool output in the format used for the evaluation.")
443                     .withOption("o", "output", "the output file", "FILE", Type.STRING, true,
444                             false, true)
445                     .withOption("f", "format", "the format (fred, pikes, gold)", "FMT",
446                             Type.STRING, true, false, true)
447                     .withOption("n", "replace-nominal",
448                             "replaces nominal frames with association " //
449                                     + " relations (for FRED compatibility)")
450                     .withLogger(LoggerFactory.getLogger("eu.fbk")) //
451                     .parse(args);
452 
453             // Extract options
454             final String format = cmd.getOptionValue("f", String.class).trim().toLowerCase();
455             final String outputFile = cmd.getOptionValue("o", String.class);
456             final List<String> inputFiles = cmd.getArgs(String.class);
457             final boolean replaceNominalFrames = cmd.hasOption("n");
458 
459             // Obtain the converter corresponding to the format specified
460             Converter converter;
461             if (format.equalsIgnoreCase("fred")) {
462                 converter = FRED_CONVERTER;
463             } else if (format.equalsIgnoreCase("gold")) {
464                 converter = GOLD_CONVERTER;
465             } else if (format.equalsIgnoreCase("pikes")) {
466                 converter = PIKES_CONVERTER;
467             } else {
468                 throw new IllegalArgumentException("Unknown format: " + format);
469             }
470 
471             // Read the input
472             final Map<String, String> namespaces = Maps.newHashMap();
473             final QuadModel input = QuadModel.create();
474             RDFSources.read(false, false, null, null, null, true,
475                     inputFiles.toArray(new String[inputFiles.size()])).emit(
476                     RDFHandlers.wrap(input, namespaces), 1);
477 
478             // Perform the conversion
479             final QuadModel output = converter.convert(input);
480 
481             // Replace nominal frames if requested
482             if (replaceNominalFrames) {
483                 replaceNominalFrames(output);
484             }
485 
486             // Write the output
487             final RDFHandler out = RDFHandlers.write(null, 1000, outputFile);
488             out.startRDF();
489             namespaces.put(DCTERMS.PREFIX, DCTERMS.NAMESPACE);
490             namespaces.put("pb", "http://pikes.fbk.eu/ontologies/propbank#");
491             namespaces.put("nb", "http://pikes.fbk.eu/ontologies/nombank#");
492             namespaces.put("vn", "http://pikes.fbk.eu/ontologies/verbnet#");
493             namespaces.put("fn", "http://pikes.fbk.eu/ontologies/framenet#");
494             namespaces.put("dul", "http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#");
495             final Set<String> outputNS = Sets.newHashSet();
496             collectNS(outputNS, output);
497             for (final Map.Entry<String, String> entry : namespaces.entrySet()) {
498                 if (!entry.getKey().isEmpty() && outputNS.contains(entry.getValue())) {
499                     out.handleNamespace(entry.getKey(), entry.getValue());
500                 }
501             }
502             for (final Statement stmt : Ordering.from(
503                     Statements.statementComparator("cspo",
504                             Statements.valueComparator(RDF.NAMESPACE))).sortedCopy(output)) {
505                 out.handleStatement(stmt);
506             }
507             out.endRDF();
508 
509         } catch (final Throwable ex) {
510             // Display error information and terminate
511             CommandLine.fail(ex);
512         }
513     }
514 
515     private static void collectNS(final Collection<String> ns, final Iterable<Statement> stmts) {
516         for (final Statement stmt : stmts) {
517             collectNS(ns, stmt.getSubject());
518             collectNS(ns, stmt.getPredicate());
519             collectNS(ns, stmt.getObject());
520             collectNS(ns, stmt.getContext());
521         }
522     }
523 
524     private static void collectNS(final Collection<String> ns, @Nullable final Value value) {
525         if (value instanceof IRI) {
526             ns.add(((IRI) value).getNamespace());
527         }
528     }
529 
530     private static class Sentence {
531 
532         private final String text;
533 
534         private final int[] beginIndexes;
535 
536         private final int[] endIndexes;
537 
538         private final List<String> termList;
539 
540         private final Set<String> termSet;
541 
542         public Sentence(final String text) {
543 
544             final int[] begins = new int[text.length()];
545             final int[] ends = new int[text.length()];
546             final List<String> termList = Lists.newArrayList();
547             final Set<String> termSet = Sets.newHashSet();
548             int count = 0;
549 
550             final Set<String> ambiguousTerms = Sets.newHashSet();
551             boolean insideTerm = false;
552             for (int i = 0; i < text.length(); ++i) {
553                 final char ch = text.charAt(i);
554                 final boolean letter = Character.isLetter(ch) || ch == '-' || ch == '_';
555                 if (letter && !insideTerm) {
556                     begins[count] = i;
557                     insideTerm = true;
558                 } else if (!letter && insideTerm) {
559                     ends[count] = i;
560                     final String term = text.substring(begins[count], ends[count]);
561                     termList.add(term);
562                     if (!termSet.add(term)) {
563                         ambiguousTerms.add(term);
564                     }
565                     ++count;
566                     insideTerm = false;
567                 }
568             }
569 
570             for (final String term : ambiguousTerms) {
571                 int index = 0;
572                 termSet.remove(term);
573                 for (int i = 0; i < termList.size(); ++i) {
574                     if (termList.get(i).equals(term)) {
575                         final String t = term + "_" + (++index);
576                         termList.set(i, t);
577                         termSet.add(t);
578                     }
579                 }
580             }
581 
582             this.text = text;
583             this.beginIndexes = Arrays.copyOfRange(begins, 0, count);
584             this.endIndexes = Arrays.copyOfRange(ends, 0, count);
585             this.termList = termList;
586             this.termSet = termSet;
587         }
588 
589         public String getText() {
590             return this.text;
591         }
592 
593         public String getTerm(final String localName) {
594             int index = localName.length();
595             while (true) {
596                 final String candidate = localName.substring(0, index);
597                 for (final String term : this.termList) {
598                     if (candidate.equalsIgnoreCase(term)) {
599                         return term;
600                     }
601                 }
602                 index = localName.lastIndexOf('_', index);
603                 if (index < 0) {
604                     throw new IllegalArgumentException("Cannot map " + localName
605                             + " to a term\nterms: " + this.termSet);
606                 }
607             }
608         }
609 
610         public String getTerm(final int beginIndex, final int endIndex) {
611             final List<String> matches = Lists.newArrayList();
612             for (int i = 0; i < this.beginIndexes.length; ++i) {
613                 if (beginIndex < this.endIndexes[i] && endIndex > this.beginIndexes[i]) {
614                     matches.add(this.termList.get(i));
615                 }
616             }
617             if (matches.size() == 0) {
618                 throw new IllegalArgumentException("No term matching indexes " + beginIndex + ", "
619                         + endIndex);
620             }
621             if (matches.size() > 1) {
622                 throw new IllegalArgumentException("Multiple terms matching indexes " + beginIndex
623                         + ", " + endIndex + "\ntext: " + this.text + "\nbegins: "
624                         + Arrays.toString(this.beginIndexes) + "\nends: "
625                         + Arrays.toString(this.endIndexes));
626             }
627             return matches.get(0);
628         }
629 
630     }
631 
632 }