1   package eu.fbk.dkm.pikes.eval;
2   
3   import java.util.Collection;
4   import java.util.Collections;
5   import java.util.List;
6   import java.util.Map;
7   import java.util.Objects;
8   import java.util.Set;
9   
10  import javax.annotation.Nullable;
11  
12  import com.google.common.base.Joiner;
13  import com.google.common.base.MoreObjects;
14  import com.google.common.base.Strings;
15  import com.google.common.collect.HashBasedTable;
16  import com.google.common.collect.HashMultimap;
17  import com.google.common.collect.ImmutableList;
18  import com.google.common.collect.ImmutableMap;
19  import com.google.common.collect.ImmutableSet;
20  import com.google.common.collect.Iterables;
21  import com.google.common.collect.Lists;
22  import com.google.common.collect.Maps;
23  import com.google.common.collect.Multimap;
24  import com.google.common.collect.Ordering;
25  import com.google.common.collect.Sets;
26  import com.google.common.collect.Table;
27  import com.google.common.html.HtmlEscapers;
28  
29  import org.eclipse.rdf4j.model.Resource;
30  import org.eclipse.rdf4j.model.Statement;
31  import org.eclipse.rdf4j.model.IRI;
32  import org.eclipse.rdf4j.model.Value;
33  import org.eclipse.rdf4j.model.ValueFactory;
34  import org.eclipse.rdf4j.model.vocabulary.DCTERMS;
35  import org.eclipse.rdf4j.model.vocabulary.OWL;
36  import org.eclipse.rdf4j.model.vocabulary.RDF;
37  import org.eclipse.rdf4j.query.BindingSet;
38  import org.eclipse.rdf4j.query.algebra.TupleExpr;
39  import org.slf4j.Logger;
40  import org.slf4j.LoggerFactory;
41  
42  import eu.fbk.utils.core.CommandLine;
43  import eu.fbk.utils.eval.PrecisionRecall;
44  import eu.fbk.rdfpro.RDFHandlers;
45  import eu.fbk.rdfpro.RDFSources;
46  import eu.fbk.rdfpro.util.QuadModel;
47  import eu.fbk.rdfpro.util.Statements;
48  
49  public final class Evaluation {
50  
51      private static final Logger LOGGER = LoggerFactory.getLogger(Evaluation.class);
52  
53      private final Stats nodeStats;
54  
55      private final Stats unlabelledStats;
56  
57      private final Stats labelledStats;
58  
59      private final Map<String, Stats> labelledStatsByNS;
60  
61      private final Stats typeStats;
62  
63      private final Map<String, Stats> typeStatsByNS;
64  
65      private final Stats linkingStats;
66  
67      private final Stats triplesStats;
68  
69      private final String report;
70  
71      private Evaluation(final Stats nodeStats, final Stats unlabelledStats,
72              final Stats labelledStats, final Map<String, Stats> labelledStatsByNS,
73              final Stats typeStats, final Map<String, Stats> typeStatsByNS,
74              final Stats linkingStats, final Stats triplesStats, final String report) {
75          this.nodeStats = Objects.requireNonNull(nodeStats);
76          this.unlabelledStats = Objects.requireNonNull(unlabelledStats);
77          this.labelledStats = Objects.requireNonNull(labelledStats);
78          this.labelledStatsByNS = ImmutableMap.copyOf(labelledStatsByNS);
79          this.typeStats = Objects.requireNonNull(typeStats);
80          this.typeStatsByNS = ImmutableMap.copyOf(typeStatsByNS);
81          this.linkingStats = Objects.requireNonNull(linkingStats);
82          this.triplesStats = Objects.requireNonNull(triplesStats);
83          this.report = Objects.requireNonNull(report);
84      }
85  
86      public static Evaluation evaluate(final Iterable<Statement> model, final boolean simplified) {
87          return new Evaluator(model, simplified).get();
88      }
89  
90      public Stats getNodeStats() {
91          return this.nodeStats;
92      }
93  
94      public Stats getUnlabelledStats() {
95          return this.unlabelledStats;
96      }
97  
98      public Stats getLabelledStats() {
99          return this.labelledStats;
100     }
101 
102     public Map<String, Stats> getLabelledStatsByNS() {
103         return this.labelledStatsByNS;
104     }
105 
106     public Stats getTypeStats() {
107         return this.typeStats;
108     }
109 
110     public Map<String, Stats> getTypeStatsByNS() {
111         return this.typeStatsByNS;
112     }
113 
114     public Stats getLinkingStats() {
115         return this.linkingStats;
116     }
117 
118     public Stats getTriplesStats() {
119         return this.triplesStats;
120     }
121 
122     public String getReport() {
123         return this.report;
124     }
125 
126     public static final class Stats {
127 
128         private final Map<String, PrecisionRecall> goldPRs;
129 
130         private final Map<String, PrecisionRecall> unionPRs;
131 
132         @Nullable
133         private final String report;
134 
135         Stats(final Map<String, PrecisionRecall> goldPRs,
136                 final Map<String, PrecisionRecall> unionPRs, final String report) {
137             this.goldPRs = goldPRs;
138             this.unionPRs = unionPRs;
139             this.report = report;
140         }
141 
142         public static Stats aggregate(final Iterable<Stats> sources) {
143             final Map<String, PrecisionRecall.Evaluator> goldEvaluators = Maps.newHashMap();
144             final Map<String, PrecisionRecall.Evaluator> unionEvaluators = Maps.newHashMap();
145             for (final Stats source : sources) {
146                 updateHelper(source.goldPRs, goldEvaluators);
147                 updateHelper(source.unionPRs, unionEvaluators);
148             }
149             final Map<String, PrecisionRecall> goldPRs = Maps.newHashMap();
150             final Map<String, PrecisionRecall> unionPRs = Maps.newHashMap();
151             for (final Map.Entry<String, PrecisionRecall.Evaluator> entry : goldEvaluators
152                     .entrySet()) {
153                 goldPRs.put(entry.getKey(), entry.getValue().getResult());
154             }
155             for (final Map.Entry<String, PrecisionRecall.Evaluator> entry : unionEvaluators
156                     .entrySet()) {
157                 unionPRs.put(entry.getKey(), entry.getValue().getResult());
158             }
159             return new Stats(goldPRs, unionPRs, null);
160         }
161 
162         private static void updateHelper(final Map<String, PrecisionRecall> prs,
163                 final Map<String, PrecisionRecall.Evaluator> evaluators) {
164             for (final Map.Entry<String, PrecisionRecall> entry : prs.entrySet()) {
165                 final String system = entry.getKey();
166                 final PrecisionRecall pr = entry.getValue();
167                 if (pr != null) {
168                     PrecisionRecall.Evaluator evaluator = evaluators.get(system);
169                     if (evaluator == null) {
170                         evaluator = PrecisionRecall.evaluator();
171                         evaluators.put(system, evaluator);
172                     }
173                     evaluator.add(pr);
174                 }
175             }
176         }
177 
178         public List<String> getSystems() {
179             return Ordering.natural().immutableSortedCopy(this.goldPRs.keySet());
180         }
181 
182         public Map<String, PrecisionRecall> getGoldPRs() {
183             return this.goldPRs;
184         }
185 
186         public Map<String, PrecisionRecall> getUnionPRs() {
187             return this.unionPRs;
188         }
189 
190         @Nullable
191         public String getReport() {
192             return this.report;
193         }
194 
195     }
196 
197     private static final class Evaluator {
198 
199         private static final TupleExpr RELATION_QUERY = Util.parse("" //
200                 + "PREFIX eval: <http://pikes.fbk.eu/ontologies/eval#>\n"
201                 + "SELECT DISTINCT ?g ?s ?o ?sm ?om\n"
202                 + "WHERE {\n" //
203                 + "  GRAPH ?g {\n" //
204                 + "    ?s a eval:Node .\n"
205                 + "    ?o a eval:Node .\n"
206                 + "    FILTER EXISTS {\n"
207                 + "      { ?s ?p ?o } UNION { ?o ?p ?s }\n"
208                 + "      FILTER (?p != eval:associableTo && ?p != eval:notAssociableTo &&\n"
209                 + "              ?p != eval:classifiableAs && ?p != eval:mappedTo)\n"
210                 + "    }\n"
211                 + "    FILTER (MD5(STR(?s)) < MD5(STR(?o)))\n"
212                 + "    OPTIONAL { ?s eval:mappedTo ?sm }\n"
213                 + "    OPTIONAL { ?o eval:mappedTo ?om }\n" //
214                 + "  }\n" //
215                 + "}\n");
216 
217         private static final TupleExpr LABELLED_QUERY = Util.parse("" //
218                 + "PREFIX eval: <http://pikes.fbk.eu/ontologies/eval#>\n"
219                 + "SELECT DISTINCT ?g ?s ?p ?o ?sm ?om\n"
220                 + "WHERE {\n" //
221                 + "  GRAPH ?g {\n" //
222                 + "    ?s a eval:Node .\n"
223                 + "    ?o a eval:Node .\n"
224                 + "    ?s ?p ?o .\n"
225                 + "    FILTER (?p != eval:associableTo && ?p != eval:notAssociableTo &&\n"
226                 + "            ?p != eval:classifiableAs && ?p != eval:mappedTo)\n"
227                 + "    FILTER (?s != ?o)\n"
228                 + "    OPTIONAL { ?s eval:mappedTo ?sm }\n"
229                 + "    OPTIONAL { ?o eval:mappedTo ?om }\n" //
230                 + "  }\n" //
231                 + "}\n");
232 
233         private static final TupleExpr ATTRIBUTE_QUERY = Util
234                 .parse("" //
235                         + "PREFIX eval: <http://pikes.fbk.eu/ontologies/eval#>\n"
236                         + "SELECT DISTINCT ?g ?s ?p ?o ?sm\n"
237                         + "WHERE {\n" //
238                         + "  GRAPH ?g {\n" //
239                         + "    ?s a eval:Node .\n"
240                         + "    ?s ?p ?o .\n" //
241                         + "    FILTER NOT EXISTS { ?o a eval:Node }\n"
242                         + "    FILTER (?o != eval:Node && ?p != eval:mappedTo && ?p != eval:denotedBy &&\n"
243                         + "            ?p != eval:associableTo && ?p != eval:notAssociableTo &&\n"
244                         + "            ?p != eval:classifiableAs)\n"
245                         + "    OPTIONAL { ?s eval:mappedTo ?sm }\n" //
246                         + "  }\n" //
247                         + "}\n");
248 
249         private final QuadModel model;
250 
251         private final Map<IRI, String> systemMap;
252 
253         private final Map<IRI, IRI> sentenceMap;
254 
255         private final Map<IRI, String> sentenceLabels;
256 
257         private final List<String> systems;
258 
259         private final Multimap<IRI, Relation> ignorableRelations;
260 
261         private final Multimap<IRI, Relation> forbiddenRelations;
262 
263         private final Multimap<IRI, Relation> ignorableTypes;
264 
265         private final Evaluation evaluation;
266 
267         Evaluator(final Iterable<Statement> alignedStmts, final boolean simplified) {
268 
269             this.model = alignedStmts instanceof QuadModel ? (QuadModel) alignedStmts : QuadModel
270                     .create(alignedStmts);
271 
272             final Set<IRI> sentenceIRIs = Sets.newHashSet();
273             this.systemMap = Maps.newHashMap();
274             this.sentenceMap = Maps.newHashMap();
275             for (final Resource graphID : this.model.filter(null, RDF.TYPE, EVAL.KNOWLEDGE_GRAPH,
276                     EVAL.METADATA).subjects()) {
277                 final String system = this.model.filter(graphID, DCTERMS.CREATOR, null, //
278                         EVAL.METADATA).objectLiteral().stringValue();
279                 final IRI sentenceIRI = this.model.filter(graphID, DCTERMS.SOURCE, null, //
280                         EVAL.METADATA).objectURI();
281                 this.systemMap.put((IRI) graphID, system);
282                 this.sentenceMap.put((IRI) graphID, sentenceIRI);
283                 sentenceIRIs.add(sentenceIRI);
284             }
285 
286             this.sentenceLabels = Maps.newHashMap();
287             int index = 1;
288             for (final IRI sentenceIRI : Ordering.from(Statements.valueComparator()).sortedCopy(
289                     sentenceIRIs)) {
290                 this.sentenceLabels.put(sentenceIRI, "S" + index++);
291             }
292 
293             this.systems = Lists.newArrayList(Sets.newHashSet(this.systemMap.values()));
294             this.systems.remove("gold");
295             // this.systems.remove("fred");
296             Collections.sort(this.systems);
297 
298             this.ignorableRelations = HashMultimap.create();
299             this.forbiddenRelations = HashMultimap.create();
300             this.ignorableTypes = HashMultimap.create();
301             for (final Statement stmt : Iterables.concat(
302                     this.model.filter(null, EVAL.ASSOCIABLE_TO, null),
303                     this.model.filter(null, EVAL.NOT_ASSOCIABLE_TO, null),
304                     this.model.filter(null, EVAL.CLASSIFIABLE_AS, null))) {
305                 final IRI sentenceID = this.sentenceMap.get(stmt.getContext());
306                 final String system = this.systemMap.get(stmt.getContext());
307                 if (sentenceID != null && system.equals("gold")) {
308                     final IRI p = stmt.getPredicate();
309                     final Relation relation = new Relation((IRI) stmt.getSubject(),
310                             (IRI) stmt.getObject(), false);
311                     (EVAL.CLASSIFIABLE_AS.equals(p) ? this.ignorableTypes : EVAL.ASSOCIABLE_TO
312                             .equals(p) ? this.ignorableRelations : this.forbiddenRelations).put(
313                             sentenceID, relation);
314                 }
315             }
316 
317             final Stats nodeStats = nodeEvaluation();
318             final Stats unlabelledStats = unlabelledEvaluation();
319 
320             final Set<String> labelledPrefixes = simplified ? ImmutableSet.of("vn", "owl")
321                     : ImmutableSet.of("vn", "fn", "pb", "nb", "owl");
322             final Set<String> labelledNS = namespacesFor(labelledPrefixes.toArray(new String[] {}));
323             final Stats labelledStats = labelledEvaluation(labelledNS, "Labelled");
324             final Map<String, Stats> labelledStatsByNS = Maps.newHashMap();
325             for (final String prefix : labelledPrefixes) {
326                 final String ns = Util.NAMESPACES.uriFor(prefix);
327                 labelledStatsByNS.put(
328                         ns,
329                         labelledEvaluation(
330                                 ImmutableSet.of(ns),
331                                 prefix.equals("owl") ? "owl:sameAs" : "Roles ("
332                                         + prefix.toUpperCase() + ")"));
333             }
334 
335             final Set<String> typePrefixes = simplified ? ImmutableSet.of("vn", "fn")
336                     : ImmutableSet.of("vn", "fn", "pb", "nb");
337             final Set<String> typeNS = namespacesFor(typePrefixes.toArray(new String[] {}));
338             final Stats typeStats = attributeEvaluation(RDF.TYPE, typeNS, "Types");
339             final Map<String, Stats> typeStatsByNS = Maps.newHashMap();
340             for (final String prefix : typePrefixes) {
341                 final String ns = Util.NAMESPACES.uriFor(prefix);
342                 typeStatsByNS.put(
343                         ns,
344                         attributeEvaluation(RDF.TYPE, ImmutableSet.of(ns),
345                                 "Types (" + prefix.toUpperCase() + ")"));
346             }
347 
348             final Stats linkingStats = attributeEvaluation(OWL.SAMEAS, null, "DBpedia links");
349 
350             final Stats triplesStats = Stats.aggregate(ImmutableList.of(labelledStats, typeStats,
351                     linkingStats));
352 
353             final StringBuilder out = new StringBuilder();
354             emitSection(out, "NODES");
355             out.append(nodeStats.getReport());
356             emitSection(out, "UNLABELLED");
357             out.append(unlabelledStats.getReport());
358             //            emitSection(out, "LABELLED (ALL)");
359             //            out.append(labelledStats.getReport());
360             for (final String ns : labelledNS) {
361                 emitSection(out, "LABELLED (" + Util.NAMESPACES.prefixFor(ns).toUpperCase() + ")");
362                 out.append(labelledStatsByNS.get(ns).getReport());
363             }
364             //            emitSection(out, "TYPES (ALL)");
365             //            out.append(typeStats.getReport());
366             for (final String ns : typeNS) {
367                 emitSection(out, "TYPES (" + Util.NAMESPACES.prefixFor(ns).toUpperCase() + ")");
368                 out.append(typeStatsByNS.get(ns).getReport());
369             }
370             emitSection(out, "LINKING");
371             out.append(linkingStats.getReport());
372             emitSection(out, "SUMMARY");
373             emitStatsHeader(out, this.systems);
374             emitStats(out, nodeStats, "instances");
375             emitStats(out, unlabelledStats, "unlabelled");
376             emitStats(out, labelledStats, "labelled");
377             for (final String ns : labelledNS) {
378                 emitStats(out, labelledStatsByNS.get(ns), "  " + Util.NAMESPACES.prefixFor(ns));
379             }
380             emitStats(out, typeStats, "types");
381             for (final String ns : typeNS) {
382                 emitStats(out, typeStatsByNS.get(ns), "  " + Util.NAMESPACES.prefixFor(ns));
383             }
384             emitStats(out, linkingStats, "linking");
385             emitStats(out, triplesStats, "triples");
386 
387             this.evaluation = new Evaluation(nodeStats, unlabelledStats, labelledStats,
388                     labelledStatsByNS, typeStats, typeStatsByNS, linkingStats, triplesStats,
389                     out.toString());
390         }
391 
392         Evaluation get() {
393             return this.evaluation;
394         }
395 
396         private String escape(final String string) {
397             return HtmlEscapers.htmlEscaper().escape(string);
398         }
399 
400         private Stats nodeEvaluation() {
401 
402             final StringBuilder out = new StringBuilder();
403 
404             final Table<IRI, String, List<IRI>> nodesTable = HashBasedTable.create();
405             for (final Statement stmt : this.model.filter(null, RDF.TYPE, EVAL.NODE)) {
406                 final IRI sentenceID = this.sentenceMap.get(stmt.getContext());
407                 final String system = this.systemMap.get(stmt.getContext());
408                 if (sentenceID != null && system != null) {
409                     getList(nodesTable, sentenceID, system).add((IRI) stmt.getSubject());
410                 }
411             }
412 
413             final Table<IRI, String, Multimap<IRI, IRI>> alignmentTable = HashBasedTable.create();
414             for (final Statement stmt : this.model.filter(null, EVAL.MAPPED_TO, null)) {
415                 final IRI graphID = (IRI) stmt.getContext();
416                 final IRI sentenceID = this.sentenceMap.get(graphID);
417                 final String system = this.systemMap.get(graphID);
418                 if (sentenceID != null && system != null) {
419                     final IRI goldNode = (IRI) stmt.getObject();
420                     final IRI testNode = (IRI) stmt.getSubject();
421                     getMultimap(alignmentTable, sentenceID, system).put(goldNode, testNode);
422                 }
423             }
424 
425             final Map<String, PrecisionRecall.Evaluator> goldEvaluators = initPR();
426             final Map<String, PrecisionRecall.Evaluator> unionEvaluators = initPR();
427             emitHeader(out, "Instances");
428 
429             String sentenceIRICell = "";
430             for (final IRI sentenceIRI : Util.VALUE_ORDERING.sortedCopy(nodesTable.rowKeySet())) {
431                 final Multimap<String, IRI> alignedNodes = HashMultimap.create();
432                 sentenceIRICell = this.sentenceLabels.get(sentenceIRI);
433                 out.append("\n<!-- sentence " + escape(sentenceIRICell) + " -->");
434                 final List<IRI> goldNodes = Util.VALUE_ORDERING.sortedCopy(nodesTable.get(
435                         sentenceIRI, "gold"));
436                 String style = " style=\"border-top: 4px solid #dddddd\"";
437                 for (final IRI goldNode : goldNodes) {
438                     out.append(String.format("\n<tr%s><td>%s</td><td>%s", style,
439                             escape(sentenceIRICell), escape(Util.format(sentenceIRI, goldNode))));
440                     style = "";
441                     sentenceIRICell = "";
442                     for (final String system : this.systems) {
443                         final Multimap<IRI, IRI> alignments = alignmentTable.get(sentenceIRI,
444                                 system);
445                         final Collection<IRI> testNodes = alignments == null ? ImmutableSet.of()
446                                 : alignments.get(goldNode);
447                         if (testNodes.isEmpty()) {
448                             goldEvaluators.get(system).addFN(1);
449                         } else {
450                             goldEvaluators.get(system).addTP(1);
451                             unionEvaluators.get(system).addTP(1);
452                             alignedNodes.putAll(system, testNodes);
453                         }
454                         out.append(String.format("</td><td>%s",
455                                 escape(Util.format(sentenceIRI, testNodes.toArray()))));
456                     }
457                     out.append("</td></tr>");
458                 }
459                 for (final String system : this.systems) {
460                     final Set<IRI> testNodes = Sets
461                             .newHashSet(nodesTable.get(sentenceIRI, system));
462                     testNodes.removeAll(alignedNodes.get(system));
463                     goldEvaluators.get(system).addFP(testNodes.size());
464                     unionEvaluators.get(system).addFP(testNodes.size());
465                     for (final IRI testNode : Util.VALUE_ORDERING.sortedCopy(testNodes)) {
466                         out.append("\n<tr><td></td><td>");
467                         for (final String s : this.systems) {
468                             out.append(String.format("</td><td>%s",
469                                     s.equals(system) ? escape(Util.format(sentenceIRI, testNode))
470                                             : ""));
471                         }
472                         out.append("</td></tr>");
473                     }
474                 }
475                 final Set<IRI> union = Sets.newHashSet();
476                 for (final String system : this.systems) {
477                     union.addAll(getMultimap(alignmentTable, sentenceIRI, system).keySet());
478                 }
479                 union.retainAll(goldNodes);
480                 for (final String system : this.systems) {
481                     unionEvaluators.get(system).addFN(
482                             Sets.difference(union,
483                                     getMultimap(alignmentTable, sentenceIRI, system).keySet())
484                                     .size());
485                 }
486             }
487 
488             final Map<String, PrecisionRecall> goldPRs = finalizePR(goldEvaluators);
489             final Map<String, PrecisionRecall> unionPRs = finalizePR(unionEvaluators);
490             emitPR(out, goldPRs, unionPRs);
491             return new Stats(goldPRs, unionPRs, out.toString());
492         }
493 
494         private Stats unlabelledEvaluation() {
495 
496             final StringBuilder out = new StringBuilder();
497 
498             final Table<IRI, String, List<Relation>> relationTable = HashBasedTable.create();
499             final Table<IRI, String, Multimap<Relation, Relation>> mappingTable = HashBasedTable
500                     .create();
501             for (final BindingSet bindings : Util.query(this.model, RELATION_QUERY)) {
502                 final IRI g = (IRI) bindings.getValue("g");
503                 final IRI sentenceID = this.sentenceMap.get(g);
504                 final String system = this.systemMap.get(g);
505                 if (sentenceID != null && system != null) {
506                     final IRI s = (IRI) bindings.getValue("s");
507                     final IRI o = (IRI) bindings.getValue("o");
508                     final IRI sm = (IRI) bindings.getValue("sm");
509                     final IRI om = (IRI) bindings.getValue("om");
510                     if (sm != null && om != null && sm.equals(om)) {
511                         continue; // self relation after mapping
512                     }
513                     final Relation r = new Relation(s, o, true);
514                     final Relation rm = new Relation(sm != null ? sm : s, om != null ? om : o,
515                             sm == null || om == null);
516                     getList(relationTable, sentenceID, system).add(rm);
517                     getMultimap(mappingTable, sentenceID, system).put(rm, r);
518                 }
519             }
520 
521             final Map<String, PrecisionRecall.Evaluator> goldEvaluators = initPR();
522             final Map<String, PrecisionRecall.Evaluator> unionEvaluators = initPR();
523             emitHeader(out, "Edges");
524 
525             String sentenceIRICell = "";
526             for (final IRI sentenceIRI : Util.VALUE_ORDERING.sortedCopy(relationTable.rowKeySet())) {
527 
528                 sentenceIRICell = this.sentenceLabels.get(sentenceIRI);
529                 out.append("\n<!-- sentence " + escape(sentenceIRICell) + " -->");
530 
531                 String style = " style=\"border-top: 4px solid #dddddd\"";
532                 final List<Relation> goldRelations = relationTable.get(sentenceIRI, "gold");
533                 for (final Relation goldRelation : Ordering.natural().sortedCopy(goldRelations)) {
534                     out.append(String.format("\n<tr%s><td>%s</td><td>%s", style,
535                             escape(sentenceIRICell), escape(goldRelation.toString(sentenceIRI))));
536                     style = "";
537                     sentenceIRICell = "";
538                     for (final String system : this.systems) {
539                         final Multimap<Relation, Relation> alignments = getMultimap(mappingTable,
540                                 sentenceIRI, system);
541                         final Collection<Relation> testRelations = alignments == null ? ImmutableSet
542                                 .of() : alignments.get(goldRelation);
543                         if (testRelations.isEmpty()) {
544                             goldEvaluators.get(system).addFN(1);
545                         } else {
546                             goldEvaluators.get(system).addTP(1);
547                             unionEvaluators.get(system).addTP(1);
548                         }
549                         out.append(String.format("</td><td>%s",
550                                 escape(Util.format(sentenceIRI, testRelations.toArray()))));
551                     }
552                     out.append("</td></tr>");
553                 }
554 
555                 final Set<Relation> unknownRelations = Sets.newHashSet();
556                 final Set<Relation> goldRelationSet = ImmutableSet.copyOf(goldRelations);
557                 for (final String system : this.systems) {
558                     final Multimap<Relation, Relation> multimap = getMultimap(mappingTable,
559                             sentenceIRI, system);
560                     for (final Relation keyRelation : Ordering.natural().sortedCopy(
561                             multimap.keySet())) {
562                         if (!goldRelationSet.contains(keyRelation)) {
563                             final boolean ignore = keyRelation.isExtra()
564                                     || this.ignorableRelations.containsEntry(sentenceIRI,
565                                             keyRelation);
566                             if (!ignore
567                                     && !this.forbiddenRelations.containsEntry(sentenceIRI,
568                                             keyRelation)) {
569                                 unknownRelations.add(keyRelation);
570                             }
571                             if (!ignore) {
572                                 goldEvaluators.get(system).addFP(1);
573                                 unionEvaluators.get(system).addFP(1);
574                             }
575                             out.append(String.format("\n<tr><td></td><td>"));
576                             for (final String s : this.systems) {
577                                 out.append(String.format(
578                                         "</td><td>%s",
579                                         !s.equals(system) ? "" : (ignore ? "* " : "")
580                                                 + escape(Util.format(sentenceIRI,
581                                                         multimap.get(keyRelation).toArray()))));
582                             }
583                             out.append("</td></tr>");
584                         }
585                     }
586                 }
587 
588                 if (!unknownRelations.isEmpty()) {
589                     LOGGER.warn("Unknown relations for sentence " + sentenceIRI + ":\n"
590                             + Joiner.on('\n').join(unknownRelations));
591                 }
592 
593                 final Set<Relation> union = Sets.newHashSet();
594                 for (final String system : this.systems) {
595                     union.addAll(getList(relationTable, sentenceIRI, system));
596                 }
597                 union.retainAll(goldRelationSet);
598                 for (final String system : this.systems) {
599                     unionEvaluators.get(system).addFN(
600                             Sets.difference(union,
601                                     getMultimap(mappingTable, sentenceIRI, system).keySet())
602                                     .size());
603                 }
604             }
605 
606             final Map<String, PrecisionRecall> goldPRs = finalizePR(goldEvaluators);
607             final Map<String, PrecisionRecall> unionPRs = finalizePR(unionEvaluators);
608             emitPR(out, goldPRs, unionPRs);
609             return new Stats(goldPRs, unionPRs, out.toString());
610         }
611 
612         private Stats labelledEvaluation(@Nullable final Set<String> namespaces, final String type) {
613 
614             final StringBuilder out = new StringBuilder();
615 
616             final ValueFactory vf = Statements.VALUE_FACTORY;
617             final IRI extraCtx = vf.createIRI("eval:Extra");
618 
619             final Table<IRI, String, List<Statement>> stmtTable = HashBasedTable.create();
620             final Table<IRI, String, Multimap<Statement, Statement>> mappingTable = HashBasedTable
621                     .create();
622             for (final BindingSet bindings : Util.query(this.model, LABELLED_QUERY)) {
623                 final IRI g = (IRI) bindings.getValue("g");
624                 final IRI sentenceID = this.sentenceMap.get(g);
625                 final String system = this.systemMap.get(g);
626                 if (sentenceID != null && system != null) {
627                     final IRI s = (IRI) bindings.getValue("s");
628                     final IRI p = (IRI) bindings.getValue("p");
629                     final IRI o = (IRI) bindings.getValue("o");
630                     final IRI sm = (IRI) bindings.getValue("sm");
631                     final IRI om = (IRI) bindings.getValue("om");
632                     if (namespaces != null && !namespaces.contains(p.getNamespace())) {
633                         continue;
634                     }
635                     if (sm != null && om != null && sm.equals(om)) {
636                         continue; // self relation after mapping
637                     }
638                     final Statement stmt = vf.createStatement(s, p, o);
639                     final Statement stmtm = vf.createStatement(sm != null ? sm : s, p,
640                             om != null ? om : o, sm == null || om == null ? extraCtx : null);
641                     getList(stmtTable, sentenceID, system).add(stmtm);
642                     getMultimap(mappingTable, sentenceID, system).put(stmtm, stmt);
643                 }
644             }
645 
646             final Map<String, PrecisionRecall.Evaluator> goldEvaluators = initPR();
647             final Map<String, PrecisionRecall.Evaluator> unionEvaluators = initPR();
648             emitHeader(out, type);
649 
650             String sentenceIRICell = "";
651             for (final IRI sentenceIRI : Util.VALUE_ORDERING.sortedCopy(stmtTable.rowKeySet())) {
652 
653                 sentenceIRICell = this.sentenceLabels.get(sentenceIRI);
654                 out.append("\n<!-- sentence " + escape(sentenceIRICell) + " -->");
655 
656                 String style = " style=\"border-top: 4px solid #dddddd\"";
657                 final List<Statement> goldStmts = MoreObjects.firstNonNull(
658                         stmtTable.get(sentenceIRI, "gold"), ImmutableList.<Statement>of());
659                 for (final Statement goldStmt : Util.STMT_ORDERING.sortedCopy(goldStmts)) {
660                     out.append(String.format("\n<tr%s><td>%s</td><td>%s", style,
661                             escape(sentenceIRICell), escape(Util.format(sentenceIRI, goldStmt))));
662                     style = "";
663                     sentenceIRICell = "";
664                     for (final String system : this.systems) {
665                         final Multimap<Statement, Statement> alignments = getMultimap(
666                                 mappingTable, sentenceIRI, system);
667                         final Collection<Statement> testStmts = alignments == null ? ImmutableSet
668                                 .of() : alignments.get(goldStmt);
669                         if (testStmts.isEmpty()) {
670                             goldEvaluators.get(system).addFN(1);
671                         } else {
672                             goldEvaluators.get(system).addTP(1);
673                             unionEvaluators.get(system).addTP(1);
674                         }
675                         out.append(String.format("</td><td>%s",
676                                 escape(Util.format(sentenceIRI, testStmts.toArray()))));
677                     }
678                     out.append("</td></tr>");
679                 }
680 
681                 final Set<Statement> goldStmtSet = ImmutableSet.copyOf(goldStmts);
682                 for (final String system : this.systems) {
683                     final Multimap<Statement, Statement> multimap = getMultimap(mappingTable,
684                             sentenceIRI, system);
685                     for (final Statement keyStmt : Util.STMT_ORDERING
686                             .sortedCopy(multimap.keySet())) {
687                         if (!goldStmtSet.contains(keyStmt)) {
688                             final Relation keyRelation = keyStmt.getSubject() instanceof IRI
689                                     && keyStmt.getObject() instanceof IRI ? new Relation(
690                                     (IRI) keyStmt.getSubject(), (IRI) keyStmt.getObject(), false)
691                                     : null;
692                             final boolean ignore = extraCtx.equals(keyStmt.getContext())
693                                     || this.ignorableRelations.containsEntry(sentenceIRI,
694                                             keyRelation);
695                             if (!ignore) {
696                                 goldEvaluators.get(system).addFP(1);
697                                 unionEvaluators.get(system).addFP(1);
698                             }
699                             out.append(String.format("\n<tr><td>%s</td><td>%s", "", ""));
700                             for (final String s : this.systems) {
701                                 out.append(String.format(
702                                         "</td><td>%s",
703                                         !s.equals(system) ? "" : (ignore ? "* " : "")
704                                                 + escape(Util.format(sentenceIRI,
705                                                         multimap.get(keyStmt).toArray()))));
706                             }
707                             out.append("</td></tr>");
708                         }
709                     }
710                 }
711 
712                 final Set<Statement> union = Sets.newHashSet();
713                 for (final String system : this.systems) {
714                     union.addAll(getList(stmtTable, sentenceIRI, system));
715                 }
716                 union.retainAll(goldStmtSet);
717                 for (final String system : this.systems) {
718                     unionEvaluators.get(system).addFN(
719                             Sets.difference(union,
720                                     getMultimap(mappingTable, sentenceIRI, system).keySet())
721                                     .size());
722                 }
723             }
724 
725             final Map<String, PrecisionRecall> goldPRs = finalizePR(goldEvaluators);
726             final Map<String, PrecisionRecall> unionPRs = finalizePR(unionEvaluators);
727             emitPR(out, goldPRs, unionPRs);
728             return new Stats(goldPRs, unionPRs, out.toString());
729         }
730 
731         private Stats attributeEvaluation(@Nullable final IRI predicate,
732                 @Nullable final Set<String> valueNS, final String type) {
733 
734             final StringBuilder out = new StringBuilder();
735 
736             final ValueFactory vf = Statements.VALUE_FACTORY;
737             final IRI extraCtx = vf.createIRI("eval:Extra");
738 
739             final Table<IRI, String, List<Statement>> stmtTable = HashBasedTable.create();
740             final Table<IRI, String, Multimap<Statement, Statement>> mappingTable = HashBasedTable
741                     .create();
742             for (final BindingSet bindings : Util.query(this.model, ATTRIBUTE_QUERY)) {
743                 final IRI g = (IRI) bindings.getValue("g");
744                 final IRI sentenceID = this.sentenceMap.get(g);
745                 final String system = this.systemMap.get(g);
746                 if (sentenceID != null && system != null) {
747                     final IRI s = (IRI) bindings.getValue("s");
748                     final IRI p = (IRI) bindings.getValue("p");
749                     final Value o = bindings.getValue("o");
750                     final IRI sm = (IRI) bindings.getValue("sm");
751                     if (predicate != null && !p.equals(predicate) //
752                             || valueNS != null && (!(o instanceof IRI) || //
753                             !valueNS.contains(((IRI) o).getNamespace()))) {
754                         continue;
755                     }
756                     final Statement stmt = vf.createStatement(s, p, o);
757                     final Statement stmtm = vf.createStatement(sm != null ? sm : s, p, o,
758                             sm == null ? extraCtx : null);
759                     getList(stmtTable, sentenceID, system).add(stmtm);
760                     getMultimap(mappingTable, sentenceID, system).put(stmtm, stmt);
761                 }
762             }
763 
764             final Map<String, PrecisionRecall.Evaluator> goldEvaluators = initPR();
765             final Map<String, PrecisionRecall.Evaluator> unionEvaluators = initPR();
766             emitHeader(out, type);
767 
768             String sentenceIRICell = "";
769             for (final IRI sentenceIRI : Util.VALUE_ORDERING.sortedCopy(stmtTable.rowKeySet())) {
770 
771                 sentenceIRICell = this.sentenceLabels.get(sentenceIRI);
772                 out.append("\n<!-- sentence " + escape(sentenceIRICell) + " -->");
773 
774                 String style = " style=\"border-top: 4px solid #dddddd\"";
775                 final List<Statement> goldStmts = MoreObjects.firstNonNull(
776                         stmtTable.get(sentenceIRI, "gold"), ImmutableList.<Statement>of());
777                 for (final Statement goldStmt : Util.STMT_ORDERING.sortedCopy(goldStmts)) {
778                     out.append(String.format("\n<tr%s><td>%s</td><td>%s", style,
779                             escape(sentenceIRICell), escape(Util.format(sentenceIRI, goldStmt))));
780                     style = "";
781                     sentenceIRICell = "";
782                     for (final String system : this.systems) {
783                         final Multimap<Statement, Statement> alignments = getMultimap(
784                                 mappingTable, sentenceIRI, system);
785                         final Collection<Statement> testStmts = alignments == null ? ImmutableSet
786                                 .of() : alignments.get(goldStmt);
787                         if (testStmts.isEmpty()) {
788                             goldEvaluators.get(system).addFN(1);
789                         } else {
790                             goldEvaluators.get(system).addTP(1);
791                             unionEvaluators.get(system).addTP(1);
792                         }
793                         out.append(String.format("</td><td>%s",
794                                 escape(Util.format(sentenceIRI, testStmts.toArray()))));
795                     }
796                     out.append("</td></tr>");
797                 }
798 
799                 final Set<Statement> goldStmtSet = ImmutableSet.copyOf(goldStmts);
800                 for (final String system : this.systems) {
801                     final Multimap<Statement, Statement> multimap = getMultimap(mappingTable,
802                             sentenceIRI, system);
803                     for (final Statement keyStmt : Util.STMT_ORDERING
804                             .sortedCopy(multimap.keySet())) {
805                         if (!goldStmtSet.contains(keyStmt)) {
806                             final Relation keyRelation = keyStmt.getSubject() instanceof IRI
807                                     && keyStmt.getObject() instanceof IRI ? new Relation(
808                                     (IRI) keyStmt.getSubject(), (IRI) keyStmt.getObject(), false)
809                                     : null;
810                             final boolean ignore = extraCtx.equals(keyStmt.getContext())
811                                     || this.ignorableTypes.containsEntry(sentenceIRI, keyRelation);
812                             if (!ignore) {
813                                 goldEvaluators.get(system).addFP(1);
814                                 unionEvaluators.get(system).addFP(1);
815                             }
816                             out.append(String.format("\n<tr><td></td><td>"));
817                             for (final String s : this.systems) {
818                                 out.append(String.format(
819                                         "</td><td>%s",
820                                         !s.equals(system) ? "" : (ignore ? "* " : "")
821                                                 + escape(Util.format(sentenceIRI,
822                                                         multimap.get(keyStmt).toArray()))));
823                             }
824                             out.append("</td></tr>");
825                         }
826                     }
827                 }
828 
829                 final Set<Statement> union = Sets.newHashSet();
830                 for (final String system : this.systems) {
831                     union.addAll(getList(stmtTable, sentenceIRI, system));
832                 }
833                 union.retainAll(goldStmtSet);
834                 for (final String system : this.systems) {
835                     unionEvaluators.get(system).addFN(
836                             Sets.difference(union,
837                                     getMultimap(mappingTable, sentenceIRI, system).keySet())
838                                     .size());
839                 }
840             }
841 
842             final Map<String, PrecisionRecall> goldPRs = finalizePR(goldEvaluators);
843             final Map<String, PrecisionRecall> unionPRs = finalizePR(unionEvaluators);
844             emitPR(out, goldPRs, unionPRs);
845             return new Stats(goldPRs, unionPRs, out.toString());
846         }
847 
848         private Map<String, PrecisionRecall.Evaluator> initPR() {
849             final Map<String, PrecisionRecall.Evaluator> evaluators = Maps.newHashMap();
850             for (final String system : this.systems) {
851                 evaluators.put(system, PrecisionRecall.evaluator());
852             }
853             return evaluators;
854         }
855 
856         private Map<String, PrecisionRecall> finalizePR(
857                 final Map<String, PrecisionRecall.Evaluator> evaluators) {
858             final ImmutableMap.Builder<String, PrecisionRecall> builder = ImmutableMap.builder();
859             for (final String system : this.systems) {
860                 builder.put(system, evaluators.get(system).getResult());
861             }
862             return builder.build();
863         }
864 
865         private void emitSection(final StringBuilder out, final String name) {
866             out.append(String.format("\n\n\n\n=== %s ===\n\n\n", name));
867         }
868 
869         private void emitHeader(final StringBuilder out, final String type) {
870 
871             final String title = (this.systems.size() <= 1 ? "Separate" : "Comparative")
872                     + " evaluation - " + type;
873 
874             out.append("<!DOCTYPE html>");
875             out.append("\n<html>");
876             out.append("\n<head>");
877             out.append("\n<title>").append(title).append("</title>");
878             out.append("\n<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">");
879             out.append("\n<link rel=\"stylesheet\" href=\"https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/css/bootstrap.min.css\">");
880             out.append("\n<link rel=\"stylesheet\" href=\"https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/css/bootstrap-theme.min.css\">");
881             out.append("\n<script src=\"https://code.jquery.com/jquery-1.11.3.min.js\"></script>");
882             out.append("\n<script src=\"https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/js/bootstrap.min.js\"></script>");
883             out.append("\n</head>");
884             out.append("\n<body>");
885             out.append("\n<div class=\"container\">");
886             out.append("\n<h1>").append(title).append("</h1>");
887             out.append("\n<p>* irrelevant element returned by evaluated system, not considered as false positive</p>");
888 
889             final String width = (90 / (this.systems.size() + 1)) + "%";
890             out.append("\n<table class=\"table table-striped table-bordered table-hover table-condensed\">");
891             out.append("\n<thead>");
892             out.append("\n<tr><th width=\"10%\">Sentence</th><th width=\"").append(width)
893                     .append("\">Gold");
894             for (final String system : this.systems) {
895                 out.append("</th><th width=\"").append(width).append("\">")
896                         .append(system.toUpperCase());
897             }
898             out.append("</th></tr>");
899             out.append("\n</thead>");
900             out.append("\n<tbody>");
901         }
902 
903         private void emitPR(final StringBuilder out,
904                 @Nullable final Map<String, PrecisionRecall> goldPRs,
905                 @Nullable final Map<String, PrecisionRecall> unionPRs) {
906             out.append("\n<!-- results -->");
907             if (goldPRs != null) {
908                 out.append("\n<tr style=\"border-top: 4px solid #dddddd\"><td colspan=\"2\">Results w.r.t. gold standard");
909                 for (final String system : this.systems) {
910                     final PrecisionRecall pr = goldPRs.get(system);
911                     out.append(String.format("</td><td>%s", escape(Util.format(null, pr))));
912                 }
913                 out.append("</td></tr>");
914             }
915             if (unionPRs != null && this.systems.size() > 1) {
916                 out.append("\n<tr><td colspan=\"2\">Results w.r.t. union of correct answers");
917                 for (final String system : this.systems) {
918                     final PrecisionRecall pr = unionPRs.get(system);
919                     out.append(String.format("</td><td>%s", escape(Util.format(null, pr))));
920                 }
921                 out.append("</td></tr>");
922             }
923             out.append("\n</tbody>");
924             out.append("\n</table>");
925         }
926 
927         private void emitStatsHeader(final StringBuilder out, final List<String> systems) {
928             final String blank = Strings.repeat(" ", 16);
929             final String prStr = "  p     r     f1   ";
930             final String prStrs = Strings.repeat(prStr, systems.size());
931             final int count = prStrs.length() + 4;
932             out.append(String.format("%s  %-" + count + "s  %-" + count + "s\n", blank,
933                     "gold p/r", "union p/r"));
934             out.append(blank);
935             for (int i = 0; i < 2; ++i) {
936                 out.append("  gold");
937                 for (final String system : systems) {
938                     out.append(String.format("  %-" + (prStr.length() - 2) + "s", system));
939                 }
940             }
941             out.append(String.format("\n%s     #%s     #%s\n", blank, prStrs, prStrs));
942             out.append(Strings.repeat("-", 16 + prStrs.length() * 2 + 8 + 4));
943             out.append("\n");
944         }
945 
946         @SuppressWarnings({ "unchecked" })
947         private void emitStats(final StringBuilder out, final Stats stats, final String label) {
948             out.append(String.format("%-16s", label));
949             for (final Map<String, PrecisionRecall> map : new Map[] { stats.getGoldPRs(),
950                     stats.getUnionPRs() }) {
951                 final PrecisionRecall countPR = map.values().iterator().next();
952                 final int count = (int) (countPR.getTP() + countPR.getFN());
953                 out.append("  ").append(String.format("%4d", count));
954                 for (final String system : stats.getSystems()) {
955                     final PrecisionRecall pr = map.get(system);
956                     out.append("  ");
957                     // out.append(String.format("%5.0f %5.0f %5.0f", pr.getTP(),
958                     // pr.getFP(), pr.getFN()));
959                     out.append(String.format("%5.3f %5.3f %5.3f", pr.getPrecision(),
960                             pr.getRecall(), pr.getF1()));
961                 }
962             }
963             out.append("\n");
964         }
965 
966         private static <R, C, T> List<T> getList(final Table<R, C, List<T>> table, final R row,
967                 final C col) {
968             List<T> list = table.get(row, col);
969             if (list == null) {
970                 list = Lists.newArrayList();
971                 table.put(row, col, list);
972             }
973             return list;
974         }
975 
976         private static <R, C, K, V> Multimap<K, V> getMultimap(
977                 final Table<R, C, Multimap<K, V>> table, final R row, final C col) {
978             Multimap<K, V> multimap = table.get(row, col);
979             if (multimap == null) {
980                 multimap = HashMultimap.create();
981                 table.put(row, col, multimap);
982             }
983             return multimap;
984         }
985 
986         private static Set<String> namespacesFor(final String... elements) {
987             final Set<String> set = Sets.newHashSet();
988             for (final String element : elements) {
989                 final String uri = Util.NAMESPACES.uriFor(element);
990                 set.add(uri != null ? uri : element);
991             }
992             return set;
993         }
994 
995     }
996 
997     public static void main(final String[] args) {
998         try {
999             // Parse command line
1000             final CommandLine cmd = CommandLine.parser().withName("eval-evaluate")
1001                     .withOption("s", "simplified", "use simplified gold standard")
1002                     .withHeader("Evaluates precision/recall given aligned data.").parse(args);
1003 
1004             // Extract options
1005             final List<String> inputFiles = cmd.getArgs(String.class);
1006             final boolean simplified = cmd.hasOption("s");
1007 
1008             // Read the input
1009             final Map<String, String> namespaces = Maps.newHashMap();
1010             final List<Statement> stmts = Lists.newArrayList();
1011             RDFSources.read(false, false, null, null,null,true,
1012                     inputFiles.toArray(new String[inputFiles.size()])).emit(
1013                     RDFHandlers.wrap(stmts, namespaces), 1);
1014 
1015             // Perform the evaluation
1016             final Evaluation evaluation = Evaluation.evaluate(stmts, simplified);
1017             LOGGER.info("Evaluation results:\n\n{}", evaluation.getReport());
1018 
1019         } catch (final Throwable ex) {
1020             // Display error information and terminate
1021             CommandLine.fail(ex);
1022         }
1023     }
1024 
1025 }