1 package eu.fbk.dkm.pikes.resources.boxer;
2
3 import eu.fbk.dkm.pikes.resources.mpqa.CorpusAnnotator;
4 import eu.fbk.utils.core.CommandLine;
5 import eu.fbk.utils.eval.PrecisionRecall;
6 import ixa.kaflib.ExternalRef;
7 import ixa.kaflib.KAFDocument;
8 import ixa.kaflib.Predicate;
9 import ixa.kaflib.Term;
10 import org.apache.commons.io.FileUtils;
11 import org.apache.commons.io.FilenameUtils;
12 import org.slf4j.LoggerFactory;
13
14 import java.io.BufferedReader;
15 import java.io.File;
16 import java.io.FileReader;
17 import java.util.*;
18
19
20
21
22
23 public class CorpusEvaluator {
24
25 private static final org.slf4j.Logger LOGGER = LoggerFactory.getLogger(CorpusEvaluator.class);
26
27 public static void main(String[] args) {
28 try {
29 final CommandLine cmd = CommandLine
30 .parser()
31 .withName("eu.fbk.dkm.pikes.resources.darmstadt-loader")
32 .withHeader("Load Boxer corpus and split it")
33 .withOption("i", "input-folder", "input folder", "DIR", CommandLine.Type.DIRECTORY_EXISTING, true, false, true)
34 .withOption("a", "annotation", "annotation file", "DIR", CommandLine.Type.FILE_EXISTING, true, false, true)
35 .withLogger(LoggerFactory.getLogger("eu.fbk.fssa")).parse(args);
36
37 final File inputFolder = cmd.getOptionValue("i", File.class);
38 final File annotationFile = cmd.getOptionValue("a", File.class);
39
40 List<String> extensions = null;
41 extensions = CorpusAnnotator.DEFAULT_NAF_EXTENSIONS;
42
43 ArrayList<String> lines = new ArrayList<>();
44
45 BufferedReader reader = new BufferedReader(new FileReader(annotationFile));
46 String line;
47 while ((line = reader.readLine()) != null) {
48 line = line.trim();
49 if (line.length() == 0) {
50 continue;
51 }
52 lines.add(line);
53 }
54 reader.close();
55
56 PrecisionRecall.Evaluator evaluator = PrecisionRecall.evaluator();
57
58 Iterator<File> fileIterator = FileUtils.iterateFiles(inputFolder, extensions.toArray(new String[extensions.size()]), true);
59 while (fileIterator.hasNext()) {
60 File file = fileIterator.next();
61 LOGGER.info("Loading file {}", file.getAbsolutePath());
62 KAFDocument document = KAFDocument.createFromFile(file);
63
64 HashMap<Term, String> fnTerms = new HashMap<>();
65 for (Predicate predicate : document.getPredicates()) {
66 HashSet<String> frameNets = new HashSet<>();
67 for (ExternalRef externalRef : predicate.getExternalRefs()) {
68 if (!externalRef.getResource().equals("eu.fbk.dkm.pikes.resources.FrameNet")) {
69 continue;
70 }
71 frameNets.add(externalRef.getReference());
72 }
73
74 if (frameNets.size() != 1) {
75 continue;
76 }
77 String fn = null;
78 for (String fn1 : frameNets) {
79 fn = fn1;
80 }
81 if (fn == null) {
82 continue;
83 }
84
85 for (Term predicateTerm : predicate.getTerms()) {
86 fnTerms.put(predicateTerm, fn);
87 }
88 }
89
90 int last = Integer.parseInt(FilenameUtils.getBaseName(file.getAbsolutePath()));
91 int start = last - (last - 1) % CorpusSplitter.sentencesPerCluster;
92 for (int i = start; i < last; i++) {
93 int j = i - 1;
94 int sent = j - start + 1;
95
96 String[] parts = lines.get(j).split(":");
97 String lemma = parts[0];
98 List<Term> terms = document.getSentenceTerms(sent + 1);
99 Term mainTerm = null;
100
101 LOGGER.debug(" " + sent + " " + j);
102 LOGGER.debug(Arrays.toString(parts));
103 LOGGER.debug(lemma);
104 LOGGER.debug(terms.toString());
105
106 for (Term term : terms) {
107 if (term.getLemma().equals(lemma)) {
108 mainTerm = term;
109 }
110 }
111
112 if (mainTerm == null) {
113
114
115
116
117 evaluator.addFN(1);
118 continue;
119 }
120
121 if (!mainTerm.getPos().equals("V")) {
122 continue;
123 }
124
125 if (fnTerms.get(mainTerm) == null) {
126
127
128
129
130 evaluator.addFN(1);
131 continue;
132 }
133
134
135
136 if (fnTerms.get(mainTerm).equals(parts[1])) {
137 evaluator.addTP(1);
138 continue;
139 }
140
141 evaluator.addFN(1);
142 evaluator.addFP(1);
143 }
144 }
145
146 PrecisionRecall precisionRecall = evaluator.getResult();
147 System.out.println(precisionRecall.toString());
148 } catch (final Throwable ex) {
149 CommandLine.fail(ex);
150 }
151
152 }
153 }