1 package eu.fbk.dkm.pikes.raid;
2
3 import com.google.common.collect.Sets;
4 import eu.fbk.utils.core.CommandLine;
5 import eu.fbk.utils.eval.SetPrecisionRecall;
6 import org.slf4j.LoggerFactory;
7
8 import java.io.BufferedReader;
9 import java.io.File;
10 import java.io.FileReader;
11 import java.util.HashSet;
12 import java.util.Set;
13
14
15
16
17
18 public class Evaluation {
19
20 private static final org.slf4j.Logger LOGGER = LoggerFactory.getLogger(Evaluation.class);
21
22 public static void main(String[] args) {
23 try {
24 final CommandLine cmd = CommandLine
25 .parser()
26 .withName("yamcha-evaluator")
27 .withHeader("Evaluate YAMCHA classification")
28 .withOption("i", "input-file", "the test file annotated", "FILE", CommandLine.Type.FILE_EXISTING, true, false, true)
29
30
31 .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);
32
33 File testFile = cmd.getOptionValue("i", File.class);
34
35
36
37 SetPrecisionRecall.Evaluator e = SetPrecisionRecall.evaluator();
38
39 BufferedReader reader = new BufferedReader(new FileReader(testFile));
40
41 String line;
42 Set<Set<Integer>> goldSpans = Sets.newHashSet();
43 Set<Set<Integer>> testSpans = Sets.newHashSet();
44 Set<Integer> thisGoldSpan = Sets.newHashSet();
45 Set<Integer> thisTestSpan = Sets.newHashSet();
46
47
48
49
50 int totSents = 0;
51 int okSents = 0;
52 int okSentLen = 0;
53 int noSents = 0;
54 int noSentLen = 0;
55
56 int i = 0;
57 int tokCount = 0;
58 while ((line = reader.readLine()) != null) {
59 i++;
60
61 if (line.trim().length() > 0) {
62 LOGGER.debug("{} --- {} - {}", i,
63 line.substring(0, Math.min(20, line.length())),
64 line.substring(Math.max(0, line.length() - 10)));
65 }
66
67 if (line.trim().length() == 0) {
68 LOGGER.debug("Sentence token count: {}", tokCount);
69
70 LOGGER.debug("Gold: {}", goldSpans.toString());
71 LOGGER.debug("Test: {}", testSpans.toString());
72
73 HashSet<Integer> allGold = new HashSet<>();
74 for (Set<Integer> goldSpan : goldSpans) {
75 allGold.addAll(goldSpan);
76 }
77 HashSet<Integer> allTest = new HashSet<>();
78 for (Set<Integer> testSpan : testSpans) {
79 allTest.addAll(testSpan);
80 }
81
82
83
84 totSents++;
85 if (allTest.equals(allGold)) {
86 okSents++;
87 okSentLen += allTest.size();
88 LOGGER.debug("CORRECT");
89 }
90 else {
91 noSents++;
92 noSentLen += allTest.size();
93 LOGGER.debug("WRONG");
94 }
95
96 e.add(goldSpans, testSpans);
97 goldSpans = Sets.newHashSet();
98 testSpans = Sets.newHashSet();
99
100 }
101
102 String[] parts = line.split("\\s");
103 if (parts.length < 2) {
104 continue;
105 }
106
107 int testCol = parts.length - 1;
108 int goldCol = parts.length - 2;
109 tokCount++;
110
111
112
113
114
115
116
117
118
119
120
121
122
123 if (parts[goldCol].equals("O")) {
124 if (thisGoldSpan.size() > 0) {
125 goldSpans.add(thisGoldSpan);
126 }
127 thisGoldSpan = Sets.newHashSet();
128 }
129 else {
130 thisGoldSpan.add(i);
131 }
132
133 if (parts[testCol].equals("O")) {
134 if (thisTestSpan.size() > 0) {
135 testSpans.add(thisTestSpan);
136 }
137 thisTestSpan = Sets.newHashSet();
138 }
139 else {
140 thisTestSpan.add(i);
141 }
142
143
144
145
146 }
147 e.add(goldSpans, testSpans);
148
149 SetPrecisionRecall spr = e.getResult();
150 System.out.println(totSents);
151 System.out.println(tokCount);
152
153 System.out.println(okSents);
154 System.out.println(okSentLen);
155 System.out.println((double) okSentLen / (double) okSents);
156
157 System.out.println(noSents);
158 System.out.println(noSentLen);
159 System.out.println((double) noSentLen / (double) noSents);
160
161 System.out.println(spr);
162
163 reader.close();
164 } catch (final Throwable ex) {
165 CommandLine.fail(ex);
166 }
167
168 }
169 }