1   package eu.fbk.dkm.pikes.raid;
2   
3   import com.google.common.collect.Sets;
4   import eu.fbk.utils.core.CommandLine;
5   import eu.fbk.utils.eval.SetPrecisionRecall;
6   import org.slf4j.LoggerFactory;
7   
8   import java.io.BufferedReader;
9   import java.io.File;
10  import java.io.FileReader;
11  import java.util.HashSet;
12  import java.util.Set;
13  
14  /**
15   * Created by alessio on 08/05/15.
16   */
17  
18  public class Evaluation {
19  
20  	private static final org.slf4j.Logger LOGGER = LoggerFactory.getLogger(Evaluation.class);
21  
22  	public static void main(String[] args) {
23  		try {
24  			final CommandLine cmd = CommandLine
25  					.parser()
26  					.withName("yamcha-evaluator")
27  					.withHeader("Evaluate YAMCHA classification")
28  					.withOption("i", "input-file", "the test file annotated", "FILE", CommandLine.Type.FILE_EXISTING, true, false, true)
29  //					.withOption("g", "gold", "gold column (starting from 0)", "NUM", CommandLine.Type.POSITIVE_INTEGER, true, false, true)
30  //					.withOption("t", "test", "test column (starting from 0)", "NUM", CommandLine.Type.POSITIVE_INTEGER, true, false, true)
31  					.withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);
32  
33  			File testFile = cmd.getOptionValue("i", File.class);
34  //			Integer goldCol = cmd.getOptionValue("g", Integer.class);
35  //			Integer testCol = cmd.getOptionValue("t", Integer.class);
36  
37  			SetPrecisionRecall.Evaluator e = SetPrecisionRecall.evaluator();
38  
39  			BufferedReader reader = new BufferedReader(new FileReader(testFile));
40  
41  			String line;
42  			Set<Set<Integer>> goldSpans = Sets.newHashSet();
43  			Set<Set<Integer>> testSpans = Sets.newHashSet();
44  			Set<Integer> thisGoldSpan = Sets.newHashSet();
45  			Set<Integer> thisTestSpan = Sets.newHashSet();
46  
47  //			Integer goldCol = -1;
48  //			Integer testCol = -1;
49  
50  			int totSents = 0;
51  			int okSents = 0;
52  			int okSentLen = 0;
53  			int noSents = 0;
54  			int noSentLen = 0;
55  
56  			int i = 0;
57  			int tokCount = 0;
58  			while ((line = reader.readLine()) != null) {
59  				i++;
60  
61  				if (line.trim().length() > 0) {
62  					LOGGER.debug("{} --- {} - {}", i,
63  							line.substring(0, Math.min(20, line.length())),
64  							line.substring(Math.max(0, line.length() - 10)));
65  				}
66  
67  				if (line.trim().length() == 0) {
68  					LOGGER.debug("Sentence token count: {}", tokCount);
69  
70  					LOGGER.debug("Gold: {}", goldSpans.toString());
71  					LOGGER.debug("Test: {}", testSpans.toString());
72  
73  					HashSet<Integer> allGold = new HashSet<>();
74  					for (Set<Integer> goldSpan : goldSpans) {
75  						allGold.addAll(goldSpan);
76  					}
77  					HashSet<Integer> allTest = new HashSet<>();
78  					for (Set<Integer> testSpan : testSpans) {
79  						allTest.addAll(testSpan);
80  					}
81  
82  //					LOGGER.debug(allTest.toString());
83  //					LOGGER.debug(allGold.toString());
84  					totSents++;
85  					if (allTest.equals(allGold)) {
86  						okSents++;
87  						okSentLen += allTest.size();
88  						LOGGER.debug("CORRECT");
89  					}
90  					else {
91  						noSents++;
92  						noSentLen += allTest.size();
93  						LOGGER.debug("WRONG");
94  					}
95  
96  					e.add(goldSpans, testSpans);
97  					goldSpans = Sets.newHashSet();
98  					testSpans = Sets.newHashSet();
99  //					tokCount = 0;
100 				}
101 
102 				String[] parts = line.split("\\s");
103 				if (parts.length < 2) {
104 					continue;
105 				}
106 
107 				int testCol = parts.length - 1;
108 				int goldCol = parts.length - 2;
109 				tokCount++;
110 
111 //				if (parts.length > 0 && parts[0].trim().length() > 0 && goldCol.equals(-1) && testCol.equals(-1)) {
112 //					testCol = parts.length - 1;
113 //					goldCol = parts.length - 2;
114 //				}
115 //
116 //				if (parts.length <= Math.max(testCol, goldCol)) {
117 //					if (parts.length > 0 && parts[0].trim().length() > 0) {
118 //						LOGGER.warn("Column count problem in line {}", i);
119 //					}
120 //					continue;
121 //				}
122 
123 				if (parts[goldCol].equals("O")) {
124 					if (thisGoldSpan.size() > 0) {
125 						goldSpans.add(thisGoldSpan);
126 					}
127 					thisGoldSpan = Sets.newHashSet();
128 				}
129 				else {
130 					thisGoldSpan.add(i);
131 				}
132 
133 				if (parts[testCol].equals("O")) {
134 					if (thisTestSpan.size() > 0) {
135 						testSpans.add(thisTestSpan);
136 					}
137 					thisTestSpan = Sets.newHashSet();
138 				}
139 				else {
140 					thisTestSpan.add(i);
141 				}
142 
143 //				System.out.println(thisGoldSpan);
144 //				System.out.println(thisTestSpan);
145 //				System.out.println();
146 			}
147 			e.add(goldSpans, testSpans);
148 
149 			SetPrecisionRecall spr = e.getResult();
150 			System.out.println(totSents);
151 			System.out.println(tokCount);
152 
153 			System.out.println(okSents);
154 			System.out.println(okSentLen);
155 			System.out.println((double) okSentLen / (double) okSents);
156 
157 			System.out.println(noSents);
158 			System.out.println(noSentLen);
159 			System.out.println((double) noSentLen / (double) noSents);
160 
161 			System.out.println(spr);
162 
163 			reader.close();
164 		} catch (final Throwable ex) {
165 			CommandLine.fail(ex);
166 		}
167 
168 	}
169 }