1   package eu.fbk.dkm.pikes.raid;
2   
3   import com.google.common.collect.Iterables;
4   import eu.fbk.utils.core.CommandLine;
5   import eu.fbk.utils.core.ValueComparator;
6   import eu.fbk.utils.eval.PrecisionRecallStats;
7   import ixa.kaflib.KAFDocument;
8   import ixa.kaflib.Opinion;
9   import ixa.kaflib.Opinion.OpinionExpression;
10  import ixa.kaflib.Opinion.OpinionTarget;
11  import ixa.kaflib.Term;
12  import org.apache.commons.io.FileUtils;
13  import org.slf4j.LoggerFactory;
14  
15  import java.io.File;
16  import java.io.IOException;
17  import java.util.*;
18  
19  /**
20   * Created by alessio on 02/04/15.
21   */
22  
23  public class EvaluateOnStanford {
24  
25  	private static final org.slf4j.Logger LOGGER = LoggerFactory.getLogger(EvaluateOnStanford.class);
26  	private static final String STANFORD_LABEL = "stanford-sentiment";
27  
28  	// todo: move to a utility class
29  	private static final String DEFAULT_NAF_PARSED_DIR = "NAF-parsed";
30  	public static List<String> DEFAULT_NAF_EXTENSIONS = new ArrayList<>();
31  
32  	static {
33  		DEFAULT_NAF_EXTENSIONS.add("xml");
34  		DEFAULT_NAF_EXTENSIONS.add("naf");
35  	}
36  
37  	public static Map sortByValue(Map unsortedMap, boolean desc) {
38  		Map sortedMap = new TreeMap(new ValueComparator(unsortedMap, desc));
39  		sortedMap.putAll(unsortedMap);
40  		return sortedMap;
41  	}
42  
43  	private static void addOpinionToMap(Map<Opinion, Integer> map, Opinion opinion) {
44  		map.put(opinion, opinion.getOpinionExpression().getTerms().size());
45  	}
46  
47  	public static void main(String[] args) {
48  		CommandLine cmd = null;
49  		try {
50  			cmd = CommandLine
51  					.parser()
52  					.withName("evaluate")
53  					.withHeader("Calculate p/r on a dataset")
54  					.withOption("i", "input-path", "the base path of the corpus", "DIR",
55  							CommandLine.Type.DIRECTORY_EXISTING, true, false, true)
56  					.withOption("p", "parsed-dir",
57  							String.format("folder with the parsed NAFS, default [basedir]/%s", DEFAULT_NAF_PARSED_DIR),
58  							"DIR", CommandLine.Type.DIRECTORY_EXISTING, true, false, false)
59  					.withOption("e", "extensions", String.format("Input extensions (default %s)", DEFAULT_NAF_EXTENSIONS), "EXTS", CommandLine.Type.STRING, true, true, false)
60  					.withOption("t", "threshold", "Threshold for neutral", "NUM", CommandLine.Type.NON_NEGATIVE_INTEGER, true, false, false)
61  					.withLogger(LoggerFactory.getLogger("eu.fbk.fssa")).parse(args);
62  
63  			File mainFolder = cmd.getOptionValue("i", File.class);
64  			File input = new File(mainFolder.getAbsolutePath() + File.separator + DEFAULT_NAF_PARSED_DIR);
65  			if (cmd.hasOption("p")) {
66  				input = cmd.getOptionValue("p", File.class);
67  			}
68  
69  			Integer threshold = cmd.getOptionValue("t", Integer.class);
70  
71  			List<String> extensions = null;
72  			if (cmd.hasOption("e")) {
73  				extensions = cmd.getOptionValues("e", String.class);
74  			}
75  			if (extensions == null) {
76  				extensions = DEFAULT_NAF_EXTENSIONS;
77  			}
78  
79  			try {
80  				if (!input.exists()) {
81  					throw new IOException(String.format("Folder %s does not exist", input.getAbsolutePath()));
82  				}
83  
84  				LOGGER.info("Loading file list");
85  				Iterator<File> fileIterator = FileUtils.iterateFiles(input, extensions.toArray(new String[extensions.size()]), true);
86  
87  				PrecisionRecallStats precisionRecallStats = new PrecisionRecallStats();
88  
89  				int goldOpinionCount = 0;
90  
91  				int numFiles = 0;
92  
93  				while (fileIterator.hasNext()) {
94  					File file = fileIterator.next();
95  					LOGGER.info(String.format("Loading file %s", file));
96  
97  					KAFDocument document = KAFDocument.createFromFile(file);
98  					++numFiles;
99  
100 					OpinionSet mpqaOpinions = new OpinionSet();
101 					OpinionSet stanfordOpinions = new OpinionSet(true);
102 
103 					for (Opinion opinion : document.getOpinions()) {
104 						if (opinion.getLabel() == null || opinion.getLabel().toLowerCase().contains("gold")) {
105 							if (isValidOpinion(opinion)) {
106 								mpqaOpinions.add(opinion);
107 								++goldOpinionCount;
108 							}
109 						}
110 						if (opinion.getLabel() != null && opinion.getLabel().toLowerCase().contains("stanford")) {
111 							stanfordOpinions.add(opinion);
112 						}
113 					}
114 
115 //					System.out.println(stanfordOpinions.size());
116 //					System.out.println(mpqaOpinions);
117 
118 					entryLoop:
119 					for (OpinionSet.OpinionEntry entry : mpqaOpinions) {
120 						Opinion opinion = entry.getOpinion();
121 						HashSet<Term> terms = new HashSet<>(opinion.getOpinionExpression().getTerms());
122 						LOGGER.debug("Finding {}", opinion.getOpinionExpression().getSpan().getStr());
123 						for (OpinionSet.OpinionEntry checkEntry : stanfordOpinions) {
124 							Opinion checkOpinion = checkEntry.getOpinion();
125 							LOGGER.trace("Checking {}", checkOpinion.getOpinionExpression().getSpan().getStr());
126 							HashSet<Term> checkTerms = new HashSet<>(checkOpinion.getOpinionExpression().getTerms());
127 							int sizeBefore = checkTerms.size();
128 							checkTerms.retainAll(terms);
129 							if (checkTerms.size() == sizeBefore) {
130 								LOGGER.debug("Found! {} === {}", opinion.getOpinionExpression().getSpan().getStr(), checkOpinion.getOpinionExpression().getSpan().getStr());
131 
132 								String stanfordPolarity;
133 								String goldPolarity = normalizePolarity(opinion.getOpinionExpression().getPolarity());
134 
135 								String stanfordPolarities = checkOpinion.getOpinionExpression().getStrength();
136 								if (stanfordPolarities != null && stanfordPolarities.length() > 0) {
137 									String[] parts = stanfordPolarities.split("\\|");
138 									Double neg = Double.parseDouble(parts[0].replace(',', '.')) + Double.parseDouble(parts[1].replace(',', '.'));
139 									Double neu = Double.parseDouble(parts[2].replace(',', '.'));
140 									Double pos = Double.parseDouble(parts[3].replace(',', '.')) + Double.parseDouble(parts[4].replace(',', '.'));
141 									if (threshold == null || 100 * neu > threshold) {
142 										if (neg > neu && neg > pos) {
143 											stanfordPolarity = "negative";
144 										}
145 										else if (pos > neu && pos > neg) {
146 											stanfordPolarity = "positive";
147 										}
148 										else {
149 											stanfordPolarity = "neutral";
150 										}
151 									}
152 									else {
153 										if (pos > neg) {
154 											stanfordPolarity = "positive";
155 										}
156 										else if (pos < neg) {
157 											stanfordPolarity = "negative";
158 										}
159 										else {
160 											stanfordPolarity = "neutral";
161 										}
162 									}
163 								}
164 								else {
165 									stanfordPolarity = checkOpinion.getOpinionExpression().getPolarity().toLowerCase();
166 								}
167 
168 								if (stanfordPolarity.equals("neutral")) {
169 									precisionRecallStats.incrementFN();
170 								}
171 								else {
172 									if (stanfordPolarity.contains(goldPolarity)) {
173 										precisionRecallStats.incrementTP();
174 									}
175 									else {
176 										precisionRecallStats.incrementFP();
177 									}
178 								}
179 								LOGGER.debug("Comparing -{}- and -{}-", opinion.getOpinionExpression().getPolarity(), checkOpinion.getOpinionExpression().getPolarity());
180 								continue entryLoop;
181 							}
182 						}
183 						LOGGER.debug("Not found");
184 					}
185 				}
186 
187 				LOGGER.info("Precision: {}", precisionRecallStats.getPrecision());
188 				LOGGER.info("Recall: {}", precisionRecallStats.getRecall());
189 				LOGGER.info("F1: {}", precisionRecallStats.getFMeasure());
190 				LOGGER.info("(computed on {} gold opinions and {} files)", goldOpinionCount, numFiles);
191 
192 			} catch (Exception e) {
193 				LOGGER.error(e.getMessage());
194 				e.printStackTrace();
195 			}
196 		} catch (Exception e) {
197 			CommandLine.fail(e);
198 		}
199 	}
200 
201 	private static String normalizePolarity(String polarity) {
202 		String p = polarity.toLowerCase();
203 		if (p.contains("pos")) {
204 			return "positive";
205 		}
206 		else if (p.contains("neg")) {
207 			return "negative";
208 		}
209 		else {
210 			return "neutral";
211 		}
212 	}
213 
214 
215 	private static boolean isValidOpinion(final Opinion opinion) {
216 		final OpinionTarget target = opinion.getOpinionTarget();
217 		final OpinionExpression exp = opinion.getOpinionExpression();
218 		if (exp != null && target != null && exp.getPolarity() != null && exp.getSpan() != null
219 				&& exp.getSpan().size() > 0 && target.getSpan() != null
220 				&& target.getSpan().size() > 0) {
221 			final int id = opinion.getOpinionTarget().getSpan().getTargets().get(0).getSent();
222 			for (final Term term : Iterables.concat(exp.getTerms(), target.getTerms())) {
223 				if (term.getSent() != id) {
224 					return false;
225 				}
226 			}
227 			if (normalizePolarity(exp.getPolarity()).equals("neutral")) {
228 				return false;
229 			}
230 			return true;
231 		}
232 		return false;
233 	}
234 
235 }