1 package eu.fbk.dkm.pikes.raid;
2
3 import com.google.common.collect.Iterables;
4 import eu.fbk.utils.core.CommandLine;
5 import eu.fbk.utils.core.ValueComparator;
6 import eu.fbk.utils.eval.PrecisionRecallStats;
7 import ixa.kaflib.KAFDocument;
8 import ixa.kaflib.Opinion;
9 import ixa.kaflib.Opinion.OpinionExpression;
10 import ixa.kaflib.Opinion.OpinionTarget;
11 import ixa.kaflib.Term;
12 import org.apache.commons.io.FileUtils;
13 import org.slf4j.LoggerFactory;
14
15 import java.io.File;
16 import java.io.IOException;
17 import java.util.*;
18
19
20
21
22
23 public class EvaluateOnStanford {
24
25 private static final org.slf4j.Logger LOGGER = LoggerFactory.getLogger(EvaluateOnStanford.class);
26 private static final String STANFORD_LABEL = "stanford-sentiment";
27
28
29 private static final String DEFAULT_NAF_PARSED_DIR = "NAF-parsed";
30 public static List<String> DEFAULT_NAF_EXTENSIONS = new ArrayList<>();
31
32 static {
33 DEFAULT_NAF_EXTENSIONS.add("xml");
34 DEFAULT_NAF_EXTENSIONS.add("naf");
35 }
36
37 public static Map sortByValue(Map unsortedMap, boolean desc) {
38 Map sortedMap = new TreeMap(new ValueComparator(unsortedMap, desc));
39 sortedMap.putAll(unsortedMap);
40 return sortedMap;
41 }
42
43 private static void addOpinionToMap(Map<Opinion, Integer> map, Opinion opinion) {
44 map.put(opinion, opinion.getOpinionExpression().getTerms().size());
45 }
46
47 public static void main(String[] args) {
48 CommandLine cmd = null;
49 try {
50 cmd = CommandLine
51 .parser()
52 .withName("evaluate")
53 .withHeader("Calculate p/r on a dataset")
54 .withOption("i", "input-path", "the base path of the corpus", "DIR",
55 CommandLine.Type.DIRECTORY_EXISTING, true, false, true)
56 .withOption("p", "parsed-dir",
57 String.format("folder with the parsed NAFS, default [basedir]/%s", DEFAULT_NAF_PARSED_DIR),
58 "DIR", CommandLine.Type.DIRECTORY_EXISTING, true, false, false)
59 .withOption("e", "extensions", String.format("Input extensions (default %s)", DEFAULT_NAF_EXTENSIONS), "EXTS", CommandLine.Type.STRING, true, true, false)
60 .withOption("t", "threshold", "Threshold for neutral", "NUM", CommandLine.Type.NON_NEGATIVE_INTEGER, true, false, false)
61 .withLogger(LoggerFactory.getLogger("eu.fbk.fssa")).parse(args);
62
63 File mainFolder = cmd.getOptionValue("i", File.class);
64 File input = new File(mainFolder.getAbsolutePath() + File.separator + DEFAULT_NAF_PARSED_DIR);
65 if (cmd.hasOption("p")) {
66 input = cmd.getOptionValue("p", File.class);
67 }
68
69 Integer threshold = cmd.getOptionValue("t", Integer.class);
70
71 List<String> extensions = null;
72 if (cmd.hasOption("e")) {
73 extensions = cmd.getOptionValues("e", String.class);
74 }
75 if (extensions == null) {
76 extensions = DEFAULT_NAF_EXTENSIONS;
77 }
78
79 try {
80 if (!input.exists()) {
81 throw new IOException(String.format("Folder %s does not exist", input.getAbsolutePath()));
82 }
83
84 LOGGER.info("Loading file list");
85 Iterator<File> fileIterator = FileUtils.iterateFiles(input, extensions.toArray(new String[extensions.size()]), true);
86
87 PrecisionRecallStats precisionRecallStats = new PrecisionRecallStats();
88
89 int goldOpinionCount = 0;
90
91 int numFiles = 0;
92
93 while (fileIterator.hasNext()) {
94 File file = fileIterator.next();
95 LOGGER.info(String.format("Loading file %s", file));
96
97 KAFDocument document = KAFDocument.createFromFile(file);
98 ++numFiles;
99
100 OpinionSet mpqaOpinions = new OpinionSet();
101 OpinionSet stanfordOpinions = new OpinionSet(true);
102
103 for (Opinion opinion : document.getOpinions()) {
104 if (opinion.getLabel() == null || opinion.getLabel().toLowerCase().contains("gold")) {
105 if (isValidOpinion(opinion)) {
106 mpqaOpinions.add(opinion);
107 ++goldOpinionCount;
108 }
109 }
110 if (opinion.getLabel() != null && opinion.getLabel().toLowerCase().contains("stanford")) {
111 stanfordOpinions.add(opinion);
112 }
113 }
114
115
116
117
118 entryLoop:
119 for (OpinionSet.OpinionEntry entry : mpqaOpinions) {
120 Opinion opinion = entry.getOpinion();
121 HashSet<Term> terms = new HashSet<>(opinion.getOpinionExpression().getTerms());
122 LOGGER.debug("Finding {}", opinion.getOpinionExpression().getSpan().getStr());
123 for (OpinionSet.OpinionEntry checkEntry : stanfordOpinions) {
124 Opinion checkOpinion = checkEntry.getOpinion();
125 LOGGER.trace("Checking {}", checkOpinion.getOpinionExpression().getSpan().getStr());
126 HashSet<Term> checkTerms = new HashSet<>(checkOpinion.getOpinionExpression().getTerms());
127 int sizeBefore = checkTerms.size();
128 checkTerms.retainAll(terms);
129 if (checkTerms.size() == sizeBefore) {
130 LOGGER.debug("Found! {} === {}", opinion.getOpinionExpression().getSpan().getStr(), checkOpinion.getOpinionExpression().getSpan().getStr());
131
132 String stanfordPolarity;
133 String goldPolarity = normalizePolarity(opinion.getOpinionExpression().getPolarity());
134
135 String stanfordPolarities = checkOpinion.getOpinionExpression().getStrength();
136 if (stanfordPolarities != null && stanfordPolarities.length() > 0) {
137 String[] parts = stanfordPolarities.split("\\|");
138 Double neg = Double.parseDouble(parts[0].replace(',', '.')) + Double.parseDouble(parts[1].replace(',', '.'));
139 Double neu = Double.parseDouble(parts[2].replace(',', '.'));
140 Double pos = Double.parseDouble(parts[3].replace(',', '.')) + Double.parseDouble(parts[4].replace(',', '.'));
141 if (threshold == null || 100 * neu > threshold) {
142 if (neg > neu && neg > pos) {
143 stanfordPolarity = "negative";
144 }
145 else if (pos > neu && pos > neg) {
146 stanfordPolarity = "positive";
147 }
148 else {
149 stanfordPolarity = "neutral";
150 }
151 }
152 else {
153 if (pos > neg) {
154 stanfordPolarity = "positive";
155 }
156 else if (pos < neg) {
157 stanfordPolarity = "negative";
158 }
159 else {
160 stanfordPolarity = "neutral";
161 }
162 }
163 }
164 else {
165 stanfordPolarity = checkOpinion.getOpinionExpression().getPolarity().toLowerCase();
166 }
167
168 if (stanfordPolarity.equals("neutral")) {
169 precisionRecallStats.incrementFN();
170 }
171 else {
172 if (stanfordPolarity.contains(goldPolarity)) {
173 precisionRecallStats.incrementTP();
174 }
175 else {
176 precisionRecallStats.incrementFP();
177 }
178 }
179 LOGGER.debug("Comparing -{}- and -{}-", opinion.getOpinionExpression().getPolarity(), checkOpinion.getOpinionExpression().getPolarity());
180 continue entryLoop;
181 }
182 }
183 LOGGER.debug("Not found");
184 }
185 }
186
187 LOGGER.info("Precision: {}", precisionRecallStats.getPrecision());
188 LOGGER.info("Recall: {}", precisionRecallStats.getRecall());
189 LOGGER.info("F1: {}", precisionRecallStats.getFMeasure());
190 LOGGER.info("(computed on {} gold opinions and {} files)", goldOpinionCount, numFiles);
191
192 } catch (Exception e) {
193 LOGGER.error(e.getMessage());
194 e.printStackTrace();
195 }
196 } catch (Exception e) {
197 CommandLine.fail(e);
198 }
199 }
200
201 private static String normalizePolarity(String polarity) {
202 String p = polarity.toLowerCase();
203 if (p.contains("pos")) {
204 return "positive";
205 }
206 else if (p.contains("neg")) {
207 return "negative";
208 }
209 else {
210 return "neutral";
211 }
212 }
213
214
215 private static boolean isValidOpinion(final Opinion opinion) {
216 final OpinionTarget target = opinion.getOpinionTarget();
217 final OpinionExpression exp = opinion.getOpinionExpression();
218 if (exp != null && target != null && exp.getPolarity() != null && exp.getSpan() != null
219 && exp.getSpan().size() > 0 && target.getSpan() != null
220 && target.getSpan().size() > 0) {
221 final int id = opinion.getOpinionTarget().getSpan().getTargets().get(0).getSent();
222 for (final Term term : Iterables.concat(exp.getTerms(), target.getTerms())) {
223 if (term.getSent() != id) {
224 return false;
225 }
226 }
227 if (normalizePolarity(exp.getPolarity()).equals("neutral")) {
228 return false;
229 }
230 return true;
231 }
232 return false;
233 }
234
235 }