1 package eu.fbk.dkm.pikes.resources.goodbadfor;
2
3 import eu.fbk.utils.core.CommandLine;
4 import eu.fbk.utils.core.FrequencyHashSet;
5 import ixa.kaflib.KAFDocument;
6 import ixa.kaflib.Opinion;
7 import ixa.kaflib.Span;
8 import ixa.kaflib.Term;
9 import org.apache.commons.io.FileUtils;
10 import org.apache.commons.io.FilenameUtils;
11 import org.slf4j.Logger;
12 import org.slf4j.LoggerFactory;
13
14 import java.io.File;
15 import java.util.Iterator;
16 import java.util.List;
17
18
19
20
21
22 public class CorpusAnalyzer {
23
24 private static final Logger LOGGER = LoggerFactory.getLogger(CorpusAnalyzer.class);
25
26 public static String spanToLemmas(Span<Term> span) {
27 StringBuffer stringBuffer = new StringBuffer();
28 for (Term term : span.getTargets()) {
29 stringBuffer.append(term.getLemma().toLowerCase());
30 stringBuffer.append(" ");
31 }
32 return stringBuffer.toString().trim();
33 }
34
35 public static void main(String[] args) {
36 try {
37 final CommandLine cmd = CommandLine
38 .parser()
39 .withName("eu.fbk.dkm.pikes.resources.goodbadfor-analyzer")
40 .withHeader("Analyze the corpus and makes statistics")
41 .withOption("i", "input-path", "the base path of the corpus", "DIR", CommandLine.Type.DIRECTORY_EXISTING, true, false, true)
42 .withOption("t", "test", "test only on this file", "FILE", CommandLine.Type.STRING, true, false, false)
43 .withLogger(LoggerFactory.getLogger("eu.fbk.fssa")).parse(args);
44
45 final File inputPath = cmd.getOptionValue("i", File.class);
46
47 File nafFolder = new File(inputPath.getAbsolutePath() + File.separator + "NAF-parsed" + File.separator);
48 String testFile = cmd.getOptionValue("t", String.class);
49
50 if (!nafFolder.exists()) {
51 LOGGER.error("Folder {} does not exist", nafFolder.getAbsolutePath());
52 }
53
54 Iterator<File> fileIterator;
55 fileIterator = FileUtils.iterateFiles(nafFolder, new String[]{"naf"}, false);
56
57 FrequencyHashSet influenceRet = new FrequencyHashSet();
58 FrequencyHashSet influenceRev = new FrequencyHashSet();
59
60 FrequencyHashSet goodFor = new FrequencyHashSet();
61 FrequencyHashSet badFor = new FrequencyHashSet();
62
63 while (fileIterator.hasNext()) {
64 File file = fileIterator.next();
65 String fileBaseName = FilenameUtils.removeExtension(file.getName());
66
67 if (testFile != null && !testFile.equals(fileBaseName)) {
68 continue;
69 }
70
71 LOGGER.debug(String.format("Loading file %s", file));
72 try {
73 KAFDocument document = KAFDocument.createFromFile(file);
74 List<Opinion> opinionList = document.getOpinions();
75 for (Opinion opinion : opinionList) {
76 if (opinion.getLabel().equals("gold-influencer")) {
77 Opinion.OpinionExpression expression = opinion.getOpinionExpression();
78 if (expression.getPolarity().equals("reverse")) {
79 influenceRev.add(spanToLemmas(expression.getSpan()));
80 }
81 if (expression.getPolarity().equals("retain")) {
82 influenceRet.add(spanToLemmas(expression.getSpan()));
83 }
84 }
85
86 if (opinion.getLabel().equals("gold-gfbf")) {
87 Opinion.OpinionExpression expression = opinion.getOpinionExpression();
88 if (expression.getPolarity().equals("goodfor")) {
89 goodFor.add(spanToLemmas(expression.getSpan()));
90 }
91 if (expression.getPolarity().equals("badfor")) {
92 badFor.add(spanToLemmas(expression.getSpan()));
93 }
94 }
95 }
96 } catch (Exception e) {
97 LOGGER.error(e.getMessage());
98 }
99 }
100
101 System.out.println(influenceRet.getSorted());
102 System.out.println(influenceRev.getSorted());
103 System.out.println(goodFor.getSorted());
104 System.out.println(badFor.getSorted());
105 } catch (final Throwable ex) {
106 CommandLine.fail(ex);
107 }
108
109 }
110 }