1   package eu.fbk.dkm.pikes.resources.goodbadfor;
2   
3   import eu.fbk.utils.core.CommandLine;
4   import eu.fbk.utils.core.FrequencyHashSet;
5   import ixa.kaflib.KAFDocument;
6   import ixa.kaflib.Opinion;
7   import ixa.kaflib.Span;
8   import ixa.kaflib.Term;
9   import org.apache.commons.io.FileUtils;
10  import org.apache.commons.io.FilenameUtils;
11  import org.slf4j.Logger;
12  import org.slf4j.LoggerFactory;
13  
14  import java.io.File;
15  import java.util.Iterator;
16  import java.util.List;
17  
18  /**
19   * Created by alessio on 01/04/15.
20   */
21  
22  public class CorpusAnalyzer {
23  
24  	private static final Logger LOGGER = LoggerFactory.getLogger(CorpusAnalyzer.class);
25  
26  	public static String spanToLemmas(Span<Term> span) {
27  		StringBuffer stringBuffer = new StringBuffer();
28  		for (Term term : span.getTargets()) {
29  			stringBuffer.append(term.getLemma().toLowerCase());
30  			stringBuffer.append(" ");
31  		}
32  		return stringBuffer.toString().trim();
33  	}
34  
35  	public static void main(String[] args) {
36  		try {
37  			final CommandLine cmd = CommandLine
38  					.parser()
39  					.withName("eu.fbk.dkm.pikes.resources.goodbadfor-analyzer")
40  					.withHeader("Analyze the corpus and makes statistics")
41  					.withOption("i", "input-path", "the base path of the corpus", "DIR", CommandLine.Type.DIRECTORY_EXISTING, true, false, true)
42  					.withOption("t", "test", "test only on this file", "FILE", CommandLine.Type.STRING, true, false, false)
43  					.withLogger(LoggerFactory.getLogger("eu.fbk.fssa")).parse(args);
44  
45  			final File inputPath = cmd.getOptionValue("i", File.class);
46  
47  			File nafFolder = new File(inputPath.getAbsolutePath() + File.separator + "NAF-parsed" + File.separator);
48  			String testFile = cmd.getOptionValue("t", String.class);
49  
50  			if (!nafFolder.exists()) {
51  				LOGGER.error("Folder {} does not exist", nafFolder.getAbsolutePath());
52  			}
53  
54  			Iterator<File> fileIterator;
55  			fileIterator = FileUtils.iterateFiles(nafFolder, new String[]{"naf"}, false);
56  
57  			FrequencyHashSet influenceRet = new FrequencyHashSet();
58  			FrequencyHashSet influenceRev = new FrequencyHashSet();
59  
60  			FrequencyHashSet goodFor = new FrequencyHashSet();
61  			FrequencyHashSet badFor = new FrequencyHashSet();
62  
63  			while (fileIterator.hasNext()) {
64  				File file = fileIterator.next();
65  				String fileBaseName = FilenameUtils.removeExtension(file.getName());
66  
67  				if (testFile != null && !testFile.equals(fileBaseName)) {
68  					continue;
69  				}
70  
71  				LOGGER.debug(String.format("Loading file %s", file));
72  				try {
73  					KAFDocument document = KAFDocument.createFromFile(file);
74  					List<Opinion> opinionList = document.getOpinions();
75  					for (Opinion opinion : opinionList) {
76  						if (opinion.getLabel().equals("gold-influencer")) {
77  							Opinion.OpinionExpression expression = opinion.getOpinionExpression();
78  							if (expression.getPolarity().equals("reverse")) {
79  								influenceRev.add(spanToLemmas(expression.getSpan()));
80  							}
81  							if (expression.getPolarity().equals("retain")) {
82  								influenceRet.add(spanToLemmas(expression.getSpan()));
83  							}
84  						}
85  
86  						if (opinion.getLabel().equals("gold-gfbf")) {
87  							Opinion.OpinionExpression expression = opinion.getOpinionExpression();
88  							if (expression.getPolarity().equals("goodfor")) {
89  								goodFor.add(spanToLemmas(expression.getSpan()));
90  							}
91  							if (expression.getPolarity().equals("badfor")) {
92  								badFor.add(spanToLemmas(expression.getSpan()));
93  							}
94  						}
95  					}
96  				} catch (Exception e) {
97  					LOGGER.error(e.getMessage());
98  				}
99  			}
100 
101 			System.out.println(influenceRet.getSorted());
102 			System.out.println(influenceRev.getSorted());
103 			System.out.println(goodFor.getSorted());
104 			System.out.println(badFor.getSorted());
105 		} catch (final Throwable ex) {
106 			CommandLine.fail(ex);
107 		}
108 
109 	}
110 }