1   package eu.fbk.dkm.pikes.resources.boxer;
2   
3   import eu.fbk.dkm.pikes.resources.mpqa.CorpusAnnotator;
4   import eu.fbk.utils.core.CommandLine;
5   import eu.fbk.utils.eval.PrecisionRecall;
6   import ixa.kaflib.ExternalRef;
7   import ixa.kaflib.KAFDocument;
8   import ixa.kaflib.Predicate;
9   import ixa.kaflib.Term;
10  import org.apache.commons.io.FileUtils;
11  import org.apache.commons.io.FilenameUtils;
12  import org.slf4j.LoggerFactory;
13  
14  import java.io.BufferedReader;
15  import java.io.File;
16  import java.io.FileReader;
17  import java.util.*;
18  
19  /**
20   * Created by alessio on 05/05/15.
21   */
22  
23  public class CorpusEvaluator {
24  
25  	private static final org.slf4j.Logger LOGGER = LoggerFactory.getLogger(CorpusEvaluator.class);
26  
27  	public static void main(String[] args) {
28  		try {
29  			final CommandLine cmd = CommandLine
30  					.parser()
31  					.withName("eu.fbk.dkm.pikes.resources.darmstadt-loader")
32  					.withHeader("Load Boxer corpus and split it")
33  					.withOption("i", "input-folder", "input folder", "DIR", CommandLine.Type.DIRECTORY_EXISTING, true, false, true)
34  					.withOption("a", "annotation", "annotation file", "DIR", CommandLine.Type.FILE_EXISTING, true, false, true)
35  					.withLogger(LoggerFactory.getLogger("eu.fbk.fssa")).parse(args);
36  
37  			final File inputFolder = cmd.getOptionValue("i", File.class);
38  			final File annotationFile = cmd.getOptionValue("a", File.class);
39  
40  			List<String> extensions = null;
41  			extensions = CorpusAnnotator.DEFAULT_NAF_EXTENSIONS;
42  
43  			ArrayList<String> lines = new ArrayList<>();
44  
45  			BufferedReader reader = new BufferedReader(new FileReader(annotationFile));
46  			String line;
47  			while ((line = reader.readLine()) != null) {
48  				line = line.trim();
49  				if (line.length() == 0) {
50  					continue;
51  				}
52  				lines.add(line);
53  			}
54  			reader.close();
55  
56  			PrecisionRecall.Evaluator evaluator = PrecisionRecall.evaluator();
57  
58  			Iterator<File> fileIterator = FileUtils.iterateFiles(inputFolder, extensions.toArray(new String[extensions.size()]), true);
59  			while (fileIterator.hasNext()) {
60  				File file = fileIterator.next();
61  				LOGGER.info("Loading file {}", file.getAbsolutePath());
62  				KAFDocument document = KAFDocument.createFromFile(file);
63  
64  				HashMap<Term, String> fnTerms = new HashMap<>();
65  				for (Predicate predicate : document.getPredicates()) {
66  					HashSet<String> frameNets = new HashSet<>();
67  					for (ExternalRef externalRef : predicate.getExternalRefs()) {
68  						if (!externalRef.getResource().equals("eu.fbk.dkm.pikes.resources.FrameNet")) {
69  							continue;
70  						}
71  						frameNets.add(externalRef.getReference());
72  					}
73  
74  					if (frameNets.size() != 1) {
75  						continue;
76  					}
77  					String fn = null;
78  					for (String fn1 : frameNets) {
79  						fn = fn1;
80  					}
81  					if (fn == null) {
82  						continue;
83  					}
84  
85  					for (Term predicateTerm : predicate.getTerms()) {
86  						fnTerms.put(predicateTerm, fn);
87  					}
88  				}
89  
90  				int last = Integer.parseInt(FilenameUtils.getBaseName(file.getAbsolutePath()));
91  				int start = last - (last - 1) % CorpusSplitter.sentencesPerCluster;
92  				for (int i = start; i < last; i++) {
93  					int j = i - 1;
94  					int sent = j - start + 1;
95  
96  					String[] parts = lines.get(j).split(":");
97  					String lemma = parts[0];
98  					List<Term> terms = document.getSentenceTerms(sent + 1);
99  					Term mainTerm = null;
100 
101 					LOGGER.debug(" " + sent + " " + j);
102 					LOGGER.debug(Arrays.toString(parts));
103 					LOGGER.debug(lemma);
104 					LOGGER.debug(terms.toString());
105 
106 					for (Term term : terms) {
107 						if (term.getLemma().equals(lemma)) {
108 							mainTerm = term;
109 						}
110 					}
111 
112 					if (mainTerm == null) {
113 //						LOGGER.info(" " + sent + " " + j);
114 //						LOGGER.info(Arrays.toString(parts));
115 //						LOGGER.info(lemma);
116 //						LOGGER.info(terms.toString());
117 						evaluator.addFN(1);
118 						continue;
119 					}
120 
121 					if (!mainTerm.getPos().equals("V")) {
122 						continue;
123 					}
124 
125 					if (fnTerms.get(mainTerm) == null) {
126 //						LOGGER.info(" " + sent + " " + j);
127 //						LOGGER.info(Arrays.toString(parts));
128 //						LOGGER.info(lemma);
129 //						LOGGER.info(terms.toString());
130 						evaluator.addFN(1);
131 						continue;
132 					}
133 
134 //					evaluator.addTP(1);
135 
136 					if (fnTerms.get(mainTerm).equals(parts[1])) {
137 						evaluator.addTP(1);
138 						continue;
139 					}
140 
141 					evaluator.addFN(1);
142 					evaluator.addFP(1);
143 				}
144 			}
145 
146 			PrecisionRecall precisionRecall = evaluator.getResult();
147 			System.out.println(precisionRecall.toString());
148 		} catch (final Throwable ex) {
149 			CommandLine.fail(ex);
150 		}
151 
152 	}
153 }