1 package eu.fbk.dkm.pikes.resources.vuaopinion;
2
3 import eu.fbk.dkm.pikes.resources.NAFFilter;
4 import eu.fbk.utils.core.CommandLine;
5 import ixa.kaflib.*;
6 import org.apache.commons.io.FileUtils;
7 import org.apache.commons.io.FilenameUtils;
8 import org.slf4j.LoggerFactory;
9
10 import java.io.File;
11 import java.io.IOException;
12 import java.util.HashMap;
13 import java.util.Iterator;
14
15
16
17
18
19
20 public class CorpusAnnotator {
21
22 private static final org.slf4j.Logger LOGGER = LoggerFactory.getLogger(CorpusAnnotator.class);
23
24 public static void main(String[] args) {
25 try {
26 CommandLine cmd = null;
27 cmd = CommandLine
28 .parser()
29 .withName("corpus-postprocessor")
30 .withHeader(
31 "Add opinion layers to the parsed NAFs")
32 .withOption("i", "input-path", "the base EN path of the corpus", "DIR",
33 CommandLine.Type.DIRECTORY_EXISTING, true, false, true)
34 .withOption("f", "force", "Force opinion")
35 .withLogger(LoggerFactory.getLogger("eu.fbk.fssa")).parse(args);
36
37 boolean forceOpinion = cmd.hasOption("f");
38
39 final File inputPath = cmd.getOptionValue("i", File.class);
40 if (!inputPath.exists()) {
41 throw new IOException(String.format("Folder %s does not exist", inputPath.getAbsolutePath()));
42 }
43
44 File kafPath = new File(inputPath.getAbsolutePath() + File.separator + "kaf");
45 if (!kafPath.exists()) {
46 throw new IOException(String.format("Folder %s does not exist", kafPath.getAbsolutePath()));
47 }
48 File nafPath = new File(inputPath.getAbsolutePath() + File.separator + "naf-parsed");
49 if (!nafPath.exists()) {
50 throw new IOException(String.format("Folder %s does not exist", nafPath.getAbsolutePath()));
51 }
52
53 Iterator<File> fileIterator;
54 fileIterator = FileUtils.iterateFiles(kafPath, new String[]{"kaf"}, false);
55
56 while (fileIterator.hasNext()) {
57 File file = fileIterator.next();
58 String fileBaseName = FilenameUtils.removeExtension(file.getName());
59 KAFDocument document = KAFDocument.createFromFile(file);
60
61 File nafFile = new File(nafPath.getAbsolutePath() + File.separator + fileBaseName + ".naf");
62 if (!nafFile.exists()) {
63 LOGGER.warn(String.format("File %s does not exist", nafFile.getAbsolutePath()));
64 continue;
65 }
66 KAFDocument nafDoc = KAFDocument.createFromFile(nafFile);
67 HashMap<String, Term> nafTerms = new HashMap<>();
68 for (Term term : nafDoc.getTerms()) {
69 nafTerms.put(term.getId(), term);
70 }
71
72 HashMap<String, String> idConverter = new HashMap<>();
73 int i = 0;
74 for (WF wf : document.getWFs()) {
75 String id = wf.getId();
76 id = id.replace('w', 't');
77 idConverter.put(id, "t" + Integer.toString(++i));
78 }
79
80 boolean hasGoldOpinions = false;
81 for (Opinion opinion : document.getOpinions()) {
82 if ("gold-vua-opinion".equals(opinion.getLabel())) {
83 hasGoldOpinions = true;
84 break;
85 }
86 }
87
88 if (hasGoldOpinions && !forceOpinion) {
89 LOGGER.info("Opinions already present, skipping...");
90
91 } else {
92 for (Opinion opinion : document.getOpinions()) {
93 Opinion newOpinion = nafDoc.newOpinion();
94 newOpinion.setLabel("gold-vua-opinion");
95
96 Span<Term> termSpan;
97
98
99 if (opinion.getOpinionExpression() != null) {
100 termSpan = KAFDocument.newTermSpan();
101 for (Term term : opinion.getOpinionExpression().getTerms()) {
102 termSpan.addTarget(nafTerms.get(idConverter.get(term.getId())));
103 }
104 Opinion.OpinionExpression expression = newOpinion.createOpinionExpression(termSpan);
105 expression.setPolarity(opinion.getOpinionExpression().getPolarity());
106 }
107
108
109 if (opinion.getOpinionHolder() != null) {
110 termSpan = KAFDocument.newTermSpan();
111 for (Term term : opinion.getOpinionHolder().getTerms()) {
112 termSpan.addTarget(nafTerms.get(idConverter.get(term.getId())));
113 }
114 newOpinion.createOpinionHolder(termSpan);
115 }
116
117
118 if (opinion.getOpinionTarget() != null) {
119 termSpan = KAFDocument.newTermSpan();
120 for (Term term : opinion.getOpinionTarget().getTerms()) {
121 termSpan.addTarget(nafTerms.get(idConverter.get(term.getId())));
122 }
123 newOpinion.createOpinionTarget(termSpan);
124 }
125 }
126 }
127
128 NAFFilter.builder(false).withSRLRoleLinking(true, true)
129 .withOpinionLinking(true, true).build().filter(document);
130
131 nafDoc.save(nafFile.getAbsolutePath());
132 }
133
134 } catch (final Throwable ex) {
135 CommandLine.fail(ex);
136 }
137 }
138 }