1 package eu.fbk.dkm.pikes.resources.mpqa;
2
3 import com.google.common.collect.HashMultimap;
4 import eu.fbk.dkm.pikes.resources.NAFFilter;
5 import eu.fbk.dkm.pikes.resources.reader.*;
6 import eu.fbk.utils.core.CommandLine;
7 import ixa.kaflib.KAFDocument;
8 import ixa.kaflib.Opinion;
9 import ixa.kaflib.Span;
10 import ixa.kaflib.Term;
11 import org.slf4j.Logger;
12 import org.slf4j.LoggerFactory;
13
14 import javax.annotation.Nullable;
15 import java.io.File;
16 import java.util.*;
17 import java.util.regex.Pattern;
18
19
20
21
22
23 public class JohanssonAnnotator {
24
25 private static final Logger LOGGER = LoggerFactory.getLogger(JohanssonAnnotator.class);
26 public static final String DEFAULT_NAF_PARSED_DIR = "NAF-parsed";
27
28 static Pattern keyValuePatt = Pattern.compile("^([^=]+)=(.*)$");
29 static Pattern spanPatt = Pattern.compile("^([^,]*),([^,]*)$");
30 public static List<String> DEFAULT_NAF_EXTENSIONS = new ArrayList<>();
31
32 public static String GOLD_LABEL = "gold-eu.fbk.dkm.pikes.resources.mpqa";
33
34 static {
35 DEFAULT_NAF_EXTENSIONS.add("xml");
36 DEFAULT_NAF_EXTENSIONS.add("naf");
37 }
38
39 private static Span<Term> getSpanFromEntity(LKAnnotationEntity entity, KAFDocument document) {
40
41 Span<Term> returnSpan = KAFDocument.newTermSpan();
42
43 if (entity.referred != null) {
44 for (LKAnnotationEntity referredEntity : entity.referred) {
45 Integer termID = Integer.parseInt(referredEntity.localURI);
46 Term term = document.getTerms().get(termID - 1);
47 returnSpan.addTarget(term);
48 }
49 }
50
51 return returnSpan;
52 }
53
54 @Nullable
55 private static Integer sentenceForSpan(Span<Term> termSpan) {
56 HashSet<Integer> sentences = new HashSet<>();
57 for (Term term : termSpan.getTargets()) {
58 sentences.add(term.getSent());
59 }
60
61 if (sentences.size() != 1) {
62 return null;
63 }
64
65 for (Integer sentence : sentences) {
66 return sentence;
67 }
68
69 return null;
70 }
71
72 public static void main(String[] args) {
73 CommandLine cmd = null;
74 try {
75 cmd = CommandLine
76 .parser()
77 .withName("eu.fbk.dkm.pikes.resources.mpqa-annotator")
78 .withHeader("Annotated files with MPQA annotations")
79 .withOption("i", "input-path", "the J-M dataset input dir", "DIR", CommandLine.Type.DIRECTORY_EXISTING, true, false, true)
80 .withOption("o", "output-path", "the NAF dir", "DIR", CommandLine.Type.DIRECTORY_EXISTING, true, false, true)
81
82
83
84
85
86 .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);
87
88 File lkFolder = cmd.getOptionValue("i", File.class);
89 File outputFolder = cmd.getOptionValue("o", File.class);
90
91 LKCollectionReader r = new LKCollectionReader(lkFolder.getAbsolutePath());
92 while (r.hasNext()) {
93 LKAnnotatedText annotatedText = r.next();
94 LKAnnotationLayer agentsLayer = annotatedText.getLayer("MPQA-agents");
95 String fileName = agentsLayer.scopeFile;
96 File nafFile = new File(outputFolder.getAbsolutePath() + File.separator + fileName);
97
98 if (!nafFile.exists()) {
99 LOGGER.error("File {} does not exist", nafFile.getCanonicalPath());
100 continue;
101 }
102
103 LOGGER.debug("Loading file {}", nafFile.getAbsolutePath());
104 KAFDocument document = KAFDocument.createFromFile(nafFile);
105
106 HashMap<String, String> hiddenAgents = new HashMap<>();
107
108 HashMultimap<String, Span<Term>> agents = HashMultimap.create();
109 for (LKAnnotationEntity entity : agentsLayer.entityList) {
110 DataElementNode expressionNode = (DataElementNode) entity.data.children.get(0);
111
112 String implicit = expressionNode.attributes.get("imp");
113 if (implicit != null && implicit.equals("true")) {
114 hiddenAgents.put(entity.localURI, "implicit");
115 continue;
116 }
117
118 String writer = expressionNode.attributes.get("w");
119 if (writer != null && writer.equals("true")) {
120 hiddenAgents.put(entity.localURI, "writer");
121 continue;
122 }
123
124 Span<Term> agentSpan = getSpanFromEntity(entity, document);
125
126 if (agentSpan.size() == 0) {
127 LOGGER.debug("Agent span is empty [{}/{}]", nafFile.getName(), entity.localURI);
128 continue;
129 }
130
131 agents.put(entity.localURI, agentSpan);
132
133 String ns = expressionNode.attributes.get("ns");
134 if (ns != null) {
135 String[] ids = ns.split(",");
136 String last = ids[ids.length - 1].replaceAll("#", "");
137 agents.put(last, agentSpan);
138 }
139 }
140
141 LKAnnotationLayer targetsLayer = annotatedText.getLayer("MPQA-target");
142 HashMap<String, Span<Term>> targets = new HashMap<>();
143 for (LKAnnotationEntity entity : targetsLayer.entityList) {
144 Span<Term> agentSpan = getSpanFromEntity(entity, document);
145
146 if (agentSpan.size() == 0) {
147 LOGGER.debug("Agent span is empty [{}/{}]", nafFile.getName(), entity.localURI);
148 continue;
149 }
150
151 targets.put(entity.localURI, agentSpan);
152
153
154
155
156
157
158
159
160 }
161
162 LKAnnotationLayer attLayer = annotatedText.getLayer("MPQA-attitude");
163 HashMap<String, HashMap<String, Object>> attitudes = new HashMap<>();
164 for (LKAnnotationEntity entity : attLayer.entityList) {
165 HashMap<String, Object> thisAttitude = new HashMap<>();
166 DataElementNode expressionNode = (DataElementNode) entity.data.children.get(0);
167 Span<Term> expressionSpan = getSpanFromEntity(entity, document);
168
169 thisAttitude.put("type", expressionNode.attributes.get("type"));
170 thisAttitude.put("expression", expressionSpan);
171
172 String targetID = expressionNode.attributes.get("tl");
173 if (targetID != null) {
174 targetID = targetID.replaceAll("#", "");
175 Span<Term> targetSpan = targets.get(targetID);
176 if (targetSpan != null) {
177 thisAttitude.put("target", targetSpan);
178 }
179 }
180
181 attitudes.put(entity.localURI, thisAttitude);
182 }
183
184 LKAnnotationLayer dseLayer = annotatedText.getLayer("MPQA-direct-subjective");
185 for (LKAnnotationEntity entity : dseLayer.entityList) {
186
187 Span<Term> expressionSpan = getSpanFromEntity(entity, document);
188 Span<Term> holderSpan = KAFDocument.newTermSpan();
189
190
191 if (expressionSpan.size() == 0) {
192 LOGGER.debug("Expression span is empty [{}/{}]", nafFile.getName(), entity.localURI);
193 continue;
194 }
195
196 Integer sentence = sentenceForSpan(expressionSpan);
197 if (sentence == null) {
198 LOGGER.warn("Expression span is not in sentence [{}/{}]", nafFile.getName(), entity.localURI);
199 continue;
200 }
201
202 Opinion opinion = document.newOpinion();
203 opinion.setLabel("gold-eu.fbk.dkm.pikes.resources.mpqa-subjective");
204
205 Opinion.OpinionExpression opinionExpression = opinion.createOpinionExpression(expressionSpan);
206 DataElementNode expressionNode = (DataElementNode) entity.data.children.get(0);
207 opinionExpression.setPolarity(expressionNode.attributes.get("pol"));
208 opinionExpression.setStrength(expressionNode.attributes.get("int"));
209
210 String holderString = expressionNode.attributes.get("ns");
211 if (holderString != null) {
212 String[] parts = holderString.split(",");
213
214 holders:
215 for (int i = parts.length - 1; i >= 0; i--) {
216 String agentID = parts[i].replaceAll("[^0-9]", "");
217
218 if (hiddenAgents.containsKey(agentID)) {
219 opinionExpression.setSentimentProductFeature(hiddenAgents.get(agentID));
220 break holders;
221 }
222
223 Set<Span<Term>> spans = agents.get(agentID);
224 for (Span<Term> termSpan : spans) {
225 Integer agentSentence = sentenceForSpan(termSpan);
226 if (agentSentence == null) {
227 continue;
228 }
229 if (!agentSentence.equals(sentence)) {
230 continue;
231 }
232 if (termSpan == null) {
233 continue;
234 }
235 holderSpan = termSpan;
236 break holders;
237 }
238 }
239 }
240
241 if (holderSpan.size() > 0) {
242 Opinion.OpinionHolder opinionHolder = opinion.createOpinionHolder(holderSpan);
243 }
244
245 String al = expressionNode.attributes.get("al");
246 if (al != null) {
247 al = al.replaceAll("#", "");
248
249 HashMap<String, Object> target = attitudes.get(al);
250 if (target != null && target.get("expression") != null) {
251 Opinion attitude = document.newOpinion();
252 String type = (String) target.get("type");
253 attitude.setLabel("gold-eu.fbk.dkm.pikes.resources.mpqa-attitude-" + type);
254 Opinion.OpinionExpression attitudeExpression = attitude.createOpinionExpression((Span<Term>) target.get("expression"));
255 attitudeExpression.setPolarity(expressionNode.attributes.get("pol"));
256 attitudeExpression.setStrength(expressionNode.attributes.get("int"));
257
258
259 if (holderSpan.size() > 0) {
260 Opinion.OpinionHolder opinionHolder = attitude.createOpinionHolder(holderSpan);
261 }
262
263 if (target.get("target") != null) {
264 Opinion.OpinionTarget opinionTarget = attitude.createOpinionTarget((Span<Term>) target.get("target"));
265 }
266 }
267 }
268
269 }
270
271 LKAnnotationLayer eseLayer = annotatedText.getLayer("MPQA-expressive-subjectivity");
272 for (LKAnnotationEntity entity : eseLayer.entityList) {
273
274 Span<Term> expressionSpan = getSpanFromEntity(entity, document);
275 Span<Term> holderSpan = KAFDocument.newTermSpan();
276
277 if (expressionSpan.size() == 0) {
278 LOGGER.debug("Expression span is empty [{}/{}]", nafFile.getName(), entity.localURI);
279 continue;
280 }
281
282 Integer sentence = sentenceForSpan(expressionSpan);
283 if (sentence == null) {
284 LOGGER.warn("Expression span is not in sentence [{}/{}]", nafFile.getName(), entity.localURI);
285 continue;
286 }
287
288 Opinion opinion = document.newOpinion();
289 opinion.setLabel("gold-eu.fbk.dkm.pikes.resources.mpqa-expressive");
290
291 Opinion.OpinionExpression opinionExpression = opinion.createOpinionExpression(expressionSpan);
292 DataElementNode expressionNode = (DataElementNode) entity.data.children.get(0);
293 opinionExpression.setPolarity(expressionNode.attributes.get("pol"));
294 opinionExpression.setStrength(expressionNode.attributes.get("int"));
295
296 String holderString = expressionNode.attributes.get("ns");
297 if (holderString != null) {
298 String[] parts = holderString.split(",");
299
300 holders:
301 for (int i = parts.length - 1; i >= 0; i--) {
302 String agentID = parts[i].replaceAll("[^0-9]", "");
303
304 if (hiddenAgents.containsKey(agentID)) {
305 opinionExpression.setSentimentProductFeature(hiddenAgents.get(agentID));
306 break holders;
307 }
308
309 Set<Span<Term>> spans = agents.get(agentID);
310 for (Span<Term> termSpan : spans) {
311 Integer agentSentence = sentenceForSpan(termSpan);
312 if (agentSentence == null) {
313 continue;
314 }
315 if (!agentSentence.equals(sentence)) {
316 continue;
317 }
318 if (termSpan == null) {
319 continue;
320 }
321 holderSpan = termSpan;
322 break holders;
323 }
324 }
325 }
326
327 if (holderSpan.size() > 0) {
328 Opinion.OpinionHolder opinionHolder = opinion.createOpinionHolder(holderSpan);
329 }
330 }
331
332 LKAnnotationLayer oseLayer = annotatedText.getLayer("MPQA-objective-speech-event");
333 for (LKAnnotationEntity entity : oseLayer.entityList) {
334
335 Span<Term> expressionSpan = getSpanFromEntity(entity, document);
336 Span<Term> holderSpan = KAFDocument.newTermSpan();
337
338 if (expressionSpan.size() == 0) {
339 LOGGER.debug("Expression span is empty [{}/{}]", nafFile.getName(), entity.localURI);
340 continue;
341 }
342
343 Integer sentence = sentenceForSpan(expressionSpan);
344 if (sentence == null) {
345 LOGGER.warn("Expression span is not in sentence [{}/{}]", nafFile.getName(), entity.localURI);
346 continue;
347 }
348
349 Opinion opinion = document.newOpinion();
350 opinion.setLabel("gold-eu.fbk.dkm.pikes.resources.mpqa-objective");
351
352 Opinion.OpinionExpression opinionExpression = opinion.createOpinionExpression(expressionSpan);
353 DataElementNode expressionNode = (DataElementNode) entity.data.children.get(0);
354
355 String holderString = expressionNode.attributes.get("ns");
356 if (holderString != null) {
357 String[] parts = holderString.split(",");
358
359 holders:
360 for (int i = parts.length - 1; i >= 0; i--) {
361 String agentID = parts[i].replaceAll("[^0-9]", "");
362
363 if (hiddenAgents.containsKey(agentID)) {
364 opinionExpression.setSentimentProductFeature(hiddenAgents.get(agentID));
365 break holders;
366 }
367
368 Set<Span<Term>> spans = agents.get(agentID);
369 for (Span<Term> termSpan : spans) {
370 Integer agentSentence = sentenceForSpan(termSpan);
371 if (agentSentence == null) {
372 continue;
373 }
374 if (!agentSentence.equals(sentence)) {
375 continue;
376 }
377 if (termSpan == null) {
378 continue;
379 }
380 holderSpan = termSpan;
381 break holders;
382 }
383 }
384 }
385
386 if (holderSpan.size() > 0) {
387 Opinion.OpinionHolder opinionHolder = opinion.createOpinionHolder(holderSpan);
388 }
389
390 }
391
392 NAFFilter.builder(false).withSRLRoleLinking(true, true)
393 .withOpinionLinking(true, true).build().filter(document);
394
395 document.save(nafFile.getAbsolutePath());
396
397
398
399 }
400
401 } catch (Throwable ex) {
402 CommandLine.fail(ex);
403 System.exit(1);
404 }
405
406 }
407 }