1   package eu.fbk.dkm.pikes.resources.mpqa;
2   
3   import com.google.common.collect.HashMultimap;
4   import eu.fbk.dkm.pikes.resources.NAFFilter;
5   import eu.fbk.dkm.pikes.resources.reader.*;
6   import eu.fbk.utils.core.CommandLine;
7   import ixa.kaflib.KAFDocument;
8   import ixa.kaflib.Opinion;
9   import ixa.kaflib.Span;
10  import ixa.kaflib.Term;
11  import org.slf4j.Logger;
12  import org.slf4j.LoggerFactory;
13  
14  import javax.annotation.Nullable;
15  import java.io.File;
16  import java.util.*;
17  import java.util.regex.Pattern;
18  
19  /**
20   * Created by alessio on 20/03/15.
21   */
22  
23  public class JohanssonAnnotator {
24  
25  	private static final Logger LOGGER = LoggerFactory.getLogger(JohanssonAnnotator.class);
26  	public static final String DEFAULT_NAF_PARSED_DIR = "NAF-parsed";
27  
28  	static Pattern keyValuePatt = Pattern.compile("^([^=]+)=(.*)$");
29  	static Pattern spanPatt = Pattern.compile("^([^,]*),([^,]*)$");
30  	public static List<String> DEFAULT_NAF_EXTENSIONS = new ArrayList<>();
31  
32  	public static String GOLD_LABEL = "gold-eu.fbk.dkm.pikes.resources.mpqa";
33  
34  	static {
35  		DEFAULT_NAF_EXTENSIONS.add("xml");
36  		DEFAULT_NAF_EXTENSIONS.add("naf");
37  	}
38  
39  	private static Span<Term> getSpanFromEntity(LKAnnotationEntity entity, KAFDocument document) {
40  
41  		Span<Term> returnSpan = KAFDocument.newTermSpan();
42  
43  		if (entity.referred != null) {
44  			for (LKAnnotationEntity referredEntity : entity.referred) {
45  				Integer termID = Integer.parseInt(referredEntity.localURI);
46  				Term term = document.getTerms().get(termID - 1);
47  				returnSpan.addTarget(term);
48  			}
49  		}
50  
51  		return returnSpan;
52  	}
53  
54  	@Nullable
55  	private static Integer sentenceForSpan(Span<Term> termSpan) {
56  		HashSet<Integer> sentences = new HashSet<>();
57  		for (Term term : termSpan.getTargets()) {
58  			sentences.add(term.getSent());
59  		}
60  
61  		if (sentences.size() != 1) {
62  			return null;
63  		}
64  
65  		for (Integer sentence : sentences) {
66  			return sentence;
67  		}
68  
69  		return null;
70  	}
71  
72  	public static void main(String[] args) {
73  		CommandLine cmd = null;
74  		try {
75  			cmd = CommandLine
76  					.parser()
77  					.withName("eu.fbk.dkm.pikes.resources.mpqa-annotator")
78  					.withHeader("Annotated files with MPQA annotations")
79  					.withOption("i", "input-path", "the J-M dataset input dir", "DIR", CommandLine.Type.DIRECTORY_EXISTING, true, false, true)
80  					.withOption("o", "output-path", "the NAF dir", "DIR", CommandLine.Type.DIRECTORY_EXISTING, true, false, true)
81  //					.withOption("e", "extensions", String.format("Input extensions (default %s)", DEFAULT_NAF_EXTENSIONS), "EXTS", CommandLine.Type.STRING, true, true, false)
82  //					.withOption("t", "test", "test only on this file", "FILE", CommandLine.Type.STRING, true, false, false)
83  //					.withOption("f", "force", "Force opinion")
84  //					.withOption("F", "fake", "Fake mode, do not write to files")
85  //					.withOption("s", "exclude-source-local-null", "Exclude opinion if source is null")
86  					.withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);
87  
88  			File lkFolder = cmd.getOptionValue("i", File.class);
89  			File outputFolder = cmd.getOptionValue("o", File.class);
90  
91  			LKCollectionReader r = new LKCollectionReader(lkFolder.getAbsolutePath());
92  			while (r.hasNext()) {
93  				LKAnnotatedText annotatedText = r.next();
94  				LKAnnotationLayer agentsLayer = annotatedText.getLayer("MPQA-agents");
95  				String fileName = agentsLayer.scopeFile;
96  				File nafFile = new File(outputFolder.getAbsolutePath() + File.separator + fileName);
97  
98  				if (!nafFile.exists()) {
99  					LOGGER.error("File {} does not exist", nafFile.getCanonicalPath());
100 					continue;
101 				}
102 
103 				LOGGER.debug("Loading file {}", nafFile.getAbsolutePath());
104 				KAFDocument document = KAFDocument.createFromFile(nafFile);
105 
106 				HashMap<String, String> hiddenAgents = new HashMap<>();
107 
108 				HashMultimap<String, Span<Term>> agents = HashMultimap.create();
109 				for (LKAnnotationEntity entity : agentsLayer.entityList) {
110 					DataElementNode expressionNode = (DataElementNode) entity.data.children.get(0);
111 
112 					String implicit = expressionNode.attributes.get("imp");
113 					if (implicit != null && implicit.equals("true")) {
114 						hiddenAgents.put(entity.localURI, "implicit");
115 						continue;
116 					}
117 
118 					String writer = expressionNode.attributes.get("w");
119 					if (writer != null && writer.equals("true")) {
120 						hiddenAgents.put(entity.localURI, "writer");
121 						continue;
122 					}
123 
124 					Span<Term> agentSpan = getSpanFromEntity(entity, document);
125 
126 					if (agentSpan.size() == 0) {
127 						LOGGER.debug("Agent span is empty [{}/{}]", nafFile.getName(), entity.localURI);
128 						continue;
129 					}
130 
131 					agents.put(entity.localURI, agentSpan);
132 
133 					String ns = expressionNode.attributes.get("ns");
134 					if (ns != null) {
135 						String[] ids = ns.split(",");
136 						String last = ids[ids.length - 1].replaceAll("#", "");
137 						agents.put(last, agentSpan);
138 					}
139 				}
140 
141 				LKAnnotationLayer targetsLayer = annotatedText.getLayer("MPQA-target");
142 				HashMap<String, Span<Term>> targets = new HashMap<>();
143 				for (LKAnnotationEntity entity : targetsLayer.entityList) {
144 					Span<Term> agentSpan = getSpanFromEntity(entity, document);
145 
146 					if (agentSpan.size() == 0) {
147 						LOGGER.debug("Agent span is empty [{}/{}]", nafFile.getName(), entity.localURI);
148 						continue;
149 					}
150 
151 					targets.put(entity.localURI, agentSpan);
152 
153 //					DataElementNode expressionNode = (DataElementNode) entity.data.children.get(0);
154 //					String ns = expressionNode.attributes.get("ns");
155 //					if (ns != null) {
156 //						String[] ids = ns.split(",");
157 //						String last = ids[ids.length - 1].replaceAll("#", "");
158 //						targets.put(last, agentSpan);
159 //					}
160 				}
161 
162 				LKAnnotationLayer attLayer = annotatedText.getLayer("MPQA-attitude");
163 				HashMap<String, HashMap<String, Object>> attitudes = new HashMap<>();
164 				for (LKAnnotationEntity entity : attLayer.entityList) {
165 					HashMap<String, Object> thisAttitude = new HashMap<>();
166 					DataElementNode expressionNode = (DataElementNode) entity.data.children.get(0);
167 					Span<Term> expressionSpan = getSpanFromEntity(entity, document);
168 
169 					thisAttitude.put("type", expressionNode.attributes.get("type"));
170 					thisAttitude.put("expression", expressionSpan);
171 
172 					String targetID = expressionNode.attributes.get("tl");
173 					if (targetID != null) {
174 						targetID = targetID.replaceAll("#", "");
175 						Span<Term> targetSpan = targets.get(targetID);
176 						if (targetSpan != null) {
177 							thisAttitude.put("target", targetSpan);
178 						}
179 					}
180 
181 					attitudes.put(entity.localURI, thisAttitude);
182 				}
183 
184 				LKAnnotationLayer dseLayer = annotatedText.getLayer("MPQA-direct-subjective");
185 				for (LKAnnotationEntity entity : dseLayer.entityList) {
186 
187 					Span<Term> expressionSpan = getSpanFromEntity(entity, document);
188 					Span<Term> holderSpan = KAFDocument.newTermSpan();
189 //					Span<Term> targetSpan = KAFDocument.newTermSpan();
190 
191 					if (expressionSpan.size() == 0) {
192 						LOGGER.debug("Expression span is empty [{}/{}]", nafFile.getName(), entity.localURI);
193 						continue;
194 					}
195 
196 					Integer sentence = sentenceForSpan(expressionSpan);
197 					if (sentence == null) {
198 						LOGGER.warn("Expression span is not in sentence [{}/{}]", nafFile.getName(), entity.localURI);
199 						continue;
200 					}
201 
202 					Opinion opinion = document.newOpinion();
203 					opinion.setLabel("gold-eu.fbk.dkm.pikes.resources.mpqa-subjective");
204 
205 					Opinion.OpinionExpression opinionExpression = opinion.createOpinionExpression(expressionSpan);
206 					DataElementNode expressionNode = (DataElementNode) entity.data.children.get(0);
207 					opinionExpression.setPolarity(expressionNode.attributes.get("pol"));
208 					opinionExpression.setStrength(expressionNode.attributes.get("int"));
209 
210 					String holderString = expressionNode.attributes.get("ns");
211 					if (holderString != null) {
212 						String[] parts = holderString.split(",");
213 
214 						holders:
215 						for (int i = parts.length - 1; i >= 0; i--) {
216 							String agentID = parts[i].replaceAll("[^0-9]", "");
217 
218 							if (hiddenAgents.containsKey(agentID)) {
219 								opinionExpression.setSentimentProductFeature(hiddenAgents.get(agentID));
220 								break holders;
221 							}
222 
223 							Set<Span<Term>> spans = agents.get(agentID);
224 							for (Span<Term> termSpan : spans) {
225 								Integer agentSentence = sentenceForSpan(termSpan);
226 								if (agentSentence == null) {
227 									continue;
228 								}
229 								if (!agentSentence.equals(sentence)) {
230 									continue;
231 								}
232 								if (termSpan == null) {
233 									continue;
234 								}
235 								holderSpan = termSpan;
236 								break holders;
237 							}
238 						}
239 					}
240 
241 					if (holderSpan.size() > 0) {
242 						Opinion.OpinionHolder opinionHolder = opinion.createOpinionHolder(holderSpan);
243 					}
244 
245 					String al = expressionNode.attributes.get("al");
246 					if (al != null) {
247 						al = al.replaceAll("#", "");
248 
249 						HashMap<String, Object> target = attitudes.get(al);
250 						if (target != null && target.get("expression") != null) {
251 							Opinion attitude = document.newOpinion();
252 							String type = (String) target.get("type");
253 							attitude.setLabel("gold-eu.fbk.dkm.pikes.resources.mpqa-attitude-" + type);
254 							Opinion.OpinionExpression attitudeExpression = attitude.createOpinionExpression((Span<Term>) target.get("expression"));
255 							attitudeExpression.setPolarity(expressionNode.attributes.get("pol"));
256 							attitudeExpression.setStrength(expressionNode.attributes.get("int"));
257 //							attitudeExpression.setSentimentSemanticType((String) target.get("type"));
258 
259 							if (holderSpan.size() > 0) {
260 								Opinion.OpinionHolder opinionHolder = attitude.createOpinionHolder(holderSpan);
261 							}
262 
263 							if (target.get("target") != null) {
264 								Opinion.OpinionTarget opinionTarget = attitude.createOpinionTarget((Span<Term>) target.get("target"));
265 							}
266 						}
267 					}
268 
269 				}
270 
271 				LKAnnotationLayer eseLayer = annotatedText.getLayer("MPQA-expressive-subjectivity");
272 				for (LKAnnotationEntity entity : eseLayer.entityList) {
273 
274 					Span<Term> expressionSpan = getSpanFromEntity(entity, document);
275 					Span<Term> holderSpan = KAFDocument.newTermSpan();
276 
277 					if (expressionSpan.size() == 0) {
278 						LOGGER.debug("Expression span is empty [{}/{}]", nafFile.getName(), entity.localURI);
279 						continue;
280 					}
281 
282 					Integer sentence = sentenceForSpan(expressionSpan);
283 					if (sentence == null) {
284 						LOGGER.warn("Expression span is not in sentence [{}/{}]", nafFile.getName(), entity.localURI);
285 						continue;
286 					}
287 
288 					Opinion opinion = document.newOpinion();
289 					opinion.setLabel("gold-eu.fbk.dkm.pikes.resources.mpqa-expressive");
290 
291 					Opinion.OpinionExpression opinionExpression = opinion.createOpinionExpression(expressionSpan);
292 					DataElementNode expressionNode = (DataElementNode) entity.data.children.get(0);
293 					opinionExpression.setPolarity(expressionNode.attributes.get("pol"));
294 					opinionExpression.setStrength(expressionNode.attributes.get("int"));
295 
296 					String holderString = expressionNode.attributes.get("ns");
297 					if (holderString != null) {
298 						String[] parts = holderString.split(",");
299 
300 						holders:
301 						for (int i = parts.length - 1; i >= 0; i--) {
302 							String agentID = parts[i].replaceAll("[^0-9]", "");
303 
304 							if (hiddenAgents.containsKey(agentID)) {
305 								opinionExpression.setSentimentProductFeature(hiddenAgents.get(agentID));
306 								break holders;
307 							}
308 
309 							Set<Span<Term>> spans = agents.get(agentID);
310 							for (Span<Term> termSpan : spans) {
311 								Integer agentSentence = sentenceForSpan(termSpan);
312 								if (agentSentence == null) {
313 									continue;
314 								}
315 								if (!agentSentence.equals(sentence)) {
316 									continue;
317 								}
318 								if (termSpan == null) {
319 									continue;
320 								}
321 								holderSpan = termSpan;
322 								break holders;
323 							}
324 						}
325 					}
326 
327 					if (holderSpan.size() > 0) {
328 						Opinion.OpinionHolder opinionHolder = opinion.createOpinionHolder(holderSpan);
329 					}
330 				}
331 
332 				LKAnnotationLayer oseLayer = annotatedText.getLayer("MPQA-objective-speech-event");
333 				for (LKAnnotationEntity entity : oseLayer.entityList) {
334 
335 					Span<Term> expressionSpan = getSpanFromEntity(entity, document);
336 					Span<Term> holderSpan = KAFDocument.newTermSpan();
337 
338 					if (expressionSpan.size() == 0) {
339 						LOGGER.debug("Expression span is empty [{}/{}]", nafFile.getName(), entity.localURI);
340 						continue;
341 					}
342 
343 					Integer sentence = sentenceForSpan(expressionSpan);
344 					if (sentence == null) {
345 						LOGGER.warn("Expression span is not in sentence [{}/{}]", nafFile.getName(), entity.localURI);
346 						continue;
347 					}
348 
349 					Opinion opinion = document.newOpinion();
350 					opinion.setLabel("gold-eu.fbk.dkm.pikes.resources.mpqa-objective");
351 
352 					Opinion.OpinionExpression opinionExpression = opinion.createOpinionExpression(expressionSpan);
353 					DataElementNode expressionNode = (DataElementNode) entity.data.children.get(0);
354 
355 					String holderString = expressionNode.attributes.get("ns");
356 					if (holderString != null) {
357 						String[] parts = holderString.split(",");
358 
359 						holders:
360 						for (int i = parts.length - 1; i >= 0; i--) {
361 							String agentID = parts[i].replaceAll("[^0-9]", "");
362 
363 							if (hiddenAgents.containsKey(agentID)) {
364 								opinionExpression.setSentimentProductFeature(hiddenAgents.get(agentID));
365 								break holders;
366 							}
367 
368 							Set<Span<Term>> spans = agents.get(agentID);
369 							for (Span<Term> termSpan : spans) {
370 								Integer agentSentence = sentenceForSpan(termSpan);
371 								if (agentSentence == null) {
372 									continue;
373 								}
374 								if (!agentSentence.equals(sentence)) {
375 									continue;
376 								}
377 								if (termSpan == null) {
378 									continue;
379 								}
380 								holderSpan = termSpan;
381 								break holders;
382 							}
383 						}
384 					}
385 
386 					if (holderSpan.size() > 0) {
387 						Opinion.OpinionHolder opinionHolder = opinion.createOpinionHolder(holderSpan);
388 					}
389 
390 				}
391 
392 				NAFFilter.builder(false).withSRLRoleLinking(true, true)
393 						.withOpinionLinking(true, true).build().filter(document);
394 
395 				document.save(nafFile.getAbsolutePath());
396 
397 //				System.out.println(document.toString());
398 //				break;
399 			}
400 
401 		} catch (Throwable ex) {
402 			CommandLine.fail(ex);
403 			System.exit(1);
404 		}
405 
406 	}
407 }