1   package eu.fbk.dkm.pikes.tintop;
2   
3   import com.fasterxml.jackson.databind.DeserializationFeature;
4   import com.fasterxml.jackson.databind.ObjectMapper;
5   import com.google.common.collect.HashMultimap;
6   import com.google.common.collect.ImmutableList;
7   import com.google.common.collect.Ordering;
8   import edu.cmu.cs.lti.ark.fn.parsing.SemaforParseResult;
9   import edu.stanford.nlp.coref.CorefCoreAnnotations;
10  import edu.stanford.nlp.coref.data.CorefChain;
11  import edu.stanford.nlp.ling.CoreAnnotations;
12  import edu.stanford.nlp.ling.CoreLabel;
13  import edu.stanford.nlp.pipeline.Annotation;
14  import edu.stanford.nlp.pipeline.StanfordCoreNLP;
15  import edu.stanford.nlp.trees.CollinsHeadFinder;
16  import edu.stanford.nlp.trees.HeadFinder;
17  import edu.stanford.nlp.trees.Tree;
18  import edu.stanford.nlp.trees.TreeCoreAnnotations;
19  import edu.stanford.nlp.util.CoreMap;
20  import edu.stanford.nlp.util.IntPair;
21  import eu.fbk.dkm.pikes.resources.*;
22  import eu.fbk.dkm.pikes.resources.ontonotes.VerbNetStatisticsExtractor;
23  import eu.fbk.dkm.pikes.tintop.util.NER2SSTtagset;
24  import eu.fbk.dkm.pikes.tintop.util.NerEntity;
25  import eu.fbk.dkm.pikes.twm.LinkingTag;
26  import eu.fbk.dkm.pikes.twm.TWMAnnotations;
27  import eu.fbk.fcw.mate.MateAnnotations;
28  import eu.fbk.fcw.ner.NERConfidenceAnnotator;
29  import eu.fbk.fcw.semafor.Semafor;
30  import eu.fbk.fcw.semafor.SemaforAnnotations;
31  import eu.fbk.fcw.ukb.UKBAnnotations;
32  import eu.fbk.fcw.utils.AnnotatorUtils;
33  import eu.fbk.fcw.wnpos.WNPosAnnotations;
34  import eu.fbk.utils.core.PropertiesUtils;
35  import eu.fbk.utils.corenlp.CustomAnnotations;
36  import eu.fbk.utils.corenlp.outputters.JSONOutputter;
37  import ixa.kaflib.*;
38  import org.apache.commons.lang.StringEscapeUtils;
39  import org.apache.log4j.Logger;
40  import se.lth.cs.srl.corpus.Word;
41  
42  import javax.annotation.Nullable;
43  import java.io.*;
44  import java.util.*;
45  import java.util.Map.Entry;
46  
47  /**
48   * Created with IntelliJ IDEA.
49   * User: alessio
50   * Date: 21/07/14
51   * Time: 12:48
52   * To change this template use File | Settings | File Templates.
53   */
54  
55  public class AnnotationPipeline {
56  
57      static Logger logger = Logger.getLogger(AnnotationPipeline.class.getName());
58  
59      enum Models {ONTONOTES, WORDNET, PREDICATE_MATRIX}
60  
61      HashMap<Models, Boolean> modelsLoaded = new HashMap<>();
62  
63      private PredicateMatrix PM;
64      private VerbNetStatisticsExtractor statisticsExtractor = null;
65  
66      private Properties defaultConfig = new Properties();
67  
68      private Map<String, String> nerMap = new HashMap<>();
69  
70      public AnnotationPipeline(@Nullable File configFile, @Nullable Properties additionalProperties) throws IOException {
71          defaultConfig = new Properties();
72          if (configFile != null) {
73              InputStream input = new FileInputStream(configFile);
74              defaultConfig.load(input);
75              input.close();
76          }
77          defaultConfig.putAll(Defaults.classProperties());
78          if (additionalProperties != null) {
79              defaultConfig.putAll(additionalProperties);
80          }
81          Defaults.setNotPresent(defaultConfig);
82  
83          for (Models model : Models.values()) {
84              modelsLoaded.put(model, false);
85          }
86      }
87  
88      public void addToNerMap(String key, String value) {
89          nerMap.put(key, value);
90      }
91  
92      public void deleteFromNerMap(String key) {
93          nerMap.remove(key);
94      }
95  
96      public Properties getDefaultConfig() {
97          return defaultConfig;
98      }
99  
100     public static void addHeads(Tree node) {
101         addHeads(node, null, null);
102     }
103 
104     public static void addHeads(Tree node, Tree parent, HeadFinder headFinder) {
105         if (node == null || node.isLeaf()) {
106             return;
107         }
108 
109         if (headFinder == null) {
110             headFinder = new CollinsHeadFinder();
111         }
112 
113         Tree head = headFinder.determineHead(node, parent);
114         if (!head.isLeaf()) {
115             head.label().setValue(head.label().toString() + ixa.kaflib.Tree.HEAD_MARK);
116         }
117 
118         for (Tree child : node.children()) {
119             addHeads(child, node, headFinder);
120         }
121 
122     }
123 
124     public void loadModels() throws Exception {
125         loadModels(getDefaultConfig());
126     }
127 
128     public void loadModels(Properties properties) throws Exception {
129 
130         boolean enablePM = Defaults.getBoolean(properties.getProperty("enable_predicate_matrix"), false);
131         boolean enableNafFilter = Defaults.getBoolean(properties.getProperty("enable_naf_filter"), false);
132         boolean enableOntoNotesFilter = Defaults.getBoolean(properties.getProperty("enable_on_filter"), false);
133 
134         logger.info("Loading Stanford CoreNLP");
135 
136         Properties stanfordFromConfig = PropertiesUtils.dotConvertedProperties(properties, "stanford");
137         StanfordCoreNLP stanfordPipeline = new StanfordCoreNLP(stanfordFromConfig);
138 
139         // Predicate Matrix
140 
141         if (enablePM && !modelsLoaded.get(Models.PREDICATE_MATRIX)) {
142             logger.info("Loading Predicate Matrix");
143             PM = new PredicateMatrix(properties.getProperty("predicate_matrix", Defaults.PREDICATE_MATRIX));
144             modelsLoaded.put(Models.PREDICATE_MATRIX, true);
145         }
146 
147         // NAF filter
148 
149         if (enableNafFilter && !modelsLoaded.get(Models.WORDNET)) {
150             logger.info("Loading WordNet for NAF filter");
151             WordNet.setPath(properties.getProperty("naf_filter_wordnet_path", Defaults.WN_DICT));
152             WordNet.init();
153             modelsLoaded.put(Models.WORDNET, true);
154         }
155 
156         // OntoNotes
157 
158         if (enableOntoNotesFilter && !modelsLoaded.get(Models.ONTONOTES)) {
159             logger.info("Loading OntoNotes");
160             statisticsExtractor = new VerbNetStatisticsExtractor();
161 //			statisticsExtractor.loadDir(config.getProperty("on_folder"));
162 //			statisticsExtractor.loadFrequencies();
163             statisticsExtractor.loadFrequencies(properties.getProperty("on_frequencies", Defaults.ON_FREQUENCIES));
164             modelsLoaded.put(Models.ONTONOTES, true);
165         }
166     }
167 
168     public void annotateStanford(Properties properties, Annotation document, KAFDocument NAFdocument)
169             throws IOException {
170 
171         boolean enablePM = Defaults.getBoolean(properties.getProperty("enable_predicate_matrix"), false);
172         boolean enableNafFilter = Defaults.getBoolean(properties.getProperty("enable_naf_filter"), false);
173         boolean enableOntoNotesFilter = Defaults.getBoolean(properties.getProperty("enable_on_filter"), false);
174         boolean enableEntityAssignment = Defaults.getBoolean(properties.getProperty("enable_entity_assignment"), false);
175 
176         Map<Integer, CorefChain> coreferenceGraph = document.get(CorefCoreAnnotations.CorefChainAnnotation.class);
177 
178         // Add tmx0
179         try {
180             Timex3 tmx0 = NAFdocument.newTimex3("tmx0", "DATE");
181             tmx0.setValue(NAFdocument.getFileDesc().creationtime.substring(0, 10));
182         } catch (Exception e) {
183             logger.warn("Document creation time is not included in the NAF headers");
184         }
185 
186         logger.info("Getting information");
187         TreeMap<Integer, Integer> sentIndexes = new TreeMap<>();
188         int totTokens = 0;
189         ArrayList<Term> allTerms = new ArrayList<>();
190 
191         HashMap<Integer, Integer> tokenFromStart = new HashMap<>();
192         HashMap<Integer, Integer> tokenFromEnd = new HashMap<>();
193 
194         HashMap<Integer, Integer> offsetToken = new HashMap<>();
195 
196         ArrayList<WF> allTokens = new ArrayList<>();
197         HashMap<Integer, HashSet<LinkingTag>> keywords = new HashMap<>();
198 
199         if (document.containsKey(TWMAnnotations.LinkingAnnotations.class)) {
200             for (LinkingTag e : document.get(TWMAnnotations.LinkingAnnotations.class)) {
201                 int start = e.getOffset();
202                 if (keywords.get(start) == null) {
203                     keywords.put(start, new HashSet<LinkingTag>());
204                 }
205                 keywords.get(start).add(e);
206                 logger.debug("Annotated entity (DS): " + e);
207             }
208         }
209 
210         // Main loop
211         List<CoreMap> get = document.get(CoreAnnotations.SentencesAnnotation.class);
212         for (int sentIndex = 0; sentIndex < get.size(); sentIndex++) {
213             CoreMap stanfordSentence = get.get(sentIndex);
214             List<CoreLabel> tokens = stanfordSentence.get(CoreAnnotations.TokensAnnotation.class);
215 
216             ArrayList<Term> terms = new ArrayList<>();
217             ArrayList<String> ners = new ArrayList<>();
218 
219             sentIndexes.put(sentIndex, totTokens);
220             totTokens += tokens.size();
221 
222             HashMap<Integer, TreeSet<Integer>> children = new HashMap<>();
223 
224             String lastNER = "O";
225             ArrayList<NerEntity> entities = new ArrayList<>();
226 
227             for (int i = 0; i < tokens.size(); i++) {
228                 CoreLabel stanfordToken = tokens.get(i);
229                 String form = stanfordToken.get(CoreAnnotations.TextAnnotation.class);
230                 String lemma = stanfordToken.get(CoreAnnotations.LemmaAnnotation.class);
231                 String pos = stanfordToken.get(CoreAnnotations.PartOfSpeechAnnotation.class);
232 
233                 form = AnnotatorUtils.codeToParenthesis(form);
234                 if (lemma != null) {
235                     lemma = AnnotatorUtils.codeToParenthesis(lemma);
236                 }
237                 pos = AnnotatorUtils.codeToParenthesis(pos);
238 
239                 children.put(i, new TreeSet<Integer>());
240 
241                 // Tokens
242                 WF thisWF = NAFdocument.newWF(form, stanfordToken.beginPosition(), sentIndex + 1);
243                 thisWF.setPara(1); //todo: Always set paragraph 1
244 
245                 Integer tokenID = totTokens - tokens.size() + i;
246 
247                 tokenFromStart.put(stanfordToken.beginPosition(), tokenID);
248                 tokenFromEnd.put(stanfordToken.beginPosition() + thisWF.getLength(), tokenID);
249 
250                 for(int j=stanfordToken.beginPosition();j<stanfordToken.beginPosition() + thisWF.getLength();j++) {
251                     //System.out.println(j + "   " + tokenID);
252                     offsetToken.put(j, tokenID);
253                 }
254 
255                 allTokens.add(tokenID, thisWF);
256 
257                 // Term
258                 Span<WF> thisWFSpan = KAFDocument.newWFSpan();
259                 thisWFSpan.addTarget(thisWF);
260                 Term thisTerm = NAFdocument.newTerm("open", lemma, pos, thisWFSpan);
261                 thisTerm.setMorphofeat(pos);
262 
263                 // Upos
264                 String upos = stanfordToken.get(CustomAnnotations.UPosAnnotation.class);
265                 thisTerm.setUpos(upos);
266 
267                 // WordNet sense
268                 String wnSense = stanfordToken.get(UKBAnnotations.UKBAnnotation.class);
269                 if (wnSense != null) {
270                     thisTerm.setWordnetSense(stanfordToken.get(UKBAnnotations.UKBAnnotation.class));
271                 }
272 
273                 // Simple POS
274                 String simplePos = stanfordToken.get(WNPosAnnotations.WNPosAnnotation.class);
275                 if (simplePos == null) {
276                     simplePos = "O";
277                 }
278                 thisTerm.setPos(simplePos);
279 
280                 // Features
281                 Map<String, Collection<String>> features = stanfordToken.get(CustomAnnotations.FeaturesAnnotation.class);
282                 thisTerm.setFeatures(features);
283 
284                 terms.add(thisTerm);
285                 allTerms.add(thisTerm);
286 
287                 String ne = stanfordToken.get(CoreAnnotations.NamedEntityTagAnnotation.class);
288                 if (nerMap.containsKey(ne)) {
289                     ne = nerMap.get(ne);
290                 }
291                 String normVal = stanfordToken.getString(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class);
292                 if (ne != null) {
293                     if (ne.equals("O")) {
294                         ners.add("0");
295                     } else {
296 
297                         // Alternative string for SST
298                         String alt = NER2SSTtagset.tagset.get(ne);
299                         if (alt == null) {
300                             alt = "MISC";
301                         }
302 
303                         if (ne.equals(lastNER)) {
304                             entities.get(entities.size() - 1).setEndToken(i);
305                             ners.add("I-" + alt);
306                         } else {
307                             NerEntity newEntity = new NerEntity(ne, i, normVal);
308                             newEntity.setScoredLabels(stanfordToken.get(NERConfidenceAnnotator.ScoredNamedEntityTagsAnnotation.class));
309                             entities.add(newEntity);
310                             ners.add("B-" + alt);
311                         }
312                     }
313                     lastNER = ne;
314                 } else {
315                     ners.add("0");
316                 }
317 
318             }
319 
320 
321 //            @todo change next to UD??
322             for (int i = 0; i < tokens.size(); i++) {
323                 CoreLabel stanfordToken = tokens.get(i);
324 
325                 // Dependencies
326                 if (!stanfordToken.containsKey(CoreAnnotations.CoNLLDepParentIndexAnnotation.class)) {
327                     continue;
328                 }
329 
330                 int head = stanfordToken.get(CoreAnnotations.CoNLLDepParentIndexAnnotation.class);
331                 head++;
332                 String depRel = stanfordToken.get(CoreAnnotations.CoNLLDepTypeAnnotation.class);
333                 if (head != 0) {
334                     Term from = terms.get(head - 1);
335                     Term to = terms.get(i);
336                     NAFdocument.newDep(from, to, depRel);
337                 }
338 
339                 Word word = stanfordToken.get(MateAnnotations.MateTokenAnnotation.class);
340                 if (word != null) {
341                     List<Word> toRoot = Word.pathToRoot(word);
342                     for (Word w : toRoot) {
343                         int id = w.getIdx() - 1;
344                         if (id < 0) {
345                             continue;
346                         }
347                         children.get(id).add(i);
348                     }
349                 }
350             }
351 
352             // Opinion
353 
354 //            boolean includeNeutral = config.getProperty("stanford_include_neutral", "0").equals("1");
355 //
356 //            Tree sentimentTree = stanfordSentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
357 //            if (sentimentTree != null) {
358 //                HashMap<edu.stanford.nlp.ling.Word, Term> indexedWords = new HashMap<>();
359 //                int wordIndex = -1;
360 //                for (Tree t : sentimentTree.getLeaves()) {
361 //                    wordIndex++;
362 //                    List<edu.stanford.nlp.ling.Word> words = t.yieldWords();
363 //                    for (edu.stanford.nlp.ling.Word w : words) {
364 //                        indexedWords.put(w, terms.get(wordIndex));
365 //                    }
366 //                }
367 //
368 //                for (Tree tree : sentimentTree) {
369 //
370 //                    Integer predictedClass;
371 //                    try {
372 //                        predictedClass = RNNCoreAnnotations.getPredictedClass(tree);
373 //                    } catch (Exception e) {
374 //                        continue;
375 //                    }
376 //
377 //                    if (predictedClass == null) {
378 //                        continue;
379 //                    }
380 //
381 //                    if (!includeNeutral && predictedClass == 2) {
382 //                        continue;
383 //                    }
384 //
385 //                    Span<Term> treeSpan = KAFDocument.newTermSpan();
386 //                    for (edu.stanford.nlp.ling.Word word : tree.yieldWords()) {
387 //                        treeSpan.addTarget(indexedWords.get(word));
388 //                    }
389 //
390 //                    Opinion opinion = NAFdocument.createOpinion();
391 //                    opinion.setLabel("stanford-sentiment");
392 //                    Opinion.OpinionExpression opinionExpression = opinion.createOpinionExpression(treeSpan);
393 //                    opinionExpression.setPolarity(stanfordSentimentLabels[predictedClass]);
394 //
395 //                    NumberFormat nf = NumberFormat.getNumberInstance();
396 //                    nf.setMaximumFractionDigits(2);
397 //
398 //                    SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree);
399 //                    StringBuffer stringBuffer = new StringBuffer();
400 //                    stringBuffer.append(nf.format(predictions.get(0)));
401 //                    stringBuffer.append("|");
402 //                    stringBuffer.append(nf.format(predictions.get(1)));
403 //                    stringBuffer.append("|");
404 //                    stringBuffer.append(nf.format(predictions.get(2)));
405 //                    stringBuffer.append("|");
406 //                    stringBuffer.append(nf.format(predictions.get(3)));
407 //                    stringBuffer.append("|");
408 //                    stringBuffer.append(nf.format(predictions.get(4)));
409 //                    opinionExpression.setStrength(stringBuffer.toString());
410 //                }
411 //            }
412 
413             // Entities
414 
415             for (NerEntity entity : entities) {
416 
417                 int startIndex = terms.get(entity.getStartToken()).getWFs().get(0).getOffset();
418                 int endIndex = terms.get(entity.getEndToken()).getWFs()
419                         .get(terms.get(entity.getEndToken()).getWFs().size() - 1).getOffset() +
420                         terms.get(entity.getEndToken()).getWFs()
421                                 .get(terms.get(entity.getEndToken()).getWFs().size() - 1).getLength();
422 
423                 logger.debug("Stanford NER entity: " + entity + "");
424                 logger.debug(String.format("Stanford NER entity: %s (from %d to %d)", entity.getLabel(), startIndex,
425                         endIndex));
426 
427                 Span<Term> thisTermSpan = KAFDocument.newTermSpan();
428                 Span<WF> thisWFSpan = KAFDocument.newWFSpan();
429 
430                 for (int i = entity.getStartToken(); i <= entity.getEndToken(); i++) {
431                     thisTermSpan.addTarget(terms.get(i));
432                     thisWFSpan.addTargets(terms.get(i).getWFs());
433                 }
434 
435                 List<Span<Term>> thisTermList = new LinkedList<>();
436                 List<Span<WF>> thisWFList = new LinkedList<>();
437 
438                 thisTermList.add(thisTermSpan);
439                 thisWFList.add(thisWFSpan);
440 
441                 Entity thisEntity = null;
442                 Timex3 thisTimex = null;
443 
444                 entity.setLabel(entity.getLabel().toUpperCase());
445 
446                 switch (entity.getLabel()) {
447                     case "PERSON":
448                     case "LOCATION":
449                     case "ORGANIZATION":
450 
451                     case "MISC":
452                     case "MONEY":
453                     case "PERCENT":
454 
455                         // Compatibility with Tint
456                     case "PER":
457                     case "LOC":
458                     case "ORG":
459 
460                         thisEntity = NAFdocument.newEntity(thisTermList);
461                         String entityLabel = entity.getLabel().replace("PERSON","PER").replace("ORGANIZATION","ORG").replace("LOCATION","LOC");
462                         thisEntity.setType(entityLabel);
463 
464                         // Normalized value
465                         if (entity.getNormalizedValue() != null && entity.getNormalizedValue().length() > 0) {
466                             thisEntity.addExternalRef(NAFdocument.createExternalRef("value", entity.getNormalizedValue()));
467                         }
468 
469                         if (enableEntityAssignment) {
470                             LinkingTag e = null;
471                             HashSet<LinkingTag> possibleEntities = keywords.get(startIndex);
472                             if (possibleEntities != null) {
473                                 for (LinkingTag loopEntity : possibleEntities) {
474                                     int end = loopEntity.getOffset() + loopEntity.getLength();
475                                     if (end != endIndex) {
476                                         continue;
477                                     }
478                                     if (e == null || e.getScore() < loopEntity.getScore()) {
479                                         e = loopEntity;
480                                     }
481                                 }
482                             }
483 
484                             if (e != null) {
485                                 ExternalRef ext = NAFdocument.newExternalRef(e.getSource(), e.getPage());
486                                 ext.setConfidence((float) e.getScore());
487                                 thisEntity.addExternalRef(ext);
488                             }
489                         }
490 
491                         break;
492 
493                     case "NUMBER":
494                         thisEntity = NAFdocument.newEntity(thisTermList);
495                         thisEntity.setType("CARDINAL");
496                         thisEntity.addExternalRef(NAFdocument.createExternalRef("value", entity.getNormalizedValue()));
497                         break;
498 
499                     case "ORDINAL":
500                         thisEntity = NAFdocument.newEntity(thisTermList);
501                         thisEntity.setType("ORDINAL");
502                         thisEntity.addExternalRef(NAFdocument.createExternalRef("value", entity.getNormalizedValue()));
503                         break;
504 
505                     case "DATE":
506                     case "TIME":
507                         thisTimex = NAFdocument.newTimex3(thisWFSpan, entity.getLabel());
508                         thisTimex.setValue(entity.getNormalizedValue());
509                         break;
510 
511                     case "DURATION":
512                         thisTimex = NAFdocument.newTimex3(thisWFSpan, entity.getLabel());
513                         thisTimex.setValue(entity.getNormalizedValue());
514                         break;
515 
516                     default:
517                         logger.debug(entity.getLabel());
518                 }
519 
520                 if (thisEntity != null && entity.getScoredLabels() != null) {
521                     for (Entry<String, Double> entry : entity.getScoredLabels().entrySet()) {
522                         ExternalRef ref = NAFdocument.createExternalRef("value-confidence",
523                                 entry.getKey().replace("PERSON","PER").replace("ORGANIZATION","ORG").replace("LOCATION","LOC"));
524                         ref.setConfidence(entry.getValue().floatValue());
525                         thisEntity.addExternalRef(ref);
526                     }
527                 }
528             }
529 
530             for (int i = 0; i < tokens.size(); i++) {
531                 CoreLabel stanfordToken = tokens.get(i);
532 
533                 se.lth.cs.srl.corpus.Predicate predicate = stanfordToken.get(MateAnnotations.MateAnnotation.class);
534                 if (predicate != null) {
535                     Span<Term> thisTermSpan = KAFDocument.newTermSpan();
536                     Term thisTerm = terms.get(predicate.getIdx() - 1);
537                     String mateSense = predicate.getSense();
538 
539                     thisTermSpan.addTarget(thisTerm);
540 
541                     Predicate newPred = NAFdocument.newPredicate(thisTermSpan);
542                     newPred.setSource("mate");
543 
544 
545                     boolean verb =true;
546                     String sense = null;
547 
548 
549                     ExternalRef e;
550                     // If it's a verb -> PropBank, if it's a noun -> NomBank
551 //                    @todo change next to UD
552                     if (thisTerm.getPos().equals("V")) {
553                         e = NAFdocument.newExternalRef("PropBank", mateSense);
554                         e.setSource("mate");
555                         sense = mateSense;
556                     } else {
557                         verb=false;
558                         e = NAFdocument.newExternalRef("NomBank", mateSense);
559                         e.setSource("mate");
560                         // check NomBank
561                         NomBank.Roleset roleset = NomBank.getRoleset(mateSense);
562                         try {
563                             sense = roleset.getPBId();
564                         } catch (Exception ex) {
565                             logger.debug(ex.getMessage());
566                         }
567                     }
568                     newPred.addExternalRef(e);
569 
570 
571 
572                     ArrayList<String> vnClasses = new ArrayList<>();
573                     ArrayList<String> fnFrames = new ArrayList<>();
574 
575                     if (enablePM) {
576                         if (sense != null && sense.length() > 0) {
577 
578                             HashSet<String> vnToAdd = new HashSet<>();
579                             String vnFinal = null;
580 
581                             // VerbNet
582                             vnClasses = PM.getVNClasses(sense);
583                             if (!vnClasses.isEmpty()) {
584                                 if (vnClasses.size() == 1 || !enableOntoNotesFilter) {
585                                     for (String vnClass1 : vnClasses) {
586                                         vnToAdd.add(vnClass1);
587                                         vnFinal = vnClass1;
588                                     }
589                                 } else {
590                                     Integer value = 0;
591 
592                                     for (String vnClass : vnClasses) {
593                                         Integer thisValue = statisticsExtractor.getVnTotals().get(vnClass);
594                                         thisValue = thisValue == null ? 0 : thisValue;
595                                         if (thisValue >= value) {
596                                             vnFinal = vnClass;
597                                             value = thisValue;
598                                         }
599                                     }
600 
601                                     // Reset the list of classes
602                                     vnClasses = new ArrayList<>();
603 
604                                     if (vnFinal != null) {
605                                         vnToAdd.add(vnFinal);
606                                         vnClasses.add(vnFinal);
607                                     }
608                                 }
609                             }
610                             //check if the sense maps to a subclass of a selected class
611                             ArrayList<String> vnSubClasses = PM.getVNSubClasses(sense);
612                             if (!vnSubClasses.isEmpty()) {
613                                 for (String vnSubClass1 : vnSubClasses) {
614                                     for (String vnClass : vnClasses) {
615                                         if (!vnSubClass1.startsWith(vnClass)) {
616                                             continue;
617                                         }
618 
619                                         vnToAdd.add(vnSubClass1);
620 
621                                         // Remove upper class
622                                         if (vnFinal != null) {
623                                             if (vnSubClass1.startsWith(vnFinal)) {
624                                                 vnToAdd.remove(vnFinal);
625                                             }
626                                         }
627                                     }
628                                 }
629                             }
630 
631                             for (String vnClass1 : vnToAdd) {
632                                 ExternalRef vnClass = NAFdocument.newExternalRef("VerbNet", vnClass1);
633                                 vnClass.setSource("mate+pm");
634                                 newPred.addExternalRef(vnClass);
635                             }
636 
637                             //added to make consistents thematic roles and vnclasses
638                             vnClasses.clear();
639                             vnClasses.addAll(vnToAdd);
640 
641                             // FrameNet
642                             fnFrames = PM.getFNFrames(sense);
643 
644                             if (enableOntoNotesFilter) {
645                                 HashSet<String> possibleFrames = new HashSet<>();
646                                 for (String vnClass : vnClasses) {
647                                     possibleFrames.addAll(PM.getVNClassesToFN(vnClass));
648                                 }
649 
650 //								System.out.println("vnClasses: " + vnClasses);
651 //								System.out.println("fnFrames (before): " + fnFrames);
652                                 fnFrames.retainAll(possibleFrames);
653 //								System.out.println("fnFrames (after): " + fnFrames);
654 //								System.out.println("Possible frames: " + possibleFrames);
655                             }
656 
657                             if (!fnFrames.isEmpty()) {
658                                 if (fnFrames.size() == 1 || !enableOntoNotesFilter) {
659                                     for (String fnFrame1 : fnFrames) {
660                                         ExternalRef fnFrame = NAFdocument.newExternalRef("FrameNet", fnFrame1);
661                                         fnFrame.setSource("mate+pm");
662                                         newPred.addExternalRef(fnFrame);
663                                     }
664                                 } else {
665                                     Integer value = 0;
666                                     String fnFinal = null;
667 
668                                     for (String fnFrame : fnFrames) {
669                                         Integer thisValue = statisticsExtractor.getFnTotals()
670                                                 .get(fnFrame.toLowerCase());
671                                         thisValue = thisValue == null ? 0 : thisValue;
672                                         if (thisValue >= value) {
673                                             fnFinal = fnFrame;
674                                             value = thisValue;
675                                         }
676                                     }
677 
678                                     // Reset the list of frames
679                                     fnFrames = new ArrayList<>();
680 
681                                     if (fnFinal != null) {
682                                         ExternalRef fnFrame = NAFdocument.newExternalRef("FrameNet", fnFinal);
683                                         fnFrame.setSource("mate+pm");
684                                         newPred.addExternalRef(fnFrame);
685                                         fnFrames.add(fnFinal);
686                                     }
687                                 }
688                             }
689 
690                             if (!verb){
691                                 // PropBank
692                                 ArrayList<String> pbPredicates = PM.getPBPredicates(sense);
693                                 if (!pbPredicates.isEmpty()) {
694                                     for (String pbPredicate1 : pbPredicates) {
695                                         ExternalRef pbPredicate = NAFdocument.newExternalRef("PropBank", pbPredicate1);
696                                         pbPredicate.setSource("mate+nb");
697                                         newPred.addExternalRef(pbPredicate);
698                                     }
699                                 }
700                             }
701 
702                             // ESO
703                             ArrayList<String> esoClasses = PM.getESOClasses(sense);
704                             if (!esoClasses.isEmpty()) {
705                                 for (String esoClass1 : esoClasses) {
706                                     ExternalRef esoClass = NAFdocument.newExternalRef("ESO", esoClass1);
707                                     esoClass.setSource("mate+pm");
708                                     newPred.addExternalRef(esoClass);
709                                 }
710                             }
711 //                            Not in pm1.3
712 //                            // EventType
713 //                            ArrayList<String> eventTypes = PM.getEventTypes(sense);
714 //                            if (!eventTypes.isEmpty()) {
715 //                                for (String eventType1 : eventTypes) {
716 //                                    ExternalRef eventType = NAFdocument.newExternalRef("EventType", eventType1);
717 //                                    eventType.setSource("mate+pm");
718 //                                    newPred.addExternalRef(eventType);
719 //                                }
720 //                            }
721 
722                             // WordNet
723                             ArrayList<String> wnSenses = PM.getWNSenses(sense);
724                             if (!wnSenses.isEmpty()) {
725                                 for (String wnSense1 : wnSenses) {
726                                     ExternalRef wnSense = NAFdocument.newExternalRef("WordNet", wnSense1);
727                                     wnSense.setSource("mate+pm");
728                                     newPred.addExternalRef(wnSense);
729                                 }
730                             }
731 
732                         }
733                     }
734 
735                     for (Word w : predicate.getArgMap().keySet()) {
736                         Span<Term> thisTermSpanForRole = KAFDocument.newTermSpan();
737                         for (int k : children.get(w.getIdx() - 1)) {
738                             thisTermSpanForRole.addTarget(terms.get(k));
739                         }
740                         thisTermSpanForRole.setHead(terms.get(w.getIdx() - 1));
741 
742                         String argument = predicate.getArgMap().get(w);
743                         Predicate.Role newRole = NAFdocument.newRole(newPred, argument, thisTermSpanForRole);
744                         ExternalRef mateRoleRef;
745                         if(verb){
746 
747                             mateRoleRef = NAFdocument
748                                     .newExternalRef("PropBank", mateSense+"@"+argument);
749                             mateRoleRef.setSource("mate");
750 
751 
752                         } else {
753 
754                             mateRoleRef = NAFdocument
755                                     .newExternalRef("NomBank", mateSense+"@"+argument);
756                             mateRoleRef.setSource("mate");
757                         }
758 
759                         newRole.addExternalRef(mateRoleRef);
760 
761 
762                         if (enablePM && PM != null && statisticsExtractor != null) {
763 
764                             // VerbNet
765                             ArrayList<String> vnThematicRoles = PM.getVNThematicRoles(sense + "@" + argument);
766                             if (!vnThematicRoles.isEmpty()) {
767                                 for (String vnThematicRole1 : vnThematicRoles) {
768                                     if (!enableOntoNotesFilter) {
769                                         ExternalRef vnThematicRole = NAFdocument
770                                                 .newExternalRef("VerbNet", vnThematicRole1);
771                                         vnThematicRole.setSource("mate+pm");
772                                         newRole.addExternalRef(vnThematicRole);
773                                     } else {
774                                         String[] parts = vnThematicRole1.split("@");
775                                         if (vnClasses.contains(parts[0])) {
776                                             ExternalRef vnThematicRole = NAFdocument
777                                                     .newExternalRef("VerbNet", vnThematicRole1);
778                                             vnThematicRole.setSource("mate+pm");
779                                             newRole.addExternalRef(vnThematicRole);
780                                         }
781                                     }
782                                 }
783                             }
784 
785                             // FrameNet
786                             ArrayList<String> fnFrameElements = PM.getFNFrameElements(sense + "@" + argument);
787                             if (!fnFrameElements.isEmpty()) {
788                                 for (String fnFrameElement1 : fnFrameElements) {
789                                     if (!enableOntoNotesFilter) {
790                                         ExternalRef fnFrameElement = NAFdocument
791                                                 .newExternalRef("FrameNet", fnFrameElement1);
792                                         fnFrameElement.setSource("mate+pm");
793                                         newRole.addExternalRef(fnFrameElement);
794                                     } else {
795                                         String[] parts = fnFrameElement1.split("@");
796                                         if (fnFrames.contains(parts[0])) {
797                                             ExternalRef fnFrameElement = NAFdocument
798                                                     .newExternalRef("FrameNet", fnFrameElement1);
799                                             fnFrameElement.setSource("mate+pm");
800                                             newRole.addExternalRef(fnFrameElement);
801                                         }
802                                     }
803                                 }
804                             }
805 
806                             // PropBank
807                             if (!verb) {
808                                 ArrayList<String> pbArguments = PM.getPBArguments(sense + "@" + argument);
809                                 if (!pbArguments.isEmpty()) {
810                                     for (String pbArgument1 : pbArguments) {
811                                         ExternalRef pbArgument = NAFdocument.newExternalRef("PropBank", pbArgument1);
812                                         pbArgument.setSource("mate+pm");
813                                         newRole.addExternalRef(pbArgument);
814                                     }
815                                 }
816                             }
817                             // ESO
818                             ArrayList<String> esoRoles = PM.getESORoles(sense + "@" + argument);
819                             if (!esoRoles.isEmpty()) {
820                                 for (String esoRole1 : esoRoles) {
821                                     ExternalRef esoRole = NAFdocument.newExternalRef("ESO", esoRole1);
822                                     esoRole.setSource("mate+pm");
823                                     newRole.addExternalRef(esoRole);
824                                 }
825                             }
826                         }
827 
828                         newPred.addRole(newRole);
829                     }
830 
831                 }
832             }
833 
834             if (stanfordSentence.containsKey(SemaforAnnotations.SemaforAnnotation.class)) {
835                 SemaforParseResult semaforParseResult = stanfordSentence.get(SemaforAnnotations.SemaforAnnotation.class);
836                 ObjectMapper mapper = new ObjectMapper();
837 
838                 mapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES);
839                 Semafor.SemaforResponse semaforResponse = mapper
840                         .readValue(semaforParseResult.toJson(), Semafor.SemaforResponse.class);
841                 for (Semafor.SemaforFrame semaforFrame : semaforResponse.getFrames()) {
842                     Semafor.SemaforAnnotation semaforTarget = semaforFrame.getTarget();
843                     if (semaforTarget == null) {
844                         continue;
845                     }
846                     String frameName = semaforTarget.getName();
847 
848                     if (semaforTarget.getSpans().size() == 0) {
849                         continue;
850                     }
851                     if (semaforFrame.getAnnotationSets().size() == 0) {
852                         continue;
853                     }
854 
855                     Semafor.SemaforSpan semaforSpan = semaforTarget.getSpans().get(0);
856                     Semafor.SemaforSet semaforAnnotation = semaforFrame.getAnnotationSets().get(0);
857 
858                     Span<Term> termSpan = KAFDocument.newTermSpan();
859                     for (int i = semaforSpan.getStart(); i < semaforSpan.getEnd(); i++) {
860                         termSpan.addTarget(terms.get(i));
861                     }
862 
863                     if (termSpan.size() == 0) {
864                         continue;
865                     }
866 
867                     Predicate predicate = NAFdocument.newPredicate(termSpan);
868                     predicate.setSource("semafor");
869                     predicate.setConfidence(semaforAnnotation.getScore());
870                     ExternalRef frameNameExt = NAFdocument.createExternalRef("FrameNet", frameName);
871                     frameNameExt.setSource("semafor");
872                     predicate.addExternalRef(frameNameExt);
873 
874                     predicate.setId("f_" + predicate.getId());
875 
876                     for (Semafor.SemaforAnnotation frameAnnotation : semaforAnnotation.getFrameElements()) {
877                         Semafor.SemaforSpan roleSpan = frameAnnotation.getSpans().get(0);
878                         String roleName = frameAnnotation.getName();
879 
880                         Span<Term> roleTermSpan = KAFDocument.newTermSpan();
881                         for (int i = roleSpan.getStart(); i < roleSpan.getEnd(); i++) {
882                             roleTermSpan.addTarget(terms.get(i));
883                         }
884 
885                         if (roleTermSpan.size() == 0) {
886                             continue;
887                         }
888 
889                         Predicate.Role role = NAFdocument.newRole(predicate, "", roleTermSpan);
890 
891 //                        @todo change next to UD
892                         final Term head = NAFUtils.extractHead(NAFdocument, role.getSpan());
893                         if (head != null) {
894                             final Span<Term> newSpan = KAFDocument
895                                     .newTermSpan(Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(
896                                             NAFdocument.getTermsByDepAncestors(ImmutableList.of(head))));
897                             role.setSpan(newSpan);
898                         }
899                         ExternalRef roleNameExt = NAFdocument.createExternalRef("FrameNet", frameName + "@" + roleName);
900                         roleNameExt.setSource("semafor");
901                         role.addExternalRef(roleNameExt);
902 //                        predicate.setSource("semafor");
903                         predicate.addRole(role);
904                     }
905 
906                 }
907             }
908 
909             // Constituency: do we need it?
910             Tree tree = stanfordSentence.get(TreeCoreAnnotations.TreeAnnotation.class);
911             if (tree != null) {
912                 NAFdocument.addConstituencyString(tree.toString(), sentIndex + 1);
913                 try {
914                     logger.debug("Tree: " + tree.toString());
915 //                    @todo change next to UD
916                     addHeads(tree);
917                     NAFdocument.addConstituencyFromParentheses(tree.toString(), sentIndex + 1);
918                 } catch (Exception e) {
919                     logger.info("Tree: " + tree.toString());
920                     logger.warn(e.getMessage());
921                     e.printStackTrace();
922                 }
923             }
924 
925         } // end sentences loop
926 
927         // Entities
928         for (Integer startIndex : keywords.keySet()) {
929             for (LinkingTag e : keywords.get(startIndex)) {
930                 //int end = e.getOffset() + e.getLength();
931                 int end = e.getOffset() + e.getLength()-1;
932                 //Integer startToken = tokenFromStart.get(e.getOffset());
933                 Integer startToken = offsetToken.get(e.getOffset());
934                 //Integer endToken = tokenFromEnd.get(end);
935                 Integer endToken = offsetToken.get(end);
936                 Span<WF> span = KAFDocument.newWFSpan();
937                 if (startToken != null && endToken != null) {
938                     for (int j = startToken; j <= endToken; j++) {
939                         span.addTarget(allTokens.get(j));
940                     }
941 
942                     try {
943                         LinkedEntity linkedEntity = NAFdocument.newLinkedEntity(span);
944                         linkedEntity.setConfidence(e.getScore());
945                         linkedEntity.setReference(e.getPage());
946                         linkedEntity.setResource(e.getSource());
947                         linkedEntity.setTypes(e.getStringTypes());
948                         linkedEntity.setSpotted(e.isSpotted());
949                     } catch (Exception err) {
950                         logger.error("Error on adding linkedEntity: " + err.getMessage());
951                     }
952                 }
953             }
954         }
955 
956         // Coref
957 
958         // Simple coref
959         HashMultimap<Integer, Integer> simpleCoref = document.get(CustomAnnotations.SimpleCorefAnnotation.class);
960         if (simpleCoref != null) {
961             List<Span<Term>> mentions = new ArrayList<>();
962 
963             for (Integer sentenceID : simpleCoref.keySet()) {
964                 TreeSet<Integer> sortedSet = new TreeSet<>();
965                 sortedSet.addAll(simpleCoref.get(sentenceID));
966 
967                 Span<Term> thisTermSpan = KAFDocument.newTermSpan();
968                 int lastTokenID = -1;
969 
970                 for (Integer tokenID : sortedSet) {
971                     tokenID = tokenID - 1;
972                     int sentenceStartTokenIndex = sentIndexes.get(sentenceID);
973                     int id = sentenceStartTokenIndex + tokenID;
974                     if (tokenID - lastTokenID > 1) {
975                         if (thisTermSpan.size() > 0) {
976                             mentions.add(thisTermSpan);
977                         }
978                         thisTermSpan = KAFDocument.newTermSpan();
979                     }
980                     thisTermSpan.addTarget(allTerms.get(id));
981                     lastTokenID = tokenID;
982                 }
983                 if (thisTermSpan.size() > 0) {
984                     mentions.add(thisTermSpan);
985                 }
986             }
987 
988             if (mentions.size() > 0) {
989                 NAFdocument.newCoref(mentions);
990             }
991         }
992 
993         // Loop through clusters
994         if (coreferenceGraph != null) {
995             for (Object c : coreferenceGraph.keySet()) {
996 
997                 CorefChain chain = coreferenceGraph.get(c);
998                 Map<IntPair, Set<CorefChain.CorefMention>> mentionMap = chain.getMentionMap();
999 
1000                 List<Span<Term>> mentions = new ArrayList<>();
1001 
1002                 // Loop through sentences
1003                 for (IntPair p : mentionMap.keySet()) {
1004 
1005                     Set<CorefChain.CorefMention> corefMentions = mentionMap.get(p);
1006                     if (corefMentions.size() < 2) {
1007                         continue;
1008                     }
1009 
1010                     // Loop through mentions
1011                     for (CorefChain.CorefMention m : corefMentions) {
1012 
1013                         int sentenceStartTokenIndex = sentIndexes.get(m.sentNum - 1);
1014                         int start = sentenceStartTokenIndex + m.startIndex - 1;
1015 
1016                         Span<Term> thisTermSpan = KAFDocument.newTermSpan();
1017                         for (int i = start; i < start + m.endIndex - m.startIndex; i++) {
1018                             thisTermSpan.addTarget(allTerms.get(i));
1019                         }
1020 
1021                         if (thisTermSpan.size() > 0) {
1022                             mentions.add(thisTermSpan);
1023                         }
1024                     }
1025                 }
1026 
1027                 if (mentions.size() > 0) {
1028                     NAFdocument.newCoref(mentions);
1029                 }
1030             }
1031         }
1032 
1033         // NAF filter
1034         if (enableNafFilter) {
1035             logger.info("Applying NAF filter");
1036             Properties nafFilterConfig = PropertiesUtils.dotConvertedProperties(properties, "filter");
1037 
1038             LinguisticProcessor linguisticProcessor = new LinguisticProcessor("naf-filter", "NAF filter");
1039             linguisticProcessor.setBeginTimestamp();
1040             try {
1041                 NAFFilter filter = NAFFilter.builder().withProperties(properties, "filter").build();
1042                 filter.filter(NAFdocument);
1043 
1044                 //NAFFilter.builder().withProperties(properties,"filter").build().filter(NAFdocument);
1045 //                NAFFilter.builder().build().filter(NAFdocument);
1046 //                NAFFilter.builder(false)
1047 //                        .withTermSenseCompletion(true).withSRLRoleLinking(false, false)
1048 //                        .withOpinionLinking(false, false).build()
1049 //                        .filter(NAFdocument);
1050             } catch (Exception e) {
1051                 logger.error("Error applying NAF filter");
1052             }
1053             linguisticProcessor.setEndTimestamp();
1054             NAFdocument.addLinguisticProcessor(linguisticProcessor.getLayer(), linguisticProcessor);
1055         }
1056     }
1057 
1058     private KAFDocument parseAll(KAFDocument NAFdocument) throws Exception {
1059         return parseAll(NAFdocument, new Properties());
1060     }
1061 
1062     private KAFDocument parseAll(KAFDocument NAFdocument, Properties merge) throws Exception {
1063 
1064         String text = NAFdocument.getRawText();
1065         text = StringEscapeUtils.unescapeHtml(text);
1066 
1067         Properties properties = getDefaultConfig();
1068         properties.putAll(merge);
1069 
1070         String maxTextLen = properties.getProperty("max_text_len");
1071         int limit = Integer.parseInt(maxTextLen);
1072         if (text.length() > limit) {
1073             throw new Exception(String.format("Input too long (%d chars, limit is %d)", text.length(), limit));
1074         }
1075 
1076         loadModels(properties);
1077         Properties stanfordConfig = PropertiesUtils.dotConvertedProperties(properties, "stanford");
1078 
1079         // Load pipeline
1080         //Properties thisSessionProps = new Properties(stanfordConfig); NON LO CAPISCO e NON FUNZIONA!!!!
1081         Properties thisSessionProps = stanfordConfig;
1082         StanfordCoreNLP thisPipeline = new StanfordCoreNLP(thisSessionProps);
1083 
1084         // Stanford
1085         logger.info("Annotating with Stanford CoreNLP");
1086         LinguisticProcessor linguisticProcessor = new LinguisticProcessor("text", "Stanford CoreNLP");
1087         linguisticProcessor.setBeginTimestamp();
1088         Annotation document = new Annotation(text);
1089         document.set(CoreAnnotations.DocDateAnnotation.class, NAFdocument.getFileDesc().creationtime);
1090         if (NAFdocument.getFileDesc().title != null) {
1091             document.set(CoreAnnotations.DocTitleAnnotation.class, NAFdocument.getFileDesc().title);
1092         }
1093         thisPipeline.annotate(document);
1094         logger.info(thisPipeline.timingInformation());
1095         linguisticProcessor.setEndTimestamp();
1096         NAFdocument.addLinguisticProcessor(linguisticProcessor.getLayer(), linguisticProcessor);
1097 
1098         annotateStanford(properties, document, NAFdocument);
1099 
1100         logger.info("Parsing finished");
1101         return NAFdocument;
1102     }
1103 
1104     public KAFDocument parseFromNAF(KAFDocument NAFdocument) throws Exception {
1105 
1106         NAFdocument = parseAll(NAFdocument);
1107 
1108         return NAFdocument;
1109     }
1110 
1111     public KAFDocument parseFromString(String textInNafFormat) throws Exception {
1112         logger.debug("Parsing of NAF");
1113 
1114         InputStream is = new ByteArrayInputStream(textInNafFormat.getBytes());
1115         BufferedReader br = new BufferedReader(new InputStreamReader(is));
1116         KAFDocument NAFdocument = KAFDocument.createFromStream(br);
1117 
1118         try {
1119             logger.info("Document: " + NAFdocument.getFileDesc().filename);
1120             logger.info("Title: " + NAFdocument.getFileDesc().title);
1121 //            logger.debug("Text: " + NAFdocument.getRawText());
1122         } catch (Exception e) {
1123             logger.error(e.getMessage());
1124         }
1125 
1126         NAFdocument = parseAll(NAFdocument);
1127 
1128         return NAFdocument;
1129     }
1130 
1131 }