1   package eu.fbk.dkm.pikes.resources;
2   
3   import java.io.IOException;
4   import java.util.Arrays;
5   import java.util.Collection;
6   import java.util.Collections;
7   import java.util.List;
8   import java.util.Map;
9   import java.util.Set;
10  import java.util.function.Consumer;
11  import java.util.regex.Matcher;
12  import java.util.regex.Pattern;
13  
14  import javax.annotation.Nullable;
15  
16  import com.google.common.base.Charsets;
17  import com.google.common.base.Joiner;
18  import com.google.common.base.MoreObjects;
19  import com.google.common.base.Objects;
20  import com.google.common.base.Preconditions;
21  import com.google.common.base.Splitter;
22  import com.google.common.base.Strings;
23  import com.google.common.collect.BiMap;
24  import com.google.common.collect.HashMultimap;
25  import com.google.common.collect.ImmutableBiMap;
26  import com.google.common.collect.ImmutableList;
27  import com.google.common.collect.ImmutableMap;
28  import com.google.common.collect.ImmutableSet;
29  import com.google.common.collect.Lists;
30  import com.google.common.collect.Maps;
31  import com.google.common.collect.Multimap;
32  import com.google.common.collect.Ordering;
33  import com.google.common.collect.Sets;
34  import com.google.common.io.Resources;
35  
36  import eu.fbk.rdfpro.util.Statements;
37  import eu.fbk.utils.svm.Util;
38  import org.eclipse.rdf4j.model.IRI;
39  import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
40  import org.slf4j.Logger;
41  import org.slf4j.LoggerFactory;
42  
43  import ixa.kaflib.Coref;
44  import ixa.kaflib.Dep;
45  import ixa.kaflib.Entity;
46  import ixa.kaflib.ExternalRef;
47  import ixa.kaflib.KAFDocument;
48  import ixa.kaflib.LinkedEntity;
49  import ixa.kaflib.Opinion;
50  import ixa.kaflib.Opinion.OpinionExpression;
51  import ixa.kaflib.Opinion.OpinionHolder;
52  import ixa.kaflib.Opinion.OpinionTarget;
53  import ixa.kaflib.Predicate;
54  import ixa.kaflib.Predicate.Role;
55  import ixa.kaflib.Span;
56  import ixa.kaflib.Term;
57  import ixa.kaflib.Timex3;
58  import ixa.kaflib.WF;
59  
60  
61  /**
62   * A filter for the post-processing of a NAF document.
63   * <p>
64   * The filter, configured and created using the builder pattern (see {@link #builder()}), performs
65   * several optional and configurable operations on a {@code NAFDocumant} that is modified in
66   * place. For the operations supported please refer to the javadoc of {@code Builder}.
67   * <p>
68   * This class is thread-safe.
69   * </p>
70   */
71  public final class NAFFilter implements Consumer<KAFDocument> {
72  
73      public static final String SUMO_NAMESPACE = "http://www.ontologyportal.org/SUMO.owl#";
74  
75      public static final IRI SUMO_PROCESS = SimpleValueFactory.getInstance().createIRI(SUMO_NAMESPACE, "Process");
76  //    public static final IRI SUMO_PROCESS = SimpleValueFactory.getInstance().createIRI(SUMO_NAMESPACE, "Process");
77  
78      private static final Logger LOGGER = LoggerFactory.getLogger(NAFFilter.class);
79  
80      private static final Map<String, String> ENTITY_SST_TO_TYPES = ImmutableMap
81              .<String, String>builder().put("person", "PER").put("group", "ORG")
82              .put("location", "LOC").put("quantity", "QUANTITY").put("artifact", "PRODUCT")
83              .put("act", "EVENT").put("event", "EVENT").put("phenomenon", "EVENT")
84              .put("process", "EVENT").put("state", "EVENT").put("animal", "MISC")
85              .put("plant", "MISC").put("body", "MISC").put("shape", "MISC").put("motive", "MISC")
86              .put("object", "MISC").put("substance", "MISC").build();
87      // .put("cognition", "EVENT")
88  
89      private static final Pattern SRL_ROLE_PATTERN = Pattern.compile("A(\\d).*");
90  
91      private static final String PARTICIPATION_REGEX = ""
92              + "SUB? (COORD CONJ?)* (PMOD (COORD CONJ?)*)? ((VC OPRD?)|(IM OPRD?))*";
93  
94      private static final String[] LINKING_STOP_WORDS;
95  
96      private static final BiMap<String, String> MAPPING_PREFIXES = ImmutableBiMap.of("propbank",
97              "pb", "nombank", "nb", "verbnet", "vn", "framenet", "fn");
98  
99      private static final Multimap<String, String> MAPPING_PREDICATES;
100 
101     private static final Multimap<String, String> MAPPING_ARGUMENTS;
102 
103     public static final NAFFilter DEFAULT = NAFFilter.builder().build();
104 
105     static {
106         List<String> stopwords = Collections.emptyList();
107         try {
108             stopwords = Resources.readLines(NAFFilter.class.getResource("linking_stopwords"),
109                     Charsets.UTF_8);
110             LOGGER.info("Loaded {} linking stopwords", stopwords.size());
111         } catch (final IOException ex) {
112             LOGGER.error("Could not load linking stopwords", ex);
113         }
114         LINKING_STOP_WORDS = stopwords.toArray(new String[stopwords.size()]);
115         for (int i = 0; i < LINKING_STOP_WORDS.length; ++i) {
116             LINKING_STOP_WORDS[i] = LINKING_STOP_WORDS[i].toLowerCase();
117         }
118         Arrays.sort(LINKING_STOP_WORDS);
119 
120         MAPPING_PREDICATES = HashMultimap.create();
121         MAPPING_ARGUMENTS = HashMultimap.create();
122         try {
123             for (final String line : Resources.readLines(
124                     NAFFilter.class.getResource("mappings-frames.tsv"), Charsets.UTF_8)) {
125                 final List<String> tokens = Splitter.on("\t").trimResults().splitToList(line);
126                 final String prefix = tokens.get(0).substring(0, 2).toLowerCase();
127                 final String fromKey = prefix + ":" + tokens.get(1);
128                 final String toKey = "fn:" + Character.toUpperCase(tokens.get(2).charAt(0))
129                         + tokens.get(2).substring(1);
130                 MAPPING_PREDICATES.put(fromKey, toKey);
131             }
132             for (final String line : Resources.readLines(
133                     NAFFilter.class.getResource("mappings-roles.tsv"), Charsets.UTF_8)) {
134                 final List<String> tokens = Splitter.on("\t").trimResults().splitToList(line);
135                 final String prefix = tokens.get(0).substring(0, 2).toLowerCase();
136                 final String fromKey = prefix + ":" + tokens.get(1);
137                 final String fnRole = tokens.get(2);
138                 final int index = fnRole.indexOf('@');
139                 final String toKey = "fn:" + Character.toUpperCase(fnRole.charAt(0))
140                         + fnRole.substring(1, index + 1)
141                         + Character.toUpperCase(fnRole.charAt(index + 1))
142                         + fnRole.substring(index + 2);
143                 MAPPING_ARGUMENTS.put(fromKey, toKey);
144             }
145 
146         } catch (final Throwable ex) {
147             LOGGER.error("Could not load mappings", ex);
148         }
149     }
150 
151     private final boolean termSenseFiltering;
152 
153     private final boolean termSenseCompletion;
154 
155     private final boolean entityRemoveOverlaps;
156 
157     private final boolean entitySpanFixing;
158 
159     private final boolean entityAddition;
160 
161     private final boolean entityValueNormalization;
162 
163     private final boolean linkingCompletion;
164 
165     private final boolean linkingFixing;
166 
167     private final boolean corefForRoleDependencies;
168 
169     private final boolean corefSpanFixing;
170 
171     private final boolean srlPreprocess;
172 
173     private final boolean srlEnableMate;
174 
175     private final boolean srlEnableSemafor;
176 
177     private final boolean srlRemoveWrongRefs;
178 
179     private final boolean srlRemoveUnknownPredicates;
180 
181     private final boolean srlPredicateAddition;
182 
183     private final boolean srlSelfArgFixing;
184 
185     private final boolean srlSenseMapping;
186 
187     private final boolean srlSenseMappingPM;
188 
189     private final boolean srlFrameBaseMapping;
190 
191     private final boolean srlRoleLinking;
192 
193     private final boolean srlRoleLinkingUsingCoref;
194 
195     private final boolean srlPreMOnIRIs;
196 
197     private final boolean opinionLinking;
198 
199     private final boolean opinionLinkingUsingCoref;
200 
201     private NAFFilter(final Builder builder) {
202         this.termSenseFiltering = MoreObjects.firstNonNull(builder.termSenseFiltering, true);
203         this.termSenseCompletion = MoreObjects.firstNonNull(builder.termSenseCompletion, true);
204         this.entityRemoveOverlaps = MoreObjects.firstNonNull(builder.entityRemoveOverlaps, true);
205         this.entitySpanFixing = MoreObjects.firstNonNull(builder.entitySpanFixing, true);
206         this.entityAddition = MoreObjects.firstNonNull(builder.entityAddition, true);
207         this.entityValueNormalization = MoreObjects.firstNonNull(builder.entityValueNormalization,
208                 true);
209         this.linkingCompletion = MoreObjects.firstNonNull(builder.linkingCompletion, true);
210         this.linkingFixing = MoreObjects.firstNonNull(builder.linkingFixing, false);
211         this.corefForRoleDependencies = MoreObjects.firstNonNull(builder.corefForRoleDependencies,
212                 false);
213         this.corefSpanFixing = MoreObjects.firstNonNull(builder.corefSpanFixing, false);
214         this.srlPreprocess = MoreObjects.firstNonNull(builder.srlPreprocess, true);
215         this.srlEnableMate = MoreObjects.firstNonNull(builder.srlEnableMate, true);
216         this.srlEnableSemafor = MoreObjects.firstNonNull(builder.srlEnableSemafor, true);
217         this.srlRemoveWrongRefs = MoreObjects.firstNonNull(builder.srlRemoveWrongRefs, true);
218         this.srlRemoveUnknownPredicates = MoreObjects.firstNonNull(
219                 builder.srlRemoveUnknownPredicates, false);
220         this.srlPredicateAddition = MoreObjects.firstNonNull(builder.srlPredicateAddition, true);
221         this.srlSelfArgFixing = MoreObjects.firstNonNull(builder.srlSelfArgFixing, true);
222         this.srlSenseMapping = MoreObjects.firstNonNull(builder.srlSenseMapping, true);
223         this.srlSenseMappingPM = false; // TODO disabled
224         this.srlFrameBaseMapping = MoreObjects.firstNonNull(builder.srlFrameBaseMapping, true);
225         this.srlRoleLinking = MoreObjects.firstNonNull(builder.srlRoleLinking, true);
226         this.srlRoleLinkingUsingCoref = MoreObjects.firstNonNull(builder.srlRoleLinkingUsingCoref,
227                 true);
228 
229         this.srlPreMOnIRIs = MoreObjects.firstNonNull(builder.srlPreMOnIRIs,
230                 true);
231         this.opinionLinking = MoreObjects.firstNonNull(builder.opinionLinking, true);
232         this.opinionLinkingUsingCoref = MoreObjects.firstNonNull(builder.opinionLinkingUsingCoref,
233                 true);
234     }
235 
236     @Override
237     public void accept(final KAFDocument document) {
238         filter(document);
239     }
240 
241     /**
242      * Filters the NAF document specified (the document is modified in-place). Filtering is
243      * controlled by the flags specified when creating the {@code NAFFilter} object.
244      *
245      * @param document
246      *            the document to filter
247      */
248     public void filter(final KAFDocument document) {
249 
250         // Check arguments
251         Preconditions.checkNotNull(document);
252 
253         // Log beginning of operation
254         final long ts = System.currentTimeMillis();
255         LOGGER.debug("== Filtering {} ==", document.getPublic().uri);
256 
257         // Normalize the document
258         NAFUtils.normalize(document);
259 
260         // Term-level filtering
261         if (this.termSenseFiltering) {
262             applyTermSenseFiltering(document);
263         }
264         if (this.termSenseCompletion) {
265             applyTermSenseCompletion(document);
266         }
267 
268         // Entity-level / Linking filtering
269         if (this.entityRemoveOverlaps) {
270             applyEntityRemoveOverlaps(document);
271         }
272         if (this.entitySpanFixing) {
273             applyEntitySpanFixing(document);
274         }
275         if (this.linkingCompletion) {
276             applyLinkingCompletion(document);
277         }
278         if (this.linkingFixing) {
279             applyLinkingFixing(document);
280         }
281         if (this.entityAddition) {
282             applyEntityAddition(document);
283         }
284         if (this.entityValueNormalization) {
285             applyEntityValueNormalization(document);
286         }
287 
288         // SRL-level filtering
289         if (this.srlPreprocess) {
290             applySRLPreprocess(document);
291         }
292         if (this.srlRemoveWrongRefs) {
293             applySRLRemoveWrongRefs(document);
294         }
295         if (this.srlRemoveUnknownPredicates) {
296             applySRLRemoveUnknownPredicates(document);
297         }
298         if (this.srlPredicateAddition) {
299             applySRLPredicateAddition(document);
300         }
301         if (this.srlSelfArgFixing) {
302             applySRLSelfArgFixing(document);
303         }
304         if (this.srlSenseMapping) {
305             applySRLSenseMapping(document);
306         }
307         if (this.srlFrameBaseMapping) {
308             applySRLFrameBaseMapping(document);
309         }
310         if (this.srlRoleLinking) {
311             applySRLRoleLinking(document);
312         }
313 
314         //added for replacing with premon IRIs
315         if (this.srlPreMOnIRIs) {
316             applySRLPreMOnIRIs(document);
317         }
318 
319 
320         // Coref-level filtering
321         if (this.corefForRoleDependencies) {
322             applyCorefForRoleDependencies(document);
323         }
324         if (this.corefSpanFixing) {
325             applyCorefSpanFixing(document);
326         }
327 
328         // Opinion-level filtering
329         if (this.opinionLinking) {
330             applyOpinionLinking(document);
331         }
332 
333 
334 
335 
336         LOGGER.debug("Done in {} ms", System.currentTimeMillis() - ts);
337     }
338 
339 
340 //    private void applyEntityTypeFixing(final KAFDocument document) {
341 //
342 //        for (final Entity entity : ImmutableList.copyOf(document.getEntities())) {
343 //
344 //
345 //
346 //
347 //            // Remove initial determiners and prepositions, plus all the terms not containing at
348 //            // least a letter or a digit. Move to next entity if no change was applied
349 //            final List<Term> filteredTerms = NAFUtils.filterTerms(entity.getTerms());
350 //            if (filteredTerms.size() == entity.getTerms().size()) {
351 //                continue;
352 //            }
353 //
354 //            // Remove the old entity
355 //            document.removeAnnotation(entity);
356 //
357 //            // If some term remained, add the filtered entity, reusing old type, named flag and
358 //            // external references
359 //            Entity newEntity = null;
360 //            if (!filteredTerms.isEmpty()) {
361 //                newEntity = document.newEntity(ImmutableList.of(KAFDocument
362 //                        .newTermSpan(filteredTerms)));
363 //                newEntity.setType(entity.getType());
364 //                newEntity.setNamed(entity.isNamed());
365 //                for (final ExternalRef ref : entity.getExternalRefs()) {
366 //                    newEntity.addExternalRef(ref);
367 //                }
368 //            }
369 //
370 //            // Log the change
371 //            if (LOGGER.isDebugEnabled()) {
372 //                LOGGER.debug((newEntity == null ? "Removed" : "Replaced") + " invalid " //
373 //                        + NAFUtils.toString(entity) + (newEntity == null ? "" : " with filtered " //
374 //                        + NAFUtils.toString(newEntity)));
375 //            }
376 //        }
377 //
378 //    }
379 
380 
381     private void applyTermSenseFiltering(final KAFDocument document) {
382 
383         for (final Term term : document.getTerms()) {
384             if (term.getMorphofeat() != null && term.getMorphofeat().startsWith("NNP")) {
385                 NAFUtils.removeRefs(term, NAFUtils.RESOURCE_WN_SYNSET, null);
386                 NAFUtils.removeRefs(term, NAFUtils.RESOURCE_WN_SST, null);
387                 NAFUtils.removeRefs(term, NAFUtils.RESOURCE_BBN, null);
388                 NAFUtils.removeRefs(term, NAFUtils.RESOURCE_SUMO, null);
389                 NAFUtils.removeRefs(term, NAFUtils.RESOURCE_YAGO, null);
390             }
391         }
392     }
393 
394     private void applyTermSenseCompletion(final KAFDocument document) {
395 
396         for (final Term term : document.getTerms()) {
397 
398             // Retrieve existing refs
399             ExternalRef bbnRef = NAFUtils.getRef(term, NAFUtils.RESOURCE_BBN, null);
400             ExternalRef synsetRef = NAFUtils.getRef(term, NAFUtils.RESOURCE_WN_SYNSET, null);
401             ExternalRef sstRef = NAFUtils.getRef(term, NAFUtils.RESOURCE_WN_SST, null);
402             final List<ExternalRef> sumoRefs = NAFUtils
403                     .getRefs(term, NAFUtils.RESOURCE_SUMO, null);
404             final List<ExternalRef> yagoRefs = NAFUtils
405                     .getRefs(term, NAFUtils.RESOURCE_YAGO, null);
406 
407             // Retrieve a missing SST from the WN Synset (works always)
408             if (sstRef == null && synsetRef != null) {
409                 final String sst = WordNet.mapSynsetToSST(synsetRef.getReference());
410                 if (sstRef == null || !Objects.equal(sstRef.getReference(), sst)) {
411                     LOGGER.debug((sstRef == null ? "Added" : "Overridden") + " SST '" + sst
412                             + "' of " + NAFUtils.toString(term) + " based on Synset '"
413                             + synsetRef.getReference() + "'");
414                     sstRef = document.newExternalRef(NAFUtils.RESOURCE_WN_SST, sst);
415                     NAFUtils.addRef(term, sstRef);
416                 }
417             }
418 
419             // Apply noun-based mapping.
420             final boolean isNoun = Character.toUpperCase(term.getPos().charAt(0)) == 'N';
421             if (isNoun) {
422 
423                 // Retrieve a missing BBN from the WN Synset
424                 if (bbnRef == null && synsetRef != null) {
425                     final String bbn = WordNet.mapSynsetToBBN(synsetRef.getReference());
426                     if (bbn != null) {
427                         bbnRef = document.newExternalRef(NAFUtils.RESOURCE_BBN, bbn);
428                         NAFUtils.addRef(term, bbnRef);
429                         LOGGER.debug("Added BBN '" + bbn + "' of " + NAFUtils.toString(term)
430                                 + " based on Synset '" + synsetRef.getReference() + "'");
431                     }
432 
433                 }
434 
435                 // Retrieve a missing WN Synset from the BBN
436                 if (synsetRef == null && bbnRef != null) {
437                     final String synsetID = WordNet.mapBBNToSynset(bbnRef.getReference());
438                     if (synsetID != null) {
439                         synsetRef = document.newExternalRef(NAFUtils.RESOURCE_WN_SYNSET, synsetID);
440                         NAFUtils.addRef(term, synsetRef);
441                         LOGGER.debug("Added Synset '" + synsetID + "' of "
442                                 + NAFUtils.toString(term) + " based on BBN '"
443                                 + bbnRef.getReference() + "'");
444                     }
445                 }
446 
447                 // Retrieve a missing SST from the BBN
448                 if (sstRef == null && bbnRef != null) {
449                     final String sst = WordNet.mapBBNToSST(bbnRef.getReference());
450                     if (sst != null) {
451                         sstRef = document.newExternalRef(NAFUtils.RESOURCE_WN_SST, sst);
452                         NAFUtils.addRef(term, sstRef);
453                         LOGGER.debug("Added SST '" + sst + "' of " + NAFUtils.toString(term)
454                                 + " based on BBN '" + bbnRef.getReference() + "'");
455                     }
456                 }
457             }
458 
459 //            // Apply mapping to SUMO if synset is available
460 //            final String lemma = term.getLemma().toLowerCase();
461 //            if (sumoRefs.isEmpty() && synsetRef != null && !lemma.equals("be")) {
462 //                Set<String> synsetIDs = Sets.newHashSet(synsetRef.getReference());
463 //                Set<IRI> conceptIRIs = Sumo.synsetsToConcepts(synsetIDs);
464 //                while (conceptIRIs.isEmpty() && !synsetIDs.isEmpty()) {
465 //                    final Set<String> oldSynsetIDs = synsetIDs;
466 //                    synsetIDs = Sets.newHashSet();
467 //                    for (final String oldSynsetID : oldSynsetIDs) {
468 //                        synsetIDs.addAll(WordNet.getHypernyms(oldSynsetID));
469 //                    }
470 //                    conceptIRIs = Sumo.synsetsToConcepts(synsetIDs);
471 //                }
472 //                if (conceptIRIs.isEmpty()) {
473 //                    synsetIDs = WordNet.getHyponyms(synsetRef.getReference());
474 //                    conceptIRIs = Sumo.synsetsToConcepts(synsetIDs);
475 //                }
476 //                if (!conceptIRIs.isEmpty()) {
477 //                    for (final IRI conceptIRI : conceptIRIs) {
478 //                        final String sumoID = conceptIRI.getLocalName();
479 //                        final ExternalRef sumoRef = document.newExternalRef(
480 //                                NAFUtils.RESOURCE_SUMO, sumoID);
481 //                        NAFUtils.setRef(term, sumoRef);
482 //                        LOGGER.debug("Added SUMO mapping: " + NAFUtils.toString(term)
483 //                                + " -> sumo:" + conceptIRI.getLocalName());
484 //                    }
485 //                }
486 //            }
487 //
488 //            // Apply mapping to Yago if synset is available
489 //            if (yagoRefs.isEmpty() && synsetRef != null) {
490 //                for (final IRI uri : YagoTaxonomy.getDBpediaYagoIRIs(ImmutableList.of(synsetRef
491 //                        .getReference()))) {
492 //                    final String yagoID = uri.stringValue().substring(
493 //                            YagoTaxonomy.NAMESPACE.length());
494 //                    final ExternalRef yagoRef = document.newExternalRef(NAFUtils.RESOURCE_YAGO,
495 //                            yagoID);
496 //                    NAFUtils.setRef(term, yagoRef);
497 //                    LOGGER.debug("Added Yago mapping: " + NAFUtils.toString(term) + " -> yago:"
498 //                            + yagoID);
499 //                }
500 //            }
501         }
502     }
503 
504     private void applyEntitySpanFixing(final KAFDocument document) {
505 
506         // Filter or remove entities consisting of invalid terms
507         for (final Entity entity : ImmutableList.copyOf(document.getEntities())) {
508 
509             // Remove initial determiners and prepositions, plus all the terms not containing at
510             // least a letter or a digit. Move to next entity if no change was applied
511             final List<Term> filteredTerms = NAFUtils.filterTerms(entity.getTerms());
512             if (filteredTerms.size() == entity.getTerms().size()) {
513                 continue;
514             }
515 
516             // Remove the old entity
517             document.removeAnnotation(entity);
518 
519             // If some term remained, add the filtered entity, reusing old type, named flag and
520             // external references
521             Entity newEntity = null;
522             if (!filteredTerms.isEmpty()) {
523                 newEntity = document.newEntity(ImmutableList.of(KAFDocument
524                         .newTermSpan(filteredTerms)));
525                 newEntity.setType(entity.getType());
526                 newEntity.setNamed(entity.isNamed());
527                 for (final ExternalRef ref : entity.getExternalRefs()) {
528                     newEntity.addExternalRef(ref);
529                 }
530             }
531 
532             // Log the change
533             if (LOGGER.isDebugEnabled()) {
534                 LOGGER.debug((newEntity == null ? "Removed" : "Replaced") + " invalid " //
535                         + NAFUtils.toString(entity) + (newEntity == null ? "" : " with filtered " //
536                                 + NAFUtils.toString(newEntity)));
537             }
538         }
539     }
540 
541     private void applyEntityRemoveOverlaps(final KAFDocument document) {
542 
543         // Consider all the entities in the document
544         outer: for (final Entity entity : ImmutableList.copyOf(document.getEntities())) {
545             for (final Term term : entity.getTerms()) {
546 
547                 // Remove entities whose span is contained in the span of another entity
548                 for (final Entity entity2 : document.getEntitiesByTerm(term)) {
549                     if (entity2 != entity && entity2.getTerms().containsAll(entity.getTerms())) {
550                         document.removeAnnotation(entity);
551                         if (LOGGER.isDebugEnabled()) {
552                             LOGGER.debug("Removed " + NAFUtils.toString(entity)
553                                     + " overlapping with " + NAFUtils.toString(entity2));
554                         }
555                         continue outer;
556                     }
557                 }
558 
559                 // Remove entities whose span overlaps with the span of some timex
560                 for (final WF wf : term.getWFs()) {
561                     final List<Timex3> timex = document.getTimeExsByWF(wf);
562                     if (!timex.isEmpty()) {
563                         document.removeAnnotation(entity);
564                         if (LOGGER.isDebugEnabled()) {
565                             LOGGER.debug("Removed " + NAFUtils.toString(entity)
566                                     + " overlapping with TIMEX3 '" + NAFUtils.toString(timex));
567                         }
568                         continue outer;
569                     }
570                 }
571             }
572         }
573     }
574 
575     private void applyEntityAddition(final KAFDocument document) {
576 
577         for (final Term term : document.getTerms()) {
578 
579             // Select names, nouns and pronouns that are not part of NE or Timex
580             final char pos = Character.toUpperCase(term.getPos().charAt(0));
581             final Dep dep = document.getDepToTerm(term);
582             final boolean namePart = pos == 'R' && dep != null
583                     && dep.getRfunc().toLowerCase().contains("name")
584                     && Character.toUpperCase(dep.getFrom().getPos().charAt(0)) == 'R'
585                     && document.getEntitiesByTerm(dep.getFrom()).isEmpty();
586             if (pos != 'R' && pos != 'N' && pos != 'Q' || namePart
587                     || !document.getTimeExsByWF(term.getWFs().get(0)).isEmpty() //
588                     || !document.getEntitiesByTerm(term).isEmpty()) {
589                 continue;
590             }
591 
592             // Determine the entity type based on NER tag first, WN synset then and SST last
593             String type = null;
594             final ExternalRef bbnRef = NAFUtils.getRef(term, NAFUtils.RESOURCE_BBN, null);
595             if (bbnRef != null) {
596                 type = bbnRef.getReference();
597             } else {
598                 final ExternalRef synsetRef = NAFUtils.getRef(term, NAFUtils.RESOURCE_WN_SYNSET,
599                         null);
600                 if (synsetRef != null) {
601                     type = WordNet.mapSynsetToBBN(synsetRef.getReference());
602                 } else {
603                     final ExternalRef sstRef = NAFUtils.getRef(term, NAFUtils.RESOURCE_WN_SST,
604                             null);
605                     if (sstRef != null) {
606                         String sst = sstRef.getReference();
607                         sst = sst.substring(sst.lastIndexOf('.') + 1);
608                         type = ENTITY_SST_TO_TYPES.get(sst);
609                     }
610                 }
611             }
612 
613             // Determine the terms for the nominal node.
614             // TODO: consider multiwords
615             final Span<Term> span = NAFUtils.getNominalSpan(document, term, false, false);
616 
617             // Add the entity, setting its type and 'named' flag
618             final Entity entity = document.newEntity(ImmutableList.of(span));
619             if (type!= null) entity.setType(type.toUpperCase().replace("PERSON","PER").replace("ORGANIZATION","ORG").replace("LOCATION","LOC"));
620             entity.setNamed(pos == 'R');
621             if (LOGGER.isDebugEnabled()) {
622                 LOGGER.debug("Added " + (entity.isNamed() ? "named " : "")
623                         + NAFUtils.toString(entity) + " with type '" + type + "'");
624             }
625         }
626     }
627 
628     private void applyEntityValueNormalization(final KAFDocument document) {
629 
630         for (final Entity entity : document.getEntities()) {
631             String type = entity.getType();
632             type = type == null ? null : type.toLowerCase();
633             if ("cardinal".equals(type) || "ordinal".equals(type) || "percent".equals(type)
634                     || "money".equals(type)) {
635 
636                 ExternalRef ref = null;
637                 final String str = entity.getSpans().get(0).getStr().toLowerCase();
638                 Double value = null;
639                 try {
640                     value = NumberSpeller.parse(str);
641                 } catch (Throwable ex) {
642                     LOGGER.debug("Could not parse number '" + str + "'", ex);
643                 }
644                 if (value != null) {
645                     String prefix = "";
646                     if ("percent".equals(type)) {
647                         prefix = "%";
648                     } else if ("money".equals(type)) {
649                         prefix = "¤";
650                         if (str.contains("euro")) {
651                             prefix = "€";
652                         } else if (str.contains("dollar")) {
653                             prefix = "$";
654                         } else if (str.contains("yen")) {
655                             prefix = "¥";
656                         }
657                     }
658                     ref = document.newExternalRef(NAFUtils.RESOURCE_VALUE,
659                             prefix + Double.toString(value.doubleValue()));
660                 }
661 
662                 if (ref != null && NAFUtils.getRef(entity, ref.getResource(), null) == null) {
663                     NAFUtils.addRef(entity, ref);
664                     LOGGER.debug("Added ref '" + ref + "' to " + NAFUtils.toString(entity));
665                 }
666             }
667         }
668     }
669 
670     private void applyLinkingCompletion(final KAFDocument document) {
671 
672         for (final LinkedEntity le : document.getLinkedEntities()) {
673 
674             // Determine head for current linked entity
675             final List<Term> terms = document.getTermsByWFs(le.getWFs().getTargets());
676             final Term head = document.getTermsHead(terms);
677             if (head == null) {
678                 continue;
679             }
680 
681             // Apply the sense to entities with same head where it is missing
682             Entity entityToModify = null;
683             for (final Entity entity : document.getEntitiesByTerm(head)) {
684                 if (head.equals(document.getTermsHead(entity.getTerms()))) {
685                     entityToModify = entity;
686                 }
687             }
688             if (entityToModify == null) {
689                 final Span<Term> span = KAFDocument.newTermSpan(document.getTermsByWFs(le.getWFs()
690                         .getTargets()));
691                 boolean overlap = false;
692                 for (final Term term : span.getTargets()) {
693                     final List<Entity> overlappingEntities = document.getEntitiesByTerm(term);
694                     if (overlappingEntities != null && !overlappingEntities.isEmpty()) {
695                         overlap = true;
696                         break;
697                     }
698                 }
699                 if (!overlap) {
700                     final boolean named = head.getMorphofeat().startsWith("NNP");
701                     boolean accept = named;
702                     if (!accept) {
703                         final String textStr = span.getStr().toLowerCase().replaceAll("\\s+", "_");
704                         final String entityStr = Statements.VALUE_FACTORY.createIRI(le.getReference()).getLocalName()
705                                 .toLowerCase();
706                         accept = textStr.equals(entityStr);
707                     }
708                     if (accept) {
709                         entityToModify = document.newEntity(ImmutableList.of(span));
710                         entityToModify.setNamed(head.getMorphofeat().startsWith("NNP"));
711                         if (LOGGER.isDebugEnabled()) {
712                             LOGGER.debug("Added linked "
713                                     + (entityToModify.isNamed() ? "named " : "")
714                                     + NAFUtils.toString(entityToModify));
715                         }
716                     }
717                 }
718             }
719 
720             if (entityToModify != null){
721                 final ExternalRef existingRef = NAFUtils.getRef(entityToModify, le.getResource(), le.getReference());
722                 if (existingRef==null){
723                     final ExternalRef ref = document.newExternalRef(le.getResource(),
724                             le.getReference());
725                     ref.setConfidence((float) le.getConfidence());
726                     NAFUtils.addRef(entityToModify, ref);
727                     LOGGER.debug("Added ref '" + ref + "' to " + NAFUtils.toString(entityToModify));
728                 } else {
729                     float existingRefConfidence = existingRef.getConfidence();
730                     if (existingRefConfidence<le.getConfidence()) {
731                         existingRef.setConfidence((float) le.getConfidence());
732                         LOGGER.debug("Modified confidence of '" + existingRef + "' to " + le.getConfidence());
733                     }
734                 }
735             }
736 
737 
738             // Apply the sense to predicates with same head where it is missing
739             for (final Predicate predicate : document.getPredicatesByTerm(head)) {
740                 if (head.equals(document.getTermsHead(predicate.getTerms()))) {
741                     if (NAFUtils.getRef(predicate, le.getResource(), le.getReference()) == null) {
742                         final ExternalRef ref = document.newExternalRef(le.getResource(),
743                                 le.getReference());
744                         ref.setConfidence((float) le.getConfidence());
745                         NAFUtils.addRef(predicate, ref);
746                         LOGGER.debug("Added ref '" + ref + "' to " + NAFUtils.toString(predicate));
747                     }
748                 }
749             }
750         }
751     }
752 
753     private void applyLinkingFixing(final KAFDocument document) {
754 
755         // Check each linked entity, dropping the links if the span is in the stop word list
756         final List<ExternalRef> refs = Lists.newArrayList();
757         for (final Entity entity : document.getEntities()) {
758 
759             // Extract all the <ExternalRef> elements with links for the current entity
760             refs.clear();
761             for (final ExternalRef ref : entity.getExternalRefs()) {
762                 if (!NAFUtils.RESOURCE_VALUE.equals(ref.getResource())) {
763                     refs.add(ref);
764                 }
765             }
766 
767             // If the entity is linked, check its span is not in the stop word list
768             if (!refs.isEmpty()) {
769                 final String[] tokens = Util.hardTokenize(entity.getStr());
770                 final String normalized = Joiner.on(' ').join(tokens).toLowerCase();
771                 if (Arrays.binarySearch(LINKING_STOP_WORDS, normalized) >= 0) {
772                     for (final ExternalRef ref : refs) {
773                         NAFUtils.removeRefs(entity, ref.getResource(), ref.getReference());
774                         if (LOGGER.isDebugEnabled()) {
775                             LOGGER.debug("Removed stop-word ref '{}' from {}", ref,
776                                     NAFUtils.toString(entity));
777                         }
778                     }
779                 }
780             }
781         }
782     }
783 
784     @SuppressWarnings("deprecation")
785     private void applyCorefSpanFixing(final KAFDocument document) {
786 
787         // Process each <coref> element in the NAF document
788         for (final Coref coref : ImmutableList.copyOf(document.getCorefs())) {
789 
790             // Remove spans without valid head
791             for (final Span<Term> span : ImmutableList.copyOf(coref.getSpans())) {
792                 final Term head = NAFUtils.extractHead(document, span);
793                 if (head == null) {
794                     coref.getSpans().remove(span);
795                     if (LOGGER.isDebugEnabled()) {
796                         LOGGER.debug("Removed span with invalid head '{}' from {}", span.getStr(),
797                                 NAFUtils.toString(coref));
798                     }
799                 } else {
800                     span.setHead(head);
801                 }
802             }
803 
804             // Remove spans containing smaller spans + determine if there is span with NNP head
805             boolean hasProperNounHead = false;
806             boolean isEvent = false;
807             final List<Span<Term>> spans = ImmutableList.copyOf(coref.getSpans());
808             outer: for (final Span<Term> span1 : spans) {
809                 for (final Span<Term> span2 : spans) {
810                     if (span1.size() > span2.size()
811                             && span1.getTargets().containsAll(span2.getTargets())) {
812                         coref.getSpans().remove(span1);
813                         if (LOGGER.isDebugEnabled()) {
814                             LOGGER.debug("Removed span '{}' including smaller span '{}' from {}",
815                                     span1.getStr(), span2.getStr(), NAFUtils.toString(coref));
816                         }
817                         continue outer;
818                     }
819                 }
820                 hasProperNounHead |= span1.getHead().getMorphofeat().startsWith("NNP");
821                 if (!isEvent) {
822                     for (final ExternalRef ref : NAFUtils.getRefs(span1.getHead(),
823                             NAFUtils.RESOURCE_SUMO, null)) {
824                         final IRI sumoID = Statements.VALUE_FACTORY.createIRI(SUMO_NAMESPACE + ref.getReference());
825                         if (Sumo.isSubClassOf(sumoID, SUMO_PROCESS)) {
826                             isEvent = true;
827                         }
828                     }
829                 }
830             }
831 
832             // Shrink spans containing a proper name, if head of another span is proper name
833             if (hasProperNounHead) {
834 
835                 // Drop spans not corresponding to non-role predicates
836                 for (final Span<Term> span : ImmutableList.copyOf(coref.getSpans())) {
837                     final Term head = span.getHead();
838                     if (!head.getMorphofeat().startsWith("NNP") && !isEvent) {
839                         if (head.getMorphofeat().startsWith("VB")) {
840                             coref.getSpans().remove(span);
841                             LOGGER.debug("Removed span with VB head '{}' from {}", span.getStr(),
842                                     NAFUtils.toString(coref));
843                         } else {
844                             outer: for (final Predicate predicate : document
845                                     .getPredicatesByTerm(head)) {
846                                 for (final ExternalRef ref : NAFUtils.getRefs(predicate,
847                                         NAFUtils.RESOURCE_NOMBANK, null)) {
848                                     final NomBank.Roleset roleset = NomBank.getRoleset(ref
849                                             .getReference());
850                                     if (roleset != null
851                                             && roleset.getPredMandatoryArgNums().isEmpty()
852                                             && roleset.getPredOptionalArgNums().isEmpty()) {
853                                         // Not a role
854                                         coref.getSpans().remove(span);
855                                         LOGGER.debug("Removed span with non-role predicate "
856                                                 + "head '{}' from {}", span.getStr(),
857                                                 NAFUtils.toString(coref));
858                                         break outer;
859                                     }
860                                 }
861                             }
862                         }
863                     }
864                 }
865 
866             } else {
867 
868                 // Split the coreference set into multiple sets, one for each sentence
869                 final Multimap<Integer, Span<Term>> spansBySentence = HashMultimap.create();
870                 for (final Span<Term> span : coref.getSpans()) {
871                     final int sentID = span.getTargets().get(0).getSent();
872                     spansBySentence.put(sentID, span);
873                 }
874                 if (spansBySentence.keySet().size() > 1) {
875                     coref.getSpans().clear();
876                     for (final Collection<Span<Term>> sentSpans : spansBySentence.asMap().values()) {
877                         if (sentSpans.size() > 1) {
878                             document.newCoref(Lists.newArrayList(sentSpans));
879                         }
880                     }
881                 }
882 
883             }
884 
885             // Drop coref in case no span remains.
886             if (coref.getSpans().isEmpty()) {
887                 document.removeAnnotation(coref);
888                 LOGGER.debug("Removed empty coref set {}", NAFUtils.toString(coref));
889             }
890         }
891     }
892 
893     private void applyCorefForRoleDependencies(final KAFDocument document) {
894 
895         outer: for (final Dep dep : document.getDeps()) {
896             final String label = dep.getRfunc();
897             if ("APPO".equals(label) || "TITLE".equals(label) || "NMOD".equals(label)) {
898 
899                 // Identify the proper name term and the role name term
900                 Term nameTerm;
901                 Term roleTerm;
902                 final String posFrom = dep.getFrom().getMorphofeat();
903                 final String posTo = dep.getTo().getMorphofeat();
904                 if (posFrom.startsWith("NNP") && posTo.startsWith("NN")
905                         && !posTo.startsWith("NNP")) {
906                     nameTerm = dep.getFrom();
907                     roleTerm = dep.getTo();
908                 } else if (posTo.startsWith("NNP") && posFrom.startsWith("NN")
909                         && !posFrom.startsWith("NNP") && label.equals("APPO")) {
910                     nameTerm = dep.getTo();
911                     roleTerm = dep.getFrom();
912                 } else {
913                     continue outer;
914                 }
915 
916                 // Abort if the two terms are already marked as coreferential
917                 for (final Coref coref : document.getCorefsByTerm(nameTerm)) {
918                     if (NAFUtils.hasHead(document, coref, nameTerm)
919                             && NAFUtils.hasHead(document, coref, roleTerm)) {
920                         continue outer;
921                     }
922                 }
923 
924                 // Verify the role term actually corresponds to a nombank role
925                 boolean isActualRole = false;
926                 predLoop: for (final Predicate predicate : document.getPredicatesByTerm(roleTerm)) {
927                     for (final ExternalRef ref : predicate.getExternalRefs()) {
928                         if (NAFUtils.RESOURCE_NOMBANK.equals(ref.getResource())) {
929                             final NomBank.Roleset rs = NomBank.getRoleset(ref.getReference());
930                             if (rs != null && (!rs.getPredMandatoryArgNums().isEmpty() //
931                                     || !rs.getPredOptionalArgNums().isEmpty())) {
932                                 isActualRole = true;
933                                 break predLoop;
934                             }
935                         }
936                     }
937                 }
938                 if (!isActualRole) {
939                     continue outer;
940                 }
941 
942                 // Expand coordination
943                 final Set<Term> roleHeads = document.getTermsByDepAncestors(
944                         ImmutableSet.of(roleTerm), "(COORD CONJ?)*");
945                 final Set<Term> nameHeads = document.getTermsByDepAncestors(
946                         ImmutableSet.of(nameTerm), "(COORD CONJ?)*");
947 
948                 // Check that all name heads are proper names
949                 for (final Term nameHead : nameHeads) {
950                     if (!nameHead.getMorphofeat().startsWith("NNP")) {
951                         continue outer;
952                     }
953                 }
954 
955                 // Check role plural/singular form
956                 for (final Term roleHead : roleHeads) {
957                     final boolean plural = roleHead.getMorphofeat().endsWith("S");
958                     if (nameHeads.size() == 1 && plural || nameHeads.size() > 1 && !plural) {
959                         continue outer;
960                     }
961                 }
962 
963                 // Add a new coreference cluster
964                 final List<Span<Term>> spans = Lists.newArrayList();
965                 spans.add(NAFUtils.getNominalSpan(document, nameTerm, true, false));
966                 for (final Term roleHead : roleHeads) {
967                     spans.add(NAFUtils.getNominalSpan(document, roleHead, false, false));
968                 }
969                 final Coref coref = document.newCoref(spans);
970                 if (LOGGER.isDebugEnabled()) {
971                     final StringBuilder builder = new StringBuilder("Added coref ");
972                     builder.append(coref.getId()).append(":");
973                     for (final Span<Term> span : coref.getSpans()) {
974                         builder.append(" '").append(span.getStr()).append('\'');
975                     }
976                     LOGGER.debug(builder.toString());
977                 }
978             }
979         }
980     }
981 
982     private void applySRLPreprocess(final KAFDocument document) {
983 
984         // Allocate two maps to store term -> predicate pairs
985         final Map<Term, Predicate> matePredicates = Maps.newHashMap();
986         final Map<Term, Predicate> semaforPredicates = Maps.newHashMap();
987 
988         // Remove predicates with invalid head
989         for (final Predicate predicate : ImmutableList.copyOf(document.getPredicates())) {
990             if (NAFUtils.extractHead(document, predicate.getSpan()) == null) {
991                 document.removeAnnotation(predicate);
992                 LOGGER.debug("Removed {} without valid head term", predicate);
993             }
994         }
995 
996         // TODO: remove once fixed - normalize Semafor roles
997         //        if (this.srlEnableSemafor) {
998         //            for (final Predicate predicate : document.getPredicates()) {
999         //                if (predicate.getId().startsWith("f_pr")
1000         //                        || "semafor".equalsIgnoreCase(predicate.getSource())) {
1001         //                    for (final Role role : predicate.getRoles()) {
1002         //                        role.setSemRole("");
1003         //                        final Term head = NAFUtils.extractHead(document, role.getSpan());
1004         //                        if (head != null) {
1005         //                            final Span<Term> newSpan = KAFDocument.newTermSpan(Ordering.from(
1006         //                                    Term.OFFSET_COMPARATOR).sortedCopy(
1007         //                                    document.getTermsByDepAncestors(ImmutableList.of(head))));
1008         //                            role.setSpan(newSpan);
1009         //                        }
1010         //                    }
1011         //                }
1012         //            }
1013         //        }
1014 
1015         // TODO: remove alignments from PM
1016         //        for (final Predicate predicate : document.getPredicates()) {
1017         //            if (!predicate.getId().startsWith("f_pr")
1018         //                    && !"semafor".equalsIgnoreCase(predicate.getSource())) {
1019         //                NAFUtils.removeRefs(predicate, "FrameNet", null);
1020         //                for (final Role role : predicate.getRoles()) {
1021         //                    NAFUtils.removeRefs(role, "FrameNet", null);
1022         //                }
1023         //            }
1024         //        }
1025 
1026         // Remove predicates from non-enabled tools (Mate, Semafor)
1027         for (final Predicate predicate : Lists.newArrayList(document.getPredicates())) {
1028             final boolean isSemafor = predicate.getId().startsWith("f_pr")
1029                     || "semafor".equalsIgnoreCase(predicate.getSource());
1030             if (isSemafor && !this.srlEnableSemafor || !isSemafor && !this.srlEnableMate) {
1031                 document.removeAnnotation(predicate);
1032                 if (LOGGER.isDebugEnabled()) {
1033                     LOGGER.debug("Removed " + NAFUtils.toString(predicate) + " (disabled)");
1034                 }
1035             } else {
1036                 final Term term = NAFUtils.extractHead(document, predicate.getSpan());
1037                 (isSemafor ? semaforPredicates : matePredicates).put(term, predicate);
1038             }
1039         }
1040 
1041         // For each Semafor predicate, merge a corresponding Mate predicate for the same term
1042         for (final Map.Entry<Term, Predicate> entry : semaforPredicates.entrySet()) {
1043             final Term term = entry.getKey();
1044             final Predicate semaforPredicate = entry.getValue();
1045             final Predicate matePredicate = matePredicates.get(term);
1046             if (matePredicate != null) {
1047 
1048                 // Determine whether FrameNet predicate corresponds (-> FN data can be merged)
1049                 final ExternalRef semaforRef = NAFUtils.getRef(semaforPredicate, "FrameNet", null);
1050                 final ExternalRef mateRef = NAFUtils.getRef(matePredicate, "FrameNet", null);
1051                 final boolean mergeFramenet = semaforRef != null && mateRef != null
1052                         && semaforRef.getReference().equalsIgnoreCase(mateRef.getReference());
1053 
1054                 // Merge predicate types
1055                 for (final ExternalRef ref : NAFUtils.getRefs(matePredicate, null, null)) {
1056                     if (!ref.getResource().equalsIgnoreCase("FrameNet")) {
1057                         NAFUtils.addRef(semaforPredicate, new ExternalRef(ref));
1058                     }
1059                 }
1060 
1061                 // Merge roles
1062                 for (final Role mateRole : matePredicate.getRoles()) {
1063                     boolean addRole = true;
1064                     final Set<Term> mateTerms = ImmutableSet.copyOf(mateRole.getSpan()
1065                             .getTargets());
1066                     for (final Role semaforRole : semaforPredicate.getRoles()) {
1067                         final Set<Term> semaforTerms = ImmutableSet.copyOf(semaforRole.getSpan()
1068                                 .getTargets());
1069                         if (mateTerms.equals(semaforTerms)) {
1070                             addRole = false;
1071                             semaforRole.setSemRole(mateRole.getSemRole());
1072                             final boolean addFramenetRef = mergeFramenet
1073                                     && NAFUtils.getRef(semaforRole, "FrameNet", null) != null;
1074                             for (final ExternalRef ref : mateRole.getExternalRefs()) {
1075                                 if (!ref.getResource().equalsIgnoreCase("FrameNet")
1076                                         || addFramenetRef) {
1077                                     semaforRole.addExternalRef(new ExternalRef(ref));
1078                                 }
1079                             }
1080                         }
1081                     }
1082                     if (addRole) {
1083                         final Role semaforRole = document.newRole(semaforPredicate,
1084                                 mateRole.getSemRole(), mateRole.getSpan());
1085                         semaforPredicate.addRole(semaforRole);
1086                         for (final ExternalRef ref : mateRole.getExternalRefs()) {
1087                             semaforRole.addExternalRef(new ExternalRef(ref));
1088                         }
1089                     }
1090                 }
1091 
1092                 // Delete original Mate predicate
1093                 document.removeAnnotation(matePredicate);
1094 
1095                 // Log operation
1096                 if (LOGGER.isDebugEnabled()) {
1097                     LOGGER.debug("Merged " + NAFUtils.toString(matePredicate) + " into "
1098                             + NAFUtils.toString(semaforPredicate)
1099                             + (mergeFramenet ? " (including FrameNet data)" : ""));
1100                 }
1101 
1102             }
1103         }
1104     }
1105 
1106     private void applySRLRemoveWrongRefs(final KAFDocument document) {
1107 
1108         // Scan all predicates in the SRL layer
1109         for (final Predicate predicate : Lists.newArrayList(document.getPredicates())) {
1110 
1111             // Extract correct lemma from predicate term
1112             final Term head = document.getTermsHead(predicate.getTerms());
1113             final String expectedLemma = head.getLemma();
1114 
1115             // Determine which resource to look for: PropBank vs NomBank
1116             final String resource = head.getPos().equalsIgnoreCase("V") ? "propbank" : "nombank";
1117 
1118             // Clean rolesets
1119             final List<ExternalRef> refs = NAFUtils.getRefs(predicate, resource, null);
1120             Integer expectedSense = null;
1121             for (final ExternalRef ref : refs) {
1122                 if (ref.getSource() != null) {
1123                     expectedSense = NAFUtils.extractSense(ref.getReference());
1124                     break;
1125                 }
1126             }
1127             for (final ExternalRef ref : refs) {
1128                 final String lemma = NAFUtils.extractLemma(ref.getReference());
1129                 final Integer sense = NAFUtils.extractSense(ref.getReference());
1130                 if (!expectedLemma.equalsIgnoreCase(lemma) || expectedSense != null
1131                         && !expectedSense.equals(sense)) {
1132                     NAFUtils.removeRefs(predicate, resource, ref.getReference());
1133                     if (LOGGER.isDebugEnabled()) {
1134                         LOGGER.debug("Removed wrong roleset '" + ref.getReference() + "' for "
1135                                 + NAFUtils.toString(predicate));
1136                     }
1137                 }
1138             }
1139 
1140             // Clean roles
1141             for (final Role role : predicate.getRoles()) {
1142                 final Integer expectedNum = NAFUtils.extractArgNum(role.getSemRole());
1143                 for (final ExternalRef ref : NAFUtils.getRefs(role, resource, null)) {
1144                     final String lemma = NAFUtils.extractLemma(ref.getReference());
1145                     final Integer sense = NAFUtils.extractSense(ref.getReference());
1146                     final Integer num = NAFUtils.extractArgNum(ref.getReference());
1147                     if (!Objects.equal(expectedNum, num) || !expectedLemma.equalsIgnoreCase(lemma)
1148                             || expectedSense != null && !expectedSense.equals(sense)) {
1149                         role.getExternalRefs().remove(ref);
1150                         if (LOGGER.isDebugEnabled()) {
1151                             LOGGER.debug("Removed wrong role '" + ref.getReference() + "' for "
1152                                     + NAFUtils.toString(predicate));
1153                         }
1154                     }
1155                 }
1156             }
1157         }
1158     }
1159 
1160     private void applySRLRemoveUnknownPredicates(final KAFDocument document) {
1161 
1162         // Scan all predicates in the SRL layer
1163         for (final Predicate predicate : Lists.newArrayList(document.getPredicates())) {
1164 
1165             // Determine whether the predicate is a verb and thus which resource to check for>
1166             final Term head = document.getTermsHead(predicate.getTerms());
1167             final boolean isVerb = head.getPos().equalsIgnoreCase("V");
1168             final String resource = isVerb ? "propbank" : "nombank";
1169 
1170             // Predicate is invalid if its roleset is unknown in NomBank / PropBank
1171             for (final ExternalRef ref : NAFUtils.getRefs(predicate, resource, null)) {
1172                 final String roleset = ref.getReference();
1173                 if (isVerb && PropBank.getRoleset(roleset) == null || !isVerb
1174                         && NomBank.getRoleset(roleset) == null) {
1175                     document.removeAnnotation(predicate);
1176                     if (LOGGER.isDebugEnabled()) {
1177                         LOGGER.debug("Removed " + NAFUtils.toString(predicate)
1178                                 + " with unknown sense '" + roleset + "' in resource " + resource);
1179                     }
1180                     break;
1181                 }
1182             }
1183         }
1184     }
1185 
1186     private void applySRLPredicateAddition(final KAFDocument document) {
1187 
1188         for (final Term term : document.getTerms()) {
1189 
1190             // Ignore terms already marked as predicates or timex or that are part of proper names
1191             final char pos = Character.toUpperCase(term.getPos().charAt(0));
1192             if (pos != 'V' && pos != 'N' && pos != 'G' && pos != 'A'
1193                     || !document.getPredicatesByTerm(term).isEmpty()
1194                     || !document.getTimeExsByWF(term.getWFs().get(0)).isEmpty()) {
1195                 continue;
1196             }
1197 
1198             // Identify the smallest entity the term belongs to, if any, in which case require
1199             // the term to be the head of the entity. This will discard other terms inside an
1200             // entity (even if nouns), thus enforcing a policy where entities are indivisible
1201             Entity entity = null;
1202             for (final Entity e : document.getEntitiesByTerm(term)) {
1203                 if (entity == null || e.getTerms().size() < entity.getTerms().size()) {
1204                     entity = e;
1205                     break;
1206                 }
1207             }
1208             if (entity != null && term != document.getTermsHead(entity.getTerms())) {
1209                 continue;
1210             }
1211 
1212             // Decide if a predicate can be added and, in case, which is its roleset,
1213             // distinguishing between verbs (-> PropBank) and other terms (-> NomBank)
1214             ExternalRef ref = null;
1215             final String lemma = term.getLemma();
1216             if (pos == 'V') {
1217                 final List<PropBank.Roleset> rolesets = PropBank.getRolesets(lemma);
1218                 if (rolesets.size() == 1) {
1219                     final String rolesetID = rolesets.get(0).getID();
1220                     ref = document.newExternalRef(NAFUtils.RESOURCE_PROPBANK, rolesetID);
1221                 }
1222             } else {
1223                 final List<NomBank.Roleset> rolesets = NomBank.getRolesetsForLemma(lemma);
1224                 if (rolesets.size() == 1) {
1225                     final String rolesetID = rolesets.get(0).getId();
1226                     ref = document.newExternalRef(NAFUtils.RESOURCE_NOMBANK, rolesetID);
1227                 }
1228             }
1229 
1230             // Create the predicate, if possible
1231             if (ref != null) {
1232                 final Predicate predicate = document.newPredicate(KAFDocument.newTermSpan(
1233                         Collections.singletonList(term), term));
1234                 predicate.addExternalRef(ref);
1235                 if (LOGGER.isDebugEnabled()) {
1236                     LOGGER.debug("Added " + NAFUtils.toString(predicate) + ", sense '"
1237                             + ref.getReference() + "'");
1238                 }
1239             }
1240         }
1241     }
1242 
1243     private void applySRLSelfArgFixing(final KAFDocument document) {
1244 
1245         for (final Predicate predicate : document.getPredicates()) {
1246 
1247             // Skip verbs
1248             final Term predTerm = predicate.getTerms().get(0);
1249             if (predTerm.getPos().equalsIgnoreCase("V")) {
1250                 continue;
1251             }
1252 
1253             // Retrieve the NomBank roleset for current predicate, if known. Skip otherwise
1254             final String rolesetID = NAFUtils.getRoleset(predicate);
1255             final NomBank.Roleset roleset = rolesetID == null ? null : NomBank
1256                     .getRoleset(rolesetID);
1257             if (roleset == null) {
1258                 continue;
1259             }
1260 
1261             // Retrieve mandatory and optional roles associated to NomBank roleset
1262             final List<Integer> mandatoryArgs = roleset.getPredMandatoryArgNums();
1263             final List<Integer> optionalArgs = roleset.getPredOptionalArgNums();
1264 
1265             // Check current role assignment to predicate term. Mark it as invalid if necessary
1266             int currentNum = -1;
1267             for (final Role role : ImmutableList.copyOf(predicate.getRoles())) {
1268                 final Term headTerm = document.getTermsHead(role.getTerms());
1269                 if (headTerm == predTerm && role.getSemRole() != null) {
1270                     boolean valid = false;
1271                     final Matcher matcher = SRL_ROLE_PATTERN.matcher(role.getSemRole());
1272                     if (matcher.matches()) {
1273                         currentNum = Integer.parseInt(matcher.group(1));
1274                         valid = roleset.getPredMandatoryArgNums().contains(currentNum)
1275                                 || roleset.getPredOptionalArgNums().contains(currentNum);
1276                     }
1277                     if (!valid) {
1278                         predicate.removeRole(role);
1279                         LOGGER.debug("Removed " + NAFUtils.toString(role) + " for "
1280                                 + NAFUtils.toString(predicate) + " (mandatory " + mandatoryArgs
1281                                 + ", optional " + optionalArgs + ")");
1282                     }
1283                 }
1284             }
1285 
1286             // Add missing role marking, if necessary
1287             if (!roleset.getPredMandatoryArgNums().isEmpty()) {
1288                 final List<Integer> args = Lists.newArrayList();
1289                 args.addAll(roleset.getPredMandatoryArgNums());
1290                 args.remove((Object) currentNum);
1291                 for (final Integer arg : args) {
1292                     final List<Term> terms = Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(
1293                             document.getTermsByDepAncestors(Collections.singleton(predTerm)));
1294                     final Span<Term> span = KAFDocument.newTermSpan(terms, predTerm);
1295                     final String semRole = "A" + arg;
1296                     final Role role = document.newRole(predicate, semRole, span);
1297                     predicate.addRole(role);
1298                     if (LOGGER.isDebugEnabled()) {
1299                         LOGGER.debug("Added " + NAFUtils.toString(role) + " to "
1300                                 + NAFUtils.toString(predicate));
1301                     }
1302                 }
1303             }
1304         }
1305     }
1306 
1307     private void applySRLSenseMapping(final KAFDocument document) {
1308 
1309         for (final Predicate predicate : document.getPredicates()) {
1310 
1311             // Apply specific mappings
1312             mapExternalRefs(predicate, MAPPING_PREDICATES);
1313 
1314             // Apply Predicate Matrix mappings, if enabled
1315             NomBank.Roleset nbRoleset = null;
1316             PropBank.Roleset pbRoleset = null;
1317             if (this.srlSenseMappingPM) {
1318                 // Obtain the PropBank roleset, either directly or mapping from NomBank
1319                 if (predicate.getTerms().get(0).getPos().equalsIgnoreCase("V")) {
1320                     final ExternalRef ref = predicate.getExternalRef(NAFUtils.RESOURCE_PROPBANK);
1321                     pbRoleset = ref == null ? null : PropBank.getRoleset(ref.getReference());
1322                 } else {
1323                     final ExternalRef ref = predicate.getExternalRef(NAFUtils.RESOURCE_NOMBANK);
1324                     nbRoleset = ref == null ? null : NomBank.getRoleset(ref.getReference());
1325                     final String pbSense = nbRoleset == null ? null : nbRoleset.getPBId();
1326                     pbRoleset = pbSense == null ? null : PropBank.getRoleset(pbSense);
1327                 }
1328 
1329                 // Skip the predicate if the PropBank roleset could not be obtained
1330                 if (pbRoleset != null) {
1331                     // Add an external ref for the PropBank roleset, if missing
1332                     if (NAFUtils.getRef(predicate, NAFUtils.RESOURCE_PROPBANK, pbRoleset.getID()) == null) {
1333                         NAFUtils.addRef(predicate, document.newExternalRef( //
1334                                 NAFUtils.RESOURCE_PROPBANK, pbRoleset.getID()));
1335                     }
1336 
1337                     // Apply mappings from the predicate matrix (indexed in PropBank.Roleset object)
1338                     for (final String vnFrame : pbRoleset.getVNFrames()) {
1339                         NAFUtils.setRef(predicate,
1340                                 document.newExternalRef(NAFUtils.RESOURCE_VERBNET, vnFrame));
1341                     }
1342                     for (final String fnFrame : pbRoleset.getFNFrames()) {
1343                         NAFUtils.setRef(predicate,
1344                                 document.newExternalRef(NAFUtils.RESOURCE_FRAMENET, fnFrame));
1345                     }
1346                 }
1347             }
1348 
1349             // Map predicate roles
1350             for (final Role role : predicate.getRoles()) {
1351 
1352                 // Add missing ref if necessary
1353                 if (role.getSemRole().startsWith("A")) {
1354                     final boolean verb = NAFUtils.extractHead(document, predicate.getSpan())
1355                             .getMorphofeat().startsWith("VB");
1356                     final String resource = verb ? "PropBank" : "NomBank";
1357                     final ExternalRef ref = NAFUtils.getRef(predicate, resource, null);
1358                     if (ref != null) {
1359                         final String r = role.getSemRole().startsWith("AM-") ? role.getSemRole()
1360                                 .substring(3) : role.getSemRole().substring(1);
1361                         role.addExternalRef(new ExternalRef(resource, ref.getReference() + "@"
1362                                 + r.toLowerCase()));
1363                     }
1364                 }
1365 
1366                 // Apply specific mappings
1367                 mapExternalRefs(role, MAPPING_ARGUMENTS);
1368 
1369                 // Apply Predicate Matrix mappings, if enabled
1370                 if (this.srlSenseMappingPM) {
1371                     final String semRole = role.getSemRole();
1372                     final char numChar = semRole.charAt(semRole.length() - 1);
1373                     if (semRole != null && Character.isDigit(numChar)) {
1374 
1375                         // Determine the PropBank arg num
1376                         final int num = Character.digit(numChar, 10);
1377                         final int pbNum = nbRoleset == null ? num : nbRoleset.getArgPBNum(num);
1378                         if (pbNum < 0) {
1379                             continue;
1380                         }
1381                         final String pbRole = pbRoleset.getID() + '@' + pbNum;
1382                         // final String pbRole = semRole.substring(0, semRole.length() - 2) + pbNum;
1383 
1384                         // Create an external ref for the PropBank role, if missing
1385                         if (NAFUtils.getRef(role, NAFUtils.RESOURCE_PROPBANK, pbRole) == null) {
1386                             NAFUtils.setRef(role,
1387                                     document.newExternalRef(NAFUtils.RESOURCE_PROPBANK, pbRole));
1388                         }
1389 
1390                         // Apply mappings from the predicate matrix
1391                         for (final String vnRole : pbRoleset.getArgVNRoles(pbNum)) {
1392                             NAFUtils.setRef(role,
1393                                     document.newExternalRef(NAFUtils.RESOURCE_VERBNET, vnRole));
1394                         }
1395                         for (final String fnRole : pbRoleset.getArgFNRoles(pbNum)) {
1396                             NAFUtils.setRef(role,
1397                                     document.newExternalRef(NAFUtils.RESOURCE_FRAMENET, fnRole));
1398                         }
1399                     }
1400                 }
1401             }
1402         }
1403     }
1404 
1405     private void applySRLFrameBaseMapping(final KAFDocument document) {
1406 
1407         // Process each predicate and role in the SRL layer
1408         for (final Predicate predicate : document.getPredicates()) {
1409 
1410             // Determine the POS necessary for FrameBase disambiguation (n/a/v/other)
1411             final Term head = NAFUtils.extractHead(document, predicate.getSpan());
1412             final FrameBase.POS pos = FrameBase.POS.forPennTag(head.getMorphofeat());
1413 
1414             // Determine the lemma, handling multiwords
1415             final StringBuilder builder = new StringBuilder();
1416             for (final Term term : predicate.getSpan().getTargets()) {
1417                 builder.append(builder.length() == 0 ? "" : "_");
1418                 builder.append(term.getLemma().toLowerCase());
1419             }
1420             final String lemma = builder.toString();
1421 
1422             // Convert FrameNet refs to FrameBase refs at the predicate level
1423             for (final ExternalRef ref : ImmutableList.copyOf(predicate.getExternalRefs())) {
1424                 if (ref.getResource().equalsIgnoreCase("framenet")) {
1425                     final String frame = ref.getReference();
1426                     final IRI fnClass = FrameBase.classFor(frame, lemma, pos);
1427                     if (fnClass != null) {
1428                         NAFUtils.setRef(predicate,
1429                                 new ExternalRef("FrameBase", fnClass.getLocalName()));
1430                     }
1431                 }
1432             }
1433 
1434             // Convert FrameNet refs to FrameBase refs at the role level
1435             for (final Role role : predicate.getRoles()) {
1436                 for (final ExternalRef ref : ImmutableList.copyOf(role.getExternalRefs())) {
1437                     if (ref.getResource().equalsIgnoreCase("framenet")) {
1438                         final String s = ref.getReference();
1439                         final int index = s.indexOf('@');
1440                         if (index > 0) {
1441                             final String frame = s.substring(0, index);
1442                             final String fe = s.substring(index + 1);
1443                             final IRI fnProperty = FrameBase.propertyFor(frame, fe);
1444                             if (fnProperty != null) {
1445                                 NAFUtils.setRef(role,
1446                                         new ExternalRef("FrameBase", fnProperty.getLocalName()));
1447                             }
1448                         }
1449                     }
1450                 }
1451             }
1452         }
1453     }
1454 
1455     private void applySRLRoleLinking(final KAFDocument document) {
1456 
1457         // Process all the roles in the SRL layer
1458         for (final Predicate predicate : Lists.newArrayList(document.getPredicates())) {
1459             for (final Role role : predicate.getRoles()) {
1460 
1461                 // Identify the role head. Skip if not found.
1462                 final Term head = NAFUtils.extractHead(document, role.getSpan());
1463                 if (head == null) {
1464                     continue;
1465                 }
1466 
1467                 // Identify the terms that can be linked
1468                 final Set<Term> argTerms = document.getTermsByDepAncestors(
1469                         Collections.singleton(head), PARTICIPATION_REGEX);
1470 
1471                 // Perform the linking, possible augmenting terms using coref info
1472                 linkEntitiesTimexPredicates(document, role, role.getSpan(), argTerms,
1473                         this.srlRoleLinkingUsingCoref);
1474             }
1475         }
1476     }
1477 
1478     private void applyOpinionLinking(final KAFDocument document) {
1479 
1480         // Process all the opinions in the NAF document
1481         for (final Opinion opinion : document.getOpinions()) {
1482 
1483             // Add links for the opinion expression, if any
1484             final OpinionExpression expression = opinion.getOpinionExpression();
1485             if (expression != null) {
1486                 linkEntitiesTimexPredicates(
1487                         document,
1488                         expression,
1489                         expression.getSpan(),
1490                         NAFUtils.extractHeads(document, null, expression.getTerms(),
1491                                 NAFUtils.matchExtendedPos(document, "NN", "VB", "JJ", "R")),
1492                         this.opinionLinkingUsingCoref);
1493             }
1494 
1495             // Add links for the opinion holder, if any
1496             final OpinionHolder holder = opinion.getOpinionHolder();
1497             if (holder != null) {
1498                 linkEntitiesTimexPredicates(document, holder, holder.getSpan(),
1499                         NAFUtils.extractHeads(document, null, holder.getTerms(), NAFUtils
1500                                 .matchExtendedPos(document, "NN", "PRP", "JJP", "DTP", "WP")),
1501                         this.opinionLinkingUsingCoref);
1502             }
1503 
1504             // Add links for the opinion target, if any
1505             final OpinionTarget target = opinion.getOpinionTarget();
1506             if (target != null) {
1507                 linkEntitiesTimexPredicates(
1508                         document,
1509                         target,
1510                         target.getSpan(),
1511                         NAFUtils.extractHeads(document, null, target.getTerms(), NAFUtils
1512                                 .matchExtendedPos(document, "NN", "PRP", "JJP", "DTP", "WP", "VB")),
1513                         this.opinionLinkingUsingCoref);
1514             }
1515         }
1516     }
1517 
1518     private static void linkEntitiesTimexPredicates(final KAFDocument document,
1519             final Object annotation, final Span<Term> spanToModify, final Set<Term> heads,
1520             final boolean useCoref) {
1521 
1522         // Add heads to span, if possible
1523         spanToModify.getHeads().clear();
1524         if (!heads.isEmpty()) {
1525             spanToModify.getHeads().addAll(heads);
1526         }
1527 
1528         // Apply coreference if specified
1529         Set<Term> linkableTerms = heads;
1530         if (useCoref) {
1531             linkableTerms = Sets.newHashSet(heads);
1532             for (final Term argTerm : heads) {
1533                 for (final Coref coref : document.getCorefsByTerm(argTerm)) {
1534                     final List<Term> spanHeads = Lists.newArrayList();
1535                     for (final Span<Term> span : coref.getSpans()) {
1536                         final Term spanHead = NAFUtils.extractHead(document, span);
1537                         if (spanHead != null) {
1538                             spanHeads.add(spanHead);
1539                         }
1540                     }
1541                     if (spanHeads.contains(argTerm)) {
1542                         for (final Term spanHead : spanHeads) {
1543                             linkableTerms.addAll(document.getTermsByDepAncestors(
1544                                     Collections.singleton(spanHead), "(COORD CONJ?)*"));
1545                         }
1546                     }
1547                 }
1548             }
1549         }
1550 
1551         // Add external refs for the entities, timex and predicates corresponding to sel. terms
1552         for (final Term term : linkableTerms) {
1553 
1554             // Determine whether the term was obtained via coreference
1555             final boolean isCoref = !heads.contains(term);
1556 
1557             // Add links for entities
1558             for (final Entity entity : document.getEntitiesByTerm(term)) {
1559                 for (final Span<Term> span : entity.getSpans()) {
1560                     final Term spanHead = NAFUtils.extractHead(document, span);
1561                     if (term.equals(spanHead)) {
1562                         final String res = isCoref ? NAFUtils.RESOURCE_ENTITY_COREF
1563                                 : NAFUtils.RESOURCE_ENTITY_REF;
1564                         NAFUtils.setRef(annotation, document.newExternalRef(res, entity.getId()));
1565                         if (LOGGER.isDebugEnabled()) {
1566                             LOGGER.debug("Linked {} to {} as {}", NAFUtils.toString(entity),
1567                                     NAFUtils.toString(annotation), res);
1568                         }
1569                     }
1570                 }
1571             }
1572 
1573             // Add links for timex
1574             for (final Timex3 timex : document.getTimeExsByWF(term.getWFs().get(0))) {
1575                 final Term timexHead = NAFUtils.extractHead(document, KAFDocument
1576                         .newTermSpan(document.getTermsByWFs(timex.getSpan().getTargets())));
1577                 if (term.equals(timexHead)) {
1578                     final String res = isCoref ? NAFUtils.RESOURCE_TIMEX_COREF
1579                             : NAFUtils.RESOURCE_TIMEX_REF;
1580                     NAFUtils.setRef(annotation, document.newExternalRef(res, timex.getId()));
1581                     if (LOGGER.isDebugEnabled()) {
1582                         LOGGER.debug("Linked {} to {} as {}", NAFUtils.toString(timex),
1583                                 NAFUtils.toString(annotation), res);
1584                     }
1585                 }
1586             }
1587 
1588             // Add links for predicates
1589             for (final Predicate pred : document.getPredicatesByTerm(term)) {
1590                 if (term.equals(NAFUtils.extractHead(document, pred.getSpan()))) {
1591                     final String res = isCoref ? NAFUtils.RESOURCE_PREDICATE_COREF
1592                             : NAFUtils.RESOURCE_PREDICATE_REF;
1593                     NAFUtils.setRef(annotation, document.newExternalRef(res, pred.getId()));
1594                     if (LOGGER.isDebugEnabled()) {
1595                         LOGGER.debug("Linked {} to {} as {}", NAFUtils.toString(pred),
1596                                 NAFUtils.toString(annotation), res);
1597                     }
1598                 }
1599             }
1600         }
1601     }
1602 
1603     private void mapExternalRefs(final Object annotation, final Multimap<String, String> mappings) {
1604 
1605         // Keep track of prefixes (NB, PB, VN, FN) of resources already available, as well as the
1606         // keys corresponding to their values
1607         final Set<String> prefixes = Sets.newHashSet();
1608         final Set<String> keys = Sets.newHashSet();
1609 
1610         // Extract prefixes and keys
1611         for (final ExternalRef ref : NAFUtils.getRefs(annotation, null, null)) {
1612             final String prefix = MAPPING_PREFIXES.get(ref.getResource().toLowerCase());
1613             if (prefix != null) {
1614                 prefixes.add(prefix);
1615                 keys.add(prefix + ":" + ref.getReference());
1616             }
1617         }
1618 
1619         // Apply mappings
1620         final List<String> queue = Lists.newLinkedList(keys);
1621         while (!queue.isEmpty()) {
1622             final String key = queue.remove(0);
1623             for (final String mappedKey : mappings.get(key)) {
1624                 final String mappedPrefix = mappedKey.substring(0, 2);
1625                 if (!prefixes.contains(mappedPrefix) && !keys.contains(mappedKey)) {
1626                     final String mappedResource = MAPPING_PREFIXES.inverse().get(mappedPrefix);
1627                     final String mappedReference = mappedKey.substring(3);
1628                     keys.add(mappedKey);
1629                     queue.add(mappedKey);
1630                     NAFUtils.addRef(annotation, new ExternalRef(mappedResource, mappedReference));
1631                     if (LOGGER.isDebugEnabled()) {
1632                         LOGGER.debug("Mapped {} : {} to {} for {}", mappedResource,
1633                                 mappedReference, mappedKey, NAFUtils.toString(annotation));
1634                     }
1635                 }
1636             }
1637         }
1638     }
1639 
1640 
1641     private void applySRLPreMOnIRIs(final KAFDocument document) {
1642         // Process each predicate and role in the SRL layer
1643 
1644         final List<String> models = Arrays.asList(NAFUtils.RESOURCE_FRAMENET, NAFUtils.RESOURCE_VERBNET, NAFUtils.RESOURCE_PROPBANK, NAFUtils.RESOURCE_NOMBANK);
1645 
1646 
1647         for (final Predicate predicate : document.getPredicates()) {
1648 
1649 
1650             List<ExternalRef> allPredicateExtRefs = predicate.getExternalRefs();
1651             List<ExternalRef> predicateExtRefToRemove =  Lists.newArrayList();
1652 
1653             for (final ExternalRef predRef : ImmutableList.copyOf(allPredicateExtRefs)) {
1654                 String refStr= predRef.getResource();
1655 
1656                 if (models.contains(refStr)) {
1657                     final String pred = predRef.getReference();
1658                     final String source = predRef.getSource();
1659 
1660                     final IRI premonIRI = NAFUtils.createPreMOnSemanticClassIRIfor(refStr,pred);
1661                     if (premonIRI != null) {
1662                         ExternalRef e = new ExternalRef("PreMOn+"+refStr, premonIRI.getLocalName());
1663                         if (source!=null) e.setSource(source);
1664                         NAFUtils.setRef(predicate, e);
1665 
1666 
1667                     }
1668 
1669                     predicateExtRefToRemove.add(predRef);
1670                 }
1671 
1672             }
1673 
1674             //remove old predicate ref
1675             for (ExternalRef toBeDropped:predicateExtRefToRemove
1676                  ) {
1677                 allPredicateExtRefs.remove(toBeDropped);
1678             }
1679 
1680 
1681             // Convert FrameNet refs to FrameBase refs at the role level
1682             for (final Role role : predicate.getRoles()) {
1683 
1684 
1685                 List<ExternalRef> allRoleExtRefs = role.getExternalRefs();
1686                 List<ExternalRef> roleExtRefToRemove =  Lists.newArrayList();
1687 
1688                 for (final ExternalRef roleRef : ImmutableList.copyOf(allRoleExtRefs)) {
1689 
1690 
1691                     String refStr= roleRef.getResource();
1692 
1693                     if (models.contains(refStr)) {
1694 
1695                         final String predicateAndRole = roleRef.getReference();
1696                         final String source = roleRef.getSource();
1697                         final int index = predicateAndRole.indexOf('@');
1698                         if (index > 0) {
1699                             final String pred = predicateAndRole.substring(0, index);
1700                             final String rol = predicateAndRole.substring(index + 1);
1701 
1702                             final IRI premonIRI = NAFUtils.createPreMOnSemanticRoleIRIfor(refStr,pred,rol);
1703                             if (premonIRI != null) {
1704                                 ExternalRef e = new ExternalRef("PreMOn+"+refStr, premonIRI.getLocalName());
1705                                 if (source!=null) e.setSource(source);
1706                                 NAFUtils.setRef(role, e);
1707                             }
1708                         }
1709                         roleExtRefToRemove.add(roleRef);
1710                     }
1711                 }
1712                 //remove old role
1713                 for (ExternalRef toBeRemoved:roleExtRefToRemove
1714                      ) {
1715                     allRoleExtRefs.remove(toBeRemoved);
1716                 }
1717             }
1718         }
1719     }
1720 
1721 
1722     /**
1723      * Returns a new configurable {@code Builder} for the instantiation of a {@code NAFFilter}.
1724      *
1725      * @return a new {@code Builder}
1726      */
1727     public static final Builder builder() {
1728         return new Builder();
1729     }
1730 
1731     /**
1732      * Returns a new configurable {@code Builder} with all {@code NAFFilter} features either
1733      * enabled or disabled, based on the supplied parameter.
1734      *
1735      * @param enableAll
1736      *            true, to enable all features; false, to disable all features; null, to maintain
1737      *            default settings.
1738      * @return a new {@code Builder}
1739      */
1740     public static final Builder builder(@Nullable final Boolean enableAll) {
1741         return new Builder() //
1742                 .withTermSenseCompletion(enableAll) //
1743                 .withEntityRemoveOverlaps(enableAll) //
1744                 .withEntitySpanFixing(enableAll) //
1745                 .withEntityAddition(enableAll) //
1746                 .withCorefSpanFixing(enableAll) //
1747                 .withCorefForRoleDependencies(enableAll) //
1748                 .withLinkingCompletion(enableAll) //
1749                 .withLinkingFixing(enableAll) //
1750                 .withSRLRemoveWrongRefs(enableAll) //
1751                 .withSRLRemoveUnknownPredicates(enableAll) //
1752                 .withSRLPredicateAddition(enableAll) //
1753                 .withSRLSelfArgFixing(enableAll) //
1754                 .withSRLSenseMapping(enableAll) //
1755                 .withSRLRoleLinking(enableAll, enableAll) //
1756                 .withOpinionLinking(enableAll, enableAll)
1757                 .withSRLPreMOnIRIs(enableAll);
1758     }
1759 
1760     /**
1761      * Configurable builder object for the creation of {@code NAFFilter}s.
1762      * <p>
1763      * Supported properties accepted by {@link #withProperties(Map, String)} and corresponding
1764      * setter methods:
1765      * </p>
1766      * <table border="1">
1767      * <thead>
1768      * <tr>
1769      * <th>Property</th>
1770      * <th>Values</th>
1771      * <th>Corresponding method</th>
1772      * <th>Default</th>
1773      * </tr>
1774      * </thead><tbody>
1775      * <tr>
1776      * <td>termSenseFiltering</td>
1777      * <td>true, false</td>
1778      * <td>{@link #withTermSenseFiltering(Boolean)}</td>
1779      * <td>true</td>
1780      * </tr>
1781      * <tr>
1782      * <td>termSenseCompletion</td>
1783      * <td>true, false</td>
1784      * <td>{@link #withTermSenseCompletion(Boolean)}</td>
1785      * <td>true</td>
1786      * </tr>
1787      * <tr>
1788      * <td>entityRemoveOverlaps</td>
1789      * <td>true, false</td>
1790      * <td>{@link #withEntityRemoveOverlaps(Boolean)}</td>
1791      * <td>true</td>
1792      * </tr>
1793      * <tr>
1794      * <td>entitySpanFixing</td>
1795      * <td>true, false</td>
1796      * <td>{@link #withEntitySpanFixing(Boolean)}</td>
1797      * <td>true</td>
1798      * </tr>
1799      * <tr>
1800      * <td>entityAddition</td>
1801      * <td>true, false</td>
1802      * <td>{@link #withEntityAddition(Boolean)}</td>
1803      * <td>true</td>
1804      * </tr>
1805      * <tr>
1806      * <td>entityValueNormalization</td>
1807      * <td>true, false</td>
1808      * <td>{@link #withEntityValueNormalization(Boolean)}</td>
1809      * <td>true</td>
1810      * </tr>
1811      * <tr>
1812      * <td>linkingCompletion</td>
1813      * <td>true, false</td>
1814      * <td>{@link #withLinkingCompletion(Boolean)}</td>
1815      * <td>true</td>
1816      * </tr>
1817      * <tr>
1818      * <td>linkingFixing</td>
1819      * <td>true, false</td>
1820      * <td>{@link #withLinkingFixing(Boolean)}</td>
1821      * <td>false</td>
1822      * </tr>
1823      * <tr>
1824      * <td>corefForRoleDependencies</td>
1825      * <td>true, false</td>
1826      * <td>{@link #withCorefForRoleDependencies(Boolean)}</td>
1827      * <td>false</td>
1828      * </tr>
1829      * <tr>
1830      * <td>corefSpanFixing</td>
1831      * <td>true, false</td>
1832      * <td>{@link #withCorefSpanFixing(Boolean)}</td>
1833      * <td>false</td>
1834      * </tr>
1835      * <tr>
1836      * <td>srlRemoveWrongRefs</td>
1837      * <td>true, false</td>
1838      * <td>{@link #withSRLRemoveWrongRefs(Boolean)}</td>
1839      * <td>true</td>
1840      * </tr>
1841      * <tr>
1842      * <td>srlRemoveUnknownPredicates</td>
1843      * <td>true, false</td>
1844      * <td>{@link #withSRLRemoveUnknownPredicates(Boolean)}</td>
1845      * <td>false</td>
1846      * </tr>
1847      * <tr>
1848      * <td>srlPredicateAddition</td>
1849      * <td>true, false</td>
1850      * <td>{@link #withSRLPredicateAddition(Boolean)}</td>
1851      * <td>true</td>
1852      * </tr>
1853      * <tr>
1854      * <td>srlSelfArgFixing</td>
1855      * <td>true, false</td>
1856      * <td>{@link #withSRLSelfArgFixing(Boolean)}</td>
1857      * <td>true</td>
1858      * </tr>
1859      * <tr>
1860      * <td>srlSenseMapping</td>
1861      * <td>true, false</td>
1862      * <td>{@link #withSRLSenseMapping(Boolean)}</td>
1863      * <td>false</td>
1864      * </tr>
1865      * <tr>
1866      * <td>srlFrameBaseMapping</td>
1867      * <td>true, false</td>
1868      * <td>{@link #withSRLFrameBaseMapping(Boolean)}</td>
1869      * <td>true</td>
1870      * </tr>
1871      * <tr>
1872      * <td>srlRoleLinking</td>
1873      * <td>none, exact, coref</td>
1874      * <td>{@link #withSRLRoleLinking(Boolean, Boolean)}</td>
1875      * <td>coref (= true, true)</td>
1876      * </tr>
1877      * <tr>
1878      * <td>opinionLinking</td>
1879      * <td>none, exact, coref</td>
1880      * <td>{@link #withOpinionLinking(Boolean, Boolean)}</td>
1881      * <td>coref (= true, true)</td>
1882      * </tr>
1883      * </tbody>
1884      * </table>
1885      */
1886     public static final class Builder {
1887 
1888         @Nullable
1889         private Boolean termSenseFiltering;
1890 
1891         @Nullable
1892         private Boolean termSenseCompletion;
1893 
1894         @Nullable
1895         private Boolean entityRemoveOverlaps;
1896 
1897         @Nullable
1898         private Boolean entitySpanFixing;
1899 
1900         @Nullable
1901         private Boolean entityAddition;
1902 
1903         @Nullable
1904         private Boolean entityValueNormalization;
1905 
1906         @Nullable
1907         private Boolean linkingCompletion;
1908 
1909         @Nullable
1910         private Boolean linkingFixing;
1911 
1912         @Nullable
1913         private Boolean corefSpanFixing;
1914 
1915         @Nullable
1916         private Boolean corefForRoleDependencies;
1917 
1918         @Nullable
1919         private Boolean srlPreprocess;
1920 
1921         @Nullable
1922         private Boolean srlEnableMate;
1923 
1924         @Nullable
1925         private Boolean srlEnableSemafor;
1926 
1927         @Nullable
1928         private Boolean srlRemoveWrongRefs;
1929 
1930         @Nullable
1931         private Boolean srlRemoveUnknownPredicates;
1932 
1933         @Nullable
1934         private Boolean srlPredicateAddition;
1935 
1936         @Nullable
1937         private Boolean srlSelfArgFixing;
1938 
1939         @Nullable
1940         private Boolean srlSenseMapping;
1941 
1942         @Nullable
1943         private Boolean srlFrameBaseMapping;
1944 
1945         @Nullable
1946         private Boolean srlRoleLinking;
1947 
1948         @Nullable
1949         private Boolean srlRoleLinkingUsingCoref;
1950 
1951         @Nullable
1952         private Boolean srlPreMOnIRIs;
1953 
1954         @Nullable
1955         private Boolean opinionLinking;
1956 
1957         @Nullable
1958         private Boolean opinionLinkingUsingCoref;
1959 
1960         Builder() {
1961         }
1962 
1963         /**
1964          * Sets all the properties in the map supplied, matching an optional prefix.
1965          *
1966          * @param properties
1967          *            the properties to configure, not null
1968          * @param prefix
1969          *            an optional prefix used to select the relevant properties in the map
1970          * @return this builder object, for call chaining
1971          */
1972         public Builder withProperties(final Map<?, ?> properties, @Nullable final String prefix) {
1973             final String p = prefix == null ? "" : prefix.endsWith(".") ? prefix : prefix + ".";
1974             for (final Map.Entry<?, ?> entry : properties.entrySet()) {
1975                 if (entry.getKey() != null && entry.getValue() != null
1976                         && entry.getKey().toString().startsWith(p)) {
1977                     final String name = entry.getKey().toString().substring(p.length());
1978                     final String value = Strings.emptyToNull(entry.getValue().toString());
1979                     if ("termSenseFiltering".equals(name)) {
1980                         withTermSenseFiltering(Boolean.valueOf(value));
1981                     } else if ("termSenseCompletion".equals(name)) {
1982                         withTermSenseCompletion(Boolean.valueOf(value));
1983                     } else if ("entityRemoveOverlaps".equals(name)) {
1984                         withEntityRemoveOverlaps(Boolean.valueOf(value));
1985                     } else if ("entitySpanFixing".equals(name)) {
1986                         withEntitySpanFixing(Boolean.valueOf(value));
1987                     } else if ("entityAddition".equals(name)) {
1988                         withEntityAddition(Boolean.valueOf(value));
1989                     } else if ("entityValueNormalization".equals(name)) {
1990                         withEntityValueNormalization(Boolean.valueOf(value));
1991                     } else if ("linkingCompletion".equals(name)) {
1992                         withLinkingCompletion(Boolean.valueOf(value));
1993                     } else if ("linkingFixing".equals(name)) {
1994                         withLinkingFixing(Boolean.valueOf(value));
1995                     } else if ("corefForRoleDependencies".equals(name)) {
1996                         withCorefForRoleDependencies(Boolean.valueOf(value));
1997                     } else if ("corefSpanFixing".equals(name)) {
1998                         withCorefSpanFixing(Boolean.valueOf(value));
1999                     } else if ("srlPreprocess".equals(name)) {
2000                         if ("none".equalsIgnoreCase(value)) {
2001                             withSRLPreprocess(false, false, false);
2002                         } else if ("basic".equalsIgnoreCase(value)) {
2003                             withSRLPreprocess(true, false, false);
2004                         } else if ("mate".equalsIgnoreCase(value)) {
2005                             withSRLPreprocess(true, true, false);
2006                         } else if ("semafor".equalsIgnoreCase(value)) {
2007                             withSRLPreprocess(true, false, true);
2008                         } else if ("mate+semafor".equalsIgnoreCase(value)) {
2009                             withSRLPreprocess(true, true, true);
2010                         }else {
2011                             throw new IllegalArgumentException("Invalid '" + value +"' srlPreprocess property. Supported: none basic mate semafor mate+semafor");
2012                         }
2013                     } else if ("srlRemoveWrongRefs".equals(name)) {
2014                         withSRLRemoveWrongRefs(Boolean.valueOf(value));
2015                     } else if ("srlRemoveUnknownPredicates".equals(name)) {
2016                         withSRLRemoveUnknownPredicates(Boolean.valueOf(value));
2017                     } else if ("srlPredicateAddition".equals(name)) {
2018                         withSRLPredicateAddition(Boolean.valueOf(value));
2019                     } else if ("srlSelfArgFixing".equals(name)) {
2020                         withSRLSelfArgFixing(Boolean.valueOf(value));
2021                     } else if ("srlSenseMapping".equals(name)) {
2022                         withSRLSenseMapping(Boolean.valueOf(value));
2023                     } else if ("srlFrameBaseMapping".equals(name)) {
2024                         withSRLFrameBaseMapping(Boolean.valueOf(value));
2025                     } else if ("srlRoleLinking".equals(name)) {
2026                         if ("none".equalsIgnoreCase(value)) {
2027                             withSRLRoleLinking(false, false);
2028                         } else if ("exact".equalsIgnoreCase(value)) {
2029                             withSRLRoleLinking(true, false);
2030                         } else if ("coref".equalsIgnoreCase(value)) {
2031                             withSRLRoleLinking(true, true);
2032                         } else {
2033                             throw new IllegalArgumentException("Invalid '" + value + "' srlRoleLinking property. Supported: none exact coref ");
2034                         }
2035                     } else if ("srlPreMOnIRIs".equals(name)){
2036                         withSRLPreMOnIRIs(Boolean.valueOf(value));
2037                     } else if ("opinionLinking".equals(name)) {
2038                         if ("none".equalsIgnoreCase(value)) {
2039                             withOpinionLinking(false, false);
2040                         } else if ("exact".equalsIgnoreCase(value)) {
2041                             withOpinionLinking(true, false);
2042                         } else if ("coref".equalsIgnoreCase(value)) {
2043                             withOpinionLinking(true, true);
2044                         } else {
2045                             throw new IllegalArgumentException("Invalid '" + value +"' opinionLinking property. Supported: none exact coref ");
2046                         }
2047                     }
2048                 }
2049             }
2050             return this;
2051         }
2052 
2053         /**
2054          * Specifies whether term senses (BBN, SST, WN Synset, SUMO mapping, YAGO) for proper
2055          * names should be removed.
2056          *
2057          * @param termSenseFiltering
2058          *            true to enable term sense filtering, null to use default value
2059          * @return this builder object, for call chaining
2060          */
2061         public Builder withTermSenseFiltering(@Nullable final Boolean termSenseFiltering) {
2062             this.termSenseFiltering = termSenseFiltering;
2063             return this;
2064         }
2065 
2066         /**
2067          * Specifies whether missing term senses (BBN, SST, WN Synset, SUMO mapping ) should be
2068          * completed by applying sense mappings.
2069          *
2070          * @param termSenseCompletion
2071          *            true to enable term sense completion, null to use default value
2072          * @return this builder object, for call chaining
2073          */
2074         public Builder withTermSenseCompletion(@Nullable final Boolean termSenseCompletion) {
2075             this.termSenseCompletion = termSenseCompletion;
2076             return this;
2077         }
2078 
2079         /**
2080          * Specifies whether entities overlapping with timex or (larger) entities should be
2081          * removed.
2082          *
2083          * @param entityRemoveOverlaps
2084          *            true, to enable removal of entities that overlap with other entities or
2085          *            timex; null to use the default setting
2086          * @return this builder object for call chaining
2087          */
2088         public Builder withEntityRemoveOverlaps(@Nullable final Boolean entityRemoveOverlaps) {
2089             this.entityRemoveOverlaps = entityRemoveOverlaps;
2090             return this;
2091         }
2092 
2093         /**
2094          * Specifies whether the spans of entities should be checked and possibly fixed, removing
2095          * determiners and non-alphanumeric terms. If enabled and no terms remain after fixing the
2096          * span of an entity, that entity is removed.
2097          *
2098          * @param entitySpanFixing
2099          *            true, to enable fixing of entity spans (and possible removal of invalid
2100          *            entities); null to use the default setting
2101          * @return this builder object, for call chaining
2102          */
2103         public Builder withEntitySpanFixing(@Nullable final Boolean entitySpanFixing) {
2104             this.entitySpanFixing = entitySpanFixing;
2105             return this;
2106         }
2107 
2108         /**
2109          * Specifies whether new entities should be added to the document for noun phrases not
2110          * already marked as entities.
2111          *
2112          * @param entityAddition
2113          *            true, to enable entity addition; null, to use the default setting
2114          * @return this builder object, for call chaining
2115          */
2116         public Builder withEntityAddition(@Nullable final Boolean entityAddition) {
2117             this.entityAddition = entityAddition;
2118             return this;
2119         }
2120 
2121         /**
2122          * Specifies whether normalization of numerical entity values (ordinal, cardinal, percent,
2123          * money) should take place.
2124          *
2125          * @param entityValueNormalization
2126          *            true, to enable entity value normalization; null, to use the default setting
2127          * @return this builder object, for call chaining
2128          */
2129         public Builder withEntityValueNormalization(
2130                 @Nullable final Boolean entityValueNormalization) {
2131             this.entityValueNormalization = entityValueNormalization;
2132             return this;
2133         }
2134 
2135         /**
2136          * Specifies whether entity links in the LinkedEntities layer should be applied to
2137          * entities and predicates where missing, thus performing a kind of linking completion.
2138          *
2139          * @param linkingCompletion
2140          *            true, to perform linking completion
2141          * @return this builder object, for call chaining
2142          */
2143         public Builder withLinkingCompletion(@Nullable final Boolean linkingCompletion) {
2144             this.linkingCompletion = linkingCompletion;
2145             return this;
2146         }
2147 
2148         /**
2149          * Specifies whether removal of inaccurate entity links to DBpedia should occur. If
2150          * enabled, links for entities whose span is part of a stop word list are removed. The
2151          * stop word list contains (multi-)words that are known to be ambiguous from an analysis
2152          * of Wikipedia data.
2153          *
2154          * @param linkingFixing
2155          *            true to enable linking fixing; null, to use the default setting
2156          * @return this builder object, for call chaining
2157          */
2158         public Builder withLinkingFixing(@Nullable final Boolean linkingFixing) {
2159             this.linkingFixing = linkingFixing;
2160             return this;
2161         }
2162 
2163         /**
2164          * Specifies whether new coreference relations should be added for APPO/NMOD/TITLE edges
2165          * in the dependency tree between proper nouns and role nouns.
2166          *
2167          * @param corefForRoleDependencies
2168          *            true to enable addition of coreference relations for APPO/NMOD/TITLE edges;
2169          *            null, to use the default setting
2170          * @return this builder object, for call chaining
2171          */
2172         public Builder withCorefForRoleDependencies(
2173                 @Nullable final Boolean corefForRoleDependencies) {
2174             this.corefForRoleDependencies = corefForRoleDependencies;
2175             return this;
2176         }
2177 
2178         /**
2179          * Specifies whether spans of existing coreference sets should be checked and possibly
2180          * shrinked or removed. The following rules are applied:
2181          * <ul>
2182          * <li>remove spans without a well-defined head in the dependency tree;</li>
2183          * <li>remove spans that enclose another span in the coreference set;</li>
2184          * <li>remove spans with non NNP head corresponding to a verb or to a NomBank predicate
2185          * that never admit itself as a role (e.g., 'war' but not 'president'), if no span with a
2186          * sumo:Process head (= event) is part of the coreference set;</li>
2187          * <li>shrink spans with non NNP head that contain some NNP token, if a span with NNP head
2188          * is part of the coreference set;</li>
2189          * </ul>
2190          * If a coreference set becomes empty as a result of the above filtering, it is removed
2191          * from the NAF document.
2192          *
2193          * @param corefSpanFixing
2194          *            true to enable coreference span fixing; null to use default setting
2195          * @return this builder object, for call chaining
2196          */
2197         public Builder withCorefSpanFixing(@Nullable final Boolean corefSpanFixing) {
2198             this.corefSpanFixing = corefSpanFixing;
2199             return this;
2200         }
2201 
2202         /**
2203          * Specifies whether to preprocess SRL layer, enabling Mate and/or Semafor outputs. If
2204          * both tools are enabled, they are combined in such a way that semafor takes precedence
2205          * in case two predicates refer to the same token.
2206          *
2207          * @param srlPreprocess
2208          *            true, to enable preprocessing of SRL layer
2209          * @param srlEnableMate
2210          *            true, to enable Mate output
2211          * @param srlEnableSemafor
2212          *            true, to enable Semafor output
2213          * @return this builder object, for call chaining
2214          */
2215         public Builder withSRLPreprocess(@Nullable final Boolean srlPreprocess,
2216                 @Nullable final Boolean srlEnableMate, @Nullable final Boolean srlEnableSemafor) {
2217             this.srlPreprocess = srlPreprocess;
2218             this.srlEnableMate = srlEnableMate;
2219             this.srlEnableSemafor = srlEnableSemafor;
2220             return this;
2221         }
2222 
2223         /**
2224          * Specifies whether ExternalRefs with wrong PropBank/NomBank rolesets/roles in the NAF
2225          * should be removed. A roleset/role is considered wrong if its lemma differs from the one
2226          * of the predicate in the text (errors can arise from 'excessive' mappings, e.g. in the
2227          * predicate matrix).
2228          *
2229          * @param srlRemoveWrongRefs
2230          *            true, if removal of ExternalRefs with wrong PB/NB rolesets/roles has to be
2231          *            enabled
2232          * @return this builder object, for call chaining
2233          */
2234         public Builder withSRLRemoveWrongRefs(@Nullable final Boolean srlRemoveWrongRefs) {
2235             this.srlRemoveWrongRefs = srlRemoveWrongRefs;
2236             return this;
2237         }
2238 
2239         /**
2240          * Specifies whether SRL predicates with unknown PropBank/NomBank rolesets/roles in the
2241          * NAF should be removed. A roleset/role is wrong if it does not appear in
2242          * PropBank/NomBank frame files (SRL tools such as Mate may detect predicates for unknown
2243          * rolesets, to increase recall).
2244          *
2245          * @param srlRemoveUnknownPredicates
2246          *            true, if removal of predicates with unknown PB/NB rolesets/roles has to be
2247          *            enabled
2248          * @return this builder object, for call chaining
2249          */
2250         public Builder withSRLRemoveUnknownPredicates(
2251                 @Nullable final Boolean srlRemoveUnknownPredicates) {
2252             this.srlRemoveUnknownPredicates = srlRemoveUnknownPredicates;
2253             return this;
2254         }
2255 
2256         /**
2257          * Specifies whether new predicates can be added for verbs, noun and adjectives having
2258          * exactly one sense in PropBank or NomBank but not marked in the text.
2259          *
2260          * @param srlPredicateAddition
2261          *            true, to enable predicate addition; null to use the default setting
2262          * @return this builder object, for call chaining
2263          */
2264         public Builder withSRLPredicateAddition(@Nullable final Boolean srlPredicateAddition) {
2265             this.srlPredicateAddition = srlPredicateAddition;
2266             return this;
2267         }
2268 
2269         /**
2270          * Specifies whether 'self-roles' can be added for predicates where missing or removed
2271          * where wrongly added. If set, for each recognized predicate the filter checks whether
2272          * the predicate term has also been marked as role. IF it is not marked in the NAF but it
2273          * is always marked in NomBank training set THEN the filter adds a new role for the
2274          * predicate term, using the semantic role in NomBank training set. If already marked
2275          * whereas no marking should happen based on previous criteria, then the role is removed.
2276          *
2277          * @param srlSelfArgFixing
2278          *            true if role addition is enabled
2279          * @return this builder object, for call chaining
2280          */
2281         public Builder withSRLSelfArgFixing(@Nullable final Boolean srlSelfArgFixing) {
2282             this.srlSelfArgFixing = srlSelfArgFixing;
2283             return this;
2284         }
2285 
2286         /**
2287          * Specifies whether mapping of roleset / roles in the SRL layer should take place. If
2288          * enabled, new external refs are added to map NomBank rolesets and roles to PropBank and
2289          * to map PropBank rolesets and roles to VerbNet and FrameNet, based on the predicate
2290          * matrix.
2291          *
2292          * @param srlSenseMapping
2293          *            true, to enable SRL sense mapping; null, to use the default setting
2294          * @return this builder object, for call chaining
2295          */
2296         public Builder withSRLSenseMapping(@Nullable final Boolean srlSenseMapping) {
2297             this.srlSenseMapping = srlSenseMapping;
2298             return this;
2299         }
2300 
2301         /**
2302          * Specifies whether mapping of rolesets / roles in the SRL layer to FrameBase classes /
2303          * properties should take place. If enabled, new external refs for FrameBase targets are
2304          * added where possible.
2305          *
2306          * @param srlFrameBaseMapping
2307          *            true, to enable SRL to FrameBase mapping; null, to use the default setting
2308          * @return this builder object, for call chaining
2309          */
2310         public Builder withSRLFrameBaseMapping(@Nullable final Boolean srlFrameBaseMapping) {
2311             this.srlFrameBaseMapping = srlFrameBaseMapping;
2312             return this;
2313         }
2314 
2315         /**
2316          * Specifies whether ExternalRef tags should be added to SRL roles to link them to the
2317          * entities, timex and predicates in the NAF the role corresponds to. The correspondence
2318          * between a role and entities/predicates is computed based on the evaluation of regular
2319          * expressions on the dependency tree that take properly into account coordination and
2320          * prepositions (e.g., in 'Tom speaks to Al, John and the friend of Jack', the A1 role 'to
2321          * Al, John and the friend of Jack' is linked to the entities 'Al' and 'John' but not
2322          * 'Jack'). If {@code useCoref} is specified, SRL roles are also linked to entities, timex
2323          * and predicates reachable via coreference chains.
2324          *
2325          * @param srlRoleLinking
2326          *            true, to enable this filtering; null, to use the default setting
2327          * @param useCoref
2328          *            true, to enable linking to coreferring entities/timex/predicates; null, to
2329          *            use the default setting
2330          * @return this builder object, for call chaining
2331          */
2332         public Builder withSRLRoleLinking(@Nullable final Boolean srlRoleLinking,
2333                 @Nullable final Boolean useCoref) {
2334             this.srlRoleLinking = srlRoleLinking;
2335             this.srlRoleLinkingUsingCoref = useCoref;
2336             return this;
2337         }
2338 
2339 
2340         /**
2341          * Specifies replace reference of predicate models in NAF with premon IRIs
2342          *
2343          * @param srlPreMOnIRIs
2344          *            true to enable IRI replacement, null to use default value
2345          * @return this builder object, for call chaining
2346          */
2347         public Builder withSRLPreMOnIRIs(@Nullable final Boolean srlPreMOnIRIs) {
2348             this.srlPreMOnIRIs = srlPreMOnIRIs;
2349             return this;
2350         }
2351 
2352         /**
2353          * Specifies whether ExternalRef tags should be added to opinion expressions, holder and
2354          * targets to lthe entities, timex and predicates their heads correspond to.
2355          *
2356          * @param opinionLinking
2357          *            true, to enable this linking; null, to use the default setting
2358          * @param opinionLinkingUsingCoref
2359          *            true, to enable linking to coreferring entities/timex/predicates; null, to
2360          *            use the default setting
2361          * @return this builder object, for call chaining
2362          */
2363         public Builder withOpinionLinking(@Nullable final Boolean opinionLinking,
2364                 @Nullable final Boolean opinionLinkingUsingCoref) {
2365             this.opinionLinking = opinionLinking;
2366             this.opinionLinkingUsingCoref = opinionLinkingUsingCoref;
2367             return this;
2368         }
2369 
2370         /**
2371          * Creates a {@code NAFFilter} based on the flags specified on this builder object.
2372          *
2373          * @return the constructed {@code NAFFilter}
2374          */
2375         public NAFFilter build() {
2376             return new NAFFilter(this);
2377         }
2378 
2379     }
2380 
2381 }