1   package eu.fbk.dkm.pikes.resources;
2   
3   import java.io.IOException;
4   import java.util.Arrays;
5   import java.util.Collection;
6   import java.util.Collections;
7   import java.util.List;
8   import java.util.Map;
9   import java.util.Set;
10  import java.util.function.Consumer;
11  import java.util.regex.Matcher;
12  import java.util.regex.Pattern;
13  
14  import javax.annotation.Nullable;
15  
16  import com.google.common.base.Charsets;
17  import com.google.common.base.Joiner;
18  import com.google.common.base.MoreObjects;
19  import com.google.common.base.Objects;
20  import com.google.common.base.Preconditions;
21  import com.google.common.base.Splitter;
22  import com.google.common.base.Strings;
23  import com.google.common.collect.BiMap;
24  import com.google.common.collect.HashMultimap;
25  import com.google.common.collect.ImmutableBiMap;
26  import com.google.common.collect.ImmutableList;
27  import com.google.common.collect.ImmutableMap;
28  import com.google.common.collect.ImmutableSet;
29  import com.google.common.collect.Lists;
30  import com.google.common.collect.Maps;
31  import com.google.common.collect.Multimap;
32  import com.google.common.collect.Ordering;
33  import com.google.common.collect.Sets;
34  import com.google.common.io.Resources;
35  
36  import eu.fbk.rdfpro.util.Statements;
37  import eu.fbk.utils.svm.Util;
38  import org.eclipse.rdf4j.model.IRI;
39  import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
40  import org.slf4j.Logger;
41  import org.slf4j.LoggerFactory;
42  
43  import ixa.kaflib.Coref;
44  import ixa.kaflib.Dep;
45  import ixa.kaflib.Entity;
46  import ixa.kaflib.ExternalRef;
47  import ixa.kaflib.KAFDocument;
48  import ixa.kaflib.LinkedEntity;
49  import ixa.kaflib.Opinion;
50  import ixa.kaflib.Opinion.OpinionExpression;
51  import ixa.kaflib.Opinion.OpinionHolder;
52  import ixa.kaflib.Opinion.OpinionTarget;
53  import ixa.kaflib.Predicate;
54  import ixa.kaflib.Predicate.Role;
55  import ixa.kaflib.Span;
56  import ixa.kaflib.Term;
57  import ixa.kaflib.Timex3;
58  import ixa.kaflib.WF;
59  
60  /**
61   * A filter for the post-processing of a NAF document.
62   * <p>
63   * The filter, configured and created using the builder pattern (see {@link #builder()}), performs
64   * several optional and configurable operations on a {@code NAFDocumant} that is modified in
65   * place. For the operations supported please refer to the javadoc of {@code Builder}.
66   * <p>
67   * This class is thread-safe.
68   * </p>
69   */
70  public final class NAFFilter implements Consumer<KAFDocument> {
71  
72      public static final String SUMO_NAMESPACE = "http://www.ontologyportal.org/SUMO.owl#";
73  
74      public static final IRI SUMO_PROCESS = SimpleValueFactory.getInstance()
75              .createIRI(SUMO_NAMESPACE, "Process");
76      // public static final IRI SUMO_PROCESS =
77      // SimpleValueFactory.getInstance().createIRI(SUMO_NAMESPACE, "Process");
78  
79      private static final Logger LOGGER = LoggerFactory.getLogger(NAFFilter.class);
80  
81      private static final Map<String, String> ENTITY_SST_TO_TYPES = ImmutableMap
82              .<String, String>builder().put("person", "PER").put("group", "ORG")
83              .put("location", "LOC").put("quantity", "QUANTITY").put("artifact", "PRODUCT")
84              .put("act", "EVENT").put("event", "EVENT").put("phenomenon", "EVENT")
85              .put("process", "EVENT").put("state", "EVENT").put("animal", "MISC")
86              .put("plant", "MISC").put("body", "MISC").put("shape", "MISC").put("motive", "MISC")
87              .put("object", "MISC").put("substance", "MISC").build();
88      // .put("cognition", "EVENT")
89  
90      private static final Pattern SRL_ROLE_PATTERN = Pattern.compile("A(\\d).*");
91  
92      private static final String PARTICIPATION_REGEX = ""
93              + "SUB? (COORD CONJ?)* (PMOD (COORD CONJ?)*)? ((VC OPRD?)|(IM OPRD?))*";
94  
95      private static final String[] LINKING_STOP_WORDS;
96  
97      private static final BiMap<String, String> MAPPING_PREFIXES = ImmutableBiMap.of("propbank",
98              "pb", "nombank", "nb", "verbnet", "vn", "framenet", "fn");
99  
100     private static final Multimap<String, String> MAPPING_PREDICATES;
101 
102     private static final Multimap<String, String> MAPPING_ARGUMENTS;
103 
104     public static final NAFFilter DEFAULT = NAFFilter.builder().build();
105 
106     static {
107         List<String> stopwords = Collections.emptyList();
108         try {
109             stopwords = Resources.readLines(NAFFilter.class.getResource("linking_stopwords"),
110                     Charsets.UTF_8);
111             LOGGER.info("Loaded {} linking stopwords", stopwords.size());
112         } catch (final IOException ex) {
113             LOGGER.error("Could not load linking stopwords", ex);
114         }
115         LINKING_STOP_WORDS = stopwords.toArray(new String[stopwords.size()]);
116         for (int i = 0; i < LINKING_STOP_WORDS.length; ++i) {
117             LINKING_STOP_WORDS[i] = LINKING_STOP_WORDS[i].toLowerCase();
118         }
119         Arrays.sort(LINKING_STOP_WORDS);
120 
121         MAPPING_PREDICATES = HashMultimap.create();
122         MAPPING_ARGUMENTS = HashMultimap.create();
123         try {
124             for (final String line : Resources.readLines(
125                     NAFFilter.class.getResource("mappings-frames.tsv"), Charsets.UTF_8)) {
126                 final List<String> tokens = Splitter.on("\t").trimResults().splitToList(line);
127                 final String prefix = tokens.get(0).substring(0, 2).toLowerCase();
128                 final String fromKey = prefix + ":" + tokens.get(1);
129                 final String toKey = "fn:" + Character.toUpperCase(tokens.get(2).charAt(0))
130                         + tokens.get(2).substring(1);
131                 MAPPING_PREDICATES.put(fromKey, toKey);
132             }
133             for (final String line : Resources.readLines(
134                     NAFFilter.class.getResource("mappings-roles.tsv"), Charsets.UTF_8)) {
135                 final List<String> tokens = Splitter.on("\t").trimResults().splitToList(line);
136                 final String prefix = tokens.get(0).substring(0, 2).toLowerCase();
137                 final String fromKey = prefix + ":" + tokens.get(1);
138                 final String fnRole = tokens.get(2);
139                 final int index = fnRole.indexOf('@');
140                 final String toKey = "fn:" + Character.toUpperCase(fnRole.charAt(0))
141                         + fnRole.substring(1, index + 1)
142                         + Character.toUpperCase(fnRole.charAt(index + 1))
143                         + fnRole.substring(index + 2);
144                 MAPPING_ARGUMENTS.put(fromKey, toKey);
145             }
146 
147         } catch (final Throwable ex) {
148             LOGGER.error("Could not load mappings", ex);
149         }
150     }
151 
152     private final boolean termSenseFiltering;
153 
154     private final boolean termSenseCompletion;
155 
156     private final boolean entityRemoveOverlaps;
157 
158     private final boolean entitySpanFixing;
159 
160     private final boolean entityAddition;
161 
162     private final boolean entityValueNormalization;
163 
164     private final boolean linkingCompletion;
165 
166     private final boolean linkingFixing;
167 
168     private final boolean corefForRoleDependencies;
169 
170     private final boolean corefSpanFixing;
171 
172     private final boolean srlPreprocess;
173 
174     private final boolean srlEnableMate;
175 
176     private final boolean srlEnableSemafor;
177 
178     private final boolean srlRemoveWrongRefs;
179 
180     private final boolean srlRemoveUnknownPredicates;
181 
182     private final boolean srlPredicateAddition;
183 
184     private final boolean srlSelfArgFixing;
185 
186     private final boolean srlSenseMapping;
187 
188     private final boolean srlSenseMappingPM;
189 
190     private final boolean srlFrameBaseMapping;
191 
192     private final boolean srlRoleLinking;
193 
194     private final boolean srlRoleLinkingUsingCoref;
195 
196     private final boolean srlPreMOnIRIs;
197 
198     private final boolean opinionLinking;
199 
200     private final boolean opinionLinkingUsingCoref;
201 
202     private NAFFilter(final Builder builder) {
203         this.termSenseFiltering = MoreObjects.firstNonNull(builder.termSenseFiltering, true);
204         this.termSenseCompletion = MoreObjects.firstNonNull(builder.termSenseCompletion, true);
205         this.entityRemoveOverlaps = MoreObjects.firstNonNull(builder.entityRemoveOverlaps, true);
206         this.entitySpanFixing = MoreObjects.firstNonNull(builder.entitySpanFixing, true);
207         this.entityAddition = MoreObjects.firstNonNull(builder.entityAddition, true);
208         this.entityValueNormalization = MoreObjects.firstNonNull(builder.entityValueNormalization,
209                 true);
210         this.linkingCompletion = MoreObjects.firstNonNull(builder.linkingCompletion, true);
211         this.linkingFixing = MoreObjects.firstNonNull(builder.linkingFixing, false);
212         this.corefForRoleDependencies = MoreObjects.firstNonNull(builder.corefForRoleDependencies,
213                 false);
214         this.corefSpanFixing = MoreObjects.firstNonNull(builder.corefSpanFixing, false);
215         this.srlPreprocess = MoreObjects.firstNonNull(builder.srlPreprocess, true);
216         this.srlEnableMate = MoreObjects.firstNonNull(builder.srlEnableMate, true);
217         this.srlEnableSemafor = MoreObjects.firstNonNull(builder.srlEnableSemafor, true);
218         this.srlRemoveWrongRefs = MoreObjects.firstNonNull(builder.srlRemoveWrongRefs, true);
219         this.srlRemoveUnknownPredicates = MoreObjects
220                 .firstNonNull(builder.srlRemoveUnknownPredicates, false);
221         this.srlPredicateAddition = MoreObjects.firstNonNull(builder.srlPredicateAddition, true);
222         this.srlSelfArgFixing = MoreObjects.firstNonNull(builder.srlSelfArgFixing, true);
223         this.srlSenseMapping = MoreObjects.firstNonNull(builder.srlSenseMapping, true);
224         this.srlSenseMappingPM = false; // TODO disabled
225         this.srlFrameBaseMapping = MoreObjects.firstNonNull(builder.srlFrameBaseMapping, true);
226         this.srlRoleLinking = MoreObjects.firstNonNull(builder.srlRoleLinking, true);
227         this.srlRoleLinkingUsingCoref = MoreObjects.firstNonNull(builder.srlRoleLinkingUsingCoref,
228                 true);
229 
230         this.srlPreMOnIRIs = MoreObjects.firstNonNull(builder.srlPreMOnIRIs, true);
231         this.opinionLinking = MoreObjects.firstNonNull(builder.opinionLinking, true);
232         this.opinionLinkingUsingCoref = MoreObjects.firstNonNull(builder.opinionLinkingUsingCoref,
233                 true);
234     }
235 
236     @Override
237     public void accept(final KAFDocument document) {
238         filter(document);
239     }
240 
241     /**
242      * Filters the NAF document specified (the document is modified in-place). Filtering is
243      * controlled by the flags specified when creating the {@code NAFFilter} object.
244      *
245      * @param document
246      *            the document to filter
247      */
248     public void filter(final KAFDocument document) {
249 
250         // Check arguments
251         Preconditions.checkNotNull(document);
252 
253         // Log beginning of operation
254         final long ts = System.currentTimeMillis();
255         LOGGER.debug("== Filtering {} ==", document.getPublic().uri);
256 
257         // Normalize the document
258         NAFUtils.normalize(document);
259 
260         // Term-level filtering
261         if (this.termSenseFiltering) {
262             applyTermSenseFiltering(document);
263         }
264         if (this.termSenseCompletion) {
265             applyTermSenseCompletion(document);
266         }
267 
268         // Entity-level / Linking filtering
269         if (this.entityRemoveOverlaps) {
270             applyEntityRemoveOverlaps(document);
271         }
272         if (this.entitySpanFixing) {
273             applyEntitySpanFixing(document);
274         }
275         if (this.linkingCompletion) {
276             applyLinkingCompletion(document);
277         }
278         if (this.linkingFixing) {
279             applyLinkingFixing(document);
280         }
281         if (this.entityAddition) {
282             applyEntityAddition(document);
283         }
284         if (this.entityValueNormalization) {
285             applyEntityValueNormalization(document);
286         }
287 
288         // SRL-level filtering
289         if (this.srlPreprocess) {
290             applySRLPreprocess(document);
291         }
292         if (this.srlRemoveWrongRefs) {
293             applySRLRemoveWrongRefs(document);
294         }
295         if (this.srlRemoveUnknownPredicates) {
296             applySRLRemoveUnknownPredicates(document);
297         }
298         if (this.srlPredicateAddition) {
299             applySRLPredicateAddition(document);
300         }
301         if (this.srlSelfArgFixing) {
302             applySRLSelfArgFixing(document);
303         }
304         if (this.srlSenseMapping) {
305             applySRLSenseMapping(document);
306         }
307         if (this.srlFrameBaseMapping) {
308             applySRLFrameBaseMapping(document);
309         }
310         if (this.srlRoleLinking) {
311             applySRLRoleLinking(document);
312         }
313 
314         // added for replacing with premon IRIs
315         if (this.srlPreMOnIRIs) {
316             applySRLPreMOnIRIs(document);
317         }
318 
319         // Coref-level filtering
320         if (this.corefForRoleDependencies) {
321             applyCorefForRoleDependencies(document);
322         }
323         if (this.corefSpanFixing) {
324             applyCorefSpanFixing(document);
325         }
326 
327         // Opinion-level filtering
328         if (this.opinionLinking) {
329             applyOpinionLinking(document);
330         }
331 
332         LOGGER.debug("Done in {} ms", System.currentTimeMillis() - ts);
333     }
334 
335     // private void applyEntityTypeFixing(final KAFDocument document) {
336     //
337     // for (final Entity entity : ImmutableList.copyOf(document.getEntities())) {
338     //
339     //
340     //
341     //
342     // // Remove initial determiners and prepositions, plus all the terms not containing at
343     // // least a letter or a digit. Move to next entity if no change was applied
344     // final List<Term> filteredTerms = NAFUtils.filterTerms(entity.getTerms());
345     // if (filteredTerms.size() == entity.getTerms().size()) {
346     // continue;
347     // }
348     //
349     // // Remove the old entity
350     // document.removeAnnotation(entity);
351     //
352     // // If some term remained, add the filtered entity, reusing old type, named flag and
353     // // external references
354     // Entity newEntity = null;
355     // if (!filteredTerms.isEmpty()) {
356     // newEntity = document.newEntity(ImmutableList.of(KAFDocument
357     // .newTermSpan(filteredTerms)));
358     // newEntity.setType(entity.getType());
359     // newEntity.setNamed(entity.isNamed());
360     // for (final ExternalRef ref : entity.getExternalRefs()) {
361     // newEntity.addExternalRef(ref);
362     // }
363     // }
364     //
365     // // Log the change
366     // if (LOGGER.isDebugEnabled()) {
367     // LOGGER.debug((newEntity == null ? "Removed" : "Replaced") + " invalid " //
368     // + NAFUtils.toString(entity) + (newEntity == null ? "" : " with filtered " //
369     // + NAFUtils.toString(newEntity)));
370     // }
371     // }
372     //
373     // }
374 
375     private void applyTermSenseFiltering(final KAFDocument document) {
376 
377         for (final Term term : document.getTerms()) {
378             if (term.getMorphofeat() != null && term.getMorphofeat().startsWith("NNP")) {
379                 NAFUtils.removeRefs(term, NAFUtils.RESOURCE_WN_SYNSET, null);
380                 NAFUtils.removeRefs(term, NAFUtils.RESOURCE_WN_SST, null);
381                 NAFUtils.removeRefs(term, NAFUtils.RESOURCE_BBN, null);
382                 NAFUtils.removeRefs(term, NAFUtils.RESOURCE_SUMO, null);
383                 NAFUtils.removeRefs(term, NAFUtils.RESOURCE_YAGO, null);
384             }
385         }
386     }
387 
388     private void applyTermSenseCompletion(final KAFDocument document) {
389 
390         for (final Term term : document.getTerms()) {
391 
392             // Retrieve existing refs
393             ExternalRef bbnRef = NAFUtils.getRef(term, NAFUtils.RESOURCE_BBN, null);
394             ExternalRef synsetRef = NAFUtils.getRef(term, NAFUtils.RESOURCE_WN_SYNSET, null);
395             ExternalRef sstRef = NAFUtils.getRef(term, NAFUtils.RESOURCE_WN_SST, null);
396             final List<ExternalRef> sumoRefs = NAFUtils.getRefs(term, NAFUtils.RESOURCE_SUMO,
397                     null);
398             final List<ExternalRef> yagoRefs = NAFUtils.getRefs(term, NAFUtils.RESOURCE_YAGO,
399                     null);
400 
401             // Retrieve a missing SST from the WN Synset (works always)
402             if (sstRef == null && synsetRef != null) {
403                 final String sst = WordNet.mapSynsetToSST(synsetRef.getReference());
404                 if (sstRef == null || !Objects.equal(sstRef.getReference(), sst)) {
405                     LOGGER.debug((sstRef == null ? "Added" : "Overridden") + " SST '" + sst
406                             + "' of " + NAFUtils.toString(term) + " based on Synset '"
407                             + synsetRef.getReference() + "'");
408                     sstRef = document.newExternalRef(NAFUtils.RESOURCE_WN_SST, sst);
409                     NAFUtils.addRef(term, sstRef);
410                 }
411             }
412 
413             // Apply noun-based mapping.
414             final boolean isNoun = Character.toUpperCase(term.getPos().charAt(0)) == 'N';
415             if (isNoun) {
416 
417                 // Retrieve a missing BBN from the WN Synset
418                 if (bbnRef == null && synsetRef != null) {
419                     final String bbn = WordNet.mapSynsetToBBN(synsetRef.getReference());
420                     if (bbn != null) {
421                         bbnRef = document.newExternalRef(NAFUtils.RESOURCE_BBN, bbn);
422                         NAFUtils.addRef(term, bbnRef);
423                         LOGGER.debug("Added BBN '" + bbn + "' of " + NAFUtils.toString(term)
424                                 + " based on Synset '" + synsetRef.getReference() + "'");
425                     }
426 
427                 }
428 
429                 // Retrieve a missing WN Synset from the BBN
430                 if (synsetRef == null && bbnRef != null) {
431                     final String synsetID = WordNet.mapBBNToSynset(bbnRef.getReference());
432                     if (synsetID != null) {
433                         synsetRef = document.newExternalRef(NAFUtils.RESOURCE_WN_SYNSET, synsetID);
434                         NAFUtils.addRef(term, synsetRef);
435                         LOGGER.debug(
436                                 "Added Synset '" + synsetID + "' of " + NAFUtils.toString(term)
437                                         + " based on BBN '" + bbnRef.getReference() + "'");
438                     }
439                 }
440 
441                 // Retrieve a missing SST from the BBN
442                 if (sstRef == null && bbnRef != null) {
443                     final String sst = WordNet.mapBBNToSST(bbnRef.getReference());
444                     if (sst != null) {
445                         sstRef = document.newExternalRef(NAFUtils.RESOURCE_WN_SST, sst);
446                         NAFUtils.addRef(term, sstRef);
447                         LOGGER.debug("Added SST '" + sst + "' of " + NAFUtils.toString(term)
448                                 + " based on BBN '" + bbnRef.getReference() + "'");
449                     }
450                 }
451             }
452 
453             // Apply mapping to SUMO if synset is available
454             final String lemma = term.getLemma().toLowerCase();
455             if (sumoRefs.isEmpty() && synsetRef != null && !lemma.equals("be")) {
456                 Set<String> synsetIDs = Sets.newHashSet(synsetRef.getReference());
457                 Set<IRI> conceptIRIs = Sumo.synsetsToConcepts(synsetIDs);
458                 while (conceptIRIs.isEmpty() && !synsetIDs.isEmpty()) {
459                     final Set<String> oldSynsetIDs = synsetIDs;
460                     synsetIDs = Sets.newHashSet();
461                     for (final String oldSynsetID : oldSynsetIDs) {
462                         synsetIDs.addAll(WordNet.getHypernyms(oldSynsetID));
463                     }
464                     conceptIRIs = Sumo.synsetsToConcepts(synsetIDs);
465                 }
466                 if (conceptIRIs.isEmpty()) {
467                     synsetIDs = WordNet.getHyponyms(synsetRef.getReference());
468                     conceptIRIs = Sumo.synsetsToConcepts(synsetIDs);
469                 }
470                 if (!conceptIRIs.isEmpty()) {
471                     for (final IRI conceptIRI : conceptIRIs) {
472                         final String sumoID = conceptIRI.getLocalName();
473                         final ExternalRef sumoRef = document.newExternalRef(NAFUtils.RESOURCE_SUMO,
474                                 sumoID);
475                         NAFUtils.setRef(term, sumoRef);
476                         LOGGER.debug("Added SUMO mapping: " + NAFUtils.toString(term) + " -> sumo:"
477                                 + conceptIRI.getLocalName());
478                     }
479                 }
480             }
481 
482             // Apply mapping to Yago if synset is available
483             if (yagoRefs.isEmpty() && synsetRef != null) {
484                 for (final IRI uri : YagoTaxonomy
485                         .getDBpediaYagoIRIs(ImmutableList.of(synsetRef.getReference()))) {
486                     final String yagoID = uri.stringValue()
487                             .substring(YagoTaxonomy.NAMESPACE.length());
488                     final ExternalRef yagoRef = document.newExternalRef(NAFUtils.RESOURCE_YAGO,
489                             yagoID);
490                     NAFUtils.setRef(term, yagoRef);
491                     LOGGER.debug("Added Yago mapping: " + NAFUtils.toString(term) + " -> yago:"
492                             + yagoID);
493                 }
494             }
495         }
496     }
497 
498     private void applyEntitySpanFixing(final KAFDocument document) {
499 
500         // Filter or remove entities consisting of invalid terms
501         for (final Entity entity : ImmutableList.copyOf(document.getEntities())) {
502 
503             // Remove initial determiners and prepositions, plus all the terms not containing at
504             // least a letter or a digit. Move to next entity if no change was applied
505             final List<Term> filteredTerms = NAFUtils.filterTerms(entity.getTerms());
506             if (filteredTerms.size() == entity.getTerms().size()) {
507                 continue;
508             }
509 
510             // Remove the old entity
511             document.removeAnnotation(entity);
512 
513             // If some term remained, add the filtered entity, reusing old type, named flag and
514             // external references
515             Entity newEntity = null;
516             if (!filteredTerms.isEmpty()) {
517                 newEntity = document
518                         .newEntity(ImmutableList.of(KAFDocument.newTermSpan(filteredTerms)));
519                 newEntity.setType(entity.getType());
520                 newEntity.setNamed(entity.isNamed());
521                 for (final ExternalRef ref : entity.getExternalRefs()) {
522                     newEntity.addExternalRef(ref);
523                 }
524             }
525 
526             // Log the change
527             if (LOGGER.isDebugEnabled()) {
528                 LOGGER.debug((newEntity == null ? "Removed" : "Replaced") + " invalid " //
529                         + NAFUtils.toString(entity) + (newEntity == null ? ""
530                                 : " with filtered " //
531                                         + NAFUtils.toString(newEntity)));
532             }
533         }
534     }
535 
536     private void applyEntityRemoveOverlaps(final KAFDocument document) {
537 
538         // Consider all the entities in the document
539         outer: for (final Entity entity : ImmutableList.copyOf(document.getEntities())) {
540             for (final Term term : entity.getTerms()) {
541 
542                 // Remove entities whose span is contained in the span of another entity
543                 for (final Entity entity2 : document.getEntitiesByTerm(term)) {
544                     if (entity2 != entity && entity2.getTerms().containsAll(entity.getTerms())) {
545                         document.removeAnnotation(entity);
546                         if (LOGGER.isDebugEnabled()) {
547                             LOGGER.debug("Removed " + NAFUtils.toString(entity)
548                                     + " overlapping with " + NAFUtils.toString(entity2));
549                         }
550                         continue outer;
551                     }
552                 }
553 
554                 // Remove entities whose span overlaps with the span of some timex
555                 for (final WF wf : term.getWFs()) {
556                     final List<Timex3> timex = document.getTimeExsByWF(wf);
557                     if (!timex.isEmpty()) {
558                         document.removeAnnotation(entity);
559                         if (LOGGER.isDebugEnabled()) {
560                             LOGGER.debug("Removed " + NAFUtils.toString(entity)
561                                     + " overlapping with TIMEX3 '" + NAFUtils.toString(timex));
562                         }
563                         continue outer;
564                     }
565                 }
566             }
567         }
568     }
569 
570     private void applyEntityAddition(final KAFDocument document) {
571 
572         for (final Term term : document.getTerms()) {
573 
574             // Select names, nouns and pronouns that are not part of NE or Timex
575             final char pos = Character.toUpperCase(term.getPos().charAt(0));
576             final Dep dep = document.getDepToTerm(term);
577             final boolean namePart = pos == 'R' && dep != null
578                     && dep.getRfunc().toLowerCase().contains("name")
579                     && Character.toUpperCase(dep.getFrom().getPos().charAt(0)) == 'R'
580                     && document.getEntitiesByTerm(dep.getFrom()).isEmpty();
581             if (pos != 'R' && pos != 'N' && pos != 'Q' || namePart
582                     || !document.getTimeExsByWF(term.getWFs().get(0)).isEmpty() //
583                     || !document.getEntitiesByTerm(term).isEmpty()) {
584                 continue;
585             }
586 
587             // Determine the entity type based on NER tag first, WN synset then and SST last
588             String type = null;
589             final ExternalRef bbnRef = NAFUtils.getRef(term, NAFUtils.RESOURCE_BBN, null);
590             if (bbnRef != null) {
591                 type = bbnRef.getReference();
592             } else {
593                 final ExternalRef synsetRef = NAFUtils.getRef(term, NAFUtils.RESOURCE_WN_SYNSET,
594                         null);
595                 if (synsetRef != null) {
596                     type = WordNet.mapSynsetToBBN(synsetRef.getReference());
597                 } else {
598                     final ExternalRef sstRef = NAFUtils.getRef(term, NAFUtils.RESOURCE_WN_SST,
599                             null);
600                     if (sstRef != null) {
601                         String sst = sstRef.getReference();
602                         sst = sst.substring(sst.lastIndexOf('.') + 1);
603                         type = ENTITY_SST_TO_TYPES.get(sst);
604                     }
605                 }
606             }
607 
608             // Determine the terms for the nominal node.
609             // TODO: consider multiwords
610             final Span<Term> span = NAFUtils.getNominalSpan(document, term, false, false);
611 
612             // Add the entity, setting its type and 'named' flag
613             final Entity entity = document.newEntity(ImmutableList.of(span));
614             if (type != null)
615                 entity.setType(type.toUpperCase().replace("PERSON", "PER")
616                         .replace("ORGANIZATION", "ORG").replace("LOCATION", "LOC"));
617             entity.setNamed(pos == 'R');
618             if (LOGGER.isDebugEnabled()) {
619                 LOGGER.debug("Added " + (entity.isNamed() ? "named " : "")
620                         + NAFUtils.toString(entity) + " with type '" + type + "'");
621             }
622         }
623     }
624 
625     private void applyEntityValueNormalization(final KAFDocument document) {
626 
627         for (final Entity entity : document.getEntities()) {
628             String type = entity.getType();
629             type = type == null ? null : type.toLowerCase();
630             if ("cardinal".equals(type) || "ordinal".equals(type) || "percent".equals(type)
631                     || "money".equals(type)) {
632 
633                 ExternalRef ref = null;
634                 final String str = entity.getSpans().get(0).getStr().toLowerCase();
635                 Double value = null;
636                 try {
637                     value = NumberSpeller.parse(str);
638                 } catch (Throwable ex) {
639                     LOGGER.debug("Could not parse number '" + str + "'", ex);
640                 }
641                 if (value != null) {
642                     String prefix = "";
643                     if ("percent".equals(type)) {
644                         prefix = "%";
645                     } else if ("money".equals(type)) {
646                         prefix = "¤";
647                         if (str.contains("euro")) {
648                             prefix = "€";
649                         } else if (str.contains("dollar")) {
650                             prefix = "$";
651                         } else if (str.contains("yen")) {
652                             prefix = "Â¥";
653                         }
654                     }
655                     ref = document.newExternalRef(NAFUtils.RESOURCE_VALUE,
656                             prefix + Double.toString(value.doubleValue()));
657                 }
658 
659                 if (ref != null && NAFUtils.getRef(entity, ref.getResource(), null) == null) {
660                     NAFUtils.addRef(entity, ref);
661                     LOGGER.debug("Added ref '" + ref + "' to " + NAFUtils.toString(entity));
662                 }
663             }
664         }
665     }
666 
667     private void applyLinkingCompletion(final KAFDocument document) {
668 
669         for (final LinkedEntity le : document.getLinkedEntities()) {
670 
671             // Determine head for current linked entity
672             final List<Term> terms = document.getTermsByWFs(le.getWFs().getTargets());
673             final Term head = document.getTermsHead(terms);
674             if (head == null) {
675                 continue;
676             }
677 
678             // Apply the sense to entities with same head where it is missing
679             Entity entityToModify = null;
680             for (final Entity entity : document.getEntitiesByTerm(head)) {
681                 if (head.equals(document.getTermsHead(entity.getTerms()))) {
682                     entityToModify = entity;
683                 }
684             }
685             if (entityToModify == null) {
686                 final Span<Term> span = KAFDocument
687                         .newTermSpan(document.getTermsByWFs(le.getWFs().getTargets()));
688                 boolean overlap = false;
689                 for (final Term term : span.getTargets()) {
690                     final List<Entity> overlappingEntities = document.getEntitiesByTerm(term);
691                     if (overlappingEntities != null && !overlappingEntities.isEmpty()) {
692                         overlap = true;
693                         break;
694                     }
695                 }
696                 if (!overlap) {
697                     final boolean named = head.getMorphofeat().startsWith("NNP");
698                     boolean accept = named;
699                     if (!accept) {
700                         final String textStr = span.getStr().toLowerCase().replaceAll("\\s+", "_");
701                         final String entityStr = Statements.VALUE_FACTORY
702                                 .createIRI(le.getReference()).getLocalName().toLowerCase();
703                         accept = textStr.equals(entityStr);
704                     }
705                     if (accept) {
706                         entityToModify = document.newEntity(ImmutableList.of(span));
707                         entityToModify.setNamed(head.getMorphofeat().startsWith("NNP"));
708                         if (LOGGER.isDebugEnabled()) {
709                             LOGGER.debug(
710                                     "Added linked " + (entityToModify.isNamed() ? "named " : "")
711                                             + NAFUtils.toString(entityToModify));
712                         }
713                     }
714                 }
715             }
716 
717             if (entityToModify != null) {
718                 final ExternalRef existingRef = NAFUtils.getRef(entityToModify, le.getResource(),
719                         le.getReference());
720                 if (existingRef == null) {
721                     final ExternalRef ref = document.newExternalRef(le.getResource(),
722                             le.getReference());
723                     ref.setConfidence((float) le.getConfidence());
724                     NAFUtils.addRef(entityToModify, ref);
725                     LOGGER.debug(
726                             "Added ref '" + ref + "' to " + NAFUtils.toString(entityToModify));
727                 } else {
728                     float existingRefConfidence = existingRef.getConfidence();
729                     if (existingRefConfidence < le.getConfidence()) {
730                         existingRef.setConfidence((float) le.getConfidence());
731                         LOGGER.debug("Modified confidence of '" + existingRef + "' to "
732                                 + le.getConfidence());
733                     }
734                 }
735             }
736 
737             // Apply the sense to predicates with same head where it is missing
738             for (final Predicate predicate : document.getPredicatesByTerm(head)) {
739                 if (head.equals(document.getTermsHead(predicate.getTerms()))) {
740                     if (NAFUtils.getRef(predicate, le.getResource(), le.getReference()) == null) {
741                         final ExternalRef ref = document.newExternalRef(le.getResource(),
742                                 le.getReference());
743                         ref.setConfidence((float) le.getConfidence());
744                         NAFUtils.addRef(predicate, ref);
745                         LOGGER.debug("Added ref '" + ref + "' to " + NAFUtils.toString(predicate));
746                     }
747                 }
748             }
749         }
750     }
751 
752     private void applyLinkingFixing(final KAFDocument document) {
753 
754         // Check each linked entity, dropping the links if the span is in the stop word list
755         final List<ExternalRef> refs = Lists.newArrayList();
756         for (final Entity entity : document.getEntities()) {
757 
758             // Extract all the <ExternalRef> elements with links for the current entity
759             refs.clear();
760             for (final ExternalRef ref : entity.getExternalRefs()) {
761                 if (!NAFUtils.RESOURCE_VALUE.equals(ref.getResource())) {
762                     refs.add(ref);
763                 }
764             }
765 
766             // If the entity is linked, check its span is not in the stop word list
767             if (!refs.isEmpty()) {
768                 final String[] tokens = Util.hardTokenize(entity.getStr());
769                 final String normalized = Joiner.on(' ').join(tokens).toLowerCase();
770                 if (Arrays.binarySearch(LINKING_STOP_WORDS, normalized) >= 0) {
771                     for (final ExternalRef ref : refs) {
772                         NAFUtils.removeRefs(entity, ref.getResource(), ref.getReference());
773                         if (LOGGER.isDebugEnabled()) {
774                             LOGGER.debug("Removed stop-word ref '{}' from {}", ref,
775                                     NAFUtils.toString(entity));
776                         }
777                     }
778                 }
779             }
780         }
781     }
782 
783     @SuppressWarnings("deprecation")
784     private void applyCorefSpanFixing(final KAFDocument document) {
785 
786         // Process each <coref> element in the NAF document
787         for (final Coref coref : ImmutableList.copyOf(document.getCorefs())) {
788 
789             // Remove spans without valid head
790             for (final Span<Term> span : ImmutableList.copyOf(coref.getSpans())) {
791                 final Term head = NAFUtils.extractHead(document, span);
792                 if (head == null) {
793                     coref.getSpans().remove(span);
794                     if (LOGGER.isDebugEnabled()) {
795                         LOGGER.debug("Removed span with invalid head '{}' from {}", span.getStr(),
796                                 NAFUtils.toString(coref));
797                     }
798                 } else {
799                     span.setHead(head);
800                 }
801             }
802 
803             // Remove spans containing smaller spans + determine if there is span with NNP head
804             boolean hasProperNounHead = false;
805             boolean isEvent = false;
806             final List<Span<Term>> spans = ImmutableList.copyOf(coref.getSpans());
807             outer: for (final Span<Term> span1 : spans) {
808                 for (final Span<Term> span2 : spans) {
809                     if (span1.size() > span2.size()
810                             && span1.getTargets().containsAll(span2.getTargets())) {
811                         coref.getSpans().remove(span1);
812                         if (LOGGER.isDebugEnabled()) {
813                             LOGGER.debug("Removed span '{}' including smaller span '{}' from {}",
814                                     span1.getStr(), span2.getStr(), NAFUtils.toString(coref));
815                         }
816                         continue outer;
817                     }
818                 }
819                 hasProperNounHead |= span1.getHead().getMorphofeat().startsWith("NNP");
820                 if (!isEvent) {
821                     for (final ExternalRef ref : NAFUtils.getRefs(span1.getHead(),
822                             NAFUtils.RESOURCE_SUMO, null)) {
823                         final IRI sumoID = Statements.VALUE_FACTORY
824                                 .createIRI(SUMO_NAMESPACE + ref.getReference());
825                         if (Sumo.isSubClassOf(sumoID, SUMO_PROCESS)) {
826                             isEvent = true;
827                         }
828                     }
829                 }
830             }
831 
832             // Shrink spans containing a proper name, if head of another span is proper name
833             if (hasProperNounHead) {
834 
835                 // Drop spans not corresponding to non-role predicates
836                 for (final Span<Term> span : ImmutableList.copyOf(coref.getSpans())) {
837                     final Term head = span.getHead();
838                     if (!head.getMorphofeat().startsWith("NNP") && !isEvent) {
839                         if (head.getMorphofeat().startsWith("VB")) {
840                             coref.getSpans().remove(span);
841                             LOGGER.debug("Removed span with VB head '{}' from {}", span.getStr(),
842                                     NAFUtils.toString(coref));
843                         } else {
844                             outer: for (final Predicate predicate : document
845                                     .getPredicatesByTerm(head)) {
846                                 for (final ExternalRef ref : NAFUtils.getRefs(predicate,
847                                         NAFUtils.RESOURCE_NOMBANK, null)) {
848                                     final NomBank.Roleset roleset = NomBank
849                                             .getRoleset(ref.getReference());
850                                     if (roleset != null
851                                             && roleset.getPredMandatoryArgNums().isEmpty()
852                                             && roleset.getPredOptionalArgNums().isEmpty()) {
853                                         // Not a role
854                                         coref.getSpans().remove(span);
855                                         LOGGER.debug(
856                                                 "Removed span with non-role predicate "
857                                                         + "head '{}' from {}",
858                                                 span.getStr(), NAFUtils.toString(coref));
859                                         break outer;
860                                     }
861                                 }
862                             }
863                         }
864                     }
865                 }
866 
867             } else {
868 
869                 // Split the coreference set into multiple sets, one for each sentence
870                 final Multimap<Integer, Span<Term>> spansBySentence = HashMultimap.create();
871                 for (final Span<Term> span : coref.getSpans()) {
872                     final int sentID = span.getTargets().get(0).getSent();
873                     spansBySentence.put(sentID, span);
874                 }
875                 if (spansBySentence.keySet().size() > 1) {
876                     coref.getSpans().clear();
877                     for (final Collection<Span<Term>> sentSpans : spansBySentence.asMap()
878                             .values()) {
879                         if (sentSpans.size() > 1) {
880                             document.newCoref(Lists.newArrayList(sentSpans));
881                         }
882                     }
883                 }
884 
885             }
886 
887             // Drop coref in case no span remains.
888             if (coref.getSpans().isEmpty()) {
889                 document.removeAnnotation(coref);
890                 LOGGER.debug("Removed empty coref set {}", NAFUtils.toString(coref));
891             }
892         }
893     }
894 
895     private void applyCorefForRoleDependencies(final KAFDocument document) {
896 
897         outer: for (final Dep dep : document.getDeps()) {
898             final String label = dep.getRfunc();
899             if ("APPO".equals(label) || "TITLE".equals(label) || "NMOD".equals(label)) {
900 
901                 // Identify the proper name term and the role name term
902                 Term nameTerm;
903                 Term roleTerm;
904                 final String posFrom = dep.getFrom().getMorphofeat();
905                 final String posTo = dep.getTo().getMorphofeat();
906                 if (posFrom.startsWith("NNP") && posTo.startsWith("NN")
907                         && !posTo.startsWith("NNP")) {
908                     nameTerm = dep.getFrom();
909                     roleTerm = dep.getTo();
910                 } else if (posTo.startsWith("NNP") && posFrom.startsWith("NN")
911                         && !posFrom.startsWith("NNP") && label.equals("APPO")) {
912                     nameTerm = dep.getTo();
913                     roleTerm = dep.getFrom();
914                 } else {
915                     continue outer;
916                 }
917 
918                 // Abort if the two terms are already marked as coreferential
919                 for (final Coref coref : document.getCorefsByTerm(nameTerm)) {
920                     if (NAFUtils.hasHead(document, coref, nameTerm)
921                             && NAFUtils.hasHead(document, coref, roleTerm)) {
922                         continue outer;
923                     }
924                 }
925 
926                 // Verify the role term actually corresponds to a nombank role
927                 boolean isActualRole = false;
928                 predLoop: for (final Predicate predicate : document
929                         .getPredicatesByTerm(roleTerm)) {
930                     for (final ExternalRef ref : predicate.getExternalRefs()) {
931                         if (NAFUtils.RESOURCE_NOMBANK.equals(ref.getResource())) {
932                             final NomBank.Roleset rs = NomBank.getRoleset(ref.getReference());
933                             if (rs != null && (!rs.getPredMandatoryArgNums().isEmpty() //
934                                     || !rs.getPredOptionalArgNums().isEmpty())) {
935                                 isActualRole = true;
936                                 break predLoop;
937                             }
938                         }
939                     }
940                 }
941                 if (!isActualRole) {
942                     continue outer;
943                 }
944 
945                 // Expand coordination
946                 final Set<Term> roleHeads = document
947                         .getTermsByDepAncestors(ImmutableSet.of(roleTerm), "(COORD CONJ?)*");
948                 final Set<Term> nameHeads = document
949                         .getTermsByDepAncestors(ImmutableSet.of(nameTerm), "(COORD CONJ?)*");
950 
951                 // Check that all name heads are proper names
952                 for (final Term nameHead : nameHeads) {
953                     if (!nameHead.getMorphofeat().startsWith("NNP")) {
954                         continue outer;
955                     }
956                 }
957 
958                 // Check role plural/singular form
959                 for (final Term roleHead : roleHeads) {
960                     final boolean plural = roleHead.getMorphofeat().endsWith("S");
961                     if (nameHeads.size() == 1 && plural || nameHeads.size() > 1 && !plural) {
962                         continue outer;
963                     }
964                 }
965 
966                 // Add a new coreference cluster
967                 final List<Span<Term>> spans = Lists.newArrayList();
968                 spans.add(NAFUtils.getNominalSpan(document, nameTerm, true, false));
969                 for (final Term roleHead : roleHeads) {
970                     spans.add(NAFUtils.getNominalSpan(document, roleHead, false, false));
971                 }
972                 final Coref coref = document.newCoref(spans);
973                 if (LOGGER.isDebugEnabled()) {
974                     final StringBuilder builder = new StringBuilder("Added coref ");
975                     builder.append(coref.getId()).append(":");
976                     for (final Span<Term> span : coref.getSpans()) {
977                         builder.append(" '").append(span.getStr()).append('\'');
978                     }
979                     LOGGER.debug(builder.toString());
980                 }
981             }
982         }
983     }
984 
985     private void applySRLPreprocess(final KAFDocument document) {
986 
987         // Allocate two maps to store term -> predicate pairs
988         final Map<Term, Predicate> matePredicates = Maps.newHashMap();
989         final Map<Term, Predicate> semaforPredicates = Maps.newHashMap();
990 
991         // Remove predicates with invalid head
992         for (final Predicate predicate : ImmutableList.copyOf(document.getPredicates())) {
993             if (NAFUtils.extractHead(document, predicate.getSpan()) == null) {
994                 document.removeAnnotation(predicate);
995                 LOGGER.debug("Removed {} without valid head term", predicate);
996             }
997         }
998 
999         // TODO: remove once fixed - normalize Semafor roles
1000         // if (this.srlEnableSemafor) {
1001         // for (final Predicate predicate : document.getPredicates()) {
1002         // if (predicate.getId().startsWith("f_pr")
1003         // || "semafor".equalsIgnoreCase(predicate.getSource())) {
1004         // for (final Role role : predicate.getRoles()) {
1005         // role.setSemRole("");
1006         // final Term head = NAFUtils.extractHead(document, role.getSpan());
1007         // if (head != null) {
1008         // final Span<Term> newSpan = KAFDocument.newTermSpan(Ordering.from(
1009         // Term.OFFSET_COMPARATOR).sortedCopy(
1010         // document.getTermsByDepAncestors(ImmutableList.of(head))));
1011         // role.setSpan(newSpan);
1012         // }
1013         // }
1014         // }
1015         // }
1016         // }
1017 
1018         // TODO: remove alignments from PM
1019         // for (final Predicate predicate : document.getPredicates()) {
1020         // if (!predicate.getId().startsWith("f_pr")
1021         // && !"semafor".equalsIgnoreCase(predicate.getSource())) {
1022         // NAFUtils.removeRefs(predicate, "FrameNet", null);
1023         // for (final Role role : predicate.getRoles()) {
1024         // NAFUtils.removeRefs(role, "FrameNet", null);
1025         // }
1026         // }
1027         // }
1028 
1029         // Remove predicates from non-enabled tools (Mate, Semafor)
1030         for (final Predicate predicate : Lists.newArrayList(document.getPredicates())) {
1031             final boolean isSemafor = predicate.getId().startsWith("f_pr")
1032                     || "semafor".equalsIgnoreCase(predicate.getSource());
1033             if (isSemafor && !this.srlEnableSemafor || !isSemafor && !this.srlEnableMate) {
1034                 document.removeAnnotation(predicate);
1035                 if (LOGGER.isDebugEnabled()) {
1036                     LOGGER.debug("Removed " + NAFUtils.toString(predicate) + " (disabled)");
1037                 }
1038             } else {
1039                 final Term term = NAFUtils.extractHead(document, predicate.getSpan());
1040                 (isSemafor ? semaforPredicates : matePredicates).put(term, predicate);
1041             }
1042         }
1043 
1044         // For each Semafor predicate, merge a corresponding Mate predicate for the same term
1045         for (final Map.Entry<Term, Predicate> entry : semaforPredicates.entrySet()) {
1046             final Term term = entry.getKey();
1047             final Predicate semaforPredicate = entry.getValue();
1048             final Predicate matePredicate = matePredicates.get(term);
1049             if (matePredicate != null) {
1050 
1051                 // Determine whether FrameNet predicate corresponds (-> FN data can be merged)
1052                 final ExternalRef semaforRef = NAFUtils.getRef(semaforPredicate, "FrameNet", null);
1053                 final ExternalRef mateRef = NAFUtils.getRef(matePredicate, "FrameNet", null);
1054                 final boolean mergeFramenet = semaforRef != null && mateRef != null
1055                         && semaforRef.getReference().equalsIgnoreCase(mateRef.getReference());
1056 
1057                 // Merge predicate types
1058                 for (final ExternalRef ref : NAFUtils.getRefs(matePredicate, null, null)) {
1059                     if (!ref.getResource().equalsIgnoreCase("FrameNet")) {
1060                         NAFUtils.addRef(semaforPredicate, new ExternalRef(ref));
1061                     }
1062                 }
1063 
1064                 // Merge roles
1065                 for (final Role mateRole : matePredicate.getRoles()) {
1066                     boolean addRole = true;
1067                     final Set<Term> mateTerms = ImmutableSet
1068                             .copyOf(mateRole.getSpan().getTargets());
1069                     for (final Role semaforRole : semaforPredicate.getRoles()) {
1070                         final Set<Term> semaforTerms = ImmutableSet
1071                                 .copyOf(semaforRole.getSpan().getTargets());
1072                         if (mateTerms.equals(semaforTerms)) {
1073                             addRole = false;
1074                             semaforRole.setSemRole(mateRole.getSemRole());
1075                             final boolean addFramenetRef = mergeFramenet
1076                                     && NAFUtils.getRef(semaforRole, "FrameNet", null) != null;
1077                             for (final ExternalRef ref : mateRole.getExternalRefs()) {
1078                                 if (!ref.getResource().equalsIgnoreCase("FrameNet")
1079                                         || addFramenetRef) {
1080                                     semaforRole.addExternalRef(new ExternalRef(ref));
1081                                 }
1082                             }
1083                         }
1084                     }
1085                     if (addRole) {
1086                         final Role semaforRole = document.newRole(semaforPredicate,
1087                                 mateRole.getSemRole(), mateRole.getSpan());
1088                         semaforPredicate.addRole(semaforRole);
1089                         for (final ExternalRef ref : mateRole.getExternalRefs()) {
1090                             semaforRole.addExternalRef(new ExternalRef(ref));
1091                         }
1092                     }
1093                 }
1094 
1095                 // Delete original Mate predicate
1096                 document.removeAnnotation(matePredicate);
1097 
1098                 // Log operation
1099                 if (LOGGER.isDebugEnabled()) {
1100                     LOGGER.debug("Merged " + NAFUtils.toString(matePredicate) + " into "
1101                             + NAFUtils.toString(semaforPredicate)
1102                             + (mergeFramenet ? " (including FrameNet data)" : ""));
1103                 }
1104 
1105             }
1106         }
1107     }
1108 
1109     private void applySRLRemoveWrongRefs(final KAFDocument document) {
1110 
1111         // Scan all predicates in the SRL layer
1112         for (final Predicate predicate : Lists.newArrayList(document.getPredicates())) {
1113 
1114             // Extract correct lemma from predicate term
1115             final Term head = document.getTermsHead(predicate.getTerms());
1116             final String expectedLemma = head.getLemma();
1117 
1118             // Determine which resource to look for: PropBank vs NomBank
1119             final String resource = head.getPos().equalsIgnoreCase("V") ? "propbank" : "nombank";
1120 
1121             // Clean rolesets
1122             final List<ExternalRef> refs = NAFUtils.getRefs(predicate, resource, null);
1123             Integer expectedSense = null;
1124             for (final ExternalRef ref : refs) {
1125                 if (ref.getSource() != null) {
1126                     expectedSense = NAFUtils.extractSense(ref.getReference());
1127                     break;
1128                 }
1129             }
1130             for (final ExternalRef ref : refs) {
1131                 final String lemma = NAFUtils.extractLemma(ref.getReference());
1132                 final Integer sense = NAFUtils.extractSense(ref.getReference());
1133                 if (!expectedLemma.equalsIgnoreCase(lemma)
1134                         || expectedSense != null && !expectedSense.equals(sense)) {
1135                     NAFUtils.removeRefs(predicate, resource, ref.getReference());
1136                     if (LOGGER.isDebugEnabled()) {
1137                         LOGGER.debug("Removed wrong roleset '" + ref.getReference() + "' for "
1138                                 + NAFUtils.toString(predicate));
1139                     }
1140                 }
1141             }
1142 
1143             // Clean roles
1144             for (final Role role : predicate.getRoles()) {
1145                 final Integer expectedNum = NAFUtils.extractArgNum(role.getSemRole());
1146                 for (final ExternalRef ref : NAFUtils.getRefs(role, resource, null)) {
1147                     final String lemma = NAFUtils.extractLemma(ref.getReference());
1148                     final Integer sense = NAFUtils.extractSense(ref.getReference());
1149                     final Integer num = NAFUtils.extractArgNum(ref.getReference());
1150                     if (!Objects.equal(expectedNum, num) || !expectedLemma.equalsIgnoreCase(lemma)
1151                             || expectedSense != null && !expectedSense.equals(sense)) {
1152                         role.getExternalRefs().remove(ref);
1153                         if (LOGGER.isDebugEnabled()) {
1154                             LOGGER.debug("Removed wrong role '" + ref.getReference() + "' for "
1155                                     + NAFUtils.toString(predicate));
1156                         }
1157                     }
1158                 }
1159             }
1160         }
1161     }
1162 
1163     private void applySRLRemoveUnknownPredicates(final KAFDocument document) {
1164 
1165         // Scan all predicates in the SRL layer
1166         for (final Predicate predicate : Lists.newArrayList(document.getPredicates())) {
1167 
1168             // Determine whether the predicate is a verb and thus which resource to check for>
1169             final Term head = document.getTermsHead(predicate.getTerms());
1170             final boolean isVerb = head.getPos().equalsIgnoreCase("V");
1171             final String resource = isVerb ? "propbank" : "nombank";
1172 
1173             // Predicate is invalid if its roleset is unknown in NomBank / PropBank
1174             for (final ExternalRef ref : NAFUtils.getRefs(predicate, resource, null)) {
1175                 final String roleset = ref.getReference();
1176                 if (isVerb && PropBank.getRoleset(roleset) == null
1177                         || !isVerb && NomBank.getRoleset(roleset) == null) {
1178                     document.removeAnnotation(predicate);
1179                     if (LOGGER.isDebugEnabled()) {
1180                         LOGGER.debug("Removed " + NAFUtils.toString(predicate)
1181                                 + " with unknown sense '" + roleset + "' in resource " + resource);
1182                     }
1183                     break;
1184                 }
1185             }
1186         }
1187     }
1188 
1189     private void applySRLPredicateAddition(final KAFDocument document) {
1190 
1191         for (final Term term : document.getTerms()) {
1192 
1193             // Ignore terms already marked as predicates or timex or that are part of proper names
1194             final char pos = Character.toUpperCase(term.getPos().charAt(0));
1195             if (pos != 'V' && pos != 'N' && pos != 'G' && pos != 'A'
1196                     || !document.getPredicatesByTerm(term).isEmpty()
1197                     || !document.getTimeExsByWF(term.getWFs().get(0)).isEmpty()) {
1198                 continue;
1199             }
1200 
1201             // Identify the smallest entity the term belongs to, if any, in which case require
1202             // the term to be the head of the entity. This will discard other terms inside an
1203             // entity (even if nouns), thus enforcing a policy where entities are indivisible
1204             Entity entity = null;
1205             for (final Entity e : document.getEntitiesByTerm(term)) {
1206                 if (entity == null || e.getTerms().size() < entity.getTerms().size()) {
1207                     entity = e;
1208                     break;
1209                 }
1210             }
1211             if (entity != null && term != document.getTermsHead(entity.getTerms())) {
1212                 continue;
1213             }
1214 
1215             // Decide if a predicate can be added and, in case, which is its roleset,
1216             // distinguishing between verbs (-> PropBank) and other terms (-> NomBank)
1217             ExternalRef ref = null;
1218             final String lemma = term.getLemma();
1219             if (pos == 'V') {
1220                 final List<PropBank.Roleset> rolesets = PropBank.getRolesets(lemma);
1221                 if (rolesets.size() == 1) {
1222                     final String rolesetID = rolesets.get(0).getID();
1223                     ref = document.newExternalRef(NAFUtils.RESOURCE_PROPBANK, rolesetID);
1224                 }
1225             } else {
1226                 final List<NomBank.Roleset> rolesets = NomBank.getRolesetsForLemma(lemma);
1227                 if (rolesets.size() == 1) {
1228                     final String rolesetID = rolesets.get(0).getId();
1229                     ref = document.newExternalRef(NAFUtils.RESOURCE_NOMBANK, rolesetID);
1230                 }
1231             }
1232 
1233             // Create the predicate, if possible
1234             if (ref != null) {
1235                 final Predicate predicate = document.newPredicate(
1236                         KAFDocument.newTermSpan(Collections.singletonList(term), term));
1237                 predicate.addExternalRef(ref);
1238                 if (LOGGER.isDebugEnabled()) {
1239                     LOGGER.debug("Added " + NAFUtils.toString(predicate) + ", sense '"
1240                             + ref.getReference() + "'");
1241                 }
1242             }
1243         }
1244     }
1245 
1246     private void applySRLSelfArgFixing(final KAFDocument document) {
1247 
1248         for (final Predicate predicate : document.getPredicates()) {
1249 
1250             // Skip verbs
1251             final Term predTerm = predicate.getTerms().get(0);
1252             if (predTerm.getPos().equalsIgnoreCase("V")) {
1253                 continue;
1254             }
1255 
1256             // Retrieve the NomBank roleset for current predicate, if known. Skip otherwise
1257             final String rolesetID = NAFUtils.getRoleset(predicate);
1258             final NomBank.Roleset roleset = rolesetID == null ? null
1259                     : NomBank.getRoleset(rolesetID);
1260             if (roleset == null) {
1261                 continue;
1262             }
1263 
1264             // Retrieve mandatory and optional roles associated to NomBank roleset
1265             final List<Integer> mandatoryArgs = roleset.getPredMandatoryArgNums();
1266             final List<Integer> optionalArgs = roleset.getPredOptionalArgNums();
1267 
1268             // Check current role assignment to predicate term. Mark it as invalid if necessary
1269             int currentNum = -1;
1270             for (final Role role : ImmutableList.copyOf(predicate.getRoles())) {
1271                 final Term headTerm = document.getTermsHead(role.getTerms());
1272                 if (headTerm == predTerm && role.getSemRole() != null) {
1273                     boolean valid = false;
1274                     final Matcher matcher = SRL_ROLE_PATTERN.matcher(role.getSemRole());
1275                     if (matcher.matches()) {
1276                         currentNum = Integer.parseInt(matcher.group(1));
1277                         valid = roleset.getPredMandatoryArgNums().contains(currentNum)
1278                                 || roleset.getPredOptionalArgNums().contains(currentNum);
1279                     }
1280                     if (!valid) {
1281                         predicate.removeRole(role);
1282                         LOGGER.debug("Removed " + NAFUtils.toString(role) + " for "
1283                                 + NAFUtils.toString(predicate) + " (mandatory " + mandatoryArgs
1284                                 + ", optional " + optionalArgs + ")");
1285                     }
1286                 }
1287             }
1288 
1289             // Add missing role marking, if necessary
1290             if (!roleset.getPredMandatoryArgNums().isEmpty()) {
1291                 final List<Integer> args = Lists.newArrayList();
1292                 args.addAll(roleset.getPredMandatoryArgNums());
1293                 args.remove((Object) currentNum);
1294                 for (final Integer arg : args) {
1295                     final List<Term> terms = Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(
1296                             document.getTermsByDepAncestors(Collections.singleton(predTerm)));
1297                     final Span<Term> span = KAFDocument.newTermSpan(terms, predTerm);
1298                     final String semRole = "A" + arg;
1299                     final Role role = document.newRole(predicate, semRole, span);
1300                     predicate.addRole(role);
1301                     if (LOGGER.isDebugEnabled()) {
1302                         LOGGER.debug("Added " + NAFUtils.toString(role) + " to "
1303                                 + NAFUtils.toString(predicate));
1304                     }
1305                 }
1306             }
1307         }
1308     }
1309 
1310     private void applySRLSenseMapping(final KAFDocument document) {
1311 
1312         for (final Predicate predicate : document.getPredicates()) {
1313 
1314             // Apply specific mappings
1315             mapExternalRefs(predicate, MAPPING_PREDICATES);
1316 
1317             // Apply Predicate Matrix mappings, if enabled
1318             NomBank.Roleset nbRoleset = null;
1319             PropBank.Roleset pbRoleset = null;
1320             if (this.srlSenseMappingPM) {
1321                 // Obtain the PropBank roleset, either directly or mapping from NomBank
1322                 if (predicate.getTerms().get(0).getPos().equalsIgnoreCase("V")) {
1323                     final ExternalRef ref = predicate.getExternalRef(NAFUtils.RESOURCE_PROPBANK);
1324                     pbRoleset = ref == null ? null : PropBank.getRoleset(ref.getReference());
1325                 } else {
1326                     final ExternalRef ref = predicate.getExternalRef(NAFUtils.RESOURCE_NOMBANK);
1327                     nbRoleset = ref == null ? null : NomBank.getRoleset(ref.getReference());
1328                     final String pbSense = nbRoleset == null ? null : nbRoleset.getPBId();
1329                     pbRoleset = pbSense == null ? null : PropBank.getRoleset(pbSense);
1330                 }
1331 
1332                 // Skip the predicate if the PropBank roleset could not be obtained
1333                 if (pbRoleset != null) {
1334                     // Add an external ref for the PropBank roleset, if missing
1335                     if (NAFUtils.getRef(predicate, NAFUtils.RESOURCE_PROPBANK,
1336                             pbRoleset.getID()) == null) {
1337                         NAFUtils.addRef(predicate, document.newExternalRef( //
1338                                 NAFUtils.RESOURCE_PROPBANK, pbRoleset.getID()));
1339                     }
1340 
1341                     // Apply mappings from the predicate matrix (indexed in PropBank.Roleset
1342                     // object)
1343                     for (final String vnFrame : pbRoleset.getVNFrames()) {
1344                         NAFUtils.setRef(predicate,
1345                                 document.newExternalRef(NAFUtils.RESOURCE_VERBNET, vnFrame));
1346                     }
1347                     for (final String fnFrame : pbRoleset.getFNFrames()) {
1348                         NAFUtils.setRef(predicate,
1349                                 document.newExternalRef(NAFUtils.RESOURCE_FRAMENET, fnFrame));
1350                     }
1351                 }
1352             }
1353 
1354             // Map predicate roles
1355             for (final Role role : predicate.getRoles()) {
1356 
1357                 // Add missing ref if necessary
1358                 if (role.getSemRole().startsWith("A")) {
1359                     final boolean verb = NAFUtils.extractHead(document, predicate.getSpan())
1360                             .getMorphofeat().startsWith("VB");
1361                     final String resource = verb ? "PropBank" : "NomBank";
1362                     final ExternalRef ref = NAFUtils.getRef(predicate, resource, null);
1363                     if (ref != null) {
1364                         final String r = role.getSemRole().startsWith("AM-")
1365                                 ? role.getSemRole().substring(3)
1366                                 : role.getSemRole().substring(1);
1367                         role.addExternalRef(new ExternalRef(resource,
1368                                 ref.getReference() + "@" + r.toLowerCase()));
1369                     }
1370                 }
1371 
1372                 // Apply specific mappings
1373                 mapExternalRefs(role, MAPPING_ARGUMENTS);
1374 
1375                 // Apply Predicate Matrix mappings, if enabled
1376                 if (this.srlSenseMappingPM) {
1377                     final String semRole = role.getSemRole();
1378                     final char numChar = semRole.charAt(semRole.length() - 1);
1379                     if (semRole != null && Character.isDigit(numChar)) {
1380 
1381                         // Determine the PropBank arg num
1382                         final int num = Character.digit(numChar, 10);
1383                         final int pbNum = nbRoleset == null ? num : nbRoleset.getArgPBNum(num);
1384                         if (pbNum < 0) {
1385                             continue;
1386                         }
1387                         final String pbRole = pbRoleset.getID() + '@' + pbNum;
1388                         // final String pbRole = semRole.substring(0, semRole.length() - 2) +
1389                         // pbNum;
1390 
1391                         // Create an external ref for the PropBank role, if missing
1392                         if (NAFUtils.getRef(role, NAFUtils.RESOURCE_PROPBANK, pbRole) == null) {
1393                             NAFUtils.setRef(role,
1394                                     document.newExternalRef(NAFUtils.RESOURCE_PROPBANK, pbRole));
1395                         }
1396 
1397                         // Apply mappings from the predicate matrix
1398                         for (final String vnRole : pbRoleset.getArgVNRoles(pbNum)) {
1399                             NAFUtils.setRef(role,
1400                                     document.newExternalRef(NAFUtils.RESOURCE_VERBNET, vnRole));
1401                         }
1402                         for (final String fnRole : pbRoleset.getArgFNRoles(pbNum)) {
1403                             NAFUtils.setRef(role,
1404                                     document.newExternalRef(NAFUtils.RESOURCE_FRAMENET, fnRole));
1405                         }
1406                     }
1407                 }
1408             }
1409         }
1410     }
1411 
1412     private void applySRLFrameBaseMapping(final KAFDocument document) {
1413 
1414         // Process each predicate and role in the SRL layer
1415         for (final Predicate predicate : document.getPredicates()) {
1416 
1417             // Determine the POS necessary for FrameBase disambiguation (n/a/v/other)
1418             final Term head = NAFUtils.extractHead(document, predicate.getSpan());
1419             final FrameBase.POS pos = FrameBase.POS.forPennTag(head.getMorphofeat());
1420 
1421             // Determine the lemma, handling multiwords
1422             final StringBuilder builder = new StringBuilder();
1423             for (final Term term : predicate.getSpan().getTargets()) {
1424                 builder.append(builder.length() == 0 ? "" : "_");
1425                 builder.append(term.getLemma().toLowerCase());
1426             }
1427             final String lemma = builder.toString();
1428 
1429             // Convert FrameNet refs to FrameBase refs at the predicate level
1430             for (final ExternalRef ref : ImmutableList.copyOf(predicate.getExternalRefs())) {
1431                 if (ref.getResource().equalsIgnoreCase("framenet")) {
1432                     final String frame = ref.getReference();
1433                     final IRI fnClass = FrameBase.classFor(frame, lemma, pos);
1434                     if (fnClass != null) {
1435                         NAFUtils.setRef(predicate,
1436                                 new ExternalRef("FrameBase", fnClass.getLocalName()));
1437                     }
1438                 }
1439             }
1440 
1441             // Convert FrameNet refs to FrameBase refs at the role level
1442             for (final Role role : predicate.getRoles()) {
1443                 for (final ExternalRef ref : ImmutableList.copyOf(role.getExternalRefs())) {
1444                     if (ref.getResource().equalsIgnoreCase("framenet")) {
1445                         final String s = ref.getReference();
1446                         final int index = s.indexOf('@');
1447                         if (index > 0) {
1448                             final String frame = s.substring(0, index);
1449                             final String fe = s.substring(index + 1);
1450                             final IRI fnProperty = FrameBase.propertyFor(frame, fe);
1451                             if (fnProperty != null) {
1452                                 NAFUtils.setRef(role,
1453                                         new ExternalRef("FrameBase", fnProperty.getLocalName()));
1454                             }
1455                         }
1456                     }
1457                 }
1458             }
1459         }
1460     }
1461 
1462     private void applySRLRoleLinking(final KAFDocument document) {
1463 
1464         // Process all the roles in the SRL layer
1465         for (final Predicate predicate : Lists.newArrayList(document.getPredicates())) {
1466             for (final Role role : predicate.getRoles()) {
1467 
1468                 // Identify the role head. Skip if not found.
1469                 final Term head = NAFUtils.extractHead(document, role.getSpan());
1470                 if (head == null) {
1471                     continue;
1472                 }
1473 
1474                 // Identify the terms that can be linked
1475                 final Set<Term> argTerms = document
1476                         .getTermsByDepAncestors(Collections.singleton(head), PARTICIPATION_REGEX);
1477 
1478                 // Perform the linking, possible augmenting terms using coref info
1479                 linkEntitiesTimexPredicates(document, role, role.getSpan(), argTerms,
1480                         this.srlRoleLinkingUsingCoref);
1481             }
1482         }
1483     }
1484 
1485     private void applyOpinionLinking(final KAFDocument document) {
1486 
1487         // Process all the opinions in the NAF document
1488         for (final Opinion opinion : document.getOpinions()) {
1489 
1490             // Add links for the opinion expression, if any
1491             final OpinionExpression expression = opinion.getOpinionExpression();
1492             if (expression != null) {
1493                 linkEntitiesTimexPredicates(document, expression, expression.getSpan(),
1494                         NAFUtils.extractHeads(document, null, expression.getTerms(),
1495                                 NAFUtils.matchExtendedPos(document, "NN", "VB", "JJ", "R")),
1496                         this.opinionLinkingUsingCoref);
1497             }
1498 
1499             // Add links for the opinion holder, if any
1500             final OpinionHolder holder = opinion.getOpinionHolder();
1501             if (holder != null) {
1502                 linkEntitiesTimexPredicates(document, holder, holder.getSpan(),
1503                         NAFUtils.extractHeads(document, null, holder.getTerms(), NAFUtils
1504                                 .matchExtendedPos(document, "NN", "PRP", "JJP", "DTP", "WP")),
1505                         this.opinionLinkingUsingCoref);
1506             }
1507 
1508             // Add links for the opinion target, if any
1509             final OpinionTarget target = opinion.getOpinionTarget();
1510             if (target != null) {
1511                 linkEntitiesTimexPredicates(
1512                         document, target, target.getSpan(), NAFUtils
1513                                 .extractHeads(document, null, target.getTerms(),
1514                                         NAFUtils.matchExtendedPos(document, "NN", "PRP", "JJP",
1515                                                 "DTP", "WP", "VB")),
1516                         this.opinionLinkingUsingCoref);
1517             }
1518         }
1519     }
1520 
1521     private static void linkEntitiesTimexPredicates(final KAFDocument document,
1522             final Object annotation, final Span<Term> spanToModify, final Set<Term> heads,
1523             final boolean useCoref) {
1524 
1525         // Add heads to span, if possible
1526         spanToModify.getHeads().clear();
1527         if (!heads.isEmpty()) {
1528             spanToModify.getHeads().addAll(heads);
1529         }
1530 
1531         // Apply coreference if specified
1532         Set<Term> linkableTerms = heads;
1533         if (useCoref) {
1534             linkableTerms = Sets.newHashSet(heads);
1535             for (final Term argTerm : heads) {
1536                 for (final Coref coref : document.getCorefsByTerm(argTerm)) {
1537                     final List<Term> spanHeads = Lists.newArrayList();
1538                     for (final Span<Term> span : coref.getSpans()) {
1539                         final Term spanHead = NAFUtils.extractHead(document, span);
1540                         if (spanHead != null) {
1541                             spanHeads.add(spanHead);
1542                         }
1543                     }
1544                     if (spanHeads.contains(argTerm)) {
1545                         for (final Term spanHead : spanHeads) {
1546                             linkableTerms.addAll(document.getTermsByDepAncestors(
1547                                     Collections.singleton(spanHead), "(COORD CONJ?)*"));
1548                         }
1549                     }
1550                 }
1551             }
1552         }
1553 
1554         // Add external refs for the entities, timex and predicates corresponding to sel. terms
1555         for (final Term term : linkableTerms) {
1556 
1557             // Determine whether the term was obtained via coreference
1558             final boolean isCoref = !heads.contains(term);
1559 
1560             // Add links for entities
1561             for (final Entity entity : document.getEntitiesByTerm(term)) {
1562                 for (final Span<Term> span : entity.getSpans()) {
1563                     final Term spanHead = NAFUtils.extractHead(document, span);
1564                     if (term.equals(spanHead)) {
1565                         final String res = isCoref ? NAFUtils.RESOURCE_ENTITY_COREF
1566                                 : NAFUtils.RESOURCE_ENTITY_REF;
1567                         NAFUtils.setRef(annotation, document.newExternalRef(res, entity.getId()));
1568                         if (LOGGER.isDebugEnabled()) {
1569                             LOGGER.debug("Linked {} to {} as {}", NAFUtils.toString(entity),
1570                                     NAFUtils.toString(annotation), res);
1571                         }
1572                     }
1573                 }
1574             }
1575 
1576             // Add links for timex
1577             for (final Timex3 timex : document.getTimeExsByWF(term.getWFs().get(0))) {
1578                 final Term timexHead = NAFUtils.extractHead(document, KAFDocument
1579                         .newTermSpan(document.getTermsByWFs(timex.getSpan().getTargets())));
1580                 if (term.equals(timexHead)) {
1581                     final String res = isCoref ? NAFUtils.RESOURCE_TIMEX_COREF
1582                             : NAFUtils.RESOURCE_TIMEX_REF;
1583                     NAFUtils.setRef(annotation, document.newExternalRef(res, timex.getId()));
1584                     if (LOGGER.isDebugEnabled()) {
1585                         LOGGER.debug("Linked {} to {} as {}", NAFUtils.toString(timex),
1586                                 NAFUtils.toString(annotation), res);
1587                     }
1588                 }
1589             }
1590 
1591             // Add links for predicates
1592             for (final Predicate pred : document.getPredicatesByTerm(term)) {
1593                 if (term.equals(NAFUtils.extractHead(document, pred.getSpan()))) {
1594                     final String res = isCoref ? NAFUtils.RESOURCE_PREDICATE_COREF
1595                             : NAFUtils.RESOURCE_PREDICATE_REF;
1596                     NAFUtils.setRef(annotation, document.newExternalRef(res, pred.getId()));
1597                     if (LOGGER.isDebugEnabled()) {
1598                         LOGGER.debug("Linked {} to {} as {}", NAFUtils.toString(pred),
1599                                 NAFUtils.toString(annotation), res);
1600                     }
1601                 }
1602             }
1603         }
1604     }
1605 
1606     private void mapExternalRefs(final Object annotation,
1607             final Multimap<String, String> mappings) {
1608 
1609         // Keep track of prefixes (NB, PB, VN, FN) of resources already available, as well as the
1610         // keys corresponding to their values
1611         final Set<String> prefixes = Sets.newHashSet();
1612         final Set<String> keys = Sets.newHashSet();
1613 
1614         // Extract prefixes and keys
1615         for (final ExternalRef ref : NAFUtils.getRefs(annotation, null, null)) {
1616             final String prefix = MAPPING_PREFIXES.get(ref.getResource().toLowerCase());
1617             if (prefix != null) {
1618                 prefixes.add(prefix);
1619                 keys.add(prefix + ":" + ref.getReference());
1620             }
1621         }
1622 
1623         // Apply mappings
1624         final List<String> queue = Lists.newLinkedList(keys);
1625         while (!queue.isEmpty()) {
1626             final String key = queue.remove(0);
1627             for (final String mappedKey : mappings.get(key)) {
1628                 final String mappedPrefix = mappedKey.substring(0, 2);
1629                 if (!prefixes.contains(mappedPrefix) && !keys.contains(mappedKey)) {
1630                     final String mappedResource = MAPPING_PREFIXES.inverse().get(mappedPrefix);
1631                     final String mappedReference = mappedKey.substring(3);
1632                     keys.add(mappedKey);
1633                     queue.add(mappedKey);
1634                     NAFUtils.addRef(annotation, new ExternalRef(mappedResource, mappedReference));
1635                     if (LOGGER.isDebugEnabled()) {
1636                         LOGGER.debug("Mapped {} : {} to {} for {}", mappedResource,
1637                                 mappedReference, mappedKey, NAFUtils.toString(annotation));
1638                     }
1639                 }
1640             }
1641         }
1642     }
1643 
1644     private void applySRLPreMOnIRIs(final KAFDocument document) {
1645         // Process each predicate and role in the SRL layer
1646 
1647         final List<String> models = Arrays.asList(NAFUtils.RESOURCE_FRAMENET,
1648                 NAFUtils.RESOURCE_VERBNET, NAFUtils.RESOURCE_PROPBANK, NAFUtils.RESOURCE_NOMBANK);
1649 
1650         for (final Predicate predicate : document.getPredicates()) {
1651 
1652             List<ExternalRef> allPredicateExtRefs = predicate.getExternalRefs();
1653             List<ExternalRef> predicateExtRefToRemove = Lists.newArrayList();
1654 
1655             for (final ExternalRef predRef : ImmutableList.copyOf(allPredicateExtRefs)) {
1656                 String refStr = predRef.getResource();
1657 
1658                 if (models.contains(refStr)) {
1659                     final String pred = predRef.getReference();
1660                     final String source = predRef.getSource();
1661 
1662                     final IRI premonIRI = NAFUtils.createPreMOnSemanticClassIRIfor(refStr, pred);
1663                     if (premonIRI != null) {
1664                         ExternalRef e = new ExternalRef("PreMOn+" + refStr,
1665                                 premonIRI.getLocalName());
1666                         if (source != null)
1667                             e.setSource(source);
1668                         NAFUtils.setRef(predicate, e);
1669 
1670                     }
1671 
1672                     predicateExtRefToRemove.add(predRef);
1673                 }
1674 
1675             }
1676 
1677             // remove old predicate ref
1678             for (ExternalRef toBeDropped : predicateExtRefToRemove) {
1679                 allPredicateExtRefs.remove(toBeDropped);
1680             }
1681 
1682             // Convert FrameNet refs to FrameBase refs at the role level
1683             for (final Role role : predicate.getRoles()) {
1684 
1685                 List<ExternalRef> allRoleExtRefs = role.getExternalRefs();
1686                 List<ExternalRef> roleExtRefToRemove = Lists.newArrayList();
1687 
1688                 for (final ExternalRef roleRef : ImmutableList.copyOf(allRoleExtRefs)) {
1689 
1690                     String refStr = roleRef.getResource();
1691 
1692                     if (models.contains(refStr)) {
1693 
1694                         final String predicateAndRole = roleRef.getReference();
1695                         final String source = roleRef.getSource();
1696                         final int index = predicateAndRole.indexOf('@');
1697                         if (index > 0) {
1698                             final String pred = predicateAndRole.substring(0, index);
1699                             final String rol = predicateAndRole.substring(index + 1);
1700 
1701                             final IRI premonIRI = NAFUtils.createPreMOnSemanticRoleIRIfor(refStr,
1702                                     pred, rol);
1703                             if (premonIRI != null) {
1704                                 ExternalRef e = new ExternalRef("PreMOn+" + refStr,
1705                                         premonIRI.getLocalName());
1706                                 if (source != null)
1707                                     e.setSource(source);
1708                                 NAFUtils.setRef(role, e);
1709                             }
1710                         }
1711                         roleExtRefToRemove.add(roleRef);
1712                     }
1713                 }
1714                 // remove old role
1715                 for (ExternalRef toBeRemoved : roleExtRefToRemove) {
1716                     allRoleExtRefs.remove(toBeRemoved);
1717                 }
1718             }
1719         }
1720     }
1721 
1722     /**
1723      * Returns a new configurable {@code Builder} for the instantiation of a {@code NAFFilter}.
1724      *
1725      * @return a new {@code Builder}
1726      */
1727     public static final Builder builder() {
1728         return new Builder();
1729     }
1730 
1731     /**
1732      * Returns a new configurable {@code Builder} with all {@code NAFFilter} features either
1733      * enabled or disabled, based on the supplied parameter.
1734      *
1735      * @param enableAll
1736      *            true, to enable all features; false, to disable all features; null, to maintain
1737      *            default settings.
1738      * @return a new {@code Builder}
1739      */
1740     public static final Builder builder(@Nullable final Boolean enableAll) {
1741         return new Builder() //
1742                 .withTermSenseCompletion(enableAll) //
1743                 .withEntityRemoveOverlaps(enableAll) //
1744                 .withEntitySpanFixing(enableAll) //
1745                 .withEntityAddition(enableAll) //
1746                 .withCorefSpanFixing(enableAll) //
1747                 .withCorefForRoleDependencies(enableAll) //
1748                 .withLinkingCompletion(enableAll) //
1749                 .withLinkingFixing(enableAll) //
1750                 .withSRLRemoveWrongRefs(enableAll) //
1751                 .withSRLRemoveUnknownPredicates(enableAll) //
1752                 .withSRLPredicateAddition(enableAll) //
1753                 .withSRLSelfArgFixing(enableAll) //
1754                 .withSRLSenseMapping(enableAll) //
1755                 .withSRLRoleLinking(enableAll, enableAll) //
1756                 .withOpinionLinking(enableAll, enableAll).withSRLPreMOnIRIs(enableAll);
1757     }
1758 
1759     /**
1760      * Configurable builder object for the creation of {@code NAFFilter}s.
1761      * <p>
1762      * Supported properties accepted by {@link #withProperties(Map, String)} and corresponding
1763      * setter methods:
1764      * </p>
1765      * <table border="1">
1766      * <thead>
1767      * <tr>
1768      * <th>Property</th>
1769      * <th>Values</th>
1770      * <th>Corresponding method</th>
1771      * <th>Default</th>
1772      * </tr>
1773      * </thead><tbody>
1774      * <tr>
1775      * <td>termSenseFiltering</td>
1776      * <td>true, false</td>
1777      * <td>{@link #withTermSenseFiltering(Boolean)}</td>
1778      * <td>true</td>
1779      * </tr>
1780      * <tr>
1781      * <td>termSenseCompletion</td>
1782      * <td>true, false</td>
1783      * <td>{@link #withTermSenseCompletion(Boolean)}</td>
1784      * <td>true</td>
1785      * </tr>
1786      * <tr>
1787      * <td>entityRemoveOverlaps</td>
1788      * <td>true, false</td>
1789      * <td>{@link #withEntityRemoveOverlaps(Boolean)}</td>
1790      * <td>true</td>
1791      * </tr>
1792      * <tr>
1793      * <td>entitySpanFixing</td>
1794      * <td>true, false</td>
1795      * <td>{@link #withEntitySpanFixing(Boolean)}</td>
1796      * <td>true</td>
1797      * </tr>
1798      * <tr>
1799      * <td>entityAddition</td>
1800      * <td>true, false</td>
1801      * <td>{@link #withEntityAddition(Boolean)}</td>
1802      * <td>true</td>
1803      * </tr>
1804      * <tr>
1805      * <td>entityValueNormalization</td>
1806      * <td>true, false</td>
1807      * <td>{@link #withEntityValueNormalization(Boolean)}</td>
1808      * <td>true</td>
1809      * </tr>
1810      * <tr>
1811      * <td>linkingCompletion</td>
1812      * <td>true, false</td>
1813      * <td>{@link #withLinkingCompletion(Boolean)}</td>
1814      * <td>true</td>
1815      * </tr>
1816      * <tr>
1817      * <td>linkingFixing</td>
1818      * <td>true, false</td>
1819      * <td>{@link #withLinkingFixing(Boolean)}</td>
1820      * <td>false</td>
1821      * </tr>
1822      * <tr>
1823      * <td>corefForRoleDependencies</td>
1824      * <td>true, false</td>
1825      * <td>{@link #withCorefForRoleDependencies(Boolean)}</td>
1826      * <td>false</td>
1827      * </tr>
1828      * <tr>
1829      * <td>corefSpanFixing</td>
1830      * <td>true, false</td>
1831      * <td>{@link #withCorefSpanFixing(Boolean)}</td>
1832      * <td>false</td>
1833      * </tr>
1834      * <tr>
1835      * <td>srlRemoveWrongRefs</td>
1836      * <td>true, false</td>
1837      * <td>{@link #withSRLRemoveWrongRefs(Boolean)}</td>
1838      * <td>true</td>
1839      * </tr>
1840      * <tr>
1841      * <td>srlRemoveUnknownPredicates</td>
1842      * <td>true, false</td>
1843      * <td>{@link #withSRLRemoveUnknownPredicates(Boolean)}</td>
1844      * <td>false</td>
1845      * </tr>
1846      * <tr>
1847      * <td>srlPredicateAddition</td>
1848      * <td>true, false</td>
1849      * <td>{@link #withSRLPredicateAddition(Boolean)}</td>
1850      * <td>true</td>
1851      * </tr>
1852      * <tr>
1853      * <td>srlSelfArgFixing</td>
1854      * <td>true, false</td>
1855      * <td>{@link #withSRLSelfArgFixing(Boolean)}</td>
1856      * <td>true</td>
1857      * </tr>
1858      * <tr>
1859      * <td>srlSenseMapping</td>
1860      * <td>true, false</td>
1861      * <td>{@link #withSRLSenseMapping(Boolean)}</td>
1862      * <td>false</td>
1863      * </tr>
1864      * <tr>
1865      * <td>srlFrameBaseMapping</td>
1866      * <td>true, false</td>
1867      * <td>{@link #withSRLFrameBaseMapping(Boolean)}</td>
1868      * <td>true</td>
1869      * </tr>
1870      * <tr>
1871      * <td>srlRoleLinking</td>
1872      * <td>none, exact, coref</td>
1873      * <td>{@link #withSRLRoleLinking(Boolean, Boolean)}</td>
1874      * <td>coref (= true, true)</td>
1875      * </tr>
1876      * <tr>
1877      * <td>opinionLinking</td>
1878      * <td>none, exact, coref</td>
1879      * <td>{@link #withOpinionLinking(Boolean, Boolean)}</td>
1880      * <td>coref (= true, true)</td>
1881      * </tr>
1882      * </tbody>
1883      * </table>
1884      */
1885     public static final class Builder {
1886 
1887         @Nullable
1888         private Boolean termSenseFiltering;
1889 
1890         @Nullable
1891         private Boolean termSenseCompletion;
1892 
1893         @Nullable
1894         private Boolean entityRemoveOverlaps;
1895 
1896         @Nullable
1897         private Boolean entitySpanFixing;
1898 
1899         @Nullable
1900         private Boolean entityAddition;
1901 
1902         @Nullable
1903         private Boolean entityValueNormalization;
1904 
1905         @Nullable
1906         private Boolean linkingCompletion;
1907 
1908         @Nullable
1909         private Boolean linkingFixing;
1910 
1911         @Nullable
1912         private Boolean corefSpanFixing;
1913 
1914         @Nullable
1915         private Boolean corefForRoleDependencies;
1916 
1917         @Nullable
1918         private Boolean srlPreprocess;
1919 
1920         @Nullable
1921         private Boolean srlEnableMate;
1922 
1923         @Nullable
1924         private Boolean srlEnableSemafor;
1925 
1926         @Nullable
1927         private Boolean srlRemoveWrongRefs;
1928 
1929         @Nullable
1930         private Boolean srlRemoveUnknownPredicates;
1931 
1932         @Nullable
1933         private Boolean srlPredicateAddition;
1934 
1935         @Nullable
1936         private Boolean srlSelfArgFixing;
1937 
1938         @Nullable
1939         private Boolean srlSenseMapping;
1940 
1941         @Nullable
1942         private Boolean srlFrameBaseMapping;
1943 
1944         @Nullable
1945         private Boolean srlRoleLinking;
1946 
1947         @Nullable
1948         private Boolean srlRoleLinkingUsingCoref;
1949 
1950         @Nullable
1951         private Boolean srlPreMOnIRIs;
1952 
1953         @Nullable
1954         private Boolean opinionLinking;
1955 
1956         @Nullable
1957         private Boolean opinionLinkingUsingCoref;
1958 
1959         Builder() {
1960         }
1961 
1962         /**
1963          * Sets all the properties in the map supplied, matching an optional prefix.
1964          *
1965          * @param properties
1966          *            the properties to configure, not null
1967          * @param prefix
1968          *            an optional prefix used to select the relevant properties in the map
1969          * @return this builder object, for call chaining
1970          */
1971         public Builder withProperties(final Map<?, ?> properties, @Nullable final String prefix) {
1972             final String p = prefix == null ? "" : prefix.endsWith(".") ? prefix : prefix + ".";
1973             for (final Map.Entry<?, ?> entry : properties.entrySet()) {
1974                 if (entry.getKey() != null && entry.getValue() != null
1975                         && entry.getKey().toString().startsWith(p)) {
1976                     final String name = entry.getKey().toString().substring(p.length());
1977                     final String value = Strings.emptyToNull(entry.getValue().toString());
1978                     if ("termSenseFiltering".equals(name)) {
1979                         withTermSenseFiltering(Boolean.valueOf(value));
1980                     } else if ("termSenseCompletion".equals(name)) {
1981                         withTermSenseCompletion(Boolean.valueOf(value));
1982                     } else if ("entityRemoveOverlaps".equals(name)) {
1983                         withEntityRemoveOverlaps(Boolean.valueOf(value));
1984                     } else if ("entitySpanFixing".equals(name)) {
1985                         withEntitySpanFixing(Boolean.valueOf(value));
1986                     } else if ("entityAddition".equals(name)) {
1987                         withEntityAddition(Boolean.valueOf(value));
1988                     } else if ("entityValueNormalization".equals(name)) {
1989                         withEntityValueNormalization(Boolean.valueOf(value));
1990                     } else if ("linkingCompletion".equals(name)) {
1991                         withLinkingCompletion(Boolean.valueOf(value));
1992                     } else if ("linkingFixing".equals(name)) {
1993                         withLinkingFixing(Boolean.valueOf(value));
1994                     } else if ("corefForRoleDependencies".equals(name)) {
1995                         withCorefForRoleDependencies(Boolean.valueOf(value));
1996                     } else if ("corefSpanFixing".equals(name)) {
1997                         withCorefSpanFixing(Boolean.valueOf(value));
1998                     } else if ("srlPreprocess".equals(name)) {
1999                         if ("none".equalsIgnoreCase(value)) {
2000                             withSRLPreprocess(false, false, false);
2001                         } else if ("basic".equalsIgnoreCase(value)) {
2002                             withSRLPreprocess(true, false, false);
2003                         } else if ("mate".equalsIgnoreCase(value)) {
2004                             withSRLPreprocess(true, true, false);
2005                         } else if ("semafor".equalsIgnoreCase(value)) {
2006                             withSRLPreprocess(true, false, true);
2007                         } else if ("mate+semafor".equalsIgnoreCase(value)) {
2008                             withSRLPreprocess(true, true, true);
2009                         } else {
2010                             throw new IllegalArgumentException("Invalid '" + value
2011                                     + "' srlPreprocess property. Supported: none basic mate semafor mate+semafor");
2012                         }
2013                     } else if ("srlRemoveWrongRefs".equals(name)) {
2014                         withSRLRemoveWrongRefs(Boolean.valueOf(value));
2015                     } else if ("srlRemoveUnknownPredicates".equals(name)) {
2016                         withSRLRemoveUnknownPredicates(Boolean.valueOf(value));
2017                     } else if ("srlPredicateAddition".equals(name)) {
2018                         withSRLPredicateAddition(Boolean.valueOf(value));
2019                     } else if ("srlSelfArgFixing".equals(name)) {
2020                         withSRLSelfArgFixing(Boolean.valueOf(value));
2021                     } else if ("srlSenseMapping".equals(name)) {
2022                         withSRLSenseMapping(Boolean.valueOf(value));
2023                     } else if ("srlFrameBaseMapping".equals(name)) {
2024                         withSRLFrameBaseMapping(Boolean.valueOf(value));
2025                     } else if ("srlRoleLinking".equals(name)) {
2026                         if ("none".equalsIgnoreCase(value)) {
2027                             withSRLRoleLinking(false, false);
2028                         } else if ("exact".equalsIgnoreCase(value)) {
2029                             withSRLRoleLinking(true, false);
2030                         } else if ("coref".equalsIgnoreCase(value)) {
2031                             withSRLRoleLinking(true, true);
2032                         } else {
2033                             throw new IllegalArgumentException("Invalid '" + value
2034                                     + "' srlRoleLinking property. Supported: none exact coref ");
2035                         }
2036                     } else if ("srlPreMOnIRIs".equals(name)) {
2037                         withSRLPreMOnIRIs(Boolean.valueOf(value));
2038                     } else if ("opinionLinking".equals(name)) {
2039                         if ("none".equalsIgnoreCase(value)) {
2040                             withOpinionLinking(false, false);
2041                         } else if ("exact".equalsIgnoreCase(value)) {
2042                             withOpinionLinking(true, false);
2043                         } else if ("coref".equalsIgnoreCase(value)) {
2044                             withOpinionLinking(true, true);
2045                         } else {
2046                             throw new IllegalArgumentException("Invalid '" + value
2047                                     + "' opinionLinking property. Supported: none exact coref ");
2048                         }
2049                     }
2050                 }
2051             }
2052             return this;
2053         }
2054 
2055         /**
2056          * Specifies whether term senses (BBN, SST, WN Synset, SUMO mapping, YAGO) for proper
2057          * names should be removed.
2058          *
2059          * @param termSenseFiltering
2060          *            true to enable term sense filtering, null to use default value
2061          * @return this builder object, for call chaining
2062          */
2063         public Builder withTermSenseFiltering(@Nullable final Boolean termSenseFiltering) {
2064             this.termSenseFiltering = termSenseFiltering;
2065             return this;
2066         }
2067 
2068         /**
2069          * Specifies whether missing term senses (BBN, SST, WN Synset, SUMO mapping ) should be
2070          * completed by applying sense mappings.
2071          *
2072          * @param termSenseCompletion
2073          *            true to enable term sense completion, null to use default value
2074          * @return this builder object, for call chaining
2075          */
2076         public Builder withTermSenseCompletion(@Nullable final Boolean termSenseCompletion) {
2077             this.termSenseCompletion = termSenseCompletion;
2078             return this;
2079         }
2080 
2081         /**
2082          * Specifies whether entities overlapping with timex or (larger) entities should be
2083          * removed.
2084          *
2085          * @param entityRemoveOverlaps
2086          *            true, to enable removal of entities that overlap with other entities or
2087          *            timex; null to use the default setting
2088          * @return this builder object for call chaining
2089          */
2090         public Builder withEntityRemoveOverlaps(@Nullable final Boolean entityRemoveOverlaps) {
2091             this.entityRemoveOverlaps = entityRemoveOverlaps;
2092             return this;
2093         }
2094 
2095         /**
2096          * Specifies whether the spans of entities should be checked and possibly fixed, removing
2097          * determiners and non-alphanumeric terms. If enabled and no terms remain after fixing the
2098          * span of an entity, that entity is removed.
2099          *
2100          * @param entitySpanFixing
2101          *            true, to enable fixing of entity spans (and possible removal of invalid
2102          *            entities); null to use the default setting
2103          * @return this builder object, for call chaining
2104          */
2105         public Builder withEntitySpanFixing(@Nullable final Boolean entitySpanFixing) {
2106             this.entitySpanFixing = entitySpanFixing;
2107             return this;
2108         }
2109 
2110         /**
2111          * Specifies whether new entities should be added to the document for noun phrases not
2112          * already marked as entities.
2113          *
2114          * @param entityAddition
2115          *            true, to enable entity addition; null, to use the default setting
2116          * @return this builder object, for call chaining
2117          */
2118         public Builder withEntityAddition(@Nullable final Boolean entityAddition) {
2119             this.entityAddition = entityAddition;
2120             return this;
2121         }
2122 
2123         /**
2124          * Specifies whether normalization of numerical entity values (ordinal, cardinal, percent,
2125          * money) should take place.
2126          *
2127          * @param entityValueNormalization
2128          *            true, to enable entity value normalization; null, to use the default setting
2129          * @return this builder object, for call chaining
2130          */
2131         public Builder withEntityValueNormalization(
2132                 @Nullable final Boolean entityValueNormalization) {
2133             this.entityValueNormalization = entityValueNormalization;
2134             return this;
2135         }
2136 
2137         /**
2138          * Specifies whether entity links in the LinkedEntities layer should be applied to
2139          * entities and predicates where missing, thus performing a kind of linking completion.
2140          *
2141          * @param linkingCompletion
2142          *            true, to perform linking completion
2143          * @return this builder object, for call chaining
2144          */
2145         public Builder withLinkingCompletion(@Nullable final Boolean linkingCompletion) {
2146             this.linkingCompletion = linkingCompletion;
2147             return this;
2148         }
2149 
2150         /**
2151          * Specifies whether removal of inaccurate entity links to DBpedia should occur. If
2152          * enabled, links for entities whose span is part of a stop word list are removed. The
2153          * stop word list contains (multi-)words that are known to be ambiguous from an analysis
2154          * of Wikipedia data.
2155          *
2156          * @param linkingFixing
2157          *            true to enable linking fixing; null, to use the default setting
2158          * @return this builder object, for call chaining
2159          */
2160         public Builder withLinkingFixing(@Nullable final Boolean linkingFixing) {
2161             this.linkingFixing = linkingFixing;
2162             return this;
2163         }
2164 
2165         /**
2166          * Specifies whether new coreference relations should be added for APPO/NMOD/TITLE edges
2167          * in the dependency tree between proper nouns and role nouns.
2168          *
2169          * @param corefForRoleDependencies
2170          *            true to enable addition of coreference relations for APPO/NMOD/TITLE edges;
2171          *            null, to use the default setting
2172          * @return this builder object, for call chaining
2173          */
2174         public Builder withCorefForRoleDependencies(
2175                 @Nullable final Boolean corefForRoleDependencies) {
2176             this.corefForRoleDependencies = corefForRoleDependencies;
2177             return this;
2178         }
2179 
2180         /**
2181          * Specifies whether spans of existing coreference sets should be checked and possibly
2182          * shrinked or removed. The following rules are applied:
2183          * <ul>
2184          * <li>remove spans without a well-defined head in the dependency tree;</li>
2185          * <li>remove spans that enclose another span in the coreference set;</li>
2186          * <li>remove spans with non NNP head corresponding to a verb or to a NomBank predicate
2187          * that never admit itself as a role (e.g., 'war' but not 'president'), if no span with a
2188          * sumo:Process head (= event) is part of the coreference set;</li>
2189          * <li>shrink spans with non NNP head that contain some NNP token, if a span with NNP head
2190          * is part of the coreference set;</li>
2191          * </ul>
2192          * If a coreference set becomes empty as a result of the above filtering, it is removed
2193          * from the NAF document.
2194          *
2195          * @param corefSpanFixing
2196          *            true to enable coreference span fixing; null to use default setting
2197          * @return this builder object, for call chaining
2198          */
2199         public Builder withCorefSpanFixing(@Nullable final Boolean corefSpanFixing) {
2200             this.corefSpanFixing = corefSpanFixing;
2201             return this;
2202         }
2203 
2204         /**
2205          * Specifies whether to preprocess SRL layer, enabling Mate and/or Semafor outputs. If
2206          * both tools are enabled, they are combined in such a way that semafor takes precedence
2207          * in case two predicates refer to the same token.
2208          *
2209          * @param srlPreprocess
2210          *            true, to enable preprocessing of SRL layer
2211          * @param srlEnableMate
2212          *            true, to enable Mate output
2213          * @param srlEnableSemafor
2214          *            true, to enable Semafor output
2215          * @return this builder object, for call chaining
2216          */
2217         public Builder withSRLPreprocess(@Nullable final Boolean srlPreprocess,
2218                 @Nullable final Boolean srlEnableMate, @Nullable final Boolean srlEnableSemafor) {
2219             this.srlPreprocess = srlPreprocess;
2220             this.srlEnableMate = srlEnableMate;
2221             this.srlEnableSemafor = srlEnableSemafor;
2222             return this;
2223         }
2224 
2225         /**
2226          * Specifies whether ExternalRefs with wrong PropBank/NomBank rolesets/roles in the NAF
2227          * should be removed. A roleset/role is considered wrong if its lemma differs from the one
2228          * of the predicate in the text (errors can arise from 'excessive' mappings, e.g. in the
2229          * predicate matrix).
2230          *
2231          * @param srlRemoveWrongRefs
2232          *            true, if removal of ExternalRefs with wrong PB/NB rolesets/roles has to be
2233          *            enabled
2234          * @return this builder object, for call chaining
2235          */
2236         public Builder withSRLRemoveWrongRefs(@Nullable final Boolean srlRemoveWrongRefs) {
2237             this.srlRemoveWrongRefs = srlRemoveWrongRefs;
2238             return this;
2239         }
2240 
2241         /**
2242          * Specifies whether SRL predicates with unknown PropBank/NomBank rolesets/roles in the
2243          * NAF should be removed. A roleset/role is wrong if it does not appear in
2244          * PropBank/NomBank frame files (SRL tools such as Mate may detect predicates for unknown
2245          * rolesets, to increase recall).
2246          *
2247          * @param srlRemoveUnknownPredicates
2248          *            true, if removal of predicates with unknown PB/NB rolesets/roles has to be
2249          *            enabled
2250          * @return this builder object, for call chaining
2251          */
2252         public Builder withSRLRemoveUnknownPredicates(
2253                 @Nullable final Boolean srlRemoveUnknownPredicates) {
2254             this.srlRemoveUnknownPredicates = srlRemoveUnknownPredicates;
2255             return this;
2256         }
2257 
2258         /**
2259          * Specifies whether new predicates can be added for verbs, noun and adjectives having
2260          * exactly one sense in PropBank or NomBank but not marked in the text.
2261          *
2262          * @param srlPredicateAddition
2263          *            true, to enable predicate addition; null to use the default setting
2264          * @return this builder object, for call chaining
2265          */
2266         public Builder withSRLPredicateAddition(@Nullable final Boolean srlPredicateAddition) {
2267             this.srlPredicateAddition = srlPredicateAddition;
2268             return this;
2269         }
2270 
2271         /**
2272          * Specifies whether 'self-roles' can be added for predicates where missing or removed
2273          * where wrongly added. If set, for each recognized predicate the filter checks whether
2274          * the predicate term has also been marked as role. IF it is not marked in the NAF but it
2275          * is always marked in NomBank training set THEN the filter adds a new role for the
2276          * predicate term, using the semantic role in NomBank training set. If already marked
2277          * whereas no marking should happen based on previous criteria, then the role is removed.
2278          *
2279          * @param srlSelfArgFixing
2280          *            true if role addition is enabled
2281          * @return this builder object, for call chaining
2282          */
2283         public Builder withSRLSelfArgFixing(@Nullable final Boolean srlSelfArgFixing) {
2284             this.srlSelfArgFixing = srlSelfArgFixing;
2285             return this;
2286         }
2287 
2288         /**
2289          * Specifies whether mapping of roleset / roles in the SRL layer should take place. If
2290          * enabled, new external refs are added to map NomBank rolesets and roles to PropBank and
2291          * to map PropBank rolesets and roles to VerbNet and FrameNet, based on the predicate
2292          * matrix.
2293          *
2294          * @param srlSenseMapping
2295          *            true, to enable SRL sense mapping; null, to use the default setting
2296          * @return this builder object, for call chaining
2297          */
2298         public Builder withSRLSenseMapping(@Nullable final Boolean srlSenseMapping) {
2299             this.srlSenseMapping = srlSenseMapping;
2300             return this;
2301         }
2302 
2303         /**
2304          * Specifies whether mapping of rolesets / roles in the SRL layer to FrameBase classes /
2305          * properties should take place. If enabled, new external refs for FrameBase targets are
2306          * added where possible.
2307          *
2308          * @param srlFrameBaseMapping
2309          *            true, to enable SRL to FrameBase mapping; null, to use the default setting
2310          * @return this builder object, for call chaining
2311          */
2312         public Builder withSRLFrameBaseMapping(@Nullable final Boolean srlFrameBaseMapping) {
2313             this.srlFrameBaseMapping = srlFrameBaseMapping;
2314             return this;
2315         }
2316 
2317         /**
2318          * Specifies whether ExternalRef tags should be added to SRL roles to link them to the
2319          * entities, timex and predicates in the NAF the role corresponds to. The correspondence
2320          * between a role and entities/predicates is computed based on the evaluation of regular
2321          * expressions on the dependency tree that take properly into account coordination and
2322          * prepositions (e.g., in 'Tom speaks to Al, John and the friend of Jack', the A1 role 'to
2323          * Al, John and the friend of Jack' is linked to the entities 'Al' and 'John' but not
2324          * 'Jack'). If {@code useCoref} is specified, SRL roles are also linked to entities, timex
2325          * and predicates reachable via coreference chains.
2326          *
2327          * @param srlRoleLinking
2328          *            true, to enable this filtering; null, to use the default setting
2329          * @param useCoref
2330          *            true, to enable linking to coreferring entities/timex/predicates; null, to
2331          *            use the default setting
2332          * @return this builder object, for call chaining
2333          */
2334         public Builder withSRLRoleLinking(@Nullable final Boolean srlRoleLinking,
2335                 @Nullable final Boolean useCoref) {
2336             this.srlRoleLinking = srlRoleLinking;
2337             this.srlRoleLinkingUsingCoref = useCoref;
2338             return this;
2339         }
2340 
2341         /**
2342          * Specifies replace reference of predicate models in NAF with premon IRIs
2343          *
2344          * @param srlPreMOnIRIs
2345          *            true to enable IRI replacement, null to use default value
2346          * @return this builder object, for call chaining
2347          */
2348         public Builder withSRLPreMOnIRIs(@Nullable final Boolean srlPreMOnIRIs) {
2349             this.srlPreMOnIRIs = srlPreMOnIRIs;
2350             return this;
2351         }
2352 
2353         /**
2354          * Specifies whether ExternalRef tags should be added to opinion expressions, holder and
2355          * targets to lthe entities, timex and predicates their heads correspond to.
2356          *
2357          * @param opinionLinking
2358          *            true, to enable this linking; null, to use the default setting
2359          * @param opinionLinkingUsingCoref
2360          *            true, to enable linking to coreferring entities/timex/predicates; null, to
2361          *            use the default setting
2362          * @return this builder object, for call chaining
2363          */
2364         public Builder withOpinionLinking(@Nullable final Boolean opinionLinking,
2365                 @Nullable final Boolean opinionLinkingUsingCoref) {
2366             this.opinionLinking = opinionLinking;
2367             this.opinionLinkingUsingCoref = opinionLinkingUsingCoref;
2368             return this;
2369         }
2370 
2371         /**
2372          * Creates a {@code NAFFilter} based on the flags specified on this builder object.
2373          *
2374          * @return the constructed {@code NAFFilter}
2375          */
2376         public NAFFilter build() {
2377             return new NAFFilter(this);
2378         }
2379 
2380     }
2381 
2382 }