1 package eu.fbk.dkm.pikes.resources;
2
3 import java.io.IOException;
4 import java.util.Arrays;
5 import java.util.Collection;
6 import java.util.Collections;
7 import java.util.List;
8 import java.util.Map;
9 import java.util.Set;
10 import java.util.function.Consumer;
11 import java.util.regex.Matcher;
12 import java.util.regex.Pattern;
13
14 import javax.annotation.Nullable;
15
16 import com.google.common.base.Charsets;
17 import com.google.common.base.Joiner;
18 import com.google.common.base.MoreObjects;
19 import com.google.common.base.Objects;
20 import com.google.common.base.Preconditions;
21 import com.google.common.base.Splitter;
22 import com.google.common.base.Strings;
23 import com.google.common.collect.BiMap;
24 import com.google.common.collect.HashMultimap;
25 import com.google.common.collect.ImmutableBiMap;
26 import com.google.common.collect.ImmutableList;
27 import com.google.common.collect.ImmutableMap;
28 import com.google.common.collect.ImmutableSet;
29 import com.google.common.collect.Lists;
30 import com.google.common.collect.Maps;
31 import com.google.common.collect.Multimap;
32 import com.google.common.collect.Ordering;
33 import com.google.common.collect.Sets;
34 import com.google.common.io.Resources;
35
36 import eu.fbk.rdfpro.util.Statements;
37 import eu.fbk.utils.svm.Util;
38 import org.eclipse.rdf4j.model.IRI;
39 import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
40 import org.slf4j.Logger;
41 import org.slf4j.LoggerFactory;
42
43 import ixa.kaflib.Coref;
44 import ixa.kaflib.Dep;
45 import ixa.kaflib.Entity;
46 import ixa.kaflib.ExternalRef;
47 import ixa.kaflib.KAFDocument;
48 import ixa.kaflib.LinkedEntity;
49 import ixa.kaflib.Opinion;
50 import ixa.kaflib.Opinion.OpinionExpression;
51 import ixa.kaflib.Opinion.OpinionHolder;
52 import ixa.kaflib.Opinion.OpinionTarget;
53 import ixa.kaflib.Predicate;
54 import ixa.kaflib.Predicate.Role;
55 import ixa.kaflib.Span;
56 import ixa.kaflib.Term;
57 import ixa.kaflib.Timex3;
58 import ixa.kaflib.WF;
59
60
61
62
63
64
65
66
67
68
69
70 public final class NAFFilter implements Consumer<KAFDocument> {
71
72 public static final String SUMO_NAMESPACE = "http://www.ontologyportal.org/SUMO.owl#";
73
74 public static final IRI SUMO_PROCESS = SimpleValueFactory.getInstance()
75 .createIRI(SUMO_NAMESPACE, "Process");
76
77
78
79 private static final Logger LOGGER = LoggerFactory.getLogger(NAFFilter.class);
80
81 private static final Map<String, String> ENTITY_SST_TO_TYPES = ImmutableMap
82 .<String, String>builder().put("person", "PER").put("group", "ORG")
83 .put("location", "LOC").put("quantity", "QUANTITY").put("artifact", "PRODUCT")
84 .put("act", "EVENT").put("event", "EVENT").put("phenomenon", "EVENT")
85 .put("process", "EVENT").put("state", "EVENT").put("animal", "MISC")
86 .put("plant", "MISC").put("body", "MISC").put("shape", "MISC").put("motive", "MISC")
87 .put("object", "MISC").put("substance", "MISC").build();
88
89
90 private static final Pattern SRL_ROLE_PATTERN = Pattern.compile("A(\\d).*");
91
92 private static final String PARTICIPATION_REGEX = ""
93 + "SUB? (COORD CONJ?)* (PMOD (COORD CONJ?)*)? ((VC OPRD?)|(IM OPRD?))*";
94
95 private static final String[] LINKING_STOP_WORDS;
96
97 private static final BiMap<String, String> MAPPING_PREFIXES = ImmutableBiMap.of("propbank",
98 "pb", "nombank", "nb", "verbnet", "vn", "framenet", "fn");
99
100 private static final Multimap<String, String> MAPPING_PREDICATES;
101
102 private static final Multimap<String, String> MAPPING_ARGUMENTS;
103
104 public static final NAFFilter DEFAULT = NAFFilter.builder().build();
105
106 static {
107 List<String> stopwords = Collections.emptyList();
108 try {
109 stopwords = Resources.readLines(NAFFilter.class.getResource("linking_stopwords"),
110 Charsets.UTF_8);
111 LOGGER.info("Loaded {} linking stopwords", stopwords.size());
112 } catch (final IOException ex) {
113 LOGGER.error("Could not load linking stopwords", ex);
114 }
115 LINKING_STOP_WORDS = stopwords.toArray(new String[stopwords.size()]);
116 for (int i = 0; i < LINKING_STOP_WORDS.length; ++i) {
117 LINKING_STOP_WORDS[i] = LINKING_STOP_WORDS[i].toLowerCase();
118 }
119 Arrays.sort(LINKING_STOP_WORDS);
120
121 MAPPING_PREDICATES = HashMultimap.create();
122 MAPPING_ARGUMENTS = HashMultimap.create();
123 try {
124 for (final String line : Resources.readLines(
125 NAFFilter.class.getResource("mappings-frames.tsv"), Charsets.UTF_8)) {
126 final List<String> tokens = Splitter.on("\t").trimResults().splitToList(line);
127 final String prefix = tokens.get(0).substring(0, 2).toLowerCase();
128 final String fromKey = prefix + ":" + tokens.get(1);
129 final String toKey = "fn:" + Character.toUpperCase(tokens.get(2).charAt(0))
130 + tokens.get(2).substring(1);
131 MAPPING_PREDICATES.put(fromKey, toKey);
132 }
133 for (final String line : Resources.readLines(
134 NAFFilter.class.getResource("mappings-roles.tsv"), Charsets.UTF_8)) {
135 final List<String> tokens = Splitter.on("\t").trimResults().splitToList(line);
136 final String prefix = tokens.get(0).substring(0, 2).toLowerCase();
137 final String fromKey = prefix + ":" + tokens.get(1);
138 final String fnRole = tokens.get(2);
139 final int index = fnRole.indexOf('@');
140 final String toKey = "fn:" + Character.toUpperCase(fnRole.charAt(0))
141 + fnRole.substring(1, index + 1)
142 + Character.toUpperCase(fnRole.charAt(index + 1))
143 + fnRole.substring(index + 2);
144 MAPPING_ARGUMENTS.put(fromKey, toKey);
145 }
146
147 } catch (final Throwable ex) {
148 LOGGER.error("Could not load mappings", ex);
149 }
150 }
151
152 private final boolean termSenseFiltering;
153
154 private final boolean termSenseCompletion;
155
156 private final boolean entityRemoveOverlaps;
157
158 private final boolean entitySpanFixing;
159
160 private final boolean entityAddition;
161
162 private final boolean entityValueNormalization;
163
164 private final boolean linkingCompletion;
165
166 private final boolean linkingFixing;
167
168 private final boolean corefForRoleDependencies;
169
170 private final boolean corefSpanFixing;
171
172 private final boolean srlPreprocess;
173
174 private final boolean srlEnableMate;
175
176 private final boolean srlEnableSemafor;
177
178 private final boolean srlRemoveWrongRefs;
179
180 private final boolean srlRemoveUnknownPredicates;
181
182 private final boolean srlPredicateAddition;
183
184 private final boolean srlSelfArgFixing;
185
186 private final boolean srlSenseMapping;
187
188 private final boolean srlSenseMappingPM;
189
190 private final boolean srlFrameBaseMapping;
191
192 private final boolean srlRoleLinking;
193
194 private final boolean srlRoleLinkingUsingCoref;
195
196 private final boolean srlPreMOnIRIs;
197
198 private final boolean opinionLinking;
199
200 private final boolean opinionLinkingUsingCoref;
201
202 private NAFFilter(final Builder builder) {
203 this.termSenseFiltering = MoreObjects.firstNonNull(builder.termSenseFiltering, true);
204 this.termSenseCompletion = MoreObjects.firstNonNull(builder.termSenseCompletion, true);
205 this.entityRemoveOverlaps = MoreObjects.firstNonNull(builder.entityRemoveOverlaps, true);
206 this.entitySpanFixing = MoreObjects.firstNonNull(builder.entitySpanFixing, true);
207 this.entityAddition = MoreObjects.firstNonNull(builder.entityAddition, true);
208 this.entityValueNormalization = MoreObjects.firstNonNull(builder.entityValueNormalization,
209 true);
210 this.linkingCompletion = MoreObjects.firstNonNull(builder.linkingCompletion, true);
211 this.linkingFixing = MoreObjects.firstNonNull(builder.linkingFixing, false);
212 this.corefForRoleDependencies = MoreObjects.firstNonNull(builder.corefForRoleDependencies,
213 false);
214 this.corefSpanFixing = MoreObjects.firstNonNull(builder.corefSpanFixing, false);
215 this.srlPreprocess = MoreObjects.firstNonNull(builder.srlPreprocess, true);
216 this.srlEnableMate = MoreObjects.firstNonNull(builder.srlEnableMate, true);
217 this.srlEnableSemafor = MoreObjects.firstNonNull(builder.srlEnableSemafor, true);
218 this.srlRemoveWrongRefs = MoreObjects.firstNonNull(builder.srlRemoveWrongRefs, true);
219 this.srlRemoveUnknownPredicates = MoreObjects
220 .firstNonNull(builder.srlRemoveUnknownPredicates, false);
221 this.srlPredicateAddition = MoreObjects.firstNonNull(builder.srlPredicateAddition, true);
222 this.srlSelfArgFixing = MoreObjects.firstNonNull(builder.srlSelfArgFixing, true);
223 this.srlSenseMapping = MoreObjects.firstNonNull(builder.srlSenseMapping, true);
224 this.srlSenseMappingPM = false;
225 this.srlFrameBaseMapping = MoreObjects.firstNonNull(builder.srlFrameBaseMapping, true);
226 this.srlRoleLinking = MoreObjects.firstNonNull(builder.srlRoleLinking, true);
227 this.srlRoleLinkingUsingCoref = MoreObjects.firstNonNull(builder.srlRoleLinkingUsingCoref,
228 true);
229
230 this.srlPreMOnIRIs = MoreObjects.firstNonNull(builder.srlPreMOnIRIs, true);
231 this.opinionLinking = MoreObjects.firstNonNull(builder.opinionLinking, true);
232 this.opinionLinkingUsingCoref = MoreObjects.firstNonNull(builder.opinionLinkingUsingCoref,
233 true);
234 }
235
236 @Override
237 public void accept(final KAFDocument document) {
238 filter(document);
239 }
240
241
242
243
244
245
246
247
248 public void filter(final KAFDocument document) {
249
250
251 Preconditions.checkNotNull(document);
252
253
254 final long ts = System.currentTimeMillis();
255 LOGGER.debug("== Filtering {} ==", document.getPublic().uri);
256
257
258 NAFUtils.normalize(document);
259
260
261 if (this.termSenseFiltering) {
262 applyTermSenseFiltering(document);
263 }
264 if (this.termSenseCompletion) {
265 applyTermSenseCompletion(document);
266 }
267
268
269 if (this.entityRemoveOverlaps) {
270 applyEntityRemoveOverlaps(document);
271 }
272 if (this.entitySpanFixing) {
273 applyEntitySpanFixing(document);
274 }
275 if (this.linkingCompletion) {
276 applyLinkingCompletion(document);
277 }
278 if (this.linkingFixing) {
279 applyLinkingFixing(document);
280 }
281 if (this.entityAddition) {
282 applyEntityAddition(document);
283 }
284 if (this.entityValueNormalization) {
285 applyEntityValueNormalization(document);
286 }
287
288
289 if (this.srlPreprocess) {
290 applySRLPreprocess(document);
291 }
292 if (this.srlRemoveWrongRefs) {
293 applySRLRemoveWrongRefs(document);
294 }
295 if (this.srlRemoveUnknownPredicates) {
296 applySRLRemoveUnknownPredicates(document);
297 }
298 if (this.srlPredicateAddition) {
299 applySRLPredicateAddition(document);
300 }
301 if (this.srlSelfArgFixing) {
302 applySRLSelfArgFixing(document);
303 }
304 if (this.srlSenseMapping) {
305 applySRLSenseMapping(document);
306 }
307 if (this.srlFrameBaseMapping) {
308 applySRLFrameBaseMapping(document);
309 }
310 if (this.srlRoleLinking) {
311 applySRLRoleLinking(document);
312 }
313
314
315 if (this.srlPreMOnIRIs) {
316 applySRLPreMOnIRIs(document);
317 }
318
319
320 if (this.corefForRoleDependencies) {
321 applyCorefForRoleDependencies(document);
322 }
323 if (this.corefSpanFixing) {
324 applyCorefSpanFixing(document);
325 }
326
327
328 if (this.opinionLinking) {
329 applyOpinionLinking(document);
330 }
331
332 LOGGER.debug("Done in {} ms", System.currentTimeMillis() - ts);
333 }
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375 private void applyTermSenseFiltering(final KAFDocument document) {
376
377 for (final Term term : document.getTerms()) {
378 if (term.getMorphofeat() != null && term.getMorphofeat().startsWith("NNP")) {
379 NAFUtils.removeRefs(term, NAFUtils.RESOURCE_WN_SYNSET, null);
380 NAFUtils.removeRefs(term, NAFUtils.RESOURCE_WN_SST, null);
381 NAFUtils.removeRefs(term, NAFUtils.RESOURCE_BBN, null);
382 NAFUtils.removeRefs(term, NAFUtils.RESOURCE_SUMO, null);
383 NAFUtils.removeRefs(term, NAFUtils.RESOURCE_YAGO, null);
384 }
385 }
386 }
387
388 private void applyTermSenseCompletion(final KAFDocument document) {
389
390 for (final Term term : document.getTerms()) {
391
392
393 ExternalRef bbnRef = NAFUtils.getRef(term, NAFUtils.RESOURCE_BBN, null);
394 ExternalRef synsetRef = NAFUtils.getRef(term, NAFUtils.RESOURCE_WN_SYNSET, null);
395 ExternalRef sstRef = NAFUtils.getRef(term, NAFUtils.RESOURCE_WN_SST, null);
396 final List<ExternalRef> sumoRefs = NAFUtils.getRefs(term, NAFUtils.RESOURCE_SUMO,
397 null);
398 final List<ExternalRef> yagoRefs = NAFUtils.getRefs(term, NAFUtils.RESOURCE_YAGO,
399 null);
400
401
402 if (sstRef == null && synsetRef != null) {
403 final String sst = WordNet.mapSynsetToSST(synsetRef.getReference());
404 if (sstRef == null || !Objects.equal(sstRef.getReference(), sst)) {
405 LOGGER.debug((sstRef == null ? "Added" : "Overridden") + " SST '" + sst
406 + "' of " + NAFUtils.toString(term) + " based on Synset '"
407 + synsetRef.getReference() + "'");
408 sstRef = document.newExternalRef(NAFUtils.RESOURCE_WN_SST, sst);
409 NAFUtils.addRef(term, sstRef);
410 }
411 }
412
413
414 final boolean isNoun = Character.toUpperCase(term.getPos().charAt(0)) == 'N';
415 if (isNoun) {
416
417
418 if (bbnRef == null && synsetRef != null) {
419 final String bbn = WordNet.mapSynsetToBBN(synsetRef.getReference());
420 if (bbn != null) {
421 bbnRef = document.newExternalRef(NAFUtils.RESOURCE_BBN, bbn);
422 NAFUtils.addRef(term, bbnRef);
423 LOGGER.debug("Added BBN '" + bbn + "' of " + NAFUtils.toString(term)
424 + " based on Synset '" + synsetRef.getReference() + "'");
425 }
426
427 }
428
429
430 if (synsetRef == null && bbnRef != null) {
431 final String synsetID = WordNet.mapBBNToSynset(bbnRef.getReference());
432 if (synsetID != null) {
433 synsetRef = document.newExternalRef(NAFUtils.RESOURCE_WN_SYNSET, synsetID);
434 NAFUtils.addRef(term, synsetRef);
435 LOGGER.debug(
436 "Added Synset '" + synsetID + "' of " + NAFUtils.toString(term)
437 + " based on BBN '" + bbnRef.getReference() + "'");
438 }
439 }
440
441
442 if (sstRef == null && bbnRef != null) {
443 final String sst = WordNet.mapBBNToSST(bbnRef.getReference());
444 if (sst != null) {
445 sstRef = document.newExternalRef(NAFUtils.RESOURCE_WN_SST, sst);
446 NAFUtils.addRef(term, sstRef);
447 LOGGER.debug("Added SST '" + sst + "' of " + NAFUtils.toString(term)
448 + " based on BBN '" + bbnRef.getReference() + "'");
449 }
450 }
451 }
452
453
454 final String lemma = term.getLemma().toLowerCase();
455 if (sumoRefs.isEmpty() && synsetRef != null && !lemma.equals("be")) {
456 Set<String> synsetIDs = Sets.newHashSet(synsetRef.getReference());
457 Set<IRI> conceptIRIs = Sumo.synsetsToConcepts(synsetIDs);
458 while (conceptIRIs.isEmpty() && !synsetIDs.isEmpty()) {
459 final Set<String> oldSynsetIDs = synsetIDs;
460 synsetIDs = Sets.newHashSet();
461 for (final String oldSynsetID : oldSynsetIDs) {
462 synsetIDs.addAll(WordNet.getHypernyms(oldSynsetID));
463 }
464 conceptIRIs = Sumo.synsetsToConcepts(synsetIDs);
465 }
466 if (conceptIRIs.isEmpty()) {
467 synsetIDs = WordNet.getHyponyms(synsetRef.getReference());
468 conceptIRIs = Sumo.synsetsToConcepts(synsetIDs);
469 }
470 if (!conceptIRIs.isEmpty()) {
471 for (final IRI conceptIRI : conceptIRIs) {
472 final String sumoID = conceptIRI.getLocalName();
473 final ExternalRef sumoRef = document.newExternalRef(NAFUtils.RESOURCE_SUMO,
474 sumoID);
475 NAFUtils.setRef(term, sumoRef);
476 LOGGER.debug("Added SUMO mapping: " + NAFUtils.toString(term) + " -> sumo:"
477 + conceptIRI.getLocalName());
478 }
479 }
480 }
481
482
483 if (yagoRefs.isEmpty() && synsetRef != null) {
484 for (final IRI uri : YagoTaxonomy
485 .getDBpediaYagoIRIs(ImmutableList.of(synsetRef.getReference()))) {
486 final String yagoID = uri.stringValue()
487 .substring(YagoTaxonomy.NAMESPACE.length());
488 final ExternalRef yagoRef = document.newExternalRef(NAFUtils.RESOURCE_YAGO,
489 yagoID);
490 NAFUtils.setRef(term, yagoRef);
491 LOGGER.debug("Added Yago mapping: " + NAFUtils.toString(term) + " -> yago:"
492 + yagoID);
493 }
494 }
495 }
496 }
497
498 private void applyEntitySpanFixing(final KAFDocument document) {
499
500
501 for (final Entity entity : ImmutableList.copyOf(document.getEntities())) {
502
503
504
505 final List<Term> filteredTerms = NAFUtils.filterTerms(entity.getTerms());
506 if (filteredTerms.size() == entity.getTerms().size()) {
507 continue;
508 }
509
510
511 document.removeAnnotation(entity);
512
513
514
515 Entity newEntity = null;
516 if (!filteredTerms.isEmpty()) {
517 newEntity = document
518 .newEntity(ImmutableList.of(KAFDocument.newTermSpan(filteredTerms)));
519 newEntity.setType(entity.getType());
520 newEntity.setNamed(entity.isNamed());
521 for (final ExternalRef ref : entity.getExternalRefs()) {
522 newEntity.addExternalRef(ref);
523 }
524 }
525
526
527 if (LOGGER.isDebugEnabled()) {
528 LOGGER.debug((newEntity == null ? "Removed" : "Replaced") + " invalid "
529 + NAFUtils.toString(entity) + (newEntity == null ? ""
530 : " with filtered "
531 + NAFUtils.toString(newEntity)));
532 }
533 }
534 }
535
536 private void applyEntityRemoveOverlaps(final KAFDocument document) {
537
538
539 outer: for (final Entity entity : ImmutableList.copyOf(document.getEntities())) {
540 for (final Term term : entity.getTerms()) {
541
542
543 for (final Entity entity2 : document.getEntitiesByTerm(term)) {
544 if (entity2 != entity && entity2.getTerms().containsAll(entity.getTerms())) {
545 document.removeAnnotation(entity);
546 if (LOGGER.isDebugEnabled()) {
547 LOGGER.debug("Removed " + NAFUtils.toString(entity)
548 + " overlapping with " + NAFUtils.toString(entity2));
549 }
550 continue outer;
551 }
552 }
553
554
555 for (final WF wf : term.getWFs()) {
556 final List<Timex3> timex = document.getTimeExsByWF(wf);
557 if (!timex.isEmpty()) {
558 document.removeAnnotation(entity);
559 if (LOGGER.isDebugEnabled()) {
560 LOGGER.debug("Removed " + NAFUtils.toString(entity)
561 + " overlapping with TIMEX3 '" + NAFUtils.toString(timex));
562 }
563 continue outer;
564 }
565 }
566 }
567 }
568 }
569
570 private void applyEntityAddition(final KAFDocument document) {
571
572 for (final Term term : document.getTerms()) {
573
574
575 final char pos = Character.toUpperCase(term.getPos().charAt(0));
576 final Dep dep = document.getDepToTerm(term);
577 final boolean namePart = pos == 'R' && dep != null
578 && dep.getRfunc().toLowerCase().contains("name")
579 && Character.toUpperCase(dep.getFrom().getPos().charAt(0)) == 'R'
580 && document.getEntitiesByTerm(dep.getFrom()).isEmpty();
581 if (pos != 'R' && pos != 'N' && pos != 'Q' || namePart
582 || !document.getTimeExsByWF(term.getWFs().get(0)).isEmpty()
583 || !document.getEntitiesByTerm(term).isEmpty()) {
584 continue;
585 }
586
587
588 String type = null;
589 final ExternalRef bbnRef = NAFUtils.getRef(term, NAFUtils.RESOURCE_BBN, null);
590 if (bbnRef != null) {
591 type = bbnRef.getReference();
592 } else {
593 final ExternalRef synsetRef = NAFUtils.getRef(term, NAFUtils.RESOURCE_WN_SYNSET,
594 null);
595 if (synsetRef != null) {
596 type = WordNet.mapSynsetToBBN(synsetRef.getReference());
597 } else {
598 final ExternalRef sstRef = NAFUtils.getRef(term, NAFUtils.RESOURCE_WN_SST,
599 null);
600 if (sstRef != null) {
601 String sst = sstRef.getReference();
602 sst = sst.substring(sst.lastIndexOf('.') + 1);
603 type = ENTITY_SST_TO_TYPES.get(sst);
604 }
605 }
606 }
607
608
609
610 final Span<Term> span = NAFUtils.getNominalSpan(document, term, false, false);
611
612
613 final Entity entity = document.newEntity(ImmutableList.of(span));
614 if (type != null)
615 entity.setType(type.toUpperCase().replace("PERSON", "PER")
616 .replace("ORGANIZATION", "ORG").replace("LOCATION", "LOC"));
617 entity.setNamed(pos == 'R');
618 if (LOGGER.isDebugEnabled()) {
619 LOGGER.debug("Added " + (entity.isNamed() ? "named " : "")
620 + NAFUtils.toString(entity) + " with type '" + type + "'");
621 }
622 }
623 }
624
625 private void applyEntityValueNormalization(final KAFDocument document) {
626
627 for (final Entity entity : document.getEntities()) {
628 String type = entity.getType();
629 type = type == null ? null : type.toLowerCase();
630 if ("cardinal".equals(type) || "ordinal".equals(type) || "percent".equals(type)
631 || "money".equals(type)) {
632
633 ExternalRef ref = null;
634 final String str = entity.getSpans().get(0).getStr().toLowerCase();
635 Double value = null;
636 try {
637 value = NumberSpeller.parse(str);
638 } catch (Throwable ex) {
639 LOGGER.debug("Could not parse number '" + str + "'", ex);
640 }
641 if (value != null) {
642 String prefix = "";
643 if ("percent".equals(type)) {
644 prefix = "%";
645 } else if ("money".equals(type)) {
646 prefix = "¤";
647 if (str.contains("euro")) {
648 prefix = "€";
649 } else if (str.contains("dollar")) {
650 prefix = "$";
651 } else if (str.contains("yen")) {
652 prefix = "Â¥";
653 }
654 }
655 ref = document.newExternalRef(NAFUtils.RESOURCE_VALUE,
656 prefix + Double.toString(value.doubleValue()));
657 }
658
659 if (ref != null && NAFUtils.getRef(entity, ref.getResource(), null) == null) {
660 NAFUtils.addRef(entity, ref);
661 LOGGER.debug("Added ref '" + ref + "' to " + NAFUtils.toString(entity));
662 }
663 }
664 }
665 }
666
667 private void applyLinkingCompletion(final KAFDocument document) {
668
669 for (final LinkedEntity le : document.getLinkedEntities()) {
670
671
672 final List<Term> terms = document.getTermsByWFs(le.getWFs().getTargets());
673 final Term head = document.getTermsHead(terms);
674 if (head == null) {
675 continue;
676 }
677
678
679 Entity entityToModify = null;
680 for (final Entity entity : document.getEntitiesByTerm(head)) {
681 if (head.equals(document.getTermsHead(entity.getTerms()))) {
682 entityToModify = entity;
683 }
684 }
685 if (entityToModify == null) {
686 final Span<Term> span = KAFDocument
687 .newTermSpan(document.getTermsByWFs(le.getWFs().getTargets()));
688 boolean overlap = false;
689 for (final Term term : span.getTargets()) {
690 final List<Entity> overlappingEntities = document.getEntitiesByTerm(term);
691 if (overlappingEntities != null && !overlappingEntities.isEmpty()) {
692 overlap = true;
693 break;
694 }
695 }
696 if (!overlap) {
697 final boolean named = head.getMorphofeat().startsWith("NNP");
698 boolean accept = named;
699 if (!accept) {
700 final String textStr = span.getStr().toLowerCase().replaceAll("\\s+", "_");
701 final String entityStr = Statements.VALUE_FACTORY
702 .createIRI(le.getReference()).getLocalName().toLowerCase();
703 accept = textStr.equals(entityStr);
704 }
705 if (accept) {
706 entityToModify = document.newEntity(ImmutableList.of(span));
707 entityToModify.setNamed(head.getMorphofeat().startsWith("NNP"));
708 if (LOGGER.isDebugEnabled()) {
709 LOGGER.debug(
710 "Added linked " + (entityToModify.isNamed() ? "named " : "")
711 + NAFUtils.toString(entityToModify));
712 }
713 }
714 }
715 }
716
717 if (entityToModify != null) {
718 final ExternalRef existingRef = NAFUtils.getRef(entityToModify, le.getResource(),
719 le.getReference());
720 if (existingRef == null) {
721 final ExternalRef ref = document.newExternalRef(le.getResource(),
722 le.getReference());
723 ref.setConfidence((float) le.getConfidence());
724 NAFUtils.addRef(entityToModify, ref);
725 LOGGER.debug(
726 "Added ref '" + ref + "' to " + NAFUtils.toString(entityToModify));
727 } else {
728 float existingRefConfidence = existingRef.getConfidence();
729 if (existingRefConfidence < le.getConfidence()) {
730 existingRef.setConfidence((float) le.getConfidence());
731 LOGGER.debug("Modified confidence of '" + existingRef + "' to "
732 + le.getConfidence());
733 }
734 }
735 }
736
737
738 for (final Predicate predicate : document.getPredicatesByTerm(head)) {
739 if (head.equals(document.getTermsHead(predicate.getTerms()))) {
740 if (NAFUtils.getRef(predicate, le.getResource(), le.getReference()) == null) {
741 final ExternalRef ref = document.newExternalRef(le.getResource(),
742 le.getReference());
743 ref.setConfidence((float) le.getConfidence());
744 NAFUtils.addRef(predicate, ref);
745 LOGGER.debug("Added ref '" + ref + "' to " + NAFUtils.toString(predicate));
746 }
747 }
748 }
749 }
750 }
751
752 private void applyLinkingFixing(final KAFDocument document) {
753
754
755 final List<ExternalRef> refs = Lists.newArrayList();
756 for (final Entity entity : document.getEntities()) {
757
758
759 refs.clear();
760 for (final ExternalRef ref : entity.getExternalRefs()) {
761 if (!NAFUtils.RESOURCE_VALUE.equals(ref.getResource())) {
762 refs.add(ref);
763 }
764 }
765
766
767 if (!refs.isEmpty()) {
768 final String[] tokens = Util.hardTokenize(entity.getStr());
769 final String normalized = Joiner.on(' ').join(tokens).toLowerCase();
770 if (Arrays.binarySearch(LINKING_STOP_WORDS, normalized) >= 0) {
771 for (final ExternalRef ref : refs) {
772 NAFUtils.removeRefs(entity, ref.getResource(), ref.getReference());
773 if (LOGGER.isDebugEnabled()) {
774 LOGGER.debug("Removed stop-word ref '{}' from {}", ref,
775 NAFUtils.toString(entity));
776 }
777 }
778 }
779 }
780 }
781 }
782
783 @SuppressWarnings("deprecation")
784 private void applyCorefSpanFixing(final KAFDocument document) {
785
786
787 for (final Coref coref : ImmutableList.copyOf(document.getCorefs())) {
788
789
790 for (final Span<Term> span : ImmutableList.copyOf(coref.getSpans())) {
791 final Term head = NAFUtils.extractHead(document, span);
792 if (head == null) {
793 coref.getSpans().remove(span);
794 if (LOGGER.isDebugEnabled()) {
795 LOGGER.debug("Removed span with invalid head '{}' from {}", span.getStr(),
796 NAFUtils.toString(coref));
797 }
798 } else {
799 span.setHead(head);
800 }
801 }
802
803
804 boolean hasProperNounHead = false;
805 boolean isEvent = false;
806 final List<Span<Term>> spans = ImmutableList.copyOf(coref.getSpans());
807 outer: for (final Span<Term> span1 : spans) {
808 for (final Span<Term> span2 : spans) {
809 if (span1.size() > span2.size()
810 && span1.getTargets().containsAll(span2.getTargets())) {
811 coref.getSpans().remove(span1);
812 if (LOGGER.isDebugEnabled()) {
813 LOGGER.debug("Removed span '{}' including smaller span '{}' from {}",
814 span1.getStr(), span2.getStr(), NAFUtils.toString(coref));
815 }
816 continue outer;
817 }
818 }
819 hasProperNounHead |= span1.getHead().getMorphofeat().startsWith("NNP");
820 if (!isEvent) {
821 for (final ExternalRef ref : NAFUtils.getRefs(span1.getHead(),
822 NAFUtils.RESOURCE_SUMO, null)) {
823 final IRI sumoID = Statements.VALUE_FACTORY
824 .createIRI(SUMO_NAMESPACE + ref.getReference());
825 if (Sumo.isSubClassOf(sumoID, SUMO_PROCESS)) {
826 isEvent = true;
827 }
828 }
829 }
830 }
831
832
833 if (hasProperNounHead) {
834
835
836 for (final Span<Term> span : ImmutableList.copyOf(coref.getSpans())) {
837 final Term head = span.getHead();
838 if (!head.getMorphofeat().startsWith("NNP") && !isEvent) {
839 if (head.getMorphofeat().startsWith("VB")) {
840 coref.getSpans().remove(span);
841 LOGGER.debug("Removed span with VB head '{}' from {}", span.getStr(),
842 NAFUtils.toString(coref));
843 } else {
844 outer: for (final Predicate predicate : document
845 .getPredicatesByTerm(head)) {
846 for (final ExternalRef ref : NAFUtils.getRefs(predicate,
847 NAFUtils.RESOURCE_NOMBANK, null)) {
848 final NomBank.Roleset roleset = NomBank
849 .getRoleset(ref.getReference());
850 if (roleset != null
851 && roleset.getPredMandatoryArgNums().isEmpty()
852 && roleset.getPredOptionalArgNums().isEmpty()) {
853
854 coref.getSpans().remove(span);
855 LOGGER.debug(
856 "Removed span with non-role predicate "
857 + "head '{}' from {}",
858 span.getStr(), NAFUtils.toString(coref));
859 break outer;
860 }
861 }
862 }
863 }
864 }
865 }
866
867 } else {
868
869
870 final Multimap<Integer, Span<Term>> spansBySentence = HashMultimap.create();
871 for (final Span<Term> span : coref.getSpans()) {
872 final int sentID = span.getTargets().get(0).getSent();
873 spansBySentence.put(sentID, span);
874 }
875 if (spansBySentence.keySet().size() > 1) {
876 coref.getSpans().clear();
877 for (final Collection<Span<Term>> sentSpans : spansBySentence.asMap()
878 .values()) {
879 if (sentSpans.size() > 1) {
880 document.newCoref(Lists.newArrayList(sentSpans));
881 }
882 }
883 }
884
885 }
886
887
888 if (coref.getSpans().isEmpty()) {
889 document.removeAnnotation(coref);
890 LOGGER.debug("Removed empty coref set {}", NAFUtils.toString(coref));
891 }
892 }
893 }
894
895 private void applyCorefForRoleDependencies(final KAFDocument document) {
896
897 outer: for (final Dep dep : document.getDeps()) {
898 final String label = dep.getRfunc();
899 if ("APPO".equals(label) || "TITLE".equals(label) || "NMOD".equals(label)) {
900
901
902 Term nameTerm;
903 Term roleTerm;
904 final String posFrom = dep.getFrom().getMorphofeat();
905 final String posTo = dep.getTo().getMorphofeat();
906 if (posFrom.startsWith("NNP") && posTo.startsWith("NN")
907 && !posTo.startsWith("NNP")) {
908 nameTerm = dep.getFrom();
909 roleTerm = dep.getTo();
910 } else if (posTo.startsWith("NNP") && posFrom.startsWith("NN")
911 && !posFrom.startsWith("NNP") && label.equals("APPO")) {
912 nameTerm = dep.getTo();
913 roleTerm = dep.getFrom();
914 } else {
915 continue outer;
916 }
917
918
919 for (final Coref coref : document.getCorefsByTerm(nameTerm)) {
920 if (NAFUtils.hasHead(document, coref, nameTerm)
921 && NAFUtils.hasHead(document, coref, roleTerm)) {
922 continue outer;
923 }
924 }
925
926
927 boolean isActualRole = false;
928 predLoop: for (final Predicate predicate : document
929 .getPredicatesByTerm(roleTerm)) {
930 for (final ExternalRef ref : predicate.getExternalRefs()) {
931 if (NAFUtils.RESOURCE_NOMBANK.equals(ref.getResource())) {
932 final NomBank.Roleset rs = NomBank.getRoleset(ref.getReference());
933 if (rs != null && (!rs.getPredMandatoryArgNums().isEmpty()
934 || !rs.getPredOptionalArgNums().isEmpty())) {
935 isActualRole = true;
936 break predLoop;
937 }
938 }
939 }
940 }
941 if (!isActualRole) {
942 continue outer;
943 }
944
945
946 final Set<Term> roleHeads = document
947 .getTermsByDepAncestors(ImmutableSet.of(roleTerm), "(COORD CONJ?)*");
948 final Set<Term> nameHeads = document
949 .getTermsByDepAncestors(ImmutableSet.of(nameTerm), "(COORD CONJ?)*");
950
951
952 for (final Term nameHead : nameHeads) {
953 if (!nameHead.getMorphofeat().startsWith("NNP")) {
954 continue outer;
955 }
956 }
957
958
959 for (final Term roleHead : roleHeads) {
960 final boolean plural = roleHead.getMorphofeat().endsWith("S");
961 if (nameHeads.size() == 1 && plural || nameHeads.size() > 1 && !plural) {
962 continue outer;
963 }
964 }
965
966
967 final List<Span<Term>> spans = Lists.newArrayList();
968 spans.add(NAFUtils.getNominalSpan(document, nameTerm, true, false));
969 for (final Term roleHead : roleHeads) {
970 spans.add(NAFUtils.getNominalSpan(document, roleHead, false, false));
971 }
972 final Coref coref = document.newCoref(spans);
973 if (LOGGER.isDebugEnabled()) {
974 final StringBuilder builder = new StringBuilder("Added coref ");
975 builder.append(coref.getId()).append(":");
976 for (final Span<Term> span : coref.getSpans()) {
977 builder.append(" '").append(span.getStr()).append('\'');
978 }
979 LOGGER.debug(builder.toString());
980 }
981 }
982 }
983 }
984
985 private void applySRLPreprocess(final KAFDocument document) {
986
987
988 final Map<Term, Predicate> matePredicates = Maps.newHashMap();
989 final Map<Term, Predicate> semaforPredicates = Maps.newHashMap();
990
991
992 for (final Predicate predicate : ImmutableList.copyOf(document.getPredicates())) {
993 if (NAFUtils.extractHead(document, predicate.getSpan()) == null) {
994 document.removeAnnotation(predicate);
995 LOGGER.debug("Removed {} without valid head term", predicate);
996 }
997 }
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030 for (final Predicate predicate : Lists.newArrayList(document.getPredicates())) {
1031 final boolean isSemafor = predicate.getId().startsWith("f_pr")
1032 || "semafor".equalsIgnoreCase(predicate.getSource());
1033 if (isSemafor && !this.srlEnableSemafor || !isSemafor && !this.srlEnableMate) {
1034 document.removeAnnotation(predicate);
1035 if (LOGGER.isDebugEnabled()) {
1036 LOGGER.debug("Removed " + NAFUtils.toString(predicate) + " (disabled)");
1037 }
1038 } else {
1039 final Term term = NAFUtils.extractHead(document, predicate.getSpan());
1040 (isSemafor ? semaforPredicates : matePredicates).put(term, predicate);
1041 }
1042 }
1043
1044
1045 for (final Map.Entry<Term, Predicate> entry : semaforPredicates.entrySet()) {
1046 final Term term = entry.getKey();
1047 final Predicate semaforPredicate = entry.getValue();
1048 final Predicate matePredicate = matePredicates.get(term);
1049 if (matePredicate != null) {
1050
1051
1052 final ExternalRef semaforRef = NAFUtils.getRef(semaforPredicate, "FrameNet", null);
1053 final ExternalRef mateRef = NAFUtils.getRef(matePredicate, "FrameNet", null);
1054 final boolean mergeFramenet = semaforRef != null && mateRef != null
1055 && semaforRef.getReference().equalsIgnoreCase(mateRef.getReference());
1056
1057
1058 for (final ExternalRef ref : NAFUtils.getRefs(matePredicate, null, null)) {
1059 if (!ref.getResource().equalsIgnoreCase("FrameNet")) {
1060 NAFUtils.addRef(semaforPredicate, new ExternalRef(ref));
1061 }
1062 }
1063
1064
1065 for (final Role mateRole : matePredicate.getRoles()) {
1066 boolean addRole = true;
1067 final Set<Term> mateTerms = ImmutableSet
1068 .copyOf(mateRole.getSpan().getTargets());
1069 for (final Role semaforRole : semaforPredicate.getRoles()) {
1070 final Set<Term> semaforTerms = ImmutableSet
1071 .copyOf(semaforRole.getSpan().getTargets());
1072 if (mateTerms.equals(semaforTerms)) {
1073 addRole = false;
1074 semaforRole.setSemRole(mateRole.getSemRole());
1075 final boolean addFramenetRef = mergeFramenet
1076 && NAFUtils.getRef(semaforRole, "FrameNet", null) != null;
1077 for (final ExternalRef ref : mateRole.getExternalRefs()) {
1078 if (!ref.getResource().equalsIgnoreCase("FrameNet")
1079 || addFramenetRef) {
1080 semaforRole.addExternalRef(new ExternalRef(ref));
1081 }
1082 }
1083 }
1084 }
1085 if (addRole) {
1086 final Role semaforRole = document.newRole(semaforPredicate,
1087 mateRole.getSemRole(), mateRole.getSpan());
1088 semaforPredicate.addRole(semaforRole);
1089 for (final ExternalRef ref : mateRole.getExternalRefs()) {
1090 semaforRole.addExternalRef(new ExternalRef(ref));
1091 }
1092 }
1093 }
1094
1095
1096 document.removeAnnotation(matePredicate);
1097
1098
1099 if (LOGGER.isDebugEnabled()) {
1100 LOGGER.debug("Merged " + NAFUtils.toString(matePredicate) + " into "
1101 + NAFUtils.toString(semaforPredicate)
1102 + (mergeFramenet ? " (including FrameNet data)" : ""));
1103 }
1104
1105 }
1106 }
1107 }
1108
1109 private void applySRLRemoveWrongRefs(final KAFDocument document) {
1110
1111
1112 for (final Predicate predicate : Lists.newArrayList(document.getPredicates())) {
1113
1114
1115 final Term head = document.getTermsHead(predicate.getTerms());
1116 final String expectedLemma = head.getLemma();
1117
1118
1119 final String resource = head.getPos().equalsIgnoreCase("V") ? "propbank" : "nombank";
1120
1121
1122 final List<ExternalRef> refs = NAFUtils.getRefs(predicate, resource, null);
1123 Integer expectedSense = null;
1124 for (final ExternalRef ref : refs) {
1125 if (ref.getSource() != null) {
1126 expectedSense = NAFUtils.extractSense(ref.getReference());
1127 break;
1128 }
1129 }
1130 for (final ExternalRef ref : refs) {
1131 final String lemma = NAFUtils.extractLemma(ref.getReference());
1132 final Integer sense = NAFUtils.extractSense(ref.getReference());
1133 if (!expectedLemma.equalsIgnoreCase(lemma)
1134 || expectedSense != null && !expectedSense.equals(sense)) {
1135 NAFUtils.removeRefs(predicate, resource, ref.getReference());
1136 if (LOGGER.isDebugEnabled()) {
1137 LOGGER.debug("Removed wrong roleset '" + ref.getReference() + "' for "
1138 + NAFUtils.toString(predicate));
1139 }
1140 }
1141 }
1142
1143
1144 for (final Role role : predicate.getRoles()) {
1145 final Integer expectedNum = NAFUtils.extractArgNum(role.getSemRole());
1146 for (final ExternalRef ref : NAFUtils.getRefs(role, resource, null)) {
1147 final String lemma = NAFUtils.extractLemma(ref.getReference());
1148 final Integer sense = NAFUtils.extractSense(ref.getReference());
1149 final Integer num = NAFUtils.extractArgNum(ref.getReference());
1150 if (!Objects.equal(expectedNum, num) || !expectedLemma.equalsIgnoreCase(lemma)
1151 || expectedSense != null && !expectedSense.equals(sense)) {
1152 role.getExternalRefs().remove(ref);
1153 if (LOGGER.isDebugEnabled()) {
1154 LOGGER.debug("Removed wrong role '" + ref.getReference() + "' for "
1155 + NAFUtils.toString(predicate));
1156 }
1157 }
1158 }
1159 }
1160 }
1161 }
1162
1163 private void applySRLRemoveUnknownPredicates(final KAFDocument document) {
1164
1165
1166 for (final Predicate predicate : Lists.newArrayList(document.getPredicates())) {
1167
1168
1169 final Term head = document.getTermsHead(predicate.getTerms());
1170 final boolean isVerb = head.getPos().equalsIgnoreCase("V");
1171 final String resource = isVerb ? "propbank" : "nombank";
1172
1173
1174 for (final ExternalRef ref : NAFUtils.getRefs(predicate, resource, null)) {
1175 final String roleset = ref.getReference();
1176 if (isVerb && PropBank.getRoleset(roleset) == null
1177 || !isVerb && NomBank.getRoleset(roleset) == null) {
1178 document.removeAnnotation(predicate);
1179 if (LOGGER.isDebugEnabled()) {
1180 LOGGER.debug("Removed " + NAFUtils.toString(predicate)
1181 + " with unknown sense '" + roleset + "' in resource " + resource);
1182 }
1183 break;
1184 }
1185 }
1186 }
1187 }
1188
1189 private void applySRLPredicateAddition(final KAFDocument document) {
1190
1191 for (final Term term : document.getTerms()) {
1192
1193
1194 final char pos = Character.toUpperCase(term.getPos().charAt(0));
1195 if (pos != 'V' && pos != 'N' && pos != 'G' && pos != 'A'
1196 || !document.getPredicatesByTerm(term).isEmpty()
1197 || !document.getTimeExsByWF(term.getWFs().get(0)).isEmpty()) {
1198 continue;
1199 }
1200
1201
1202
1203
1204 Entity entity = null;
1205 for (final Entity e : document.getEntitiesByTerm(term)) {
1206 if (entity == null || e.getTerms().size() < entity.getTerms().size()) {
1207 entity = e;
1208 break;
1209 }
1210 }
1211 if (entity != null && term != document.getTermsHead(entity.getTerms())) {
1212 continue;
1213 }
1214
1215
1216
1217 ExternalRef ref = null;
1218 final String lemma = term.getLemma();
1219 if (pos == 'V') {
1220 final List<PropBank.Roleset> rolesets = PropBank.getRolesets(lemma);
1221 if (rolesets.size() == 1) {
1222 final String rolesetID = rolesets.get(0).getID();
1223 ref = document.newExternalRef(NAFUtils.RESOURCE_PROPBANK, rolesetID);
1224 }
1225 } else {
1226 final List<NomBank.Roleset> rolesets = NomBank.getRolesetsForLemma(lemma);
1227 if (rolesets.size() == 1) {
1228 final String rolesetID = rolesets.get(0).getId();
1229 ref = document.newExternalRef(NAFUtils.RESOURCE_NOMBANK, rolesetID);
1230 }
1231 }
1232
1233
1234 if (ref != null) {
1235 final Predicate predicate = document.newPredicate(
1236 KAFDocument.newTermSpan(Collections.singletonList(term), term));
1237 predicate.addExternalRef(ref);
1238 if (LOGGER.isDebugEnabled()) {
1239 LOGGER.debug("Added " + NAFUtils.toString(predicate) + ", sense '"
1240 + ref.getReference() + "'");
1241 }
1242 }
1243 }
1244 }
1245
1246 private void applySRLSelfArgFixing(final KAFDocument document) {
1247
1248 for (final Predicate predicate : document.getPredicates()) {
1249
1250
1251 final Term predTerm = predicate.getTerms().get(0);
1252 if (predTerm.getPos().equalsIgnoreCase("V")) {
1253 continue;
1254 }
1255
1256
1257 final String rolesetID = NAFUtils.getRoleset(predicate);
1258 final NomBank.Roleset roleset = rolesetID == null ? null
1259 : NomBank.getRoleset(rolesetID);
1260 if (roleset == null) {
1261 continue;
1262 }
1263
1264
1265 final List<Integer> mandatoryArgs = roleset.getPredMandatoryArgNums();
1266 final List<Integer> optionalArgs = roleset.getPredOptionalArgNums();
1267
1268
1269 int currentNum = -1;
1270 for (final Role role : ImmutableList.copyOf(predicate.getRoles())) {
1271 final Term headTerm = document.getTermsHead(role.getTerms());
1272 if (headTerm == predTerm && role.getSemRole() != null) {
1273 boolean valid = false;
1274 final Matcher matcher = SRL_ROLE_PATTERN.matcher(role.getSemRole());
1275 if (matcher.matches()) {
1276 currentNum = Integer.parseInt(matcher.group(1));
1277 valid = roleset.getPredMandatoryArgNums().contains(currentNum)
1278 || roleset.getPredOptionalArgNums().contains(currentNum);
1279 }
1280 if (!valid) {
1281 predicate.removeRole(role);
1282 LOGGER.debug("Removed " + NAFUtils.toString(role) + " for "
1283 + NAFUtils.toString(predicate) + " (mandatory " + mandatoryArgs
1284 + ", optional " + optionalArgs + ")");
1285 }
1286 }
1287 }
1288
1289
1290 if (!roleset.getPredMandatoryArgNums().isEmpty()) {
1291 final List<Integer> args = Lists.newArrayList();
1292 args.addAll(roleset.getPredMandatoryArgNums());
1293 args.remove((Object) currentNum);
1294 for (final Integer arg : args) {
1295 final List<Term> terms = Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(
1296 document.getTermsByDepAncestors(Collections.singleton(predTerm)));
1297 final Span<Term> span = KAFDocument.newTermSpan(terms, predTerm);
1298 final String semRole = "A" + arg;
1299 final Role role = document.newRole(predicate, semRole, span);
1300 predicate.addRole(role);
1301 if (LOGGER.isDebugEnabled()) {
1302 LOGGER.debug("Added " + NAFUtils.toString(role) + " to "
1303 + NAFUtils.toString(predicate));
1304 }
1305 }
1306 }
1307 }
1308 }
1309
1310 private void applySRLSenseMapping(final KAFDocument document) {
1311
1312 for (final Predicate predicate : document.getPredicates()) {
1313
1314
1315 mapExternalRefs(predicate, MAPPING_PREDICATES);
1316
1317
1318 NomBank.Roleset nbRoleset = null;
1319 PropBank.Roleset pbRoleset = null;
1320 if (this.srlSenseMappingPM) {
1321
1322 if (predicate.getTerms().get(0).getPos().equalsIgnoreCase("V")) {
1323 final ExternalRef ref = predicate.getExternalRef(NAFUtils.RESOURCE_PROPBANK);
1324 pbRoleset = ref == null ? null : PropBank.getRoleset(ref.getReference());
1325 } else {
1326 final ExternalRef ref = predicate.getExternalRef(NAFUtils.RESOURCE_NOMBANK);
1327 nbRoleset = ref == null ? null : NomBank.getRoleset(ref.getReference());
1328 final String pbSense = nbRoleset == null ? null : nbRoleset.getPBId();
1329 pbRoleset = pbSense == null ? null : PropBank.getRoleset(pbSense);
1330 }
1331
1332
1333 if (pbRoleset != null) {
1334
1335 if (NAFUtils.getRef(predicate, NAFUtils.RESOURCE_PROPBANK,
1336 pbRoleset.getID()) == null) {
1337 NAFUtils.addRef(predicate, document.newExternalRef(
1338 NAFUtils.RESOURCE_PROPBANK, pbRoleset.getID()));
1339 }
1340
1341
1342
1343 for (final String vnFrame : pbRoleset.getVNFrames()) {
1344 NAFUtils.setRef(predicate,
1345 document.newExternalRef(NAFUtils.RESOURCE_VERBNET, vnFrame));
1346 }
1347 for (final String fnFrame : pbRoleset.getFNFrames()) {
1348 NAFUtils.setRef(predicate,
1349 document.newExternalRef(NAFUtils.RESOURCE_FRAMENET, fnFrame));
1350 }
1351 }
1352 }
1353
1354
1355 for (final Role role : predicate.getRoles()) {
1356
1357
1358 if (role.getSemRole().startsWith("A")) {
1359 final boolean verb = NAFUtils.extractHead(document, predicate.getSpan())
1360 .getMorphofeat().startsWith("VB");
1361 final String resource = verb ? "PropBank" : "NomBank";
1362 final ExternalRef ref = NAFUtils.getRef(predicate, resource, null);
1363 if (ref != null) {
1364 final String r = role.getSemRole().startsWith("AM-")
1365 ? role.getSemRole().substring(3)
1366 : role.getSemRole().substring(1);
1367 role.addExternalRef(new ExternalRef(resource,
1368 ref.getReference() + "@" + r.toLowerCase()));
1369 }
1370 }
1371
1372
1373 mapExternalRefs(role, MAPPING_ARGUMENTS);
1374
1375
1376 if (this.srlSenseMappingPM) {
1377 final String semRole = role.getSemRole();
1378 final char numChar = semRole.charAt(semRole.length() - 1);
1379 if (semRole != null && Character.isDigit(numChar)) {
1380
1381
1382 final int num = Character.digit(numChar, 10);
1383 final int pbNum = nbRoleset == null ? num : nbRoleset.getArgPBNum(num);
1384 if (pbNum < 0) {
1385 continue;
1386 }
1387 final String pbRole = pbRoleset.getID() + '@' + pbNum;
1388
1389
1390
1391
1392 if (NAFUtils.getRef(role, NAFUtils.RESOURCE_PROPBANK, pbRole) == null) {
1393 NAFUtils.setRef(role,
1394 document.newExternalRef(NAFUtils.RESOURCE_PROPBANK, pbRole));
1395 }
1396
1397
1398 for (final String vnRole : pbRoleset.getArgVNRoles(pbNum)) {
1399 NAFUtils.setRef(role,
1400 document.newExternalRef(NAFUtils.RESOURCE_VERBNET, vnRole));
1401 }
1402 for (final String fnRole : pbRoleset.getArgFNRoles(pbNum)) {
1403 NAFUtils.setRef(role,
1404 document.newExternalRef(NAFUtils.RESOURCE_FRAMENET, fnRole));
1405 }
1406 }
1407 }
1408 }
1409 }
1410 }
1411
1412 private void applySRLFrameBaseMapping(final KAFDocument document) {
1413
1414
1415 for (final Predicate predicate : document.getPredicates()) {
1416
1417
1418 final Term head = NAFUtils.extractHead(document, predicate.getSpan());
1419 final FrameBase.POS pos = FrameBase.POS.forPennTag(head.getMorphofeat());
1420
1421
1422 final StringBuilder builder = new StringBuilder();
1423 for (final Term term : predicate.getSpan().getTargets()) {
1424 builder.append(builder.length() == 0 ? "" : "_");
1425 builder.append(term.getLemma().toLowerCase());
1426 }
1427 final String lemma = builder.toString();
1428
1429
1430 for (final ExternalRef ref : ImmutableList.copyOf(predicate.getExternalRefs())) {
1431 if (ref.getResource().equalsIgnoreCase("framenet")) {
1432 final String frame = ref.getReference();
1433 final IRI fnClass = FrameBase.classFor(frame, lemma, pos);
1434 if (fnClass != null) {
1435 NAFUtils.setRef(predicate,
1436 new ExternalRef("FrameBase", fnClass.getLocalName()));
1437 }
1438 }
1439 }
1440
1441
1442 for (final Role role : predicate.getRoles()) {
1443 for (final ExternalRef ref : ImmutableList.copyOf(role.getExternalRefs())) {
1444 if (ref.getResource().equalsIgnoreCase("framenet")) {
1445 final String s = ref.getReference();
1446 final int index = s.indexOf('@');
1447 if (index > 0) {
1448 final String frame = s.substring(0, index);
1449 final String fe = s.substring(index + 1);
1450 final IRI fnProperty = FrameBase.propertyFor(frame, fe);
1451 if (fnProperty != null) {
1452 NAFUtils.setRef(role,
1453 new ExternalRef("FrameBase", fnProperty.getLocalName()));
1454 }
1455 }
1456 }
1457 }
1458 }
1459 }
1460 }
1461
1462 private void applySRLRoleLinking(final KAFDocument document) {
1463
1464
1465 for (final Predicate predicate : Lists.newArrayList(document.getPredicates())) {
1466 for (final Role role : predicate.getRoles()) {
1467
1468
1469 final Term head = NAFUtils.extractHead(document, role.getSpan());
1470 if (head == null) {
1471 continue;
1472 }
1473
1474
1475 final Set<Term> argTerms = document
1476 .getTermsByDepAncestors(Collections.singleton(head), PARTICIPATION_REGEX);
1477
1478
1479 linkEntitiesTimexPredicates(document, role, role.getSpan(), argTerms,
1480 this.srlRoleLinkingUsingCoref);
1481 }
1482 }
1483 }
1484
1485 private void applyOpinionLinking(final KAFDocument document) {
1486
1487
1488 for (final Opinion opinion : document.getOpinions()) {
1489
1490
1491 final OpinionExpression expression = opinion.getOpinionExpression();
1492 if (expression != null) {
1493 linkEntitiesTimexPredicates(document, expression, expression.getSpan(),
1494 NAFUtils.extractHeads(document, null, expression.getTerms(),
1495 NAFUtils.matchExtendedPos(document, "NN", "VB", "JJ", "R")),
1496 this.opinionLinkingUsingCoref);
1497 }
1498
1499
1500 final OpinionHolder holder = opinion.getOpinionHolder();
1501 if (holder != null) {
1502 linkEntitiesTimexPredicates(document, holder, holder.getSpan(),
1503 NAFUtils.extractHeads(document, null, holder.getTerms(), NAFUtils
1504 .matchExtendedPos(document, "NN", "PRP", "JJP", "DTP", "WP")),
1505 this.opinionLinkingUsingCoref);
1506 }
1507
1508
1509 final OpinionTarget target = opinion.getOpinionTarget();
1510 if (target != null) {
1511 linkEntitiesTimexPredicates(
1512 document, target, target.getSpan(), NAFUtils
1513 .extractHeads(document, null, target.getTerms(),
1514 NAFUtils.matchExtendedPos(document, "NN", "PRP", "JJP",
1515 "DTP", "WP", "VB")),
1516 this.opinionLinkingUsingCoref);
1517 }
1518 }
1519 }
1520
1521 private static void linkEntitiesTimexPredicates(final KAFDocument document,
1522 final Object annotation, final Span<Term> spanToModify, final Set<Term> heads,
1523 final boolean useCoref) {
1524
1525
1526 spanToModify.getHeads().clear();
1527 if (!heads.isEmpty()) {
1528 spanToModify.getHeads().addAll(heads);
1529 }
1530
1531
1532 Set<Term> linkableTerms = heads;
1533 if (useCoref) {
1534 linkableTerms = Sets.newHashSet(heads);
1535 for (final Term argTerm : heads) {
1536 for (final Coref coref : document.getCorefsByTerm(argTerm)) {
1537 final List<Term> spanHeads = Lists.newArrayList();
1538 for (final Span<Term> span : coref.getSpans()) {
1539 final Term spanHead = NAFUtils.extractHead(document, span);
1540 if (spanHead != null) {
1541 spanHeads.add(spanHead);
1542 }
1543 }
1544 if (spanHeads.contains(argTerm)) {
1545 for (final Term spanHead : spanHeads) {
1546 linkableTerms.addAll(document.getTermsByDepAncestors(
1547 Collections.singleton(spanHead), "(COORD CONJ?)*"));
1548 }
1549 }
1550 }
1551 }
1552 }
1553
1554
1555 for (final Term term : linkableTerms) {
1556
1557
1558 final boolean isCoref = !heads.contains(term);
1559
1560
1561 for (final Entity entity : document.getEntitiesByTerm(term)) {
1562 for (final Span<Term> span : entity.getSpans()) {
1563 final Term spanHead = NAFUtils.extractHead(document, span);
1564 if (term.equals(spanHead)) {
1565 final String res = isCoref ? NAFUtils.RESOURCE_ENTITY_COREF
1566 : NAFUtils.RESOURCE_ENTITY_REF;
1567 NAFUtils.setRef(annotation, document.newExternalRef(res, entity.getId()));
1568 if (LOGGER.isDebugEnabled()) {
1569 LOGGER.debug("Linked {} to {} as {}", NAFUtils.toString(entity),
1570 NAFUtils.toString(annotation), res);
1571 }
1572 }
1573 }
1574 }
1575
1576
1577 for (final Timex3 timex : document.getTimeExsByWF(term.getWFs().get(0))) {
1578 final Term timexHead = NAFUtils.extractHead(document, KAFDocument
1579 .newTermSpan(document.getTermsByWFs(timex.getSpan().getTargets())));
1580 if (term.equals(timexHead)) {
1581 final String res = isCoref ? NAFUtils.RESOURCE_TIMEX_COREF
1582 : NAFUtils.RESOURCE_TIMEX_REF;
1583 NAFUtils.setRef(annotation, document.newExternalRef(res, timex.getId()));
1584 if (LOGGER.isDebugEnabled()) {
1585 LOGGER.debug("Linked {} to {} as {}", NAFUtils.toString(timex),
1586 NAFUtils.toString(annotation), res);
1587 }
1588 }
1589 }
1590
1591
1592 for (final Predicate pred : document.getPredicatesByTerm(term)) {
1593 if (term.equals(NAFUtils.extractHead(document, pred.getSpan()))) {
1594 final String res = isCoref ? NAFUtils.RESOURCE_PREDICATE_COREF
1595 : NAFUtils.RESOURCE_PREDICATE_REF;
1596 NAFUtils.setRef(annotation, document.newExternalRef(res, pred.getId()));
1597 if (LOGGER.isDebugEnabled()) {
1598 LOGGER.debug("Linked {} to {} as {}", NAFUtils.toString(pred),
1599 NAFUtils.toString(annotation), res);
1600 }
1601 }
1602 }
1603 }
1604 }
1605
1606 private void mapExternalRefs(final Object annotation,
1607 final Multimap<String, String> mappings) {
1608
1609
1610
1611 final Set<String> prefixes = Sets.newHashSet();
1612 final Set<String> keys = Sets.newHashSet();
1613
1614
1615 for (final ExternalRef ref : NAFUtils.getRefs(annotation, null, null)) {
1616 final String prefix = MAPPING_PREFIXES.get(ref.getResource().toLowerCase());
1617 if (prefix != null) {
1618 prefixes.add(prefix);
1619 keys.add(prefix + ":" + ref.getReference());
1620 }
1621 }
1622
1623
1624 final List<String> queue = Lists.newLinkedList(keys);
1625 while (!queue.isEmpty()) {
1626 final String key = queue.remove(0);
1627 for (final String mappedKey : mappings.get(key)) {
1628 final String mappedPrefix = mappedKey.substring(0, 2);
1629 if (!prefixes.contains(mappedPrefix) && !keys.contains(mappedKey)) {
1630 final String mappedResource = MAPPING_PREFIXES.inverse().get(mappedPrefix);
1631 final String mappedReference = mappedKey.substring(3);
1632 keys.add(mappedKey);
1633 queue.add(mappedKey);
1634 NAFUtils.addRef(annotation, new ExternalRef(mappedResource, mappedReference));
1635 if (LOGGER.isDebugEnabled()) {
1636 LOGGER.debug("Mapped {} : {} to {} for {}", mappedResource,
1637 mappedReference, mappedKey, NAFUtils.toString(annotation));
1638 }
1639 }
1640 }
1641 }
1642 }
1643
1644 private void applySRLPreMOnIRIs(final KAFDocument document) {
1645
1646
1647 final List<String> models = Arrays.asList(NAFUtils.RESOURCE_FRAMENET,
1648 NAFUtils.RESOURCE_VERBNET, NAFUtils.RESOURCE_PROPBANK, NAFUtils.RESOURCE_NOMBANK);
1649
1650 for (final Predicate predicate : document.getPredicates()) {
1651
1652 List<ExternalRef> allPredicateExtRefs = predicate.getExternalRefs();
1653 List<ExternalRef> predicateExtRefToRemove = Lists.newArrayList();
1654
1655 for (final ExternalRef predRef : ImmutableList.copyOf(allPredicateExtRefs)) {
1656 String refStr = predRef.getResource();
1657
1658 if (models.contains(refStr)) {
1659 final String pred = predRef.getReference();
1660 final String source = predRef.getSource();
1661
1662 final IRI premonIRI = NAFUtils.createPreMOnSemanticClassIRIfor(refStr, pred);
1663 if (premonIRI != null) {
1664 ExternalRef e = new ExternalRef("PreMOn+" + refStr,
1665 premonIRI.getLocalName());
1666 if (source != null)
1667 e.setSource(source);
1668 NAFUtils.setRef(predicate, e);
1669
1670 }
1671
1672 predicateExtRefToRemove.add(predRef);
1673 }
1674
1675 }
1676
1677
1678 for (ExternalRef toBeDropped : predicateExtRefToRemove) {
1679 allPredicateExtRefs.remove(toBeDropped);
1680 }
1681
1682
1683 for (final Role role : predicate.getRoles()) {
1684
1685 List<ExternalRef> allRoleExtRefs = role.getExternalRefs();
1686 List<ExternalRef> roleExtRefToRemove = Lists.newArrayList();
1687
1688 for (final ExternalRef roleRef : ImmutableList.copyOf(allRoleExtRefs)) {
1689
1690 String refStr = roleRef.getResource();
1691
1692 if (models.contains(refStr)) {
1693
1694 final String predicateAndRole = roleRef.getReference();
1695 final String source = roleRef.getSource();
1696 final int index = predicateAndRole.indexOf('@');
1697 if (index > 0) {
1698 final String pred = predicateAndRole.substring(0, index);
1699 final String rol = predicateAndRole.substring(index + 1);
1700
1701 final IRI premonIRI = NAFUtils.createPreMOnSemanticRoleIRIfor(refStr,
1702 pred, rol);
1703 if (premonIRI != null) {
1704 ExternalRef e = new ExternalRef("PreMOn+" + refStr,
1705 premonIRI.getLocalName());
1706 if (source != null)
1707 e.setSource(source);
1708 NAFUtils.setRef(role, e);
1709 }
1710 }
1711 roleExtRefToRemove.add(roleRef);
1712 }
1713 }
1714
1715 for (ExternalRef toBeRemoved : roleExtRefToRemove) {
1716 allRoleExtRefs.remove(toBeRemoved);
1717 }
1718 }
1719 }
1720 }
1721
1722
1723
1724
1725
1726
1727 public static final Builder builder() {
1728 return new Builder();
1729 }
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740 public static final Builder builder(@Nullable final Boolean enableAll) {
1741 return new Builder()
1742 .withTermSenseCompletion(enableAll)
1743 .withEntityRemoveOverlaps(enableAll)
1744 .withEntitySpanFixing(enableAll)
1745 .withEntityAddition(enableAll)
1746 .withCorefSpanFixing(enableAll)
1747 .withCorefForRoleDependencies(enableAll)
1748 .withLinkingCompletion(enableAll)
1749 .withLinkingFixing(enableAll)
1750 .withSRLRemoveWrongRefs(enableAll)
1751 .withSRLRemoveUnknownPredicates(enableAll)
1752 .withSRLPredicateAddition(enableAll)
1753 .withSRLSelfArgFixing(enableAll)
1754 .withSRLSenseMapping(enableAll)
1755 .withSRLRoleLinking(enableAll, enableAll)
1756 .withOpinionLinking(enableAll, enableAll).withSRLPreMOnIRIs(enableAll);
1757 }
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885 public static final class Builder {
1886
1887 @Nullable
1888 private Boolean termSenseFiltering;
1889
1890 @Nullable
1891 private Boolean termSenseCompletion;
1892
1893 @Nullable
1894 private Boolean entityRemoveOverlaps;
1895
1896 @Nullable
1897 private Boolean entitySpanFixing;
1898
1899 @Nullable
1900 private Boolean entityAddition;
1901
1902 @Nullable
1903 private Boolean entityValueNormalization;
1904
1905 @Nullable
1906 private Boolean linkingCompletion;
1907
1908 @Nullable
1909 private Boolean linkingFixing;
1910
1911 @Nullable
1912 private Boolean corefSpanFixing;
1913
1914 @Nullable
1915 private Boolean corefForRoleDependencies;
1916
1917 @Nullable
1918 private Boolean srlPreprocess;
1919
1920 @Nullable
1921 private Boolean srlEnableMate;
1922
1923 @Nullable
1924 private Boolean srlEnableSemafor;
1925
1926 @Nullable
1927 private Boolean srlRemoveWrongRefs;
1928
1929 @Nullable
1930 private Boolean srlRemoveUnknownPredicates;
1931
1932 @Nullable
1933 private Boolean srlPredicateAddition;
1934
1935 @Nullable
1936 private Boolean srlSelfArgFixing;
1937
1938 @Nullable
1939 private Boolean srlSenseMapping;
1940
1941 @Nullable
1942 private Boolean srlFrameBaseMapping;
1943
1944 @Nullable
1945 private Boolean srlRoleLinking;
1946
1947 @Nullable
1948 private Boolean srlRoleLinkingUsingCoref;
1949
1950 @Nullable
1951 private Boolean srlPreMOnIRIs;
1952
1953 @Nullable
1954 private Boolean opinionLinking;
1955
1956 @Nullable
1957 private Boolean opinionLinkingUsingCoref;
1958
1959 Builder() {
1960 }
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971 public Builder withProperties(final Map<?, ?> properties, @Nullable final String prefix) {
1972 final String p = prefix == null ? "" : prefix.endsWith(".") ? prefix : prefix + ".";
1973 for (final Map.Entry<?, ?> entry : properties.entrySet()) {
1974 if (entry.getKey() != null && entry.getValue() != null
1975 && entry.getKey().toString().startsWith(p)) {
1976 final String name = entry.getKey().toString().substring(p.length());
1977 final String value = Strings.emptyToNull(entry.getValue().toString());
1978 if ("termSenseFiltering".equals(name)) {
1979 withTermSenseFiltering(Boolean.valueOf(value));
1980 } else if ("termSenseCompletion".equals(name)) {
1981 withTermSenseCompletion(Boolean.valueOf(value));
1982 } else if ("entityRemoveOverlaps".equals(name)) {
1983 withEntityRemoveOverlaps(Boolean.valueOf(value));
1984 } else if ("entitySpanFixing".equals(name)) {
1985 withEntitySpanFixing(Boolean.valueOf(value));
1986 } else if ("entityAddition".equals(name)) {
1987 withEntityAddition(Boolean.valueOf(value));
1988 } else if ("entityValueNormalization".equals(name)) {
1989 withEntityValueNormalization(Boolean.valueOf(value));
1990 } else if ("linkingCompletion".equals(name)) {
1991 withLinkingCompletion(Boolean.valueOf(value));
1992 } else if ("linkingFixing".equals(name)) {
1993 withLinkingFixing(Boolean.valueOf(value));
1994 } else if ("corefForRoleDependencies".equals(name)) {
1995 withCorefForRoleDependencies(Boolean.valueOf(value));
1996 } else if ("corefSpanFixing".equals(name)) {
1997 withCorefSpanFixing(Boolean.valueOf(value));
1998 } else if ("srlPreprocess".equals(name)) {
1999 if ("none".equalsIgnoreCase(value)) {
2000 withSRLPreprocess(false, false, false);
2001 } else if ("basic".equalsIgnoreCase(value)) {
2002 withSRLPreprocess(true, false, false);
2003 } else if ("mate".equalsIgnoreCase(value)) {
2004 withSRLPreprocess(true, true, false);
2005 } else if ("semafor".equalsIgnoreCase(value)) {
2006 withSRLPreprocess(true, false, true);
2007 } else if ("mate+semafor".equalsIgnoreCase(value)) {
2008 withSRLPreprocess(true, true, true);
2009 } else {
2010 throw new IllegalArgumentException("Invalid '" + value
2011 + "' srlPreprocess property. Supported: none basic mate semafor mate+semafor");
2012 }
2013 } else if ("srlRemoveWrongRefs".equals(name)) {
2014 withSRLRemoveWrongRefs(Boolean.valueOf(value));
2015 } else if ("srlRemoveUnknownPredicates".equals(name)) {
2016 withSRLRemoveUnknownPredicates(Boolean.valueOf(value));
2017 } else if ("srlPredicateAddition".equals(name)) {
2018 withSRLPredicateAddition(Boolean.valueOf(value));
2019 } else if ("srlSelfArgFixing".equals(name)) {
2020 withSRLSelfArgFixing(Boolean.valueOf(value));
2021 } else if ("srlSenseMapping".equals(name)) {
2022 withSRLSenseMapping(Boolean.valueOf(value));
2023 } else if ("srlFrameBaseMapping".equals(name)) {
2024 withSRLFrameBaseMapping(Boolean.valueOf(value));
2025 } else if ("srlRoleLinking".equals(name)) {
2026 if ("none".equalsIgnoreCase(value)) {
2027 withSRLRoleLinking(false, false);
2028 } else if ("exact".equalsIgnoreCase(value)) {
2029 withSRLRoleLinking(true, false);
2030 } else if ("coref".equalsIgnoreCase(value)) {
2031 withSRLRoleLinking(true, true);
2032 } else {
2033 throw new IllegalArgumentException("Invalid '" + value
2034 + "' srlRoleLinking property. Supported: none exact coref ");
2035 }
2036 } else if ("srlPreMOnIRIs".equals(name)) {
2037 withSRLPreMOnIRIs(Boolean.valueOf(value));
2038 } else if ("opinionLinking".equals(name)) {
2039 if ("none".equalsIgnoreCase(value)) {
2040 withOpinionLinking(false, false);
2041 } else if ("exact".equalsIgnoreCase(value)) {
2042 withOpinionLinking(true, false);
2043 } else if ("coref".equalsIgnoreCase(value)) {
2044 withOpinionLinking(true, true);
2045 } else {
2046 throw new IllegalArgumentException("Invalid '" + value
2047 + "' opinionLinking property. Supported: none exact coref ");
2048 }
2049 }
2050 }
2051 }
2052 return this;
2053 }
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063 public Builder withTermSenseFiltering(@Nullable final Boolean termSenseFiltering) {
2064 this.termSenseFiltering = termSenseFiltering;
2065 return this;
2066 }
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076 public Builder withTermSenseCompletion(@Nullable final Boolean termSenseCompletion) {
2077 this.termSenseCompletion = termSenseCompletion;
2078 return this;
2079 }
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090 public Builder withEntityRemoveOverlaps(@Nullable final Boolean entityRemoveOverlaps) {
2091 this.entityRemoveOverlaps = entityRemoveOverlaps;
2092 return this;
2093 }
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105 public Builder withEntitySpanFixing(@Nullable final Boolean entitySpanFixing) {
2106 this.entitySpanFixing = entitySpanFixing;
2107 return this;
2108 }
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118 public Builder withEntityAddition(@Nullable final Boolean entityAddition) {
2119 this.entityAddition = entityAddition;
2120 return this;
2121 }
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131 public Builder withEntityValueNormalization(
2132 @Nullable final Boolean entityValueNormalization) {
2133 this.entityValueNormalization = entityValueNormalization;
2134 return this;
2135 }
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145 public Builder withLinkingCompletion(@Nullable final Boolean linkingCompletion) {
2146 this.linkingCompletion = linkingCompletion;
2147 return this;
2148 }
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160 public Builder withLinkingFixing(@Nullable final Boolean linkingFixing) {
2161 this.linkingFixing = linkingFixing;
2162 return this;
2163 }
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174 public Builder withCorefForRoleDependencies(
2175 @Nullable final Boolean corefForRoleDependencies) {
2176 this.corefForRoleDependencies = corefForRoleDependencies;
2177 return this;
2178 }
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199 public Builder withCorefSpanFixing(@Nullable final Boolean corefSpanFixing) {
2200 this.corefSpanFixing = corefSpanFixing;
2201 return this;
2202 }
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217 public Builder withSRLPreprocess(@Nullable final Boolean srlPreprocess,
2218 @Nullable final Boolean srlEnableMate, @Nullable final Boolean srlEnableSemafor) {
2219 this.srlPreprocess = srlPreprocess;
2220 this.srlEnableMate = srlEnableMate;
2221 this.srlEnableSemafor = srlEnableSemafor;
2222 return this;
2223 }
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236 public Builder withSRLRemoveWrongRefs(@Nullable final Boolean srlRemoveWrongRefs) {
2237 this.srlRemoveWrongRefs = srlRemoveWrongRefs;
2238 return this;
2239 }
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252 public Builder withSRLRemoveUnknownPredicates(
2253 @Nullable final Boolean srlRemoveUnknownPredicates) {
2254 this.srlRemoveUnknownPredicates = srlRemoveUnknownPredicates;
2255 return this;
2256 }
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266 public Builder withSRLPredicateAddition(@Nullable final Boolean srlPredicateAddition) {
2267 this.srlPredicateAddition = srlPredicateAddition;
2268 return this;
2269 }
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283 public Builder withSRLSelfArgFixing(@Nullable final Boolean srlSelfArgFixing) {
2284 this.srlSelfArgFixing = srlSelfArgFixing;
2285 return this;
2286 }
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298 public Builder withSRLSenseMapping(@Nullable final Boolean srlSenseMapping) {
2299 this.srlSenseMapping = srlSenseMapping;
2300 return this;
2301 }
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312 public Builder withSRLFrameBaseMapping(@Nullable final Boolean srlFrameBaseMapping) {
2313 this.srlFrameBaseMapping = srlFrameBaseMapping;
2314 return this;
2315 }
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334 public Builder withSRLRoleLinking(@Nullable final Boolean srlRoleLinking,
2335 @Nullable final Boolean useCoref) {
2336 this.srlRoleLinking = srlRoleLinking;
2337 this.srlRoleLinkingUsingCoref = useCoref;
2338 return this;
2339 }
2340
2341
2342
2343
2344
2345
2346
2347
2348 public Builder withSRLPreMOnIRIs(@Nullable final Boolean srlPreMOnIRIs) {
2349 this.srlPreMOnIRIs = srlPreMOnIRIs;
2350 return this;
2351 }
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364 public Builder withOpinionLinking(@Nullable final Boolean opinionLinking,
2365 @Nullable final Boolean opinionLinkingUsingCoref) {
2366 this.opinionLinking = opinionLinking;
2367 this.opinionLinkingUsingCoref = opinionLinkingUsingCoref;
2368 return this;
2369 }
2370
2371
2372
2373
2374
2375
2376 public NAFFilter build() {
2377 return new NAFFilter(this);
2378 }
2379
2380 }
2381
2382 }