1 package eu.fbk.dkm.pikes.rdf.naf;
2
3 import java.lang.reflect.Array;
4 import java.util.*;
5 import java.util.stream.Collectors;
6 import java.util.stream.Stream;
7
8 import javax.annotation.Nullable;
9
10 import com.google.common.base.MoreObjects;
11 import com.google.common.base.Objects;
12 import com.google.common.base.Strings;
13 import com.google.common.collect.*;
14
15 import eu.fbk.dkm.pikes.rdf.util.OWLTime;
16 import eu.fbk.dkm.pikes.rdf.vocab.*;
17 import eu.fbk.dkm.pikes.resources.YagoTaxonomy;
18 import eu.fbk.utils.svm.Util;
19 import org.eclipse.rdf4j.model.*;
20 import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
21 import org.eclipse.rdf4j.model.vocabulary.*;
22 import org.eclipse.rdf4j.rio.RDFHandlerException;
23 import org.slf4j.Logger;
24 import org.slf4j.LoggerFactory;
25
26 import ixa.kaflib.Coref;
27 import ixa.kaflib.Dep;
28 import ixa.kaflib.Entity;
29 import ixa.kaflib.ExternalRef;
30 import ixa.kaflib.KAFDocument;
31 import ixa.kaflib.KAFDocument.FileDesc;
32 import ixa.kaflib.LinguisticProcessor;
33 import ixa.kaflib.Predicate;
34 import ixa.kaflib.Predicate.Role;
35 import ixa.kaflib.Span;
36 import ixa.kaflib.Term;
37 import ixa.kaflib.Timex3;
38 import ixa.kaflib.WF;
39
40 import eu.fbk.dkm.pikes.rdf.api.Extractor;
41 import eu.fbk.dkm.pikes.rdf.util.ModelUtil;
42 import eu.fbk.dkm.pikes.resources.NAFUtils;
43 import eu.fbk.rdfpro.RDFHandlers;
44 import eu.fbk.rdfpro.util.Hash;
45 import eu.fbk.rdfpro.util.Statements;
46
47 public class NAFExtractor implements Extractor {
48
49 private static final Logger LOGGER = LoggerFactory.getLogger(NAFExtractor.class);
50
51 public void generate(final Object document, final Model model, @Nullable final Iterable<Integer> sentenceIDs) throws Exception {
52 KAFDocument doc = (KAFDocument) document;
53 IRI IRI = SimpleValueFactory.getInstance().createIRI(doc.getPublic().uri);
54
55 final boolean[] ids = new boolean[doc.getNumSentences() + 1];
56 if (sentenceIDs == null) {
57 Arrays.fill(ids, true);
58 } else {
59 for (final Integer sentenceID : sentenceIDs) {
60 ids[sentenceID] = true;
61 }
62 }
63
64 new Extraction(IRI, model,
65 doc, ids).run();
66 }
67
68 @Override
69 public void extract(final Object document, final Model model, final boolean[] sentenceIDs) throws Exception {
70 KAFDocument doc = (KAFDocument) document;
71 IRI IRI = SimpleValueFactory.getInstance().createIRI(doc.getPublic().uri);
72 new Extraction(IRI, model,
73 doc, sentenceIDs).run();
74 }
75
76
77
78 private static final String MODIFIER_REGEX = "(NMOD|AMOD|TMP|LOC|TITLE) PMOD? (COORD CONJ?)* PMOD?";
79
80
81 private static final String PARTICIPATION_REGEX = ""
82
83 + "SUB? ( (COORD CONJ?)* PMOD)? ((VC OPRD?)|(IM OPRD?))*";
84
85
86 private static final String COORDINATION_REGEX = "(COORD CONJ?)*";
87
88 private static final Multimap<String, IRI> DEFAULT_TYPE_MAP = ImmutableMultimap
89 .<String, IRI>builder()
90 .put("entity.person", NWR.PERSON)
91 .put("entity.organization", NWR.ORGANIZATION)
92 .put("entity.location", NWR.LOCATION)
93 .put("entity.misc", NWR.MISC)
94 .put("entity.money", GR.PRICE_SPECIFICATION)
95 .put("entity.date", OWLTIME.DATE_TIME_INTERVAL)
96 .put("entity.time", OWLTIME.DATE_TIME_INTERVAL)
97 .put("timex.date", OWLTIME.DATE_TIME_INTERVAL)
98 .put("timex.duration", OWLTIME.PROPER_INTERVAL)
99 .build();
100
101 private static final Map<String, String> DEFAULT_NAMESPACE_MAP = ImmutableMap
102 .<String, String>builder()
103 .put("propbank", "http://www.newsreader-project.eu/ontologies/propbank/")
104 .put("nombank", "http://www.newsreader-project.eu/ontologies/nombank/")
105 .put("framenet", "http://www.newsreader-project.eu/ontologies/framenet/")
106 .put("verbnet", "http://www.newsreader-project.eu/ontologies/verbnet/")
107 .put("premon+propbank", "http://premon.fbk.eu/resource/")
108 .put("premon+nombank", "http://premon.fbk.eu/resource/")
109 .put("premon+framenet", "http://premon.fbk.eu/resource/")
110 .put("premon+verbnet", "http://premon.fbk.eu/resource/")
111 .put("eso", "http://www.newsreader-project.eu/domain-ontology#")
112 .put("framebase", "http://framebase.org/ns/") //
113 .put("wordnet","http://sli.uvigo.gal/rdf_galnet/") //
114 .put("wn30-ukb","http://wordnet-rdf.princeton.edu/wn30/")
115 .put("wn30-sst","http://pikes.fbk.eu/wn/sst/")
116 .put("wn30","http://wordnet-rdf.princeton.edu/wn30/")
117 .put("bbn","http://pikes.fbk.eu/bbn/")
118 .put(KEM.PREFIX, KEM.NAMESPACE)
119 .put(KEMT.PREFIX, KEMT.NAMESPACE)
120 .put("attribute", "attr:")
121
122 .put("syn", "http://wordnet-rdf.princeton.edu/wn30/")
123 .put(SUMO.PREFIX, SUMO.NAMESPACE)
124 .put("yago", YagoTaxonomy.NAMESPACE).build();
125
126 private static final String DEFAULT_OWLTIME_NAMESPACE = "http://pikes.fbk.eu/time/";
127 private static final String DEFAULT_NER_NAMESPACE = "http://pikes.fbk.eu/ner/";
128 private static final String DEFAULT_WN_SST_NAMESPACE = "http://pikes.fbk.eu/wn/sst/";
129 private static final String DEFAULT_WN_SYN_NAMESPACE = "http://wordnet-rdf.princeton.edu/wn30/";
130 private static final String DEFAULT_BBN_NAMESPACE = "http://pikes.fbk.eu/bbn/";
131
132
133 private static final String DEFAULT_OLIA_UD_POS = "http://fginter.github.io/docs/u/pos/all.html#";
134 private static final String DEFAULT_OLIA_PENN_POS = "http://purl.org/olia/penn.owl#";
135
136 public static final NAFExtractor DEFAULT = NAFExtractor.builder().build();
137
138 private final Multimap<String, IRI> typeMap;
139
140 private final Map<String, String> namespaceMap;
141
142 private final String owltimeNamespace;
143
144 private final boolean merging;
145
146 private final boolean normalization;
147
148
149 public NAFExtractor(final Builder builder) {
150 this.typeMap = ImmutableMultimap.copyOf(MoreObjects.firstNonNull(builder.typeMap,
151 DEFAULT_TYPE_MAP));
152 this.namespaceMap = ImmutableMap.copyOf(MoreObjects.firstNonNull(builder.namespaceMap,
153 DEFAULT_NAMESPACE_MAP));
154 this.owltimeNamespace = MoreObjects.firstNonNull(builder.owltimeNamespace,
155 DEFAULT_OWLTIME_NAMESPACE);
156 this.merging = MoreObjects.firstNonNull(builder.merging, Boolean.FALSE);
157 this.normalization = MoreObjects.firstNonNull(builder.normalization, Boolean.FALSE);
158 }
159
160 private final class Extraction {
161
162 private final Model model;
163
164 private final KAFDocument document;
165
166 private final ValueFactory vf;
167
168 private final String documentText;
169
170 private final IRI documentIRI;
171
172 private final boolean[] sentenceIDs;
173
174 private final BiMap<String, String> mintedIRIs;
175
176 private final IRI contextIRI;
177
178
179
180 private final Map<String, Set<Mention>> mentions;
181 private final Map<Mention, Set<Annotation>> annotations;
182
183 private final Map<String, Mention> nafIdMentions;
184
185
186
187
188 private Mention getMention(final String head, List<Term> terms){
189
190 Mention mention = null;
191 if (this.mentions.containsKey(head)) {
192 Set<Mention> mentions = this.mentions.get(head);
193 for (Mention m : mentions
194 ) {
195 if (m.extent.equals(terms))
196 mention = m;
197 }
198 }
199 return mention;
200 }
201
202
203 private Mention getBestMention(final String head){
204
205 Mention BestMention = null;
206 if (this.mentions.containsKey(head)) {
207 Set<Mention> mentions = this.mentions.get(head);
208 BestMention = this.mentions.get(head).iterator().next();
209 for (Mention m : mentions
210 ) {
211 if (BestMention.extent.size()<m.extent.size())
212 BestMention = m;
213 }
214 }
215 return BestMention;
216 }
217
218 private void safeMentionPutInMap(final String ID, final Mention mention) {
219 Set<Mention> mentions;
220
221 if (this.mentions.containsKey(ID))
222 mentions = this.mentions.get(ID);
223 else
224 mentions = Sets.newHashSet();
225 mentions.add(mention);
226 this.mentions.put(ID, mentions);
227 }
228
229 private void safeAnnotationPutInMap(final Mention mention, final Annotation annotation) {
230 Set<Annotation> annotations;
231
232 if (this.annotations.containsKey(mention))
233 annotations = this.annotations.get(mention);
234 else
235 annotations = Sets.newHashSet();
236 annotations.add(annotation);
237 this.annotations.put(mention, annotations);
238 }
239
240 Extraction(final IRI IRI, final Model model, final KAFDocument document, final boolean[] sentenceIDs) {
241
242
243 final StringBuilder builder = new StringBuilder();
244 for (final WF word : document.getWFs()) {
245 final int offset = word.getOffset();
246 if (builder.length() > offset) {
247 builder.setLength(offset);
248 } else {
249 while (builder.length() < offset) {
250 builder.append(" ");
251 }
252 }
253 builder.append(word.getForm());
254 }
255
256
257 this.model = model;
258 this.document = document;
259 this.mintedIRIs = HashBiMap.create();
260 this.vf = Statements.VALUE_FACTORY;
261 this.documentText = builder.toString();
262 this.documentIRI = IRI;
263
264
265
266 this.sentenceIDs = sentenceIDs;
267
268 this.contextIRI = Statements.VALUE_FACTORY.createIRI(this.documentIRI.stringValue() + "#ctx");
269
270 this.model.add(this.contextIRI, NIF.SOURCE_URL, IRI);
271 this.model.add(this.contextIRI, RDF.TYPE, NIF.CONTEXT);
272 this.model.add(this.contextIRI, NIF.IS_STRING, Statements.VALUE_FACTORY.createLiteral(documentText));
273 this.mentions = Maps.newHashMap();
274 this.annotations = Maps.newHashMap();
275 this.nafIdMentions = Maps.newHashMap();
276
277 }
278
279 void run() {
280
281
282 processMetadata();
283 processTimexes();
284 processEntities();
285 processPredicates();
286
287
288 processCoordinations();
289
290
291 processCoreferences();
292 processRoles();
293 }
294
295
296
297
298 private void processMetadata() {
299
300
301 final IRI docIRI = this.documentIRI;
302 final IRI nafIRI = this.vf.createIRI(docIRI.stringValue() + ".naf");
303
304
305 emitTriple(docIRI, RDF.TYPE, new IRI[] { KEMT.TEXT_RESOURCE, KS.RESOURCE, KS.TEXT });
306
307
308 if (this.document.getFileDesc() != null) {
309 final FileDesc fd = this.document.getFileDesc();
310 emitTriple(docIRI, DCTERMS.TITLE, fd.title);
311 emitTriple(docIRI, DCTERMS.CREATOR, fd.author);
312 emitTriple(docIRI, DCTERMS.CREATED, fd.creationtime);
313 emitTriple(docIRI, KS.NAF_FILE_NAME, fd.filename);
314 emitTriple(docIRI, KS.NAF_FILE_TYPE, fd.filetype);
315 emitTriple(docIRI, KS.NAF_PAGES, fd.pages);
316 }
317
318
319 if (this.document.getLang() != null) {
320 emitTriple(docIRI, DCTERMS.LANGUAGE,
321 ModelUtil.languageCodeToIRI(this.document.getLang()));
322 }
323
324
325 if (this.document.getRawText() != null) {
326 final String rawText = this.document.getRawText();
327 final StringBuilder builder = new StringBuilder();
328 boolean addSpace = false;
329 for (int i = 0; i < rawText.length(); ++i) {
330 final char c = rawText.charAt(i);
331 if (Character.isWhitespace(c)) {
332 addSpace = builder.length() > 0;
333 } else {
334 if (addSpace) {
335 builder.append(' ');
336 addSpace = false;
337 }
338 builder.append(c);
339 }
340 }
341 emitTriple(docIRI, KS.TEXT_HASH, Hash.murmur3(builder.toString()).toString());
342 }
343
344
345 emitTriple(docIRI, KS.ANNOTATED_WITH, nafIRI);
346 emitTriple(nafIRI, KS.ANNOTATION_OF, docIRI);
347
348
349 emitTriple(nafIRI, RDF.TYPE, new IRI[] { KEMT.TEXT_RESOURCE, KS.RESOURCE, KS.NAF });
350 emitTriple(nafIRI, KS.VERSION, this.document.getVersion());
351 emitTriple(nafIRI, DCTERMS.IDENTIFIER, this.document.getPublic().publicId);
352
353
354 String timestamp = null;
355 for (final Map.Entry<String, List<LinguisticProcessor>> entry : this.document
356 .getLinguisticProcessors().entrySet()) {
357 emitTriple(nafIRI, KS.LAYER,
358 this.vf.createIRI(KS.NAMESPACE, "layer_" + entry.getKey()));
359 for (final LinguisticProcessor lp : entry.getValue()) {
360 if (timestamp == null) {
361 if (!Strings.isNullOrEmpty(lp.getBeginTimestamp())) {
362 timestamp = lp.getBeginTimestamp();
363 } else if (!Strings.isNullOrEmpty(lp.getEndTimestamp())) {
364 timestamp = lp.getEndTimestamp();
365 }
366 }
367 final IRI lpIRI = this.vf.createIRI(ModelUtil.cleanIRI(KS.NAMESPACE
368 + lp.getName() + '.' + lp.getVersion()));
369 emitTriple(nafIRI, DCTERMS.CREATOR, lpIRI);
370 emitTriple(lpIRI, DCTERMS.TITLE, lp.getName());
371 emitTriple(lpIRI, KS.VERSION, lp.getVersion());
372 }
373 }
374 emitTriple(nafIRI, DCTERMS.CREATED, timestamp);
375
376 }
377
378
379
380 private void processTimexes() {
381 for (final Timex3 timex : this.document.getTimeExs()) {
382
383
384 if (timex.getSpan() == null
385 || this.sentenceIDs[timex.getSpan().getFirstTarget().getSent()]) {
386 try {
387 processTimex(timex);
388 } catch (final Throwable ex) {
389 LOGGER.error("Error processing " + NAFUtils.toString(timex) + ", type "
390 + timex.getType() + ", value " + timex.getValue(), ex);
391 }
392 }
393 }
394 }
395
396 private void processTimex(final Timex3 timex){
397
398
399 if (timex.getSpan() == null) {
400 return;
401 }
402
403
404 final List<Term> terms = this.document.getTermsByWFs(timex.getSpan().getTargets());
405 final Term head = NAFUtils.extractHead(this.document, KAFDocument.newTermSpan(terms));
406 final String label = NAFUtils.getText(NAFUtils.filterTerms(terms));
407 final String type = timex.getType().trim().toLowerCase();
408
409
410
411 Mention mention = getMention(head.getId(),terms);
412 final IRI mentionIRI;
413 if (mention==null) {
414
415 mentionIRI = emitMention(terms);
416 mention = new Mention(head,terms,mentionIRI);
417 safeMentionPutInMap(head.getId(),mention);
418 } else
419
420 mentionIRI = mention.mentionIRI;
421
422 this.nafIdMentions.put(timex.getId(),mention);
423
424
425 final IRI semAnnoIRI = createSemanticAnnotationIRI(timex.getId(),mentionIRI,KEMT.TIMEX);
426 Annotation ann = new Annotation(semAnnoIRI,KEMT.TIMEX);
427 safeAnnotationPutInMap(mention,ann);
428
429 IRI timexIRI = null;
430
431 if (timex.getValue() != null) {
432 if (type.equals("date") || type.equals("time")) {
433 if (type.equals("date")) emitTriple(semAnnoIRI, KEMT.TYPE_P, KEMT.TT_DATE);
434 else emitTriple(semAnnoIRI, KEMT.TYPE_P, KEMT.TT_TIME);
435
436 final OWLTime.Interval interval = OWLTime.Interval
437 .parseTimex(timex.getValue());
438 if (interval != null) {
439 timexIRI = interval.toRDF(RDFHandlers.wrap(this.model),
440 NAFExtractor.this.owltimeNamespace, null);
441 } else {
442 LOGGER.debug("Could not represent date/time value '" + timex.getValue()
443 + "' of " + NAFUtils.toString(timex));
444 }
445
446 } else if (type.equals("duration")) {
447 emitTriple(semAnnoIRI, KEMT.TYPE_P, KEMT.TT_DURATION);
448 final OWLTime.Duration duration = OWLTime.Duration
449 .parseTimex(timex.getValue());
450 if (duration != null) {
451 timexIRI = this.vf.createIRI(NAFExtractor.this.owltimeNamespace,
452 duration.toString());
453 final IRI durationIRI = duration.toRDF(RDFHandlers.wrap(this.model),
454 NAFExtractor.this.owltimeNamespace, null);
455 emitTriple(timexIRI, OWLTIME.HAS_DURATION_DESCRIPTION, durationIRI);
456 } else {
457 LOGGER.debug("Could not represent duration value '" + timex.getValue()
458 + "' of " + NAFUtils.toString(timex));
459 }
460 } else {
461
462
463 throw new UnsupportedOperationException("Unsupported TIMEX3 type: " + type);
464 }
465 }
466
467
468 if (timexIRI == null) {
469 timexIRI = mintIRI(timex.getId(),
470 MoreObjects.firstNonNull(timex.getValue(), timex.getSpan().getStr()));
471 }
472
473
474 emitTriple(semAnnoIRI, KEMT.OBJECT_VALUE, timexIRI);
475
476
477 emitTriple(semAnnoIRI, KEMT.RAW_STRING, emitFragment(terms));
478
479 }
480
481
482
483 private void processEntities() {
484 for (final Entity entity : this.document.getEntities()) {
485 for (final Span<Term> span : entity.getSpans()) {
486
487 if (this.sentenceIDs[span.getFirstTarget().getSent()]) {
488 try {
489 processEntity(entity);
490 } catch (final Throwable ex) {
491 LOGGER.error("Error processing " + NAFUtils.toString(entity)
492 + ", type " + entity.getType(), ex);
493 }
494 break;
495 }
496 }
497 }
498 }
499
500 private void processEntity(final Entity entity) throws RDFHandlerException {
501
502
503 final List<Term> terms = entity.getSpans().get(0).getTargets();
504 final String label = NAFUtils.getText(NAFUtils.filterTerms(terms));
505 final Term head = NAFUtils.extractHead(this.document, entity.getSpans().get(0));
506 if (head == null) {
507 return;
508 }
509
510
511 String type = entity.getType();
512 type = type == null ? null : type.toLowerCase();
513
514 final boolean isProperty = "money".equals(type) || "cardinal".equals(type)
515 || "ordinal".equals(type) || "percent".equals(type) || "language".equals(type)
516 || "norp".equals(type) || "quantity".equals(type);
517
518
519 final boolean named = entity.isNamed() || "romanticism".equalsIgnoreCase(label)
520 || "operant conditioning chamber".equalsIgnoreCase(label);
521
522
523 final Dep dep = this.document.getDepToTerm(head);
524 if (isProperty && dep != null) {
525 final String depLabel = dep.getRfunc().toUpperCase();
526 if (depLabel.contains("NMOD") || depLabel.contains("AMOD")) {
527 return;
528 }
529 }
530
531
532 Mention mention = getMention(head.getId(),terms);
533 final IRI mentionIRI;
534 if (mention==null) {
535
536 mentionIRI = emitMention(terms);
537 mention = new Mention(head,terms,mentionIRI);
538 safeMentionPutInMap(head.getId(),mention);
539 } else
540
541 mentionIRI = mention.mentionIRI;
542
543 this.nafIdMentions.put(entity.getId(),mention);
544
545
546
547 boolean typeAnnotation = false;
548 boolean hasOtherNercTypes = false;
549 for (final ExternalRef ref : entity.getExternalRefs()) {
550 final String resource = ref.getResource();
551 if ((resource.equals("value-confidence"))||(resource.equals("nerc-probmodel"))) {
552 hasOtherNercTypes=true;
553
554 String reference = ref.getReference();
555
556 final IRI semAnnoIRI = createSemanticAnnotationIRI(entity.getId()+reference,mentionIRI,KEMT.ENTITY_ANNOTATION);
557 Annotation ann = new Annotation(semAnnoIRI,KEMT.ENTITY_ANNOTATION);
558 safeAnnotationPutInMap(mention,ann);
559
560 emitTriple(semAnnoIRI, ITSRDF.TA_CLASS_REF, this.vf.createIRI(DEFAULT_NER_NAMESPACE+reference));
561 typeAnnotation=true;
562
563 if (ref.hasConfidence()) emitTriple(semAnnoIRI,NIF.CONFIDENCE , ref.getConfidence());
564 if (named) {
565 emitTriple(semAnnoIRI, RDF.TYPE, KEMT.NAMED_ENTITY);
566 emitTriple(semAnnoIRI, KEMT.PROPER_NAME, label);
567 }
568
569 emitTriple(semAnnoIRI, KEMT.RAW_STRING, emitFragment(terms));
570 }
571 }
572
573 if ((!hasOtherNercTypes)&&(type!=null)) {
574
575 final IRI semAnnoIRI = createSemanticAnnotationIRI(entity.getId()+type,mentionIRI,KEMT.ENTITY_ANNOTATION);
576 Annotation ann = new Annotation(semAnnoIRI,KEMT.ENTITY_ANNOTATION);
577 safeAnnotationPutInMap(mention,ann);
578 emitTriple(semAnnoIRI, ITSRDF.TA_CLASS_REF, this.vf.createIRI(DEFAULT_NER_NAMESPACE+type));
579 typeAnnotation=true;
580 if (isProperty) {
581 emitEntityAttributes(entity, semAnnoIRI);
582 }
583 if (named) {
584 emitTriple(semAnnoIRI, RDF.TYPE, KEMT.NAMED_ENTITY);
585 emitTriple(semAnnoIRI, KEMT.PROPER_NAME, label);
586 }
587
588 emitTriple(semAnnoIRI, KEMT.RAW_STRING, emitFragment(terms));
589 }
590
591 boolean linkingAnnotation = false;
592
593 for (final ExternalRef ref : entity.getExternalRefs()) {
594 final String resource = ref.getResource();
595 if (resource.startsWith("dbpedia-")) {
596 final IRI refIRI = this.vf.createIRI(Util.cleanIRI(ref.getReference()));
597 final IRI semAnnoIRI = createSemanticAnnotationIRI(entity.getId()+"_"+refIRI.getLocalName(),mentionIRI,KEMT.ENTITY_ANNOTATION);
598 Annotation ann = new Annotation(semAnnoIRI,KEMT.ENTITY_ANNOTATION);
599 safeAnnotationPutInMap(mention,ann);
600
601 emitTriple(semAnnoIRI, ITSRDF.TA_IDENT_REF, refIRI);
602 linkingAnnotation = true;
603
604 if (ref.hasConfidence()) emitTriple(semAnnoIRI,NIF.CONFIDENCE , ref.getConfidence());
605
606 emitTriple(semAnnoIRI, KEMT.RAW_STRING, emitFragment(terms));
607 }
608 }
609
610
611
612
613 emitCommonAttributesAnnotation(entity.getId()+"_semann",mention,head,terms, (!linkingAnnotation)&&(!typeAnnotation));
614
615 }
616
617
618
619 private void processPredicates(){
620 for (final Predicate predicate : this.document.getPredicates()) {
621
622 if (this.sentenceIDs[predicate.getSpan().getFirstTarget().getSent()]) {
623 try {
624 processPredicate(predicate);
625 } catch (final Throwable ex) {
626 LOGGER.error("Error processing " + NAFUtils.toString(predicate), ex);
627 }
628 }
629 }
630 }
631
632
633
634 private void processPredicate(final Predicate predicate) throws RDFHandlerException {
635
636
637 final List<Term> terms = predicate.getSpan().getTargets();
638 final String label = NAFUtils.getText(NAFUtils.filterTerms(terms));
639 final Term head = NAFUtils.extractHead(this.document, predicate.getSpan());
640
641
642 final StringBuilder builder = new StringBuilder();
643 for (final Term term : terms) {
644 builder.append(builder.length() == 0 ? "" : "_");
645 builder.append(term.getLemma().toLowerCase());
646 }
647 final String lemma = builder.toString();
648
649 final String POS = head.getPos();
650
651
652 Mention mention = getMention(head.getId(),terms);
653 final IRI mentionIRI;
654 if (mention==null) {
655
656 mentionIRI = emitMention(terms);
657 mention = new Mention(head,terms,mentionIRI);
658 safeMentionPutInMap(head.getId(),mention);
659 } else
660
661 mentionIRI = mention.mentionIRI;
662
663 this.nafIdMentions.put(predicate.getId(),mention);
664
665
666 emitTriple(mentionIRI,NIF.LEMMA,lemma);
667
668
669 emitTriple(mentionIRI,NIF.OLIA_LINK,this.vf.createIRI(DEFAULT_OLIA_PENN_POS+POS));
670
671
672 for (final ExternalRef ref : predicate.getExternalRefs()) {
673
674 if (ref.getResource().startsWith("dbpedia")){
675 continue;
676 }
677 if ("".equals(ref.getReference())) {
678 continue;
679 }
680 final IRI typeIRI = mintRefIRI(ref.getResource(), ref.getReference());
681
682 final IRI semAnnoIRI = createSemanticAnnotationIRI(predicate.getId()+"_"+typeIRI.getLocalName(),mentionIRI,KEMT.PREDICATE_C);
683 Annotation ann = new Annotation(semAnnoIRI,KEMT.PREDICATE_C);
684 safeAnnotationPutInMap(mention,ann);
685
686 emitTriple(semAnnoIRI,ITSRDF.TA_CLASS_REF,typeIRI);
687
688
689 emitTriple(semAnnoIRI, KEMT.RAW_STRING, emitFragment(terms));
690
691 }
692
693
694 emitCommonAttributesAnnotation(predicate.getId()+"_semann",mention,head,terms,false);
695 }
696
697
698
699 private void processCoordinations (){
700
701
702 Map<Integer, Set<Mention>> sentenceMentions = Maps.newHashMap();
703
704 Map<Mention, Set<Mention>> coordinatedMentions = Maps.newHashMap();
705
706
707
708 for (String headID: this.mentions.keySet()
709 ) {
710
711 final Mention mention = getBestMention(headID);
712 final Term head = mention.head;
713 final Integer sentenceID = head.getSent();
714
715
716 Set<Mention> mentions;
717 if (sentenceMentions.containsKey(sentenceID))
718 mentions = sentenceMentions.get(sentenceID);
719 else
720 mentions = Sets.newHashSet();
721 mentions.add(mention);
722 sentenceMentions.put(mention.head.getSent(), mentions);
723
724
725 Set<Term> coordinatedTerms = this.document.getTermsByDepAncestors(
726 Collections.singleton(head), NAFExtractor.COORDINATION_REGEX);
727
728
729 if (coordinatedTerms.size()>1) {
730 for (final Term term : coordinatedTerms) {
731
732
733 final Mention depMen = getBestMention(term.getId());
734 if (depMen != null) {
735
736
737 Set<Mention> depMentions;
738 if (coordinatedMentions.containsKey(mention))
739 depMentions = coordinatedMentions.get(mention);
740 else
741 depMentions = Sets.newHashSet();
742 depMentions.add(depMen);
743 coordinatedMentions.put(mention, depMentions);
744
745 }
746 }
747
748 }
749
750 }
751
752
753 for (Integer sentenceID:sentenceMentions.keySet()
754 ) {
755
756
757 Set<Mention> sentMen = sentenceMentions.get(sentenceID);
758 Set<Mention> mentionsToKeep = Sets.newHashSet();
759
760 for (Mention A:sentMen) {
761
762 if (!coordinatedMentions.containsKey(A)) continue;
763 if (coordinatedMentions.get(A).size()==1) continue;
764 boolean keep = true;
765 for (Mention B : sentMen) {
766 if (A.equals(B)) continue;
767
768 if (!coordinatedMentions.containsKey(B)) continue;
769 if (coordinatedMentions.get(B).contains(A)) {
770
771 keep=false;
772 break;
773 }
774 }
775 if (keep) mentionsToKeep.add(A);
776 }
777
778
779 Integer counter = 0;
780 for (Mention men:mentionsToKeep
781 ) {
782
783 counter++;
784
785 List<Term> terms = Lists.newArrayList();
786 List<IRI> mentionsIRI = Lists.newArrayList();
787 List<IRI> coordinatedIRI = Lists.newArrayList();
788
789 for (Mention depMen: coordinatedMentions.get(men)
790 ) {
791 terms.addAll(depMen.extent);
792 mentionsIRI.add(depMen.mentionIRI);
793
794
795 final IRI semAnnoIRI = createSemanticAnnotationIRI("coordItem",depMen.mentionIRI,KEMT.ENTITY_ANNOTATION);
796 coordinatedIRI.add(semAnnoIRI);
797 final Annotation ann = new Annotation(semAnnoIRI,KEMT.ENTITY_ANNOTATION);
798 safeAnnotationPutInMap(depMen,ann);
799
800
801 emitTriple(semAnnoIRI, KEMT.RAW_STRING, depMen.mentionIRI);
802
803
804 }
805
806
807 final IRI groupEntityMentionIRI = emitMention(terms);
808 final Mention groupEntityMention = new Mention(men.head,terms,groupEntityMentionIRI);
809 safeMentionPutInMap(men.head.getId(),groupEntityMention);
810
811
812 final IRI groupEntityIRI = createSemanticAnnotationIRI("group",groupEntityMentionIRI,KEMT.ENTITY_ANNOTATION);
813 final Annotation groupEntityAnn = new Annotation(groupEntityIRI,KEMT.ENTITY_ANNOTATION);
814 safeAnnotationPutInMap(groupEntityMention,groupEntityAnn);
815
816
817 emitTriple(groupEntityIRI, KEMT.RAW_STRING, groupEntityMentionIRI);
818
819
820 final IRI coordinationMentionIRI = groupEntityMentionIRI;
821 final Mention coordinationMention = new Mention(men.head,terms,coordinationMentionIRI);
822 safeMentionPutInMap(men.head.getId(),coordinationMention);
823
824
825 final IRI coordinationIRI = createSemanticAnnotationIRI("coord",coordinationMentionIRI,KEMT.COORDINATION);
826 final Annotation coordinationAnn = new Annotation(groupEntityIRI,KEMT.COORDINATION);
827 safeAnnotationPutInMap(coordinationMention,coordinationAnn);
828
829
830 emitTriple(coordinationIRI, KEMT.RAW_STRING, coordinationMentionIRI);
831
832 emitTriple(coordinationIRI,KEMT.GROUP,groupEntityIRI);
833
834 for (IRI conjunctIRI:coordinatedIRI
835 )
836 emitTriple(coordinationIRI,KEMT.CONJUNCT,conjunctIRI);
837
838 for (IRI conjunctMentionIRI:mentionsIRI)
839 emitTriple(coordinationIRI,KEMT.CONJUNCT_STRING,conjunctMentionIRI);
840 }
841 }
842 }
843
844
845
846 private void processCoreferences() {
847 for (final Coref coref : this.document.getCorefs()) {
848 if ("event".equalsIgnoreCase(coref.getType())) {
849 continue;
850 }
851 final List<Span<Term>> spans = Lists.newArrayList();
852 for (final Span<Term> span : coref.getSpans()) {
853 if (this.sentenceIDs[span.getFirstTarget().getSent()]) {
854 spans.add(span);
855 }
856 }
857 if (!spans.isEmpty()) {
858 try {
859 processCoref(spans,coref.getId());
860 } catch (final Throwable ex) {
861 LOGGER.error("Error processing " + NAFUtils.toString(coref), ex);
862 }
863 }
864 }
865 }
866
867 @SuppressWarnings("Duplicates")
868 private void processCoref(final List<Span<Term>> spans, String corefID) {
869
870
871
872 final List<Span<Term>> corefSpans = Lists.newArrayList();
873 final List<Term> corefRawTerms = Lists.newArrayList();
874 final List<Mention> corefMentions = Lists.newArrayList();
875 final List<Term> corefMentionTerms = Lists.newArrayList();
876
877
878 for (final Span<Term> span : spans) {
879 final Term head = NAFUtils.extractHead(this.document, span);
880 if (head != null) {
881 Mention correspondingMention = getBestMention(head.getId());
882 if (correspondingMention!=null) {
883 corefMentions.add(correspondingMention);
884 corefSpans.add(span);
885 corefMentionTerms.addAll(correspondingMention.extent);
886 corefRawTerms.addAll(span.getTargets());
887 }
888 }
889 }
890
891
892 if (corefSpans.size() <= 1) {
893 return;
894 }
895
896
897
898 final IRI coreferenceMentionIRI = emitMention(corefMentionTerms);
899
900
901 final IRI coreferenceIRI = createSemanticAnnotationIRI(corefID,coreferenceMentionIRI,KEMT.COREFERENCE);
902
903 for (int i = 0; i < corefMentions.size(); i++) {
904
905 final IRI coreferentIRI = createSemanticAnnotationIRI(corefID,corefMentions.get(i).mentionIRI,KEMT.ENTITY_ANNOTATION);
906 emitTriple(coreferenceIRI,KEMT.COREFERRING,coreferentIRI);
907
908 emitTriple(coreferentIRI, KEMT.RAW_STRING, emitFragment(corefSpans.get(i).getTargets()));
909 }
910
911
912 emitTriple(coreferenceIRI, KEMT.RAW_STRING, emitFragment(corefRawTerms));
913 }
914
915 private void processRoles() {
916 for (final Predicate predicate : this.document.getPredicates()) {
917 for (final Role role : predicate.getRoles()) {
918 final Term roleHead = NAFUtils.extractHead(this.document, role.getSpan());
919 if (roleHead != null) {
920
921
922 final Set<Term> argHeads = this.document.getTermsByDepAncestors(
923 Collections.singleton(roleHead), PARTICIPATION_REGEX);
924
925 for (final Term argHead : argHeads) {
926 try {
927 processRole(predicate, role, argHead);
928 } catch (final Throwable ex) {
929 LOGGER.error("Error processing " + NAFUtils.toString(role)
930 + " of " + NAFUtils.toString(predicate)
931 + ", argument " + NAFUtils.toString(argHead), ex);
932 }
933 }
934 }
935 }
936 }
937 }
938
939
940 private void processRole(final Predicate predicate, final Role role, final Term argHead) {
941
942
943
944 final Mention predMention = this.nafIdMentions.get(predicate.getId());
945
946
947 Mention correspondingMention = getBestMention(argHead.getId());
948 if (correspondingMention==null) return;
949
950
951 final IRI fakePredIRI = createSemanticAnnotationIRI(predicate.getId(),predMention.mentionIRI,KEMT.PREDICATE_C);
952 final IRI fakeRoleIRI = createSemanticAnnotationIRI(role.getId()+"_"+argHead.getId(),correspondingMention.mentionIRI,KEMT.ARGUMENT_C);
953
954
955 final IRI fakePredRawString = emitFragment(predicate.getSpan().getTargets());
956 emitTriple(fakePredIRI,KEMT.RAW_STRING,fakePredRawString);
957 final IRI fakeRoleRawString = emitFragment(role.getSpan().getTargets());
958 emitTriple(fakeRoleIRI,KEMT.RAW_STRING,fakeRoleRawString);
959
960
961 final IRI partMentionIRI = emitMention(Stream.concat(predMention.extent.stream(), correspondingMention.extent.stream())
962 .collect(Collectors.toList()));
963
964 final IRI partRawIRI = emitMention(Stream.concat(predicate.getSpan().getTargets().stream(), role.getSpan().getTargets().stream())
965 .collect(Collectors.toList()));
966
967 final IRI participationIRI = createSemanticAnnotationIRI(predicate.getId()+"_"+role.getId()+"_"+argHead.getId(),partMentionIRI,KEMT.PARTICIPATION);
968
969 emitTriple(participationIRI,KEMT.PREDICATE_P,fakePredIRI);
970 emitTriple(participationIRI,KEMT.ARGUMENT_P,fakeRoleIRI);
971 emitTriple(participationIRI,KEMT.RAW_STRING,partRawIRI);
972
973 for (final ExternalRef ref : role.getExternalRefs()) {
974 if ("".equals(ref.getReference())) {
975 continue;
976 }
977
978 final IRI typeIRI = mintRefIRI(ref.getResource(), ref.getReference());
979 final IRI roleIRI = createSemanticAnnotationIRI(role.getId()+"_"+argHead.getId()+"_"+typeIRI.getLocalName(),correspondingMention.mentionIRI,KEMT.ARGUMENT_C);
980 Annotation ann = new Annotation(roleIRI,KEMT.ARGUMENT_C);
981 safeAnnotationPutInMap(correspondingMention,ann);
982 emitTriple(roleIRI,ITSRDF.TA_PROP_REF,typeIRI);
983 emitTriple(roleIRI,KEMT.RAW_STRING,fakeRoleRawString);
984 }
985 }
986
987
988 @Nullable
989 private IRI emitMention(final Iterable<Term> terms) {
990
991 final List<Term> sortedTerms = Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(terms);
992 final int numTerms = sortedTerms.size();
993 if (numTerms == 0) {
994 return null;
995 }
996
997 final IRI mentionID = emitFragment(sortedTerms);
998 emitTriple(mentionID, RDF.TYPE, KEM.MENTION);
999 return mentionID;
1000 }
1001
1002
1003 private IRI emitFragment(final Iterable<Term> terms) {
1004
1005 final List<Term> sortedTerms = Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(terms);
1006 final int numTerms = sortedTerms.size();
1007 if (numTerms == 0) {
1008 return null;
1009 }
1010
1011 final String text = this.documentText;
1012 final List<IRI> componentIRIs = Lists.newArrayList();
1013 final int begin = NAFUtils.getBegin(sortedTerms.get(0));
1014 int offset = begin;
1015 int startTermIdx = 0;
1016
1017 final StringBuilder anchorBuilder = new StringBuilder();
1018 final StringBuilder uriBuilder = new StringBuilder(this.documentIRI.stringValue())
1019 .append("#char=").append(begin).append(",");
1020
1021 for (int i = 0; i < numTerms; ++i) {
1022 final Term term = sortedTerms.get(i);
1023 final int termOffset = NAFUtils.getBegin(term);
1024 if (termOffset > offset && !text.substring(offset, termOffset).trim().isEmpty()) {
1025 final int start = NAFUtils.getBegin(sortedTerms.get(startTermIdx));
1026 anchorBuilder.append(text.substring(start, offset)).append(" [...] ");
1027 uriBuilder.append(offset).append(";").append(termOffset).append(',');
1028 componentIRIs.add(emitFragment(sortedTerms.subList(startTermIdx, i)));
1029 startTermIdx = i;
1030 }
1031 offset = NAFUtils.getEnd(term);
1032 }
1033 if (startTermIdx > 0) {
1034 componentIRIs.add(emitFragment(sortedTerms.subList(startTermIdx, numTerms)));
1035 }
1036
1037
1038 anchorBuilder.append(text.substring(NAFUtils.getBegin(sortedTerms.get(startTermIdx)),
1039 offset));
1040 uriBuilder.append(offset);
1041
1042 final String anchor = anchorBuilder.toString();
1043 final IRI fragmentID = this.vf.createIRI(uriBuilder.toString());
1044 emitTriple(fragmentID, KEM.FRAGMENT_OF, this.documentIRI);
1045
1046
1047
1048 if (!componentIRIs.isEmpty()) {
1049 emitTriple(fragmentID, RDF.TYPE, KEM.COMPOSITE_FRAGMENT);
1050 for (final IRI componentIRI : componentIRIs) {
1051 emitTriple(fragmentID, KEM.HAS_COMPONENT, componentIRI);
1052 }
1053 } else emitTriple(fragmentID, RDF.TYPE, NIF.RFC5147_STRING);
1054
1055 emitTriple(fragmentID, NIF.BEGIN_INDEX, this.vf.createLiteral(begin));
1056 emitTriple(fragmentID, NIF.END_INDEX, this.vf.createLiteral(offset));
1057 emitTriple(fragmentID, NIF.ANCHOR_OF, this.vf.createLiteral(anchor));
1058
1059 return fragmentID;
1060 }
1061
1062
1063 private IRI createSemanticAnnotationIRI(final String id, final IRI mentionIRI, final IRI type){
1064
1065 final IRI semanticAnnotationIRI = this.vf.createIRI(mentionIRI.toString()+id);
1066 this.model.add(semanticAnnotationIRI,RDF.TYPE,type);
1067 this.model.add(mentionIRI,KEM.HAS_ANNOTATION,semanticAnnotationIRI);
1068
1069 return semanticAnnotationIRI;
1070
1071 }
1072
1073 private void emitTriple(@Nullable final IRI subject, @Nullable final IRI property,
1074 @Nullable final Object objects) {
1075 if (subject != null && property != null) {
1076 for (final Value object : extract(Value.class, objects,
1077 RDF.TYPE.equals(property) ? NAFExtractor.this.typeMap : null)) {
1078 this.model.add(this.vf.createStatement(subject, property, object));
1079 }
1080 }
1081 }
1082
1083 private IRI mintIRI(final String id, @Nullable final String suggestedLocalName) {
1084 String localName = this.mintedIRIs.get(id);
1085 if (localName == null) {
1086 final String name = MoreObjects.firstNonNull(suggestedLocalName, id);
1087 final StringBuilder builder = new StringBuilder();
1088 for (int i = 0; i < name.length(); ++i) {
1089 final char c = name.charAt(i);
1090 builder.append(Character.isWhitespace(c) ? '_' : c);
1091 }
1092 final String base = builder.toString();
1093 int counter = 1;
1094 while (true) {
1095 localName = base + (counter == 1 ? "" : "_" + counter);
1096 if (!this.mintedIRIs.inverse().containsKey(localName)) {
1097 this.mintedIRIs.put(id, localName);
1098 break;
1099 }
1100 ++counter;
1101 }
1102 }
1103 return this.vf.createIRI(Util.cleanIRI(this.documentIRI + "#" + localName));
1104 }
1105
1106
1107 @Nullable
1108 private IRI mintRefIRI(@Nullable final String resource, @Nullable final String reference) {
1109 if (!Strings.isNullOrEmpty(resource) && !Strings.isNullOrEmpty(reference)) {
1110 final String normResource = resource.toLowerCase();
1111 final String namespace = NAFExtractor.this.namespaceMap.get(normResource);
1112 if (namespace != null) {
1113 return this.vf
1114 .createIRI(Util.cleanIRI(namespace + reference.replace('#', '.')));
1115 } else System.out.println(normResource);
1116 }
1117 return null;
1118 }
1119
1120
1121 private void emitEntityAttributes(final Entity entity, final IRI subject)
1122 throws RDFHandlerException {
1123
1124
1125 final ExternalRef valueRef = NAFUtils.getRef(entity, "value", null);
1126 String nerTag = entity.getType();
1127 nerTag = nerTag == null ? null : nerTag.toLowerCase();
1128
1129 if (valueRef != null) {
1130
1131 try {
1132 final String s = valueRef.getReference().trim();
1133 if (s.isEmpty()) {
1134 return;
1135 }
1136 if (Objects.equal(nerTag, "cardinal") || Objects.equal(nerTag, "quantity")) {
1137 emitTriple(subject, KEMT.OBJECT_VALUE, Double.parseDouble(s));
1138
1139 } else if (Objects.equal(nerTag, "ordinal")) {
1140 emitTriple(subject, KEMT.OBJECT_VALUE, Double.parseDouble(s));
1141
1142 } else if (Objects.equal(nerTag, "percent")) {
1143 final int index = s.indexOf('%');
1144 emitTriple(subject, KEMT.OBJECT_VALUE, Double.parseDouble(s.substring(index + 1)));
1145 } else if (Objects.equal(nerTag, "money")) {
1146 int index = 0;
1147 while (index < s.length()) {
1148 final char c = s.charAt(index);
1149 if (c == '€') {
1150 emitTriple(subject, KEMT.UNIT, "EUR");
1151 } else if (c == '$') {
1152 emitTriple(subject, KEMT.UNIT, "USD");
1153 } else if (c == '¥') {
1154 emitTriple(subject, KEMT.UNIT, "YEN");
1155 } else if (Character.isDigit(c)) {
1156 break;
1157 }
1158 ++index;
1159 }
1160 emitTriple(subject, KEMT.OBJECT_VALUE, Double.parseDouble(s.substring(index)));
1161 }
1162 } catch (final NumberFormatException ex) {
1163 LOGGER.debug("Could not process normalized value: " + valueRef.getReference());
1164 }
1165 }
1166 }
1167
1168
1169
1170 private void emitCommonAttributesAnnotation(final String id, final Mention mention, final Term head, final List<Term> terms, final boolean forceSemanticAnnotationCreation)
1171 throws RDFHandlerException {
1172
1173
1174
1175 final ExternalRef sstRef = NAFUtils.getRef(head, NAFUtils.RESOURCE_WN_SST, null);
1176 final ExternalRef synsetRef = NAFUtils.getRef(head, NAFUtils.RESOURCE_WN_SYNSET, null);
1177 final ExternalRef bbnRef = NAFUtils.getRef(head, NAFUtils.RESOURCE_BBN, null);
1178
1179 if ((forceSemanticAnnotationCreation)||(sstRef != null)||(synsetRef != null)||(bbnRef != null)) {
1180
1181 final IRI semanticAnnotationIRI = createSemanticAnnotationIRI(id, mention.mentionIRI, KEMT.ENTITY_ANNOTATION);
1182 Annotation ann = new Annotation(semanticAnnotationIRI, KEM.SEMANTIC_ANNOTATION);
1183 safeAnnotationPutInMap(mention, ann);
1184
1185
1186 if (sstRef != null) {
1187 final String sst = sstRef.getReference();
1188 final IRI uri = this.vf.createIRI(DEFAULT_WN_SST_NAMESPACE,
1189 sst.substring(sst.lastIndexOf('-') + 1));
1190 emitTriple(semanticAnnotationIRI, ITSRDF.TERM_INFO_REF, uri);
1191 }
1192
1193
1194
1195 if (synsetRef != null) {
1196 final IRI uri = this.vf.createIRI(DEFAULT_WN_SYN_NAMESPACE,
1197 synsetRef.getReference());
1198 emitTriple(semanticAnnotationIRI, ITSRDF.TERM_INFO_REF, uri);
1199 }
1200
1201
1202
1203 if (bbnRef != null) {
1204 final IRI uri = this.vf.createIRI(DEFAULT_BBN_NAMESPACE,
1205 bbnRef.getReference());
1206 emitTriple(semanticAnnotationIRI, ITSRDF.TERM_INFO_REF, uri);
1207 }
1208
1209
1210 emitTriple(semanticAnnotationIRI, KEMT.RAW_STRING, emitFragment(terms));
1211 }
1212 }
1213
1214
1215
1216
1217
1218 private <T extends Value> Collection<T> extract(final Class<T> clazz,
1219 @Nullable final Object object, @Nullable final Multimap<String, ? extends T> map) {
1220 if (object == null) {
1221 return ImmutableList.of();
1222 } else if (clazz.isInstance(object)) {
1223 return ImmutableList.of((T) object);
1224 } else if (object instanceof Iterable<?>) {
1225 final List<T> list = Lists.newArrayList();
1226 for (final Object element : (Iterable<?>) object) {
1227 list.addAll(extract(clazz, element, map));
1228 }
1229 return list;
1230 } else if (object.getClass().isArray()) {
1231 final List<T> list = Lists.newArrayList();
1232 final int length = Array.getLength(object);
1233 for (int i = 0; i < length; ++i) {
1234 list.addAll(extract(clazz, Array.get(object, i), map));
1235 }
1236 return list;
1237 } else if (map != null) {
1238 return (Collection<T>) map.get(object.toString());
1239 } else {
1240 return ImmutableList.of(Statements.convert(object, clazz));
1241 }
1242 }
1243
1244 }
1245
1246 public static Builder builder() {
1247 return new Builder();
1248 }
1249
1250 public static final class Builder {
1251
1252 @Nullable
1253 private Multimap<String, IRI> typeMap;
1254
1255 @Nullable
1256 private Multimap<String, IRI> propertyMap;
1257
1258 @Nullable
1259 private Map<String, String> namespaceMap;
1260
1261 @Nullable
1262 private String owltimeNamespace;
1263
1264 @Nullable
1265 private Boolean merging;
1266
1267 @Nullable
1268 private Boolean normalization;
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279 public Builder withProperties(final Map<?, ?> properties, @Nullable final String prefix) {
1280 final String p = prefix == null ? "" : prefix.endsWith(".") ? prefix : prefix + ".";
1281 for (final Map.Entry<?, ?> entry : properties.entrySet()) {
1282 if (entry.getKey() != null && entry.getValue() != null
1283 && entry.getKey().toString().startsWith(p)) {
1284 final String name = entry.getKey().toString().substring(p.length());
1285 final String value = Strings.emptyToNull(entry.getValue().toString());
1286 if ("fusion".equals(name)) {
1287 withMerging(Boolean.valueOf(value));
1288 } else if ("normalization".equals(name)) {
1289 withNormalization(Boolean.valueOf(value));
1290 }
1291 }
1292 }
1293 return this;
1294 }
1295
1296 public Builder withTypeMap(@Nullable final Multimap<String, IRI> typeMap) {
1297 this.typeMap = typeMap;
1298 return this;
1299 }
1300
1301 public Builder withPropertyMap(@Nullable final Multimap<String, IRI> propertyMap) {
1302 this.propertyMap = propertyMap;
1303 return this;
1304 }
1305
1306 public Builder withNamespaceMap(@Nullable final Map<String, String> namespaceMap) {
1307 this.namespaceMap = namespaceMap;
1308 return this;
1309 }
1310
1311 public Builder withOWLTimeNamespace(@Nullable final String owltimeNamespace) {
1312 this.owltimeNamespace = owltimeNamespace;
1313 return this;
1314 }
1315
1316 public Builder withMerging(@Nullable final Boolean merging) {
1317 this.merging = merging;
1318 return this;
1319 }
1320
1321 public Builder withNormalization(@Nullable final Boolean normalization) {
1322 this.normalization = normalization;
1323 return this;
1324 }
1325
1326 public NAFExtractor build() {
1327 return new NAFExtractor(this);
1328 }
1329
1330 }
1331
1332
1333 private static final class Mention {
1334
1335 IRI mentionIRI;
1336 final Term head;
1337 final List<Term> extent;
1338
1339 Mention(final Term head, final Iterable<Term> extent, final IRI mentionIRI) {
1340 this.head = head;
1341 this.extent = ImmutableList.copyOf(extent);
1342 this.mentionIRI = mentionIRI;
1343 }
1344 }
1345
1346
1347 private static final class Annotation {
1348
1349 IRI annotationIRI;
1350 IRI type;
1351
1352 Annotation(final IRI annotationIRI, final IRI type) {
1353 this.annotationIRI = annotationIRI;
1354 this.type = type;
1355 }
1356 }
1357 }