1   package eu.fbk.dkm.pikes.rdf.naf;
2   
3   import java.lang.reflect.Array;
4   import java.util.*;
5   import java.util.stream.Collectors;
6   import java.util.stream.Stream;
7   
8   import javax.annotation.Nullable;
9   
10  import com.google.common.base.MoreObjects;
11  import com.google.common.base.Objects;
12  import com.google.common.base.Strings;
13  import com.google.common.collect.*;
14  
15  import eu.fbk.dkm.pikes.rdf.util.OWLTime;
16  import eu.fbk.dkm.pikes.rdf.vocab.*;
17  import eu.fbk.dkm.pikes.resources.YagoTaxonomy;
18  import eu.fbk.utils.svm.Util;
19  import org.eclipse.rdf4j.model.*;
20  import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
21  import org.eclipse.rdf4j.model.vocabulary.*;
22  import org.eclipse.rdf4j.rio.RDFHandlerException;
23  import org.slf4j.Logger;
24  import org.slf4j.LoggerFactory;
25  
26  import ixa.kaflib.Coref;
27  import ixa.kaflib.Dep;
28  import ixa.kaflib.Entity;
29  import ixa.kaflib.ExternalRef;
30  import ixa.kaflib.KAFDocument;
31  import ixa.kaflib.KAFDocument.FileDesc;
32  import ixa.kaflib.LinguisticProcessor;
33  import ixa.kaflib.Predicate;
34  import ixa.kaflib.Predicate.Role;
35  import ixa.kaflib.Span;
36  import ixa.kaflib.Term;
37  import ixa.kaflib.Timex3;
38  import ixa.kaflib.WF;
39  
40  import eu.fbk.dkm.pikes.rdf.api.Extractor;
41  import eu.fbk.dkm.pikes.rdf.util.ModelUtil;
42  import eu.fbk.dkm.pikes.resources.NAFUtils;
43  import eu.fbk.rdfpro.RDFHandlers;
44  import eu.fbk.rdfpro.util.Hash;
45  import eu.fbk.rdfpro.util.Statements;
46  
47  public class NAFExtractor implements Extractor {
48  
49      private static final Logger LOGGER = LoggerFactory.getLogger(NAFExtractor.class);
50  
51      public void generate(final Object document, final Model model, @Nullable final Iterable<Integer> sentenceIDs) throws Exception {
52          KAFDocument doc = (KAFDocument) document;
53          IRI IRI = SimpleValueFactory.getInstance().createIRI(doc.getPublic().uri);
54  
55          final boolean[] ids = new boolean[doc.getNumSentences() + 1];
56          if (sentenceIDs == null) {
57              Arrays.fill(ids, true);
58          } else {
59              for (final Integer sentenceID : sentenceIDs) {
60                  ids[sentenceID] = true;
61              }
62          }
63  
64          new Extraction(IRI, model,
65                  doc, ids).run();
66      }
67  
68      @Override
69      public void extract(final Object document, final Model model, final boolean[] sentenceIDs) throws Exception {
70          KAFDocument doc = (KAFDocument) document;
71          IRI IRI = SimpleValueFactory.getInstance().createIRI(doc.getPublic().uri);
72          new Extraction(IRI, model,
73                  doc, sentenceIDs).run();
74      }
75  
76  
77      //todo adapt for UD (not needed)
78      private static final String MODIFIER_REGEX = "(NMOD|AMOD|TMP|LOC|TITLE) PMOD? (COORD CONJ?)* PMOD?";
79  
80      //todo adapt for UD
81      private static final String PARTICIPATION_REGEX = ""
82  //            + "SUB? (COORD CONJ?)* (PMOD (COORD CONJ?)*)? ((VC OPRD?)|(IM OPRD?))*";
83              + "SUB? ( (COORD CONJ?)* PMOD)? ((VC OPRD?)|(IM OPRD?))*";
84  
85      //todo adapt for UD
86      private static final String COORDINATION_REGEX = "(COORD CONJ?)*";
87  
88      private static final Multimap<String, IRI> DEFAULT_TYPE_MAP = ImmutableMultimap
89              .<String, IRI>builder() //
90              .put("entity.person", NWR.PERSON) //
91              .put("entity.organization", NWR.ORGANIZATION) //
92              .put("entity.location", NWR.LOCATION) //
93              .put("entity.misc", NWR.MISC) //
94              .put("entity.money", GR.PRICE_SPECIFICATION) //
95              .put("entity.date", OWLTIME.DATE_TIME_INTERVAL) //
96              .put("entity.time", OWLTIME.DATE_TIME_INTERVAL) //
97              .put("timex.date", OWLTIME.DATE_TIME_INTERVAL) //
98              .put("timex.duration", OWLTIME.PROPER_INTERVAL) //
99              .build();
100 
101     private static final Map<String, String> DEFAULT_NAMESPACE_MAP = ImmutableMap
102             .<String, String>builder()
103             .put("propbank", "http://www.newsreader-project.eu/ontologies/propbank/")
104             .put("nombank", "http://www.newsreader-project.eu/ontologies/nombank/")
105             .put("framenet", "http://www.newsreader-project.eu/ontologies/framenet/")
106             .put("verbnet", "http://www.newsreader-project.eu/ontologies/verbnet/")
107             .put("premon+propbank", "http://premon.fbk.eu/resource/")
108             .put("premon+nombank", "http://premon.fbk.eu/resource/")
109             .put("premon+framenet", "http://premon.fbk.eu/resource/")
110             .put("premon+verbnet", "http://premon.fbk.eu/resource/")
111             .put("eso", "http://www.newsreader-project.eu/domain-ontology#")
112             .put("framebase", "http://framebase.org/ns/") //
113             .put("wordnet","http://sli.uvigo.gal/rdf_galnet/") //
114             .put("wn30-ukb","http://wordnet-rdf.princeton.edu/wn30/")
115             .put("wn30-sst","http://pikes.fbk.eu/wn/sst/")
116             .put("wn30","http://wordnet-rdf.princeton.edu/wn30/")
117             .put("bbn","http://pikes.fbk.eu/bbn/")
118             .put(KEM.PREFIX, KEM.NAMESPACE) //
119             .put(KEMT.PREFIX, KEMT.NAMESPACE) //
120             .put("attribute", "attr:")
121             // TODO: change this namespace
122             .put("syn", "http://wordnet-rdf.princeton.edu/wn30/")
123             .put(SUMO.PREFIX, SUMO.NAMESPACE)//
124             .put("yago", YagoTaxonomy.NAMESPACE).build();
125 
126     private static final String DEFAULT_OWLTIME_NAMESPACE = "http://pikes.fbk.eu/time/";
127     private static final String DEFAULT_NER_NAMESPACE = "http://pikes.fbk.eu/ner/";
128     private static final String DEFAULT_WN_SST_NAMESPACE = "http://pikes.fbk.eu/wn/sst/";
129     private static final String DEFAULT_WN_SYN_NAMESPACE = "http://wordnet-rdf.princeton.edu/wn30/";
130     private static final String DEFAULT_BBN_NAMESPACE = "http://pikes.fbk.eu/bbn/";
131 
132 
133     private static final String DEFAULT_OLIA_UD_POS = "http://fginter.github.io/docs/u/pos/all.html#";
134     private static final String DEFAULT_OLIA_PENN_POS = "http://purl.org/olia/penn.owl#";
135 
136     public static final NAFExtractor DEFAULT = NAFExtractor.builder().build();
137 
138     private final Multimap<String, IRI> typeMap;
139 
140     private final Map<String, String> namespaceMap;
141 
142     private final String owltimeNamespace;
143 
144     private final boolean merging;
145 
146     private final boolean normalization;
147 
148 
149     public NAFExtractor(final Builder builder) {
150         this.typeMap = ImmutableMultimap.copyOf(MoreObjects.firstNonNull(builder.typeMap,
151                 DEFAULT_TYPE_MAP));
152         this.namespaceMap = ImmutableMap.copyOf(MoreObjects.firstNonNull(builder.namespaceMap,
153                 DEFAULT_NAMESPACE_MAP));
154         this.owltimeNamespace = MoreObjects.firstNonNull(builder.owltimeNamespace,
155                 DEFAULT_OWLTIME_NAMESPACE);
156         this.merging = MoreObjects.firstNonNull(builder.merging, Boolean.FALSE);
157         this.normalization = MoreObjects.firstNonNull(builder.normalization, Boolean.FALSE);
158     }
159 
160     private final class Extraction {
161 
162         private final Model model;
163 
164         private final KAFDocument document;
165 
166         private final ValueFactory vf;
167 
168         private final String documentText;
169 
170         private final IRI documentIRI;
171 
172         private final boolean[] sentenceIDs;
173 
174         private final BiMap<String, String> mintedIRIs;
175 
176         private final IRI contextIRI;
177 
178 //        private final Map<Term, InstanceMention> mentions;
179 
180         private final Map<String, Set<Mention>> mentions;
181         private final Map<Mention, Set<Annotation>> annotations;
182 
183         private final Map<String, Mention> nafIdMentions;
184 //        private final Map<String, Set<Annotation>> nafIdAnnotations;
185 
186 
187         //check if there is already a mention with that head and span
188         private Mention getMention(final String head, List<Term> terms){
189 
190             Mention mention = null;
191             if (this.mentions.containsKey(head)) {
192                 Set<Mention> mentions = this.mentions.get(head);
193                 for (Mention m : mentions
194                      ) {
195                     if (m.extent.equals(terms))
196                         mention = m;
197                 }
198             }
199             return mention;
200         }
201 
202         //get the BEST mention for a given head (used in coordination, coreference, roles)
203         private Mention getBestMention(final String head){
204 
205             Mention BestMention = null;
206             if (this.mentions.containsKey(head)) {
207                 Set<Mention> mentions = this.mentions.get(head);
208                 BestMention =  this.mentions.get(head).iterator().next();
209                 for (Mention m : mentions
210                         ) {
211                     if (BestMention.extent.size()<m.extent.size())
212                         BestMention = m;
213                 }
214             }
215             return BestMention;
216         }
217 
218         private void safeMentionPutInMap(final String ID, final Mention mention) {
219             Set<Mention> mentions;
220 
221             if (this.mentions.containsKey(ID))
222                 mentions = this.mentions.get(ID);
223             else
224                 mentions = Sets.newHashSet();
225             mentions.add(mention);
226             this.mentions.put(ID, mentions);
227         }
228 
229         private void safeAnnotationPutInMap(final Mention mention, final Annotation annotation) {
230             Set<Annotation> annotations;
231 
232             if (this.annotations.containsKey(mention))
233                 annotations = this.annotations.get(mention);
234             else
235                 annotations = Sets.newHashSet();
236             annotations.add(annotation);
237             this.annotations.put(mention, annotations);
238         }
239 
240         Extraction(final IRI IRI, final Model model, final KAFDocument document, final boolean[] sentenceIDs) {
241 
242             // Reconstruct the document text using term offsets to avoid alignment issues
243             final StringBuilder builder = new StringBuilder();
244             for (final WF word : document.getWFs()) {
245                 final int offset = word.getOffset();
246                 if (builder.length() > offset) {
247                     builder.setLength(offset);
248                 } else {
249                     while (builder.length() < offset) {
250                         builder.append(" ");
251                     }
252                 }
253                 builder.append(word.getForm());
254             }
255 
256             // Initialize the object
257             this.model = model;
258             this.document = document;
259             this.mintedIRIs = HashBiMap.create();
260             this.vf = Statements.VALUE_FACTORY;
261             this.documentText = builder.toString();
262             this.documentIRI = IRI;
263             //contextIRI: nif:Context (from NIF) is the maximal fragment associated to a kemt:TextResource
264 
265             //used for processing only some sentences
266             this.sentenceIDs = sentenceIDs;
267 
268             this.contextIRI = Statements.VALUE_FACTORY.createIRI(this.documentIRI.stringValue() + "#ctx");
269 
270             this.model.add(this.contextIRI, NIF.SOURCE_URL, IRI);
271             this.model.add(this.contextIRI, RDF.TYPE, NIF.CONTEXT);
272             this.model.add(this.contextIRI, NIF.IS_STRING, Statements.VALUE_FACTORY.createLiteral(documentText));
273             this.mentions = Maps.newHashMap();
274             this.annotations = Maps.newHashMap();
275             this.nafIdMentions = Maps.newHashMap();
276 //            this.nafIdAnnotations = Maps.newHashMap();
277         }
278 
279         void run() {
280 
281 //            order in 0-3 doesn't matter'
282             processMetadata(); // 0. Process NAF metadata DONE
283             processTimexes(); // 1. Process all <timex3> annotations DONE
284             processEntities(); // 2. Process all <entity> annotations DONE
285             processPredicates(); // 3. Process <predicate> annotations DONE?
286 
287 //            next one has to come after 0-3
288             processCoordinations(); // 4. Process all <entity> annotations which are involved in a coordination
289 
290 //            next ones have to come after coordination
291             processCoreferences(); // 6. Process <coref> annotations
292             processRoles(); // 7. Process head <term>s in <role> annotations
293         }
294 
295 
296 
297 
298         private void processMetadata() {
299 
300             // Obtain IRIs of document and NAF resources
301             final IRI docIRI = this.documentIRI;
302             final IRI nafIRI = this.vf.createIRI(docIRI.stringValue() + ".naf");
303 
304             // Emit document types
305             emitTriple(docIRI, RDF.TYPE, new IRI[] { KEMT.TEXT_RESOURCE, KS.RESOURCE, KS.TEXT });
306 
307             // Emit title, author and DCT from the <fileDesc> element, if present
308             if (this.document.getFileDesc() != null) {
309                 final FileDesc fd = this.document.getFileDesc();
310                 emitTriple(docIRI, DCTERMS.TITLE, fd.title);
311                 emitTriple(docIRI, DCTERMS.CREATOR, fd.author);
312                 emitTriple(docIRI, DCTERMS.CREATED, fd.creationtime);
313                 emitTriple(docIRI, KS.NAF_FILE_NAME, fd.filename);
314                 emitTriple(docIRI, KS.NAF_FILE_TYPE, fd.filetype);
315                 emitTriple(docIRI, KS.NAF_PAGES, fd.pages);
316             }
317 
318             // Emit the document language, if available
319             if (this.document.getLang() != null) {
320                 emitTriple(docIRI, DCTERMS.LANGUAGE,
321                         ModelUtil.languageCodeToIRI(this.document.getLang()));
322             }
323 
324             // Emit an hash of the whitespace-normalized raw text, if available
325             if (this.document.getRawText() != null) {
326                 final String rawText = this.document.getRawText();
327                 final StringBuilder builder = new StringBuilder();
328                 boolean addSpace = false;
329                 for (int i = 0; i < rawText.length(); ++i) {
330                     final char c = rawText.charAt(i);
331                     if (Character.isWhitespace(c)) {
332                         addSpace = builder.length() > 0;
333                     } else {
334                         if (addSpace) {
335                             builder.append(' ');
336                             addSpace = false;
337                         }
338                         builder.append(c);
339                     }
340                 }
341                 emitTriple(docIRI, KS.TEXT_HASH, Hash.murmur3(builder.toString()).toString());
342             }
343 
344             // Link document to its NAF annotation
345             emitTriple(docIRI, KS.ANNOTATED_WITH, nafIRI);
346             emitTriple(nafIRI, KS.ANNOTATION_OF, docIRI);
347 
348             // Emit types, version and publicId of NAF resource
349             emitTriple(nafIRI, RDF.TYPE, new IRI[] { KEMT.TEXT_RESOURCE, KS.RESOURCE, KS.NAF });
350             emitTriple(nafIRI, KS.VERSION, this.document.getVersion());
351             emitTriple(nafIRI, DCTERMS.IDENTIFIER, this.document.getPublic().publicId);
352 
353             // Emit information about linguistic processors: dct:created, dct:creatro, ego:layer
354             String timestamp = null;
355             for (final Map.Entry<String, List<LinguisticProcessor>> entry : this.document
356                     .getLinguisticProcessors().entrySet()) {
357                 emitTriple(nafIRI, KS.LAYER,
358                         this.vf.createIRI(KS.NAMESPACE, "layer_" + entry.getKey()));
359                 for (final LinguisticProcessor lp : entry.getValue()) {
360                     if (timestamp == null) {
361                         if (!Strings.isNullOrEmpty(lp.getBeginTimestamp())) {
362                             timestamp = lp.getBeginTimestamp();
363                         } else if (!Strings.isNullOrEmpty(lp.getEndTimestamp())) {
364                             timestamp = lp.getEndTimestamp();
365                         }
366                     }
367                     final IRI lpIRI = this.vf.createIRI(ModelUtil.cleanIRI(KS.NAMESPACE
368                             + lp.getName() + '.' + lp.getVersion()));
369                     emitTriple(nafIRI, DCTERMS.CREATOR, lpIRI);
370                     emitTriple(lpIRI, DCTERMS.TITLE, lp.getName());
371                     emitTriple(lpIRI, KS.VERSION, lp.getVersion());
372                 }
373             }
374             emitTriple(nafIRI, DCTERMS.CREATED, timestamp);
375 
376         }
377 
378 
379 
380         private void processTimexes() {
381             for (final Timex3 timex : this.document.getTimeExs()) {
382 
383                 //filter only the annotations in the requested sentences
384                 if (timex.getSpan() == null
385                         || this.sentenceIDs[timex.getSpan().getFirstTarget().getSent()]) {
386                     try {
387                         processTimex(timex);
388                     } catch (final Throwable ex) {
389                         LOGGER.error("Error processing " + NAFUtils.toString(timex) + ", type "
390                                 + timex.getType() + ", value " + timex.getValue(), ex);
391                     }
392                 }
393             }
394         }
395 
396         private void processTimex(final Timex3 timex){
397 
398             // Abort if timex has no span (e.g., the DCT)
399             if (timex.getSpan() == null) {
400                 return;
401             }
402 
403             // Extract terms, head and label
404             final List<Term> terms = this.document.getTermsByWFs(timex.getSpan().getTargets());
405             final Term head = NAFUtils.extractHead(this.document, KAFDocument.newTermSpan(terms));
406             final String label = NAFUtils.getText(NAFUtils.filterTerms(terms));
407             final String type = timex.getType().trim().toLowerCase();
408 
409 
410             // create mention if not already existing
411             Mention mention = getMention(head.getId(),terms);
412             final IRI mentionIRI;
413             if (mention==null) {
414                 //emit mentions
415                 mentionIRI = emitMention(terms);
416                 mention = new Mention(head,terms,mentionIRI);
417                 safeMentionPutInMap(head.getId(),mention);
418             } else
419                 //reuse mention IRI
420                 mentionIRI = mention.mentionIRI;
421 
422             this.nafIdMentions.put(timex.getId(),mention);
423 
424             //emit semantic annotation of type timex and store in the map of annotation per mention
425             final IRI semAnnoIRI = createSemanticAnnotationIRI(timex.getId(),mentionIRI,KEMT.TIMEX);
426             Annotation ann = new Annotation(semAnnoIRI,KEMT.TIMEX);
427             safeAnnotationPutInMap(mention,ann);
428 
429             IRI timexIRI = null;
430             // Emit type specific statements
431             if (timex.getValue() != null) {
432                 if (type.equals("date") || type.equals("time")) {
433                     if (type.equals("date")) emitTriple(semAnnoIRI, KEMT.TYPE_P, KEMT.TT_DATE);
434                     else emitTriple(semAnnoIRI, KEMT.TYPE_P, KEMT.TT_TIME);
435 
436                     final OWLTime.Interval interval = OWLTime.Interval
437                             .parseTimex(timex.getValue());
438                     if (interval != null) {
439                         timexIRI = interval.toRDF(RDFHandlers.wrap(this.model),
440                                 NAFExtractor.this.owltimeNamespace, null);
441                     } else {
442                         LOGGER.debug("Could not represent date/time value '" + timex.getValue()
443                                 + "' of " + NAFUtils.toString(timex));
444                     }
445 
446                 } else if (type.equals("duration")) {
447                     emitTriple(semAnnoIRI, KEMT.TYPE_P, KEMT.TT_DURATION);
448                     final OWLTime.Duration duration = OWLTime.Duration
449                             .parseTimex(timex.getValue());
450                     if (duration != null) {
451                         timexIRI = this.vf.createIRI(NAFExtractor.this.owltimeNamespace,
452                                 duration.toString());
453                         final IRI durationIRI = duration.toRDF(RDFHandlers.wrap(this.model),
454                                 NAFExtractor.this.owltimeNamespace, null);
455                         emitTriple(timexIRI, OWLTIME.HAS_DURATION_DESCRIPTION, durationIRI);
456                     } else {
457                         LOGGER.debug("Could not represent duration value '" + timex.getValue()
458                                 + "' of " + NAFUtils.toString(timex));
459                     }
460                 } else {
461 
462                     // TODO: support SET?
463                     throw new UnsupportedOperationException("Unsupported TIMEX3 type: " + type);
464                 }
465             }
466 
467             // Generate a default timex IRI on failure
468             if (timexIRI == null) {
469                 timexIRI = mintIRI(timex.getId(),
470                         MoreObjects.firstNonNull(timex.getValue(), timex.getSpan().getStr()));
471             }
472 
473 //            attach timex to semantic annotation
474             emitTriple(semAnnoIRI, KEMT.OBJECT_VALUE, timexIRI);
475 
476             //attach raw string to timex annotation
477             emitTriple(semAnnoIRI, KEMT.RAW_STRING, emitFragment(terms));
478 
479         }
480 
481 
482 
483         private void processEntities() {
484             for (final Entity entity : this.document.getEntities()) {
485                 for (final Span<Term> span : entity.getSpans()) {
486                     //filter only the annotations in the requested sentences
487                     if (this.sentenceIDs[span.getFirstTarget().getSent()]) {
488                         try {
489                             processEntity(entity);
490                         } catch (final Throwable ex) {
491                             LOGGER.error("Error processing " + NAFUtils.toString(entity)
492                                     + ", type " + entity.getType(), ex);
493                         }
494                         break; // move to next entity
495                     }
496                 }
497             }
498         }
499 
500         private void processEntity(final Entity entity) throws RDFHandlerException {
501 
502             // Retrieve terms, head and label
503             final List<Term> terms = entity.getSpans().get(0).getTargets();
504             final String label = NAFUtils.getText(NAFUtils.filterTerms(terms));
505             final Term head = NAFUtils.extractHead(this.document, entity.getSpans().get(0));
506             if (head == null) {
507                 return;
508             }
509 
510             // Extract type information (type IRI, whether timex or attribute) based on NER tag
511             String type = entity.getType();
512             type = type == null ? null : type.toLowerCase();
513 //            final boolean isLinked = !entity.getExternalRefs().isEmpty();
514             final boolean isProperty = "money".equals(type) || "cardinal".equals(type)
515                     || "ordinal".equals(type) || "percent".equals(type) || "language".equals(type)
516                     || "norp".equals(type) || "quantity".equals(type);
517 
518             //check if named entity
519             final boolean named = entity.isNamed() || "romanticism".equalsIgnoreCase(label)
520                     || "operant conditioning chamber".equalsIgnoreCase(label); // TODO
521 
522             // Discard attributes in modifier position
523             final Dep dep = this.document.getDepToTerm(head);
524             if (isProperty && dep != null) {
525                 final String depLabel = dep.getRfunc().toUpperCase();
526                 if (depLabel.contains("NMOD") || depLabel.contains("AMOD")) {
527                     return;
528                 }
529             }
530 
531             // create mention if not already existing
532             Mention mention = getMention(head.getId(),terms);
533             final IRI mentionIRI;
534             if (mention==null) {
535                 //emit mentions
536                 mentionIRI = emitMention(terms);
537                 mention = new Mention(head,terms,mentionIRI);
538                 safeMentionPutInMap(head.getId(),mention);
539             } else
540                 //reuse mention IRI
541                 mentionIRI = mention.mentionIRI;
542 
543             this.nafIdMentions.put(entity.getId(),mention);
544 
545             //CREATE THE NER ANNOTATION(S)
546             //check external ref for other NERC types
547             boolean typeAnnotation = false;
548             boolean hasOtherNercTypes = false;
549             for (final ExternalRef ref : entity.getExternalRefs()) {
550                     final String resource = ref.getResource();
551                     if ((resource.equals("value-confidence"))||(resource.equals("nerc-probmodel"))) {
552                         hasOtherNercTypes=true;
553                         //                        emit semantic annotation
554                         String reference = ref.getReference();
555                         //emit semantic annotation and store in the map of annotation per mention
556                         final IRI semAnnoIRI = createSemanticAnnotationIRI(entity.getId()+reference,mentionIRI,KEMT.ENTITY_ANNOTATION);
557                         Annotation ann = new Annotation(semAnnoIRI,KEMT.ENTITY_ANNOTATION);
558                         safeAnnotationPutInMap(mention,ann);
559                         //emit type
560                         emitTriple(semAnnoIRI, ITSRDF.TA_CLASS_REF, this.vf.createIRI(DEFAULT_NER_NAMESPACE+reference));
561                         typeAnnotation=true;
562                         //emit confidence if available
563                         if (ref.hasConfidence()) emitTriple(semAnnoIRI,NIF.CONFIDENCE , ref.getConfidence());
564                         if (named) {
565                             emitTriple(semAnnoIRI, RDF.TYPE, KEMT.NAMED_ENTITY);
566                             emitTriple(semAnnoIRI, KEMT.PROPER_NAME, label);
567                         }
568                         //attach raw string to annotation
569                         emitTriple(semAnnoIRI, KEMT.RAW_STRING, emitFragment(terms));
570                     }
571             }
572             //there are no other nerc types in external ref, use the type attribute of the entity
573             if ((!hasOtherNercTypes)&&(type!=null)) {
574                 //emit semantic annotation of type timex and store in the map of annotation per mention
575                 final IRI semAnnoIRI = createSemanticAnnotationIRI(entity.getId()+type,mentionIRI,KEMT.ENTITY_ANNOTATION);
576                 Annotation ann = new Annotation(semAnnoIRI,KEMT.ENTITY_ANNOTATION);
577                 safeAnnotationPutInMap(mention,ann);
578                 emitTriple(semAnnoIRI, ITSRDF.TA_CLASS_REF, this.vf.createIRI(DEFAULT_NER_NAMESPACE+type));
579                 typeAnnotation=true;
580                 if (isProperty) {
581                     emitEntityAttributes(entity, semAnnoIRI);
582                 }
583                 if (named) {
584                     emitTriple(semAnnoIRI, RDF.TYPE, KEMT.NAMED_ENTITY);
585                     emitTriple(semAnnoIRI, KEMT.PROPER_NAME, label);
586                 }
587                 //attach raw string to annotation
588                 emitTriple(semAnnoIRI, KEMT.RAW_STRING, emitFragment(terms));
589             }
590 
591             boolean linkingAnnotation = false;
592             //CREATE THE LINKING ANNOTATION(S)
593             for (final ExternalRef ref : entity.getExternalRefs()) {
594                 final String resource = ref.getResource();
595                 if (resource.startsWith("dbpedia-")) {
596                     final IRI refIRI = this.vf.createIRI(Util.cleanIRI(ref.getReference()));
597                     final IRI semAnnoIRI = createSemanticAnnotationIRI(entity.getId()+"_"+refIRI.getLocalName(),mentionIRI,KEMT.ENTITY_ANNOTATION);
598                     Annotation ann = new Annotation(semAnnoIRI,KEMT.ENTITY_ANNOTATION);
599                     safeAnnotationPutInMap(mention,ann);
600                     //emit linking
601                     emitTriple(semAnnoIRI, ITSRDF.TA_IDENT_REF, refIRI);
602                     linkingAnnotation = true;
603                     //emit confidence if available
604                     if (ref.hasConfidence()) emitTriple(semAnnoIRI,NIF.CONFIDENCE , ref.getConfidence());
605                     //attach raw string to annotation
606                     emitTriple(semAnnoIRI, KEMT.RAW_STRING, emitFragment(terms));
607                 }
608             }
609 
610 
611             //forceSemanticAnnotationCreation
612             //CREATE TERM ANNOTATIONS (WSD, SST)
613             emitCommonAttributesAnnotation(entity.getId()+"_semann",mention,head,terms, (!linkingAnnotation)&&(!typeAnnotation));
614 
615         }
616 
617 
618 
619         private void processPredicates(){
620             for (final Predicate predicate : this.document.getPredicates()) {
621                 //filter only the annotations in the requested sentences
622                 if (this.sentenceIDs[predicate.getSpan().getFirstTarget().getSent()]) {
623                     try {
624                         processPredicate(predicate);
625                     } catch (final Throwable ex) {
626                         LOGGER.error("Error processing " + NAFUtils.toString(predicate), ex);
627                     }
628                 }
629             }
630         }
631 
632 
633 
634         private void processPredicate(final Predicate predicate) throws RDFHandlerException {
635 
636             // Retrieve terms, head and label
637             final List<Term> terms = predicate.getSpan().getTargets();
638             final String label = NAFUtils.getText(NAFUtils.filterTerms(terms));
639             final Term head = NAFUtils.extractHead(this.document, predicate.getSpan());
640 
641             // Determine the lemma, handling multiwords
642             final StringBuilder builder = new StringBuilder();
643             for (final Term term : terms) {
644                 builder.append(builder.length() == 0 ? "" : "_");
645                 builder.append(term.getLemma().toLowerCase());
646             }
647             final String lemma = builder.toString();
648 //            todo next should become for UD --> final String POS = head.getUpos();
649             final String POS = head.getPos();
650 
651             // create mention if not already existing
652             Mention mention = getMention(head.getId(),terms);
653             final IRI mentionIRI;
654             if (mention==null) {
655                 //emit mentions
656                 mentionIRI = emitMention(terms);
657                 mention = new Mention(head,terms,mentionIRI);
658                 safeMentionPutInMap(head.getId(),mention);
659             } else
660                 //reuse mention IRI
661                 mentionIRI = mention.mentionIRI;
662 
663             this.nafIdMentions.put(predicate.getId(),mention);
664 
665             //add lemma and pos for framebase mappings
666             emitTriple(mentionIRI,NIF.LEMMA,lemma);
667             //            todo next should become for UD
668 //          emitTriple(mentionIRI,NIF.OLIA_LINK,this.vf.createIRI(DEFAULT_OLIA_UD_POS+POS));
669             emitTriple(mentionIRI,NIF.OLIA_LINK,this.vf.createIRI(DEFAULT_OLIA_PENN_POS+POS));
670 
671             // Process framenet/verbnet/etc external refs
672             for (final ExternalRef ref : predicate.getExternalRefs()) {
673 //                we don't wnat dbpedia on predicates'
674                 if (ref.getResource().startsWith("dbpedia")){
675                     continue;
676                 }
677                 if ("".equals(ref.getReference())) {
678                     continue;
679                 }
680                 final IRI typeIRI = mintRefIRI(ref.getResource(), ref.getReference());
681                 //emit semantic annotation of type timex and store in the map of annotation per mention
682                 final IRI semAnnoIRI = createSemanticAnnotationIRI(predicate.getId()+"_"+typeIRI.getLocalName(),mentionIRI,KEMT.PREDICATE_C);
683                 Annotation ann = new Annotation(semAnnoIRI,KEMT.PREDICATE_C);
684                 safeAnnotationPutInMap(mention,ann);
685 
686                 emitTriple(semAnnoIRI,ITSRDF.TA_CLASS_REF,typeIRI);
687 
688                 //attach raw string to annotation
689                 emitTriple(semAnnoIRI, KEMT.RAW_STRING, emitFragment(terms));
690 
691             }
692 
693             //CREATE TERM ANNOTATIONS (WSD, SST)
694             emitCommonAttributesAnnotation(predicate.getId()+"_semann",mention,head,terms,false);
695         }
696 
697 
698 
699         private void  processCoordinations (){
700 
701 //            hashmap (sentenceID, set of entity annotation in that sentence)
702             Map<Integer, Set<Mention>> sentenceMentions = Maps.newHashMap();
703 //            hashmap (entityA, set of entity depending from entity A via cordination)
704             Map<Mention, Set<Mention>> coordinatedMentions = Maps.newHashMap();
705 
706 
707             // iterate over all entities, and populate an
708             for (String headID: this.mentions.keySet()
709                     ) {
710 
711                 final Mention mention = getBestMention(headID);
712                 final Term head = mention.head;
713                 final Integer sentenceID = head.getSent();
714 
715 //                    store the mention in its sentence bucket
716                 Set<Mention> mentions;
717                 if (sentenceMentions.containsKey(sentenceID))
718                     mentions = sentenceMentions.get(sentenceID);
719                 else
720                     mentions = Sets.newHashSet();
721                 mentions.add(mention);
722                 sentenceMentions.put(mention.head.getSent(), mentions);
723 
724 //                    dependency pattern for coordination
725                 Set<Term> coordinatedTerms = this.document.getTermsByDepAncestors(
726                         Collections.singleton(head), NAFExtractor.COORDINATION_REGEX);
727 
728 //                    there have to be at least two coordinated term, otherwise nothing to do
729                 if (coordinatedTerms.size()>1) {
730                     for (final Term term : coordinatedTerms) {
731 
732 //                        term is an entry in the dependency that is linked to the head of the mention via (COORD CONJ?)*
733                         final Mention depMen = getBestMention(term.getId());
734                         if (depMen != null) {
735 
736 //                                store the dependent annotation in the head annotation bucket
737                             Set<Mention> depMentions;
738                             if (coordinatedMentions.containsKey(mention))
739                                 depMentions = coordinatedMentions.get(mention);
740                             else
741                                 depMentions = Sets.newHashSet();
742                             depMentions.add(depMen);
743                             coordinatedMentions.put(mention, depMentions);
744 
745                         }
746                     }
747 
748                 }
749 
750             }
751 
752 //            Now cycle over sentences and keep maximal coordinatedEntities
753             for (Integer sentenceID:sentenceMentions.keySet()
754                     ) {
755 
756 //                retrieve the mentions in that sentence
757                 Set<Mention> sentMen = sentenceMentions.get(sentenceID);
758                 Set<Mention> mentionsToKeep = Sets.newHashSet();
759 
760                 for (Mention A:sentMen) {
761 //                    check if it has coordinated terms
762                     if (!coordinatedMentions.containsKey(A)) continue;
763                     if (coordinatedMentions.get(A).size()==1) continue;
764                     boolean keep = true;
765                     for (Mention B : sentMen) {
766                         if (A.equals(B)) continue;
767 //                        check if it has coordinated terms
768                         if (!coordinatedMentions.containsKey(B)) continue;
769                         if (coordinatedMentions.get(B).contains(A)) {
770 //                            A is a coordinated term of B, drop A
771                             keep=false;
772                             break;
773                         }
774                     }
775                     if (keep) mentionsToKeep.add(A);
776                 }
777 
778 //                mentionsToKeep contains all the head of "independent" coordination dependency paths to keep
779                 Integer counter = 0;
780                 for (Mention men:mentionsToKeep
781                         ) {
782 
783                     counter++;
784 //                    collect extents and URI of coordinated mentions
785                     List<Term> terms = Lists.newArrayList();
786                     List<IRI> mentionsIRI = Lists.newArrayList();
787                     List<IRI> coordinatedIRI = Lists.newArrayList();
788 
789                     for (Mention depMen: coordinatedMentions.get(men)
790                             ) {
791                         terms.addAll(depMen.extent);
792                         mentionsIRI.add(depMen.mentionIRI);
793 
794                         //emit the entity annotation for each coordinated entity
795                         final IRI semAnnoIRI = createSemanticAnnotationIRI("coordItem",depMen.mentionIRI,KEMT.ENTITY_ANNOTATION);
796                         coordinatedIRI.add(semAnnoIRI);
797                         final Annotation ann = new Annotation(semAnnoIRI,KEMT.ENTITY_ANNOTATION);
798                         safeAnnotationPutInMap(depMen,ann);
799 
800                         //attach raw string to annotation (here is the same as the mention)
801                         emitTriple(semAnnoIRI, KEMT.RAW_STRING, depMen.mentionIRI);
802 
803 
804                     }
805 
806                     //emit group entity mention (it can't already exists)
807                     final IRI groupEntityMentionIRI = emitMention(terms);
808                     final Mention groupEntityMention = new Mention(men.head,terms,groupEntityMentionIRI);
809                     safeMentionPutInMap(men.head.getId(),groupEntityMention);
810 
811                     //emit group entity annotation
812                     final IRI groupEntityIRI = createSemanticAnnotationIRI("group",groupEntityMentionIRI,KEMT.ENTITY_ANNOTATION);
813                     final Annotation groupEntityAnn = new Annotation(groupEntityIRI,KEMT.ENTITY_ANNOTATION);
814                     safeAnnotationPutInMap(groupEntityMention,groupEntityAnn);
815 
816                     //attach raw string to annotation (here is the same as the mention)
817                     emitTriple(groupEntityIRI, KEMT.RAW_STRING, groupEntityMentionIRI);
818 
819                     //emit coordination mention (for the time being, we reuse the group entity one)
820                     final IRI coordinationMentionIRI = groupEntityMentionIRI;
821                     final Mention coordinationMention = new Mention(men.head,terms,coordinationMentionIRI);
822                     safeMentionPutInMap(men.head.getId(),coordinationMention);
823 
824                     //emit semantic annotation of type coordination
825                     final IRI coordinationIRI = createSemanticAnnotationIRI("coord",coordinationMentionIRI,KEMT.COORDINATION);
826                     final Annotation coordinationAnn = new Annotation(groupEntityIRI,KEMT.COORDINATION);
827                     safeAnnotationPutInMap(coordinationMention,coordinationAnn);
828 
829                     //attach raw string to annotation (here is the same as the mention)
830                     emitTriple(coordinationIRI, KEMT.RAW_STRING, coordinationMentionIRI);
831 
832                     emitTriple(coordinationIRI,KEMT.GROUP,groupEntityIRI);
833 
834                     for (IRI conjunctIRI:coordinatedIRI
835                             )
836                         emitTriple(coordinationIRI,KEMT.CONJUNCT,conjunctIRI);
837 
838                     for (IRI conjunctMentionIRI:mentionsIRI)
839                         emitTriple(coordinationIRI,KEMT.CONJUNCT_STRING,conjunctMentionIRI);
840                 }
841             }
842         }
843 
844 
845 
846         private void processCoreferences() {
847             for (final Coref coref : this.document.getCorefs()) {
848                 if ("event".equalsIgnoreCase(coref.getType())) {
849                     continue;
850                 }
851                 final List<Span<Term>> spans = Lists.newArrayList();
852                 for (final Span<Term> span : coref.getSpans()) {
853                     if (this.sentenceIDs[span.getFirstTarget().getSent()]) {
854                         spans.add(span);
855                     }
856                 }
857                 if (!spans.isEmpty()) {
858                     try {
859                         processCoref(spans,coref.getId());
860                     } catch (final Throwable ex) {
861                         LOGGER.error("Error processing " + NAFUtils.toString(coref), ex);
862                     }
863                 }
864             }
865         }
866 
867         @SuppressWarnings("Duplicates")
868         private void processCoref(final List<Span<Term>> spans, String corefID) {
869 
870             // Build three correlated lists containing, for each member of the coref cluster, its
871             // span, the head terms in the span and the associated IRIs
872             final List<Span<Term>> corefSpans = Lists.newArrayList();
873             final List<Term> corefRawTerms = Lists.newArrayList();
874             final List<Mention> corefMentions = Lists.newArrayList();
875             final List<Term> corefMentionTerms = Lists.newArrayList();
876 
877             //iterate over all spans of a coreference, keep only the spans for which there exists a mention
878             for (final Span<Term> span : spans) {
879                 final Term head = NAFUtils.extractHead(this.document, span);
880                 if (head != null) {
881                     Mention correspondingMention = getBestMention(head.getId());
882                     if (correspondingMention!=null) {
883                         corefMentions.add(correspondingMention);
884                         corefSpans.add(span);
885                         corefMentionTerms.addAll(correspondingMention.extent);
886                         corefRawTerms.addAll(span.getTargets());
887                     }
888                 }
889             }
890 
891             // Abort in case there is only one remaining member in the coref cluster
892             if (corefSpans.size() <= 1) {
893                 return;
894             }
895 
896             //there is no need to create a Mention object, as cooreferences are relations (i.e., we will not attach a role to a coreference mention...
897             //emit coreference mention (it can't already exists)
898             final IRI coreferenceMentionIRI = emitMention(corefMentionTerms);
899 
900             //emit coreference annotation annotation
901             final IRI coreferenceIRI = createSemanticAnnotationIRI(corefID,coreferenceMentionIRI,KEMT.COREFERENCE);
902 
903             for (int i = 0; i < corefMentions.size(); i++) {
904                 //emit coreferent
905                 final IRI coreferentIRI = createSemanticAnnotationIRI(corefID,corefMentions.get(i).mentionIRI,KEMT.ENTITY_ANNOTATION);
906                 emitTriple(coreferenceIRI,KEMT.COREFERRING,coreferentIRI);
907                 //emit coreferent raw string (i.e., its original span)
908                 emitTriple(coreferentIRI, KEMT.RAW_STRING, emitFragment(corefSpans.get(i).getTargets()));
909             }
910 
911             //emit coreference raw string (i.e., its original span)
912             emitTriple(coreferenceIRI, KEMT.RAW_STRING, emitFragment(corefRawTerms));
913         }
914 
915         private void processRoles() {
916             for (final Predicate predicate : this.document.getPredicates()) {
917                 for (final Role role : predicate.getRoles()) {
918                     final Term roleHead = NAFUtils.extractHead(this.document, role.getSpan());
919                     if (roleHead != null) {
920 
921 
922                         final Set<Term> argHeads = this.document.getTermsByDepAncestors(
923                                 Collections.singleton(roleHead), PARTICIPATION_REGEX);
924 
925                         for (final Term argHead : argHeads) {
926                             try {
927                                 processRole(predicate, role, argHead);
928                             } catch (final Throwable ex) {
929                                 LOGGER.error("Error processing " + NAFUtils.toString(role)
930                                         + " of " + NAFUtils.toString(predicate)
931                                         + ", argument " + NAFUtils.toString(argHead), ex);
932                             }
933                         }
934                     }
935                 }
936             }
937         }
938 
939 
940         private void processRole(final Predicate predicate, final Role role, final Term argHead) {
941 
942 
943             //get predicate mention
944             final Mention predMention = this.nafIdMentions.get(predicate.getId());
945 
946             //get the role mention
947             Mention correspondingMention = getBestMention(argHead.getId());
948             if (correspondingMention==null) return;
949 
950             //emit fake predicate and role for participation relation
951             final IRI fakePredIRI = createSemanticAnnotationIRI(predicate.getId(),predMention.mentionIRI,KEMT.PREDICATE_C);
952             final IRI fakeRoleIRI = createSemanticAnnotationIRI(role.getId()+"_"+argHead.getId(),correspondingMention.mentionIRI,KEMT.ARGUMENT_C);
953 
954             //emit fake predicate and role raw string
955             final IRI fakePredRawString = emitFragment(predicate.getSpan().getTargets());
956             emitTriple(fakePredIRI,KEMT.RAW_STRING,fakePredRawString);
957             final IRI fakeRoleRawString = emitFragment(role.getSpan().getTargets());
958             emitTriple(fakeRoleIRI,KEMT.RAW_STRING,fakeRoleRawString);
959 
960             //emit participation mention
961             final IRI partMentionIRI = emitMention(Stream.concat(predMention.extent.stream(), correspondingMention.extent.stream())
962                     .collect(Collectors.toList()));
963             //emit participation raw string
964             final IRI partRawIRI = emitMention(Stream.concat(predicate.getSpan().getTargets().stream(), role.getSpan().getTargets().stream())
965                     .collect(Collectors.toList()));
966             //emit participation annotation
967             final IRI participationIRI = createSemanticAnnotationIRI(predicate.getId()+"_"+role.getId()+"_"+argHead.getId(),partMentionIRI,KEMT.PARTICIPATION);
968 
969             emitTriple(participationIRI,KEMT.PREDICATE_P,fakePredIRI);
970             emitTriple(participationIRI,KEMT.ARGUMENT_P,fakeRoleIRI);
971             emitTriple(participationIRI,KEMT.RAW_STRING,partRawIRI);
972 
973             for (final ExternalRef ref : role.getExternalRefs()) {
974                 if ("".equals(ref.getReference())) {
975                     continue;
976                 }
977                 //emit role annotation
978                 final IRI typeIRI = mintRefIRI(ref.getResource(), ref.getReference());
979                 final IRI roleIRI = createSemanticAnnotationIRI(role.getId()+"_"+argHead.getId()+"_"+typeIRI.getLocalName(),correspondingMention.mentionIRI,KEMT.ARGUMENT_C);
980                 Annotation ann = new Annotation(roleIRI,KEMT.ARGUMENT_C);
981                 safeAnnotationPutInMap(correspondingMention,ann);
982                 emitTriple(roleIRI,ITSRDF.TA_PROP_REF,typeIRI);
983                 emitTriple(roleIRI,KEMT.RAW_STRING,fakeRoleRawString);
984             }
985         }
986 
987 
988         @Nullable
989         private IRI emitMention(final Iterable<Term> terms) {
990 
991             final List<Term> sortedTerms = Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(terms);
992             final int numTerms = sortedTerms.size();
993             if (numTerms == 0) {
994                 return null;
995             }
996 
997             final IRI mentionID = emitFragment(sortedTerms);
998             emitTriple(mentionID, RDF.TYPE, KEM.MENTION);
999             return mentionID;
1000         }
1001 
1002 
1003         private IRI emitFragment(final Iterable<Term> terms) {
1004 
1005             final List<Term> sortedTerms = Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(terms);
1006             final int numTerms = sortedTerms.size();
1007             if (numTerms == 0) {
1008                 return null;
1009             }
1010 
1011             final String text = this.documentText;
1012             final List<IRI> componentIRIs = Lists.newArrayList();
1013             final int begin = NAFUtils.getBegin(sortedTerms.get(0));
1014             int offset = begin;
1015             int startTermIdx = 0;
1016 
1017             final StringBuilder anchorBuilder = new StringBuilder();
1018             final StringBuilder uriBuilder = new StringBuilder(this.documentIRI.stringValue())
1019                     .append("#char=").append(begin).append(",");
1020 
1021             for (int i = 0; i < numTerms; ++i) {
1022                 final Term term = sortedTerms.get(i);
1023                 final int termOffset = NAFUtils.getBegin(term);
1024                 if (termOffset > offset && !text.substring(offset, termOffset).trim().isEmpty()) {
1025                     final int start = NAFUtils.getBegin(sortedTerms.get(startTermIdx));
1026                     anchorBuilder.append(text.substring(start, offset)).append(" [...] ");
1027                     uriBuilder.append(offset).append(";").append(termOffset).append(',');
1028                     componentIRIs.add(emitFragment(sortedTerms.subList(startTermIdx, i)));
1029                     startTermIdx = i;
1030                 }
1031                 offset = NAFUtils.getEnd(term);
1032             }
1033             if (startTermIdx > 0) {
1034                 componentIRIs.add(emitFragment(sortedTerms.subList(startTermIdx, numTerms)));
1035             }
1036 
1037 
1038             anchorBuilder.append(text.substring(NAFUtils.getBegin(sortedTerms.get(startTermIdx)),
1039                     offset));
1040             uriBuilder.append(offset);
1041 
1042             final String anchor = anchorBuilder.toString();
1043             final IRI fragmentID = this.vf.createIRI(uriBuilder.toString());
1044             emitTriple(fragmentID, KEM.FRAGMENT_OF, this.documentIRI);
1045 
1046 
1047 //            if not composite --> RFC5147
1048             if (!componentIRIs.isEmpty()) {
1049                 emitTriple(fragmentID, RDF.TYPE, KEM.COMPOSITE_FRAGMENT);
1050                 for (final IRI componentIRI : componentIRIs) {
1051                     emitTriple(fragmentID, KEM.HAS_COMPONENT, componentIRI);
1052                 }
1053             } else emitTriple(fragmentID, RDF.TYPE, NIF.RFC5147_STRING);
1054 
1055             emitTriple(fragmentID, NIF.BEGIN_INDEX, this.vf.createLiteral(begin));
1056             emitTriple(fragmentID, NIF.END_INDEX, this.vf.createLiteral(offset));
1057             emitTriple(fragmentID, NIF.ANCHOR_OF, this.vf.createLiteral(anchor));
1058 
1059             return fragmentID;
1060         }
1061 
1062 
1063         private IRI createSemanticAnnotationIRI(final String id, final IRI mentionIRI, final IRI type){
1064 
1065             final IRI semanticAnnotationIRI = this.vf.createIRI(mentionIRI.toString()+id);
1066             this.model.add(semanticAnnotationIRI,RDF.TYPE,type);
1067             this.model.add(mentionIRI,KEM.HAS_ANNOTATION,semanticAnnotationIRI);
1068 
1069             return semanticAnnotationIRI;
1070 
1071         }
1072 
1073         private void emitTriple(@Nullable final IRI subject, @Nullable final IRI property,
1074                               @Nullable final Object objects) {
1075             if (subject != null && property != null) {
1076                 for (final Value object : extract(Value.class, objects,
1077                         RDF.TYPE.equals(property) ? NAFExtractor.this.typeMap : null)) {
1078                     this.model.add(this.vf.createStatement(subject, property, object));
1079                 }
1080             }
1081         }
1082 
1083         private IRI mintIRI(final String id, @Nullable final String suggestedLocalName) {
1084             String localName = this.mintedIRIs.get(id);
1085             if (localName == null) {
1086                 final String name = MoreObjects.firstNonNull(suggestedLocalName, id);
1087                 final StringBuilder builder = new StringBuilder();
1088                 for (int i = 0; i < name.length(); ++i) {
1089                     final char c = name.charAt(i);
1090                     builder.append(Character.isWhitespace(c) ? '_' : c);
1091                 }
1092                 final String base = builder.toString();
1093                 int counter = 1;
1094                 while (true) {
1095                     localName = base + (counter == 1 ? "" : "_" + counter);
1096                     if (!this.mintedIRIs.inverse().containsKey(localName)) {
1097                         this.mintedIRIs.put(id, localName);
1098                         break;
1099                     }
1100                     ++counter;
1101                 }
1102             }
1103             return this.vf.createIRI(Util.cleanIRI(this.documentIRI + "#" + localName));
1104         }
1105 
1106 
1107         @Nullable
1108         private IRI mintRefIRI(@Nullable final String resource, @Nullable final String reference) {
1109             if (!Strings.isNullOrEmpty(resource) && !Strings.isNullOrEmpty(reference)) {
1110                 final String normResource = resource.toLowerCase();
1111                 final String namespace = NAFExtractor.this.namespaceMap.get(normResource);
1112                 if (namespace != null) {
1113                     return this.vf
1114                             .createIRI(Util.cleanIRI(namespace + reference.replace('#', '.')));
1115                 } else System.out.println(normResource);
1116             }
1117             return null;
1118         }
1119 
1120 
1121         private void emitEntityAttributes(final Entity entity, final IRI subject)
1122                 throws RDFHandlerException {
1123 
1124             // Retrieve normalized value and NER tag
1125             final ExternalRef valueRef = NAFUtils.getRef(entity, "value", null);
1126             String nerTag = entity.getType();
1127             nerTag = nerTag == null ? null : nerTag.toLowerCase();
1128 
1129                 if (valueRef != null) {
1130                 // Otherwise, we use the normalized value from Stanford
1131                 try {
1132                     final String s = valueRef.getReference().trim();
1133                     if (s.isEmpty()) {
1134                         return;
1135                     }
1136                     if (Objects.equal(nerTag, "cardinal") || Objects.equal(nerTag, "quantity")) {
1137                         emitTriple(subject, KEMT.OBJECT_VALUE, Double.parseDouble(s));
1138 
1139                     } else if (Objects.equal(nerTag, "ordinal")) {
1140                         emitTriple(subject, KEMT.OBJECT_VALUE, Double.parseDouble(s));
1141 
1142                     } else if (Objects.equal(nerTag, "percent")) {
1143                         final int index = s.indexOf('%');
1144                         emitTriple(subject, KEMT.OBJECT_VALUE, Double.parseDouble(s.substring(index + 1)));
1145                     } else if (Objects.equal(nerTag, "money")) {
1146                         int index = 0;
1147                         while (index < s.length()) {
1148                             final char c = s.charAt(index);
1149                             if (c == '€') {
1150                                 emitTriple(subject, KEMT.UNIT, "EUR");
1151                             } else if (c == '$') {
1152                                 emitTriple(subject, KEMT.UNIT, "USD");
1153                             } else if (c == '¥') {
1154                                 emitTriple(subject, KEMT.UNIT, "YEN");
1155                             } else if (Character.isDigit(c)) {
1156                                 break;
1157                             }
1158                             ++index;
1159                         }
1160                         emitTriple(subject, KEMT.OBJECT_VALUE, Double.parseDouble(s.substring(index)));
1161                     }
1162                 } catch (final NumberFormatException ex) {
1163                     LOGGER.debug("Could not process normalized value: " + valueRef.getReference());
1164                 }
1165             }
1166         }
1167 
1168 
1169 
1170         private void emitCommonAttributesAnnotation(final String id, final Mention mention, final Term head, final List<Term> terms, final boolean forceSemanticAnnotationCreation)
1171                 throws RDFHandlerException {
1172 
1173             //create semann only if
1174 
1175             final ExternalRef sstRef = NAFUtils.getRef(head, NAFUtils.RESOURCE_WN_SST, null);
1176             final ExternalRef synsetRef = NAFUtils.getRef(head, NAFUtils.RESOURCE_WN_SYNSET, null);
1177             final ExternalRef bbnRef = NAFUtils.getRef(head, NAFUtils.RESOURCE_BBN, null);
1178 
1179             if ((forceSemanticAnnotationCreation)||(sstRef != null)||(synsetRef != null)||(bbnRef != null)) {
1180 
1181                 final IRI semanticAnnotationIRI = createSemanticAnnotationIRI(id, mention.mentionIRI, KEMT.ENTITY_ANNOTATION);
1182                 Annotation ann = new Annotation(semanticAnnotationIRI, KEM.SEMANTIC_ANNOTATION);
1183                 safeAnnotationPutInMap(mention, ann);
1184 
1185                 //WN SST
1186                 if (sstRef != null) {
1187                     final String sst = sstRef.getReference();
1188                     final IRI uri = this.vf.createIRI(DEFAULT_WN_SST_NAMESPACE,
1189                             sst.substring(sst.lastIndexOf('-') + 1));
1190                     emitTriple(semanticAnnotationIRI, ITSRDF.TERM_INFO_REF, uri);
1191                 }
1192 
1193                 //WN SYNSET
1194 
1195                 if (synsetRef != null) {
1196                     final IRI uri = this.vf.createIRI(DEFAULT_WN_SYN_NAMESPACE,
1197                             synsetRef.getReference());
1198                     emitTriple(semanticAnnotationIRI, ITSRDF.TERM_INFO_REF, uri);
1199                 }
1200 
1201                 //BBN
1202 
1203                 if (bbnRef != null) {
1204                     final IRI uri = this.vf.createIRI(DEFAULT_BBN_NAMESPACE,
1205                             bbnRef.getReference());
1206                     emitTriple(semanticAnnotationIRI, ITSRDF.TERM_INFO_REF, uri);
1207                 }
1208 
1209                 //attach raw string to timex annotation
1210                 emitTriple(semanticAnnotationIRI, KEMT.RAW_STRING, emitFragment(terms));
1211             }
1212         }
1213 
1214 
1215 
1216 
1217 
1218         private <T extends Value> Collection<T> extract(final Class<T> clazz,
1219                                                         @Nullable final Object object, @Nullable final Multimap<String, ? extends T> map) {
1220             if (object == null) {
1221                 return ImmutableList.of();
1222             } else if (clazz.isInstance(object)) {
1223                 return ImmutableList.of((T) object);
1224             } else if (object instanceof Iterable<?>) {
1225                 final List<T> list = Lists.newArrayList();
1226                 for (final Object element : (Iterable<?>) object) {
1227                     list.addAll(extract(clazz, element, map));
1228                 }
1229                 return list;
1230             } else if (object.getClass().isArray()) {
1231                 final List<T> list = Lists.newArrayList();
1232                 final int length = Array.getLength(object);
1233                 for (int i = 0; i < length; ++i) {
1234                     list.addAll(extract(clazz, Array.get(object, i), map));
1235                 }
1236                 return list;
1237             } else if (map != null) {
1238                 return (Collection<T>) map.get(object.toString());
1239             } else {
1240                 return ImmutableList.of(Statements.convert(object, clazz));
1241             }
1242         }
1243 
1244     }
1245 
1246     public static Builder builder() {
1247         return new Builder();
1248     }
1249 
1250     public static final class Builder {
1251 
1252         @Nullable
1253         private Multimap<String, IRI> typeMap;
1254 
1255         @Nullable
1256         private Multimap<String, IRI> propertyMap;
1257 
1258         @Nullable
1259         private Map<String, String> namespaceMap;
1260 
1261         @Nullable
1262         private String owltimeNamespace;
1263 
1264         @Nullable
1265         private Boolean merging;
1266 
1267         @Nullable
1268         private Boolean normalization;
1269 
1270         /**
1271          * Sets all the properties in the map supplied, matching an optional prefix.
1272          *
1273          * @param properties
1274          *            the properties to configure, not null
1275          * @param prefix
1276          *            an optional prefix used to select the relevant properties in the map
1277          * @return this builder object, for call chaining
1278          */
1279         public Builder withProperties(final Map<?, ?> properties, @Nullable final String prefix) {
1280             final String p = prefix == null ? "" : prefix.endsWith(".") ? prefix : prefix + ".";
1281             for (final Map.Entry<?, ?> entry : properties.entrySet()) {
1282                 if (entry.getKey() != null && entry.getValue() != null
1283                         && entry.getKey().toString().startsWith(p)) {
1284                     final String name = entry.getKey().toString().substring(p.length());
1285                     final String value = Strings.emptyToNull(entry.getValue().toString());
1286                     if ("fusion".equals(name)) {
1287                         withMerging(Boolean.valueOf(value));
1288                     } else if ("normalization".equals(name)) {
1289                         withNormalization(Boolean.valueOf(value));
1290                     }
1291                 }
1292             }
1293             return this;
1294         }
1295 
1296         public Builder withTypeMap(@Nullable final Multimap<String, IRI> typeMap) {
1297             this.typeMap = typeMap;
1298             return this;
1299         }
1300 
1301         public Builder withPropertyMap(@Nullable final Multimap<String, IRI> propertyMap) {
1302             this.propertyMap = propertyMap;
1303             return this;
1304         }
1305 
1306         public Builder withNamespaceMap(@Nullable final Map<String, String> namespaceMap) {
1307             this.namespaceMap = namespaceMap;
1308             return this;
1309         }
1310 
1311         public Builder withOWLTimeNamespace(@Nullable final String owltimeNamespace) {
1312             this.owltimeNamespace = owltimeNamespace;
1313             return this;
1314         }
1315 
1316         public Builder withMerging(@Nullable final Boolean merging) {
1317             this.merging = merging;
1318             return this;
1319         }
1320 
1321         public Builder withNormalization(@Nullable final Boolean normalization) {
1322             this.normalization = normalization;
1323             return this;
1324         }
1325 
1326         public NAFExtractor build() {
1327             return new NAFExtractor(this);
1328         }
1329 
1330     }
1331 
1332 
1333     private static final class Mention {
1334 
1335         IRI mentionIRI;
1336         final Term head;
1337         final List<Term> extent;
1338 
1339         Mention(final Term head, final Iterable<Term> extent, final IRI mentionIRI) {
1340             this.head = head;
1341             this.extent = ImmutableList.copyOf(extent);
1342             this.mentionIRI = mentionIRI;
1343         }
1344     }
1345 
1346 
1347     private static final class Annotation {
1348 
1349         IRI annotationIRI;
1350         IRI type;
1351 
1352         Annotation(final IRI annotationIRI, final IRI type) {
1353             this.annotationIRI = annotationIRI;
1354             this.type = type;
1355         }
1356     }
1357 }