1   package eu.fbk.dkm.pikes.rdf;
2   
3   import java.io.File;
4   import java.lang.reflect.Array;
5   import java.nio.file.Path;
6   import java.util.Arrays;
7   import java.util.Collection;
8   import java.util.Collections;
9   import java.util.Iterator;
10  import java.util.List;
11  import java.util.Map;
12  import java.util.Set;
13  import java.util.concurrent.CountDownLatch;
14  import java.util.concurrent.atomic.AtomicInteger;
15  
16  import javax.annotation.Nullable;
17  
18  import com.google.common.base.MoreObjects;
19  import com.google.common.base.Objects;
20  import com.google.common.base.Strings;
21  import com.google.common.collect.BiMap;
22  import com.google.common.collect.HashBiMap;
23  import com.google.common.collect.HashMultimap;
24  import com.google.common.collect.ImmutableList;
25  import com.google.common.collect.ImmutableMap;
26  import com.google.common.collect.ImmutableMultimap;
27  import com.google.common.collect.ImmutableSet;
28  import com.google.common.collect.Iterables;
29  import com.google.common.collect.Lists;
30  import com.google.common.collect.Maps;
31  import com.google.common.collect.Multimap;
32  import com.google.common.collect.Ordering;
33  import com.google.common.collect.Sets;
34  import com.google.common.io.Files;
35  
36  import eu.fbk.dkm.pikes.rdf.vocab.*;
37  import org.eclipse.rdf4j.model.BNode;
38  import org.eclipse.rdf4j.model.Literal;
39  import org.eclipse.rdf4j.model.Model;
40  import org.eclipse.rdf4j.model.Resource;
41  import org.eclipse.rdf4j.model.Statement;
42  import org.eclipse.rdf4j.model.IRI;
43  import org.eclipse.rdf4j.model.Value;
44  import org.eclipse.rdf4j.model.ValueFactory;
45  import org.eclipse.rdf4j.model.impl.LinkedHashModel;
46  import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
47  import org.eclipse.rdf4j.model.impl.ValueFactoryImpl;
48  import org.eclipse.rdf4j.model.vocabulary.*;
49  import org.eclipse.rdf4j.rio.RDFHandler;
50  import org.eclipse.rdf4j.rio.RDFHandlerException;
51  import org.slf4j.Logger;
52  import org.slf4j.LoggerFactory;
53  import org.slf4j.MDC;
54  
55  import ixa.kaflib.Coref;
56  import ixa.kaflib.Dep;
57  import ixa.kaflib.Entity;
58  import ixa.kaflib.ExternalRef;
59  import ixa.kaflib.Factuality;
60  import ixa.kaflib.KAFDocument;
61  import ixa.kaflib.KAFDocument.FileDesc;
62  import ixa.kaflib.LinguisticProcessor;
63  import ixa.kaflib.Opinion;
64  import ixa.kaflib.Opinion.OpinionHolder;
65  import ixa.kaflib.Opinion.OpinionTarget;
66  import ixa.kaflib.Opinion.Polarity;
67  import ixa.kaflib.Predicate;
68  import ixa.kaflib.Predicate.Role;
69  import ixa.kaflib.Span;
70  import ixa.kaflib.Term;
71  import ixa.kaflib.Timex3;
72  import ixa.kaflib.WF;
73  
74  import eu.fbk.dkm.pikes.naflib.Corpus;
75  import eu.fbk.dkm.pikes.rdf.util.ModelUtil;
76  import eu.fbk.dkm.pikes.rdf.util.OWLTime;
77  import eu.fbk.dkm.pikes.rdf.util.ProcessorASNorm;
78  import eu.fbk.dkm.pikes.resources.NAFFilter;
79  import eu.fbk.dkm.pikes.resources.NAFUtils;
80  import eu.fbk.dkm.pikes.resources.PropBank;
81  import eu.fbk.dkm.pikes.resources.Sumo;
82  import eu.fbk.dkm.pikes.resources.WordNet;
83  import eu.fbk.dkm.pikes.resources.YagoTaxonomy;
84  import eu.fbk.utils.svm.Util;
85  import eu.fbk.rdfpro.RDFHandlers;
86  import eu.fbk.rdfpro.RDFProcessors;
87  import eu.fbk.rdfpro.RDFSource;
88  import eu.fbk.rdfpro.RDFSources;
89  import eu.fbk.rdfpro.util.Environment;
90  import eu.fbk.rdfpro.util.Hash;
91  import eu.fbk.rdfpro.util.Options;
92  import eu.fbk.rdfpro.util.QuadModel;
93  import eu.fbk.rdfpro.util.Statements;
94  import eu.fbk.rdfpro.util.Tracker;
95  
96  // entity.type
97  // instance
98  
99  public final class RDFGenerator {
100 
101     private static final Logger LOGGER = LoggerFactory.getLogger(RDFGenerator.class);
102 
103     private static final ValueFactory FACTORY = SimpleValueFactory.getInstance();
104 
105     //todo adapta to UD
106     private static final String MODIFIER_REGEX = "(NMOD|AMOD|TMP|LOC|TITLE) PMOD? (COORD CONJ?)* PMOD?";
107 
108     //todo adapta to UD
109     private static final String PARTICIPATION_REGEX = ""
110             + "SUB? (COORD CONJ?)* (PMOD (COORD CONJ?)*)? ((VC OPRD?)|(IM OPRD?))*";
111 
112     private static final Multimap<String, IRI> DEFAULT_TYPE_MAP = ImmutableMultimap
113             .<String, IRI>builder() //
114             .put("entity.person", NWR.PERSON) //
115             .put("entity.organization", NWR.ORGANIZATION) //
116             .put("entity.location", NWR.LOCATION) //
117             .put("entity.misc", NWR.MISC) //
118             .put("entity.money", GR.PRICE_SPECIFICATION) //
119             .put("entity.date", OWLTIME.DATE_TIME_INTERVAL) //
120             .put("entity.time", OWLTIME.DATE_TIME_INTERVAL) //
121             .put("timex.date", OWLTIME.DATE_TIME_INTERVAL) //
122             .put("timex.duration", OWLTIME.PROPER_INTERVAL) //
123             .build();
124 
125     private static final Map<String, String> DEFAULT_NAMESPACE_MAP = ImmutableMap
126             .<String, String>builder()
127             .put("propbank", "http://www.newsreader-project.eu/ontologies/propbank/")
128             .put("nombank", "http://www.newsreader-project.eu/ontologies/nombank/")
129             .put("framenet", "http://www.newsreader-project.eu/ontologies/framenet/")
130             .put("verbnet", "http://www.newsreader-project.eu/ontologies/verbnet/")
131             .put("premon+propbank", "http://premon.fbk.eu/resource/")
132             .put("premon+nombank", "http://premon.fbk.eu/resource/")
133             .put("premon+framenet", "http://premon.fbk.eu/resource/")
134             .put("premon+verbnet", "http://premon.fbk.eu/resource/")
135             .put("eso", "http://www.newsreader-project.eu/domain-ontology#")
136             .put("framebase", "http://framebase.org/ns/") //
137             .put("attribute", "attr:")
138             // TODO: change this namespace
139             .put("syn", "http://wordnet-rdf.princeton.edu/wn30/")
140             // TODO .put("conn", "http://www.newsreader-project.eu/conn/")
141             .put("sumo", SUMO.NAMESPACE).put("yago", YagoTaxonomy.NAMESPACE).build();
142 
143     private static final String DEFAULT_OWLTIME_NAMESPACE = "http://www.newsreader-project.eu/time/";
144 
145     public static final RDFGenerator DEFAULT = RDFGenerator.builder().build();
146 
147     private final Multimap<String, IRI> typeMap;
148 
149     private final Map<String, String> namespaceMap;
150 
151     private final String owltimeNamespace;
152 
153     private final boolean merging;
154 
155     private final boolean normalization;
156 
157     private RDFGenerator(final Builder builder) {
158         this.typeMap = ImmutableMultimap.copyOf(MoreObjects.firstNonNull(builder.typeMap,
159                 DEFAULT_TYPE_MAP));
160         this.namespaceMap = ImmutableMap.copyOf(MoreObjects.firstNonNull(builder.namespaceMap,
161                 DEFAULT_NAMESPACE_MAP));
162         this.owltimeNamespace = MoreObjects.firstNonNull(builder.owltimeNamespace,
163                 DEFAULT_OWLTIME_NAMESPACE);
164         this.merging = MoreObjects.firstNonNull(builder.merging, Boolean.FALSE);
165         this.normalization = MoreObjects.firstNonNull(builder.normalization, Boolean.FALSE);
166     }
167 
168     public Model generate(final KAFDocument document, @Nullable final Iterable<Integer> sentenceIDs) {
169         final Model model = new LinkedHashModel();
170         generate(document, sentenceIDs, model);
171         return model;
172     }
173 
174     public void generate(final KAFDocument document,
175             @Nullable final Iterable<Integer> sentenceIDs,
176             final Collection<? super Statement> output) {
177         final RDFHandler handler = RDFHandlers.wrap(output);
178         try {
179             generate(document, sentenceIDs, handler);
180         } catch (final Throwable ex) {
181             throw new RuntimeException("Unexpected exception (!)", ex);
182         }
183     }
184 
185     public void generate(final KAFDocument document,
186             @Nullable final Iterable<Integer> sentenceIDs, final RDFHandler handler)
187             throws RDFHandlerException {
188 
189         final boolean[] ids = new boolean[document.getNumSentences() + 1];
190         if (sentenceIDs == null) {
191             Arrays.fill(ids, true);
192         } else {
193             for (final Integer sentenceID : sentenceIDs) {
194                 ids[sentenceID] = true;
195             }
196         }
197 
198         final String baseIRI = document.getPublic().uri;
199         new Extractor(baseIRI, handler, document, ids).run();
200     }
201 
202     public static Builder builder() {
203         return new Builder();
204     }
205 
206     public static final class Builder {
207 
208         @Nullable
209         private Multimap<String, IRI> typeMap;
210 
211         @Nullable
212         private Multimap<String, IRI> propertyMap;
213 
214         @Nullable
215         private Map<String, String> namespaceMap;
216 
217         @Nullable
218         private String owltimeNamespace;
219 
220         @Nullable
221         private Boolean merging;
222 
223         @Nullable
224         private Boolean normalization;
225 
226         /**
227          * Sets all the properties in the map supplied, matching an optional prefix.
228          *
229          * @param properties
230          *            the properties to configure, not null
231          * @param prefix
232          *            an optional prefix used to select the relevant properties in the map
233          * @return this builder object, for call chaining
234          */
235         public Builder withProperties(final Map<?, ?> properties, @Nullable final String prefix) {
236             final String p = prefix == null ? "" : prefix.endsWith(".") ? prefix : prefix + ".";
237             for (final Map.Entry<?, ?> entry : properties.entrySet()) {
238                 if (entry.getKey() != null && entry.getValue() != null
239                         && entry.getKey().toString().startsWith(p)) {
240                     final String name = entry.getKey().toString().substring(p.length());
241                     final String value = Strings.emptyToNull(entry.getValue().toString());
242                     if ("fusion".equals(name)) {
243                         withMerging(Boolean.valueOf(value));
244                     } else if ("normalization".equals(name)) {
245                         withNormalization(Boolean.valueOf(value));
246                     }
247                 }
248             }
249             return this;
250         }
251 
252         public Builder withTypeMap(@Nullable final Multimap<String, IRI> typeMap) {
253             this.typeMap = typeMap;
254             return this;
255         }
256 
257         public Builder withPropertyMap(@Nullable final Multimap<String, IRI> propertyMap) {
258             this.propertyMap = propertyMap;
259             return this;
260         }
261 
262         public Builder withNamespaceMap(@Nullable final Map<String, String> namespaceMap) {
263             this.namespaceMap = namespaceMap;
264             return this;
265         }
266 
267         public Builder withOWLTimeNamespace(@Nullable final String owltimeNamespace) {
268             this.owltimeNamespace = owltimeNamespace;
269             return this;
270         }
271 
272         public Builder withMerging(@Nullable final Boolean merging) {
273             this.merging = merging;
274             return this;
275         }
276 
277         public Builder withNormalization(@Nullable final Boolean normalization) {
278             this.normalization = normalization;
279             return this;
280         }
281 
282         public RDFGenerator build() {
283             return new RDFGenerator(this);
284         }
285 
286     }
287 
288     static final class Runner implements Runnable {
289 
290         private final Corpus corpus;
291 
292         private final RDFGenerator generator;
293 
294         private final File outputFile;
295 
296         private final boolean intermediate;
297 
298         private Runner(final Corpus corpus, final RDFGenerator generator, final File outputFile,
299                 final boolean split) {
300             this.corpus = corpus;
301             this.generator = generator;
302             this.outputFile = outputFile.getAbsoluteFile();
303             this.intermediate = split;
304         }
305 
306         static Runner create(final String name, final String... args) {
307             final Options options = Options.parse(
308                     "r,recursive|o,output!|m,merge|n,normalize|i,intermediate|+", args);
309             final File outputFile = options.getOptionArg("o", File.class);
310             final boolean recursive = options.hasOption("r");
311             final boolean merge = options.hasOption("m");
312             final boolean normalize = options.hasOption("n");
313             final boolean intermediate = options.hasOption("i");
314             final Corpus corpus = Corpus.create(recursive, options.getPositionalArgs(File.class));
315             final RDFGenerator generator = RDFGenerator.builder()
316                     .withProperties(Util.PROPERTIES, "eu.fbk.dkm.pikes.rdf.RDFGenerator")
317                     .withMerging(merge).withNormalization(normalize).build();
318             return new Runner(corpus, generator, outputFile, intermediate);
319         }
320 
321         @Override
322         public void run() {
323 
324             LOGGER.info("Converting {} NAF files to RDF", this.corpus.size());
325 
326             final NAFFilter filter = NAFFilter.builder()
327                     .withProperties(Util.PROPERTIES, "eu.fbk.dkm.pikes.rdf.NAFFilter")
328                     .withSRLPreprocess(true, true, true).build();
329 
330             final RDFHandler writer;
331             if (!this.intermediate) {
332                 try {
333                     Files.createParentDirs(this.outputFile);
334                     writer = RDFHandlers.write(null, 1, Runner.this.outputFile.getAbsolutePath());
335                     writer.startRDF();
336                 } catch (final Throwable ex) {
337                     throw new RuntimeException(ex);
338                 }
339             } else {
340                 writer = null;
341             }
342 
343             final Tracker tracker = new Tracker(LOGGER, null, //
344                     "Processed %d NAF files (%d NAF/s avg)", //
345                     "Processed %d NAF files (%d NAF/s, %d NAF/s avg)");
346 
347             final int numThreads = Environment.getCores();
348             final CountDownLatch latch = new CountDownLatch(numThreads);
349             final AtomicInteger counter = new AtomicInteger(0);
350             final AtomicInteger succeeded = new AtomicInteger(0);
351             tracker.start();
352             for (int i = 0; i < numThreads; ++i) {
353                 Environment.getPool().submit(new Runnable() {
354 
355                     @Override
356                     public void run() {
357                         try {
358                             final Path outBase = Runner.this.outputFile.toPath().getParent()
359                                     .toAbsolutePath().normalize();
360                             while (true) {
361                                 final int i = counter.getAndIncrement();
362                                 if (i >= Runner.this.corpus.size()) {
363                                     break;
364                                 }
365                                 String docName = null;
366 
367                                 final Path path = Runner.this.corpus.file(i);
368 
369                                 Path output = null;
370                                 if (Runner.this.intermediate) {
371                                     try {
372                                         final Path base = Runner.this.corpus.path();
373                                         final Path relative = base.toAbsolutePath().relativize(
374                                                 path.toAbsolutePath());
375                                         String name = relative.toString();
376                                         int index = name.indexOf(".naf");
377                                         if (index < 0) {
378                                             index = name.indexOf(".xml");
379                                         }
380                                         name = name.substring(0, index) + ".tql.gz";
381                                         output = outBase.resolve(name);
382                                         if (java.nio.file.Files.exists(output)) {
383                                             LOGGER.info("Skipping {}", path);
384                                             succeeded.incrementAndGet();
385                                             tracker.increment();
386                                             continue;
387                                         }
388                                     } catch (final Throwable ex) {
389                                         LOGGER.error("Could not compute output file name", ex);
390                                     }
391                                 }
392 
393                                 LOGGER.info("Processing {}", path);
394 
395                                 try {
396                                     final KAFDocument document = Runner.this.corpus.get(i);
397                                     docName = document.getPublic().publicId;
398                                     MDC.put("context", docName);
399                                     filter.filter(document);
400                                     final RDFSource source = RDFSources.wrap(Runner.this.generator
401                                             .generate(document, null));
402 
403                                     if (!Runner.this.intermediate) {
404                                         source.emit(RDFHandlers.ignoreMethods(writer,
405                                                 RDFHandlers.METHOD_START_RDF
406                                                         | RDFHandlers.METHOD_END_RDF
407                                                         | RDFHandlers.METHOD_CLOSE), 1);
408                                     } else {
409                                         java.nio.file.Files.createDirectories(output.getParent());
410                                         source.emit(RDFHandlers.write(null, 1, output
411                                                 .toAbsolutePath().toString()), 1);
412                                     }
413 
414                                     succeeded.incrementAndGet();
415 
416                                 } catch (final Throwable ex) {
417                                     LOGGER.error("Processing failed for " + docName, ex);
418                                 } finally {
419                                     MDC.remove("context");
420                                 }
421                                 tracker.increment();
422                             }
423                         } finally {
424                             latch.countDown();
425                         }
426                     }
427 
428                 });
429             }
430             try {
431                 latch.await();
432                 if (!this.intermediate) {
433                     writer.endRDF();
434                 }
435             } catch (final InterruptedException ex) {
436                 Thread.currentThread().interrupt();
437             } catch (final RDFHandlerException ex) {
438                 throw new RuntimeException(ex);
439             }
440             tracker.end();
441 
442             LOGGER.info("Successfully converted {}/{} files", succeeded, this.corpus.size());
443         }
444     }
445 
446     private final class Extractor {
447 
448         private final String baseIRI;
449 
450         private final RDFHandler handler;
451 
452         private final QuadModel statements;
453 
454         private final BiMap<String, String> mintedIRIs;
455 
456         private final KAFDocument document;
457 
458         private final IRI documentIRI;
459 
460         private final boolean[] sentenceIDs;
461 
462         private final String documentText;
463 
464         private final Map<String, Annotation> annotations;
465 
466         public Extractor(final String baseIRI, final RDFHandler handler,
467                 final KAFDocument document, final boolean[] sentenceIDs) {
468 
469             this.baseIRI = baseIRI;
470             this.handler = handler;
471             this.statements = QuadModel.create();
472             this.mintedIRIs = HashBiMap.create();
473             this.document = document;
474             this.documentIRI = FACTORY.createIRI(Util.cleanIRI(document.getPublic().uri));
475             this.sentenceIDs = sentenceIDs;
476 
477             final StringBuilder builder = new StringBuilder();
478             for (final WF word : document.getWFs()) {
479                 final int offset = word.getOffset();
480                 if (builder.length() > offset) {
481                     builder.setLength(offset);
482                 } else {
483                     while (builder.length() < offset) {
484                         builder.append(" ");
485                     }
486                 }
487                 builder.append(word.getForm());
488             }
489             this.documentText = builder.toString();
490 
491             this.annotations = Maps.newHashMap();
492         }
493 
494         public void run() throws RDFHandlerException {
495 
496             // 0. Process NAF metadata
497             processMetadata();
498 
499             // 1. Process <timex3> annotations
500             for (final Timex3 timex : this.document.getTimeExs()) {
501                 if (timex.getSpan() == null
502                         || this.sentenceIDs[timex.getSpan().getFirstTarget().getSent()]) {
503                     try {
504                         processTimex(timex);
505                     } catch (final Throwable ex) {
506                         LOGGER.error("Error processing " + NAFUtils.toString(timex) + ", type "
507                                 + timex.getType() + ", value " + timex.getValue(), ex);
508                     }
509                 }
510             }
511 
512             // 2. Process <entity> annotations
513             for (final Entity entity : this.document.getEntities()) {
514                 for (final Span<Term> span : entity.getSpans()) {
515                     if (this.sentenceIDs[span.getFirstTarget().getSent()]) {
516                         try {
517                             processEntity(entity);
518                         } catch (final Throwable ex) {
519                             LOGGER.error("Error processing " + NAFUtils.toString(entity)
520                                     + ", type " + entity.getType(), ex);
521                         }
522                         break; // move to next entity
523                     }
524                 }
525             }
526 
527             // 3. Process <predicate> annotations; must be done after 1, 2
528             outer: for (final Predicate predicate : this.document.getPredicates()) {
529                 if (this.sentenceIDs[predicate.getSpan().getFirstTarget().getSent()]) {
530                     // TODO: the code below is madness... :-(
531                     for (final ExternalRef ref : predicate.getExternalRefs()) {
532                         if (NAFUtils.RESOURCE_PROPBANK.equals(ref.getResource())
533                                 && ref.getReference().equals("be.01")) {
534                             Term a1Head = null;
535                             Term a2Head = null;
536                             for (final Role role : predicate.getRoles()) {
537                                 final Term head = NAFUtils.extractHead(this.document,
538                                         role.getSpan());
539                                 if (head != null) {
540                                     if ("A1".equals(role.getSemRole())) {
541                                         a1Head = head;
542                                     } else if ("A2".equals(role.getSemRole())) {
543                                         a2Head = head;
544                                     }
545                                 }
546                             }
547                             if (a1Head != null && a2Head != null) {
548                                 for (final Coref coref : this.document.getCorefsByTerm(a1Head)) {
549                                     final Set<Term> corefHeads = Sets.newHashSet();
550                                     for (final Span<Term> span : coref.getSpans()) {
551                                         final Term head = NAFUtils
552                                                 .extractHead(this.document, span);
553                                         if (head != null) {
554                                             corefHeads.add(head);
555                                         }
556                                     }
557                                     if (corefHeads.contains(a1Head) && corefHeads.contains(a2Head)) {
558                                         continue outer;
559                                     }
560                                 }
561                             }
562                         }
563                     }
564                     try {
565                         processPredicate(predicate);
566                     } catch (final Throwable ex) {
567                         LOGGER.error("Error processing " + NAFUtils.toString(predicate), ex);
568                     }
569                 }
570             }
571 
572 
573 
574 
575             // 4. Process <factvalue> annotations; must be done after 3
576             for (final Factuality factuality : this.document.getFactualities()) {
577                 if (this.sentenceIDs[factuality.getWord().getSent()]) {
578                     try {
579                         processFactuality(factuality);
580                     } catch (final Throwable ex) {
581                         LOGGER.error("Error processing " + NAFUtils.toString(factuality), ex);
582                     }
583                 }
584             }
585 
586             // 5. Process <term> acting as modifiers; must be done after 1, 2, 3
587             for (final Annotation ann : this.annotations.values()) {
588                 final IRI uri = ann.predicateIRI != null ? ann.predicateIRI : ann.objectIRI;
589                 if (uri != null) {
590                     final Set<Term> forbiddenTerms = Sets.newHashSet();
591                     final List<Coref> corefs = this.document.getCorefsByTerm(ann.head);
592                     for (final Coref coref : corefs) {
593                         final List<Term> heads = Lists.newArrayList();
594                         for (final Span<Term> span : coref.getSpans()) {
595                             final Term head = NAFUtils.extractHead(this.document, span);
596                             if (head != null) {
597                                 heads.add(head);
598                             }
599                         }
600                         if (heads.contains(ann.head)) {
601                             forbiddenTerms.addAll(heads);
602                         }
603                     }
604                     for (final Term term : this.document.getTermsByDepAncestors(
605                             Collections.singleton(ann.head), MODIFIER_REGEX)) {
606                         if (!forbiddenTerms.contains(term)) {
607                             try {
608                                 processModifier(term, ann.head, uri, ann.extent);
609                             } catch (final Throwable ex) {
610                                 LOGGER.error(
611                                         "Error processing MODIFIER " + NAFUtils.toString(term)
612                                                 + " of " + NAFUtils.toString(ann.head)
613                                                 + " (object IRI " + ann.objectIRI
614                                                 + "; predicate IRI " + ann.predicateIRI + ")", ex);
615                             }
616                         }
617                     }
618                 }
619             }
620 
621             // 6. Process <coref> annotations; must be done after 1, 2, 3
622             for (final Coref coref : this.document.getCorefs()) {
623                 if ("event".equalsIgnoreCase(coref.getType())) {
624                     continue;
625                 }
626                 final List<Span<Term>> spans = Lists.newArrayList();
627                 for (final Span<Term> span : coref.getSpans()) {
628                     if (this.sentenceIDs[span.getFirstTarget().getSent()]) {
629                         spans.add(span);
630                     }
631                 }
632                 if (!spans.isEmpty()) {
633                     try {
634                         processCoref(spans);
635                     } catch (final Throwable ex) {
636                         LOGGER.error("Error processing " + NAFUtils.toString(coref), ex);
637                     }
638                 }
639             }
640 
641             // 7. Process head <term>s in <role> annotations; must be done after 1, 2, 3
642             for (final Predicate predicate : this.document.getPredicates()) {
643                 if (this.sentenceIDs[predicate.getSpan().getFirstTarget().getSent()]) {
644                     final PropBank.Roleset rs = PropBank
645                             .getRoleset(NAFUtils.getRoleset(predicate));
646                     final String entitySuffix = rs == null ? "?" : Integer.toString(rs
647                             .getCoreferenceEntityArg());
648                     final String predicateSuffix = rs == null ? "?" : Integer.toString(rs
649                             .getCoreferencePredicateArg());
650                     Set<Term> corefEntityHeads = null;
651                     Set<Term> corefPredicateHeads = null;
652                     for (final Role role : predicate.getRoles()) {
653                         final Term roleHead = NAFUtils.extractHead(this.document, role.getSpan());
654                         if (roleHead != null) {
655                             final Set<Term> argHeads = this.document.getTermsByDepAncestors(
656                                     Collections.singleton(roleHead), PARTICIPATION_REGEX);
657                             boolean isCorefPredicateRole = false;
658                             if (role.getSemRole().endsWith(entitySuffix)) {
659                                 corefEntityHeads = argHeads;
660                             } else if (role.getSemRole().endsWith(predicateSuffix)) {
661                                 corefPredicateHeads = argHeads;
662                                 isCorefPredicateRole = true;
663                             }
664                             for (final Term argHead : argHeads) {
665                                 try {
666                                     processRole(predicate, role, argHead, isCorefPredicateRole);
667                                 } catch (final Throwable ex) {
668                                     LOGGER.error("Error processing " + NAFUtils.toString(role)
669                                             + " of " + NAFUtils.toString(predicate)
670                                             + ", argument " + NAFUtils.toString(argHead), ex);
671                                 }
672                             }
673                         }
674                     }
675                     if (corefEntityHeads != null && corefEntityHeads.size() == 1
676                             && corefPredicateHeads != null && corefPredicateHeads.size() == 1) {
677                         final Annotation entityAnn = this.annotations.get(corefEntityHeads
678                                 .iterator().next().getId());
679                         final Annotation predicateAnn = this.annotations.get(corefPredicateHeads
680                                 .iterator().next().getId());
681                         if (predicateAnn != null && entityAnn != null
682                                 && predicateAnn.predicateIRI != null
683                                 && predicateAnn.objectIRI != null && entityAnn.objectIRI != null) {
684                             final IRI mentionIRI = emitMention(Iterables.concat(
685                                     predicateAnn.extent, entityAnn.extent));
686                             emitFact(predicateAnn.objectIRI, OWL.SAMEAS, entityAnn.objectIRI,
687                                     mentionIRI, null);
688                         }
689                     }
690                 }
691             }
692 
693             // 8. Process <opinion>s; must be done after 1, 2, 3
694             for (final Opinion opinion : this.document.getOpinions()) {
695                 if (opinion.getOpinionExpression() == null
696                         || opinion.getLabel() != null
697                         && (opinion.getLabel().toLowerCase().contains("stanford") || opinion
698                                 .getLabel().toLowerCase().contains("gold"))) {
699                     continue;
700                 }
701                 for (final Term term : opinion.getOpinionExpression().getTerms()) {
702                     if (this.sentenceIDs[term.getSent()]) {
703                         processOpinion(opinion);
704                         break;
705                     }
706                 }
707             }
708 
709             // 9. Finalize
710             Iterable<Statement> statements = RDFGenerator.this.merging ? merge(this.statements)
711                     : this.statements;
712             if (RDFGenerator.this.normalization) {
713                 statements = new ProcessorASNorm("fact:").wrap(RDFSources.wrap(statements));
714             }
715             this.handler.startRDF();
716             for (final Statement statement : statements) {
717                 this.handler.handleStatement(statement);
718             }
719             this.handler.endRDF();
720         }
721 
722         private void processMetadata() throws RDFHandlerException {
723 
724             // Obtain IRIs of document and NAF resources
725             final IRI docIRI = this.documentIRI;
726             final IRI nafIRI = FACTORY.createIRI(docIRI.stringValue() + ".naf");
727 
728             // Emit document types
729             emitMeta(docIRI, RDF.TYPE, new IRI[] { KS_OLD.RESOURCE, KS_OLD.TEXT });
730 
731             // Emit title, author and DCT from the <fileDesc> element, if present
732             if (this.document.getFileDesc() != null) {
733                 final FileDesc fd = this.document.getFileDesc();
734                 emitMeta(docIRI, DCTERMS.TITLE, fd.title);
735                 emitMeta(docIRI, DCTERMS.CREATOR, fd.author);
736                 emitMeta(docIRI, DCTERMS.CREATED, fd.creationtime);
737                 emitMeta(docIRI, KS_OLD.NAF_FILE_NAME, fd.filename);
738                 emitMeta(docIRI, KS_OLD.NAF_FILE_TYPE, fd.filetype);
739                 emitMeta(docIRI, KS_OLD.NAF_PAGES, fd.pages);
740             }
741 
742             // Emit the document language, if available
743             if (this.document.getLang() != null) {
744                 emitMeta(docIRI, DCTERMS.LANGUAGE,
745                         ModelUtil.languageCodeToIRI(this.document.getLang()));
746             }
747 
748             // Emit an hash of the whitespace-normalized raw text, if available
749             if (this.document.getRawText() != null) {
750                 final String rawText = this.document.getRawText();
751                 final StringBuilder builder = new StringBuilder();
752                 boolean addSpace = false;
753                 for (int i = 0; i < rawText.length(); ++i) {
754                     final char c = rawText.charAt(i);
755                     if (Character.isWhitespace(c)) {
756                         addSpace = builder.length() > 0;
757                     } else {
758                         if (addSpace) {
759                             builder.append(' ');
760                             addSpace = false;
761                         }
762                         builder.append(c);
763                     }
764                 }
765                 emitMeta(docIRI, KS_OLD.TEXT_HASH, Hash.murmur3(builder.toString()).toString());
766             }
767 
768             // Link document to its NAF annotation
769             emitMeta(docIRI, KS_OLD.ANNOTATED_WITH, nafIRI);
770             emitMeta(nafIRI, KS_OLD.ANNOTATION_OF, docIRI);
771 
772             // Emit types, version and publicId of NAF resource
773             emitMeta(nafIRI, RDF.TYPE, new IRI[] { KS_OLD.RESOURCE, KS_OLD.NAF });
774             emitMeta(nafIRI, KS_OLD.VERSION, this.document.getVersion());
775             emitMeta(nafIRI, DCTERMS.IDENTIFIER, this.document.getPublic().publicId);
776 
777             // Emit information about linguistic processors: dct:created, dct:creatro, ego:layer
778             String timestamp = null;
779             for (final Map.Entry<String, List<LinguisticProcessor>> entry : this.document
780                     .getLinguisticProcessors().entrySet()) {
781                 emitMeta(nafIRI, KS_OLD.LAYER,
782                         FACTORY.createIRI(KS_OLD.NAMESPACE, "layer_" + entry.getKey()));
783                 for (final LinguisticProcessor lp : entry.getValue()) {
784                     if (timestamp == null) {
785                         if (!Strings.isNullOrEmpty(lp.getBeginTimestamp())) {
786                             timestamp = lp.getBeginTimestamp();
787                         } else if (!Strings.isNullOrEmpty(lp.getEndTimestamp())) {
788                             timestamp = lp.getEndTimestamp();
789                         }
790                     }
791                     final IRI lpIRI = FACTORY.createIRI(ModelUtil.cleanIRI(KS_OLD.NAMESPACE
792                             + lp.getName() + '.' + lp.getVersion()));
793                     emitMeta(nafIRI, DCTERMS.CREATOR, lpIRI);
794                     emitMeta(lpIRI, DCTERMS.TITLE, lp.getName());
795                     emitMeta(lpIRI, KS_OLD.VERSION, lp.getVersion());
796                 }
797             }
798             emitMeta(nafIRI, DCTERMS.CREATED, timestamp);
799         }
800 
801         private void processTimex(final Timex3 timex) throws RDFHandlerException {
802 
803             // Abort if timex has no span (e.g., the DCT)
804             if (timex.getSpan() == null) {
805                 return;
806             }
807 
808             // Extract terms, head and label
809             final List<Term> terms = this.document.getTermsByWFs(timex.getSpan().getTargets());
810             final Term head = NAFUtils.extractHead(this.document, KAFDocument.newTermSpan(terms));
811             final String label = NAFUtils.getText(NAFUtils.filterTerms(terms));
812             final String type = timex.getType().trim().toLowerCase();
813 
814             // Annotate the term (or pickup the existing annotation)
815             final Annotation ann = defineAnnotation(head, terms);
816 
817             // Abort if cannot annotate (wrong head) or if a IRI was already assigned to the term
818             if (ann == null || ann.objectIRI != null) {
819                 return;
820             }
821 
822             // Emit a mention and its triples for the current timex
823             final IRI mentionIRI = emitMention(terms);
824             emitMeta(mentionIRI, RDF.TYPE, KS_OLD.TIME_MENTION);
825 
826             // Emit type specific statements
827             IRI timexIRI = null;
828             if (timex.getValue() != null) {
829                 if (type.equals("date") || type.equals("time")) {
830                     final OWLTime.Interval interval = OWLTime.Interval
831                             .parseTimex(timex.getValue());
832                     if (interval != null) {
833                         timexIRI = interval.toRDF(this.handler,
834                                 RDFGenerator.this.owltimeNamespace, null);
835                     } else {
836                         LOGGER.debug("Could not represent date/time value '" + timex.getValue()
837                                 + "' of " + NAFUtils.toString(timex));
838                     }
839                 } else if (type.equals("duration")) {
840                     final OWLTime.Duration duration = OWLTime.Duration
841                             .parseTimex(timex.getValue());
842                     if (duration != null) {
843                         timexIRI = FACTORY.createIRI(RDFGenerator.this.owltimeNamespace,
844                                 duration.toString());
845                         final IRI durationIRI = duration.toRDF(this.handler,
846                                 RDFGenerator.this.owltimeNamespace, null);
847                         emitFact(timexIRI, OWLTIME.HAS_DURATION_DESCRIPTION, durationIRI,
848                                 mentionIRI, null);
849                     } else {
850                         LOGGER.debug("Could not represent duration value '" + timex.getValue()
851                                 + "' of " + NAFUtils.toString(timex));
852                     }
853                 } else {
854                     // TODO: support SET?
855                     throw new UnsupportedOperationException("Unsupported TIMEX3 type: " + type);
856                 }
857             }
858 
859             // Generate a default timex IRI on failure
860             if (timexIRI == null) {
861                 timexIRI = mintIRI(timex.getId(),
862                         MoreObjects.firstNonNull(timex.getValue(), timex.getSpan().getStr()));
863             }
864 
865             // Register the timex IRI it in the term annotation and link it to the mention
866             ann.objectIRI = timexIRI;
867             emitMeta(timexIRI, GAF.DENOTED_BY, mentionIRI);
868 
869             // Emit common attributes based on head and label
870             emitFact(timexIRI, RDF.TYPE, ImmutableList.of(KS_OLD.ENTITY, KS_OLD.TIME, "timex." + type),
871                     mentionIRI, null);
872             emitCommonAttributes(timexIRI, mentionIRI, head, label, true);
873         }
874 
875         private void processEntity(final Entity entity) throws RDFHandlerException {
876 
877             // Retrieve terms, head and label
878             final List<Term> terms = entity.getSpans().get(0).getTargets();
879             final String label = NAFUtils.getText(NAFUtils.filterTerms(terms));
880             final Term head = NAFUtils.extractHead(this.document, entity.getSpans().get(0));
881             if (head == null) {
882                 return;
883             }
884 
885             // Extract type information (type IRI, whether timex or attribute) based on NER tag
886             String type = entity.getType();
887             type = type == null ? null : type.toLowerCase();
888             // final Collection<IRI> typeIRIs = RDFGenerator.this.typeMap.get("entity." + type);
889             final boolean isLinked = !entity.getExternalRefs().isEmpty();
890             final boolean isProperty = "money".equals(type) || "cardinal".equals(type)
891                     || "ordinal".equals(type) || "percent".equals(type) || "language".equals(type)
892                     || "norp".equals(type) || "quantity".equals(type);
893 
894             // Discard attributes in modifier position, as they will be considered later
895             final Dep dep = this.document.getDepToTerm(head);
896             if (isProperty && dep != null) {
897                 final String depLabel = dep.getRfunc().toUpperCase();
898                 if (depLabel.contains("NMOD") || depLabel.contains("AMOD")) {
899                     return;
900                 }
901             }
902 
903             // Annotate the term (or pickup the existing annotation)
904             final Annotation ann = defineAnnotation(head, terms);
905 
906             // Abort if cannot annotate (wrong head) or if a IRI was already assigned to the term
907             if (ann == null || ann.objectIRI != null) {
908                 return;
909             }
910 
911             // Mint a IRI for the entity and register it in the term annotation
912             final IRI entityIRI;
913             if (!entity.isNamed() || isLinked) {
914                 entityIRI = mintIRI(entity.getId(), entity.isNamed() ? entity.getSpans().get(0)
915                         .getStr() : head.getLemma());
916             } else {
917                 entityIRI = Statements.VALUE_FACTORY.createIRI(Util.cleanIRI("entity:"
918                         + entity.getStr().toLowerCase().replace(' ', '_')));
919             }
920             ann.objectIRI = entityIRI;
921 
922             // Emit a mention and its triples for the current entity
923             final IRI mentionIRI = emitMention(terms);
924             emitMeta(entityIRI, GAF.DENOTED_BY, mentionIRI);
925             emitMeta(mentionIRI, RDF.TYPE, KS_OLD.ENTITY_MENTION);
926             // if ("person".equals(type)) {
927             // emitMeta(mentionIRI, RDF.TYPE, KS_OLD.PERSON_MENTION);
928             // } else if ("organization".equals(type)) {
929             // emitMeta(mentionIRI, RDF.TYPE, KS_OLD.ORGANIZATION_MENTION);
930             // } else if ("location".equals(type)) {
931             // emitMeta(mentionIRI, RDF.TYPE, KS_OLD.LOCATION_MENTION);
932             // } else if (!isProperty) {
933             // emitMeta(mentionIRI, RDF.TYPE, KS_OLD.MISC_MENTION);
934             // }
935             if (isProperty) {
936                 emitMeta(mentionIRI, RDF.TYPE, KS_OLD.ATTRIBUTE_MENTION);
937             }
938 
939             // Emit common attributes based on head and label
940             emitFact(entityIRI, RDF.TYPE, new Object[] { KS_OLD.ENTITY, "entity",
941                     type == null ? null : "entity." + type }, mentionIRI, null);
942             if (this.document.getPredicatesByTerm(head).isEmpty()) {
943                 emitCommonAttributes(entityIRI, mentionIRI, head, label, true);
944             }
945 
946             // Handle the case the <entity> is an attribute of some anonymous instance
947             if (isProperty) {
948                 emitEntityAttributes(entity, entityIRI, mentionIRI);
949             } else {
950 
951                 // TODO: originally the following check was enforced
952                 //                if (!typeIRIs.isEmpty()) {
953                 //                }
954 
955                 // Handle the case the <entity> is an ontological instance itself
956                 final boolean named = entity.isNamed() || "romanticism".equalsIgnoreCase(label)
957                         || "operant conditioning chamber".equalsIgnoreCase(label); // TODO
958                 if (named) {
959                     emitFact(entityIRI, FOAF.NAME, label, mentionIRI, null);
960                     emitMeta(mentionIRI, RDF.TYPE, KS_OLD.NAME_MENTION);
961                 }
962                 final IRI property = named ? OWL.SAMEAS : RDFS.SEEALSO;
963                 for (final ExternalRef ref : entity.getExternalRefs()) {
964                     try {
965                         final IRI refIRI = FACTORY.createIRI(Util.cleanIRI(ref.getReference()));
966                         emitFact(entityIRI, property, refIRI, mentionIRI,
967                                 (double) ref.getConfidence());
968                     } catch (final Throwable ex) {
969                         // ignore: not a IRI
970                     }
971                 }
972             }
973         }
974 
975         private void processPredicate(final Predicate predicate) throws RDFHandlerException {
976 
977             // Retrieve terms, head and label
978             final List<Term> terms = predicate.getSpan().getTargets();
979             final String label = NAFUtils.getText(NAFUtils.filterTerms(terms));
980             final Term head = NAFUtils.extractHead(this.document, predicate.getSpan());
981 
982             // Abort if predicate overlaps with timex or named / ordinal entity
983             if (!this.document.getTimeExsByTerm(head).isEmpty()) {
984                 return;
985             }
986             for (final Entity entity : this.document.getEntitiesByTerm(head)) {
987                 if (entity.isNamed() || "ordinal".equalsIgnoreCase(entity.getType())) {
988                     return;
989                 }
990             }
991 
992             // Annotate the term (or pickup the existing annotation); abort if wrong head
993             final Annotation ann = defineAnnotation(head, terms);
994             if (ann == null) {
995                 return;
996             }
997 
998             // Validate the existing annotation based on expected previous processing
999             if (ann.predicateIRI != null) {
1000                 LOGGER.warn("Already processed: " + NAFUtils.toString(predicate) + "; head is "
1001                         + NAFUtils.toString(head));
1002                 return; // this is a problem of the NAF
1003             }
1004 
1005             // Determine whether the predicate admit its own span as an argument
1006             boolean selfArg = false;
1007             if (ann.objectIRI != null) {
1008                 for (final Role role : predicate.getRoles()) {
1009                     selfArg |= head.equals(NAFUtils.extractHead(this.document, role.getSpan()));
1010                 }
1011             }
1012 
1013             // Determine if the predicate is an event, based on SUMO mapping
1014             boolean isEvent = false;
1015             for (final ExternalRef ref : head.getExternalRefs()) {
1016                 if ("SUMO".equals(ref.getResource())) {
1017                     final IRI conceptIRI = SimpleValueFactory.getInstance().createIRI(
1018                             SUMO.NAMESPACE, ref.getReference());
1019                     if (Sumo.isSubClassOf(conceptIRI, SUMO.PROCESS)) {
1020                         isEvent = true;
1021                         break;
1022                     }
1023                 }
1024             }
1025 
1026             // Assign a IRI to the predicate, possibly reusing the IRI of an entity
1027             final IRI predicateIRI = ann.objectIRI != null && !selfArg ? ann.objectIRI : mintIRI(
1028                     predicate.getId(), head.getLemma());
1029             ann.predicateIRI = predicateIRI;
1030 
1031             // Emit a mention and its triples (reuse an entity span if possible)
1032             IRI mentionIRI = null;
1033             if (predicateIRI.equals(ann.objectIRI)) {
1034                 for (final Entity entity : this.document.getEntitiesByTerm(head)) {
1035                     mentionIRI = emitMention(entity.getSpans().get(0).getTargets());
1036                 }
1037             } else {
1038                 mentionIRI = emitMention(terms);
1039             }
1040             emitMeta(mentionIRI, RDF.TYPE, KS_OLD.PREDICATE_MENTION);
1041             emitMeta(predicateIRI, GAF.DENOTED_BY, mentionIRI);
1042 
1043             // Emit common attributes
1044             if (ann.objectIRI == null) {
1045                 emitCommonAttributes(ann.predicateIRI, mentionIRI, head, label, true);
1046             } else {
1047                 emitCommonAttributes(ann.objectIRI, mentionIRI, head, label, !selfArg);
1048             }
1049 
1050             // Process framenet/verbnet/etc external refs
1051             for (final ExternalRef ref : predicate.getExternalRefs()) {
1052                 if ("".equals(ref.getReference())) {
1053                     continue;
1054                 }
1055                 final IRI typeIRI = mintRefIRI(ref.getResource(), ref.getReference());
1056                 emitFact(predicateIRI, RDF.TYPE, typeIRI, mentionIRI, null);
1057                 //                if (ref.getResource().equals(NAFUtils.RESOURCE_FRAMENET)) {
1058                 //                    for (final String id : FrameNet.getRelatedFrames(true, ref.getReference(),
1059                 //                            FrameNet.Relation.INHERITS_FROM)) {
1060                 //                        final IRI uri = mintRefIRI(NAFUtils.RESOURCE_FRAMENET, id);
1061                 //                        emitFact(predicateIRI, RDF.TYPE, uri, mentionIRI, null);
1062                 //                    }
1063                 //                } else if (ref.getResource().equals(NAFUtils.RESOURCE_VERBNET)) {
1064                 //                    for (final String id : VerbNet.getSuperClasses(true, ref.getReference())) {
1065                 //                        final IRI uri = mintRefIRI(NAFUtils.RESOURCE_VERBNET, id);
1066                 //                        emitFact(predicateIRI, RDF.TYPE, uri, mentionIRI, null);
1067                 //                    }
1068                 //                }
1069             }
1070 
1071             // Mark the predicate as sem:Event and associate it the correct ego: type
1072             final List<Object> typeKeys = Lists.newArrayList(KS_OLD.ENTITY, KS_OLD.PREDICATE, SEM.EVENT);
1073             if (isEvent) {
1074                 typeKeys.add(SUMO.PROCESS);
1075             }
1076             emitFact(predicateIRI, RDF.TYPE, typeKeys, mentionIRI, null);
1077         }
1078 
1079         private void processFactuality(final Factuality factuality) throws RDFHandlerException {
1080 
1081             // TODO: factuality should be better handled
1082 
1083             // Retrieve term and corresponding annotation
1084             final Term term = factuality.getWord();
1085             final Annotation ann = this.annotations.get(term.getId());
1086 
1087             // Abort if the annotation is missing or does not refer to a predicate
1088             if (ann == null || ann.predicateIRI == null) {
1089                 return;
1090             }
1091 
1092             // Emit a mention for the predicate extent
1093             final IRI mentionIRI = emitMention(ann.extent);
1094 
1095             // Emit a triple associating the factuality value to the predicate
1096             final String value = factuality.getMaxPart().getPrediction();
1097             emitFact(ann.predicateIRI, KS_OLD.FACTUALITY, value, mentionIRI, null);
1098         }
1099 
1100         private void processModifier(final Term modifierTerm, final Term instanceTerm,
1101                 final IRI instanceIRI, final List<Term> instanceExtent) throws RDFHandlerException {
1102 
1103             // Retrieve POS and <entity> corresponding to the modifier term
1104             final char pos = Character.toUpperCase(modifierTerm.getPos().charAt(0));
1105             final List<Entity> entities = this.document.getEntitiesByTerm(modifierTerm);
1106             final Annotation ann = this.annotations.get(modifierTerm.getId());
1107 
1108             // Ignore modifiers marked as TIMEX
1109             if (!this.document.getTimeExsByTerm(modifierTerm).isEmpty()) {
1110                 return;
1111             }
1112 
1113             if (ann != null) {
1114                 // If modifier has been mapped to some other instance, link the two instances
1115                 final IRI otherIRI = ann.objectIRI != null ? ann.objectIRI : ann.predicateIRI;
1116                 if (otherIRI != null) {
1117                     final IRI mentionID = emitMention(Iterables.concat(instanceExtent, ann.extent));
1118                     emitFact(instanceIRI, KS_OLD.MOD, otherIRI, mentionID, null);
1119                 }
1120                 final String path = extractPath(instanceTerm, modifierTerm);
1121                 if (!Strings.isNullOrEmpty(path)) {
1122                     final IRI mentionID = emitMention(Iterables.concat(instanceExtent, ann.extent));
1123                     final IRI property = mintRefIRI("conn", path);
1124                     emitFact(instanceIRI, property, otherIRI, mentionID, null);
1125                 }
1126 
1127             } else if (!entities.isEmpty()) {
1128                 // If modifier is an <entity> for which we didn't create a node, then create
1129                 // an attribute and attach it to the modified entity
1130                 final Entity entity = entities.get(0);
1131                 final IRI mentionIRI = emitMention(entity.getSpans().get(0).getTargets());
1132                 emitMeta(mentionIRI, RDF.TYPE, KS_OLD.ATTRIBUTE_MENTION);
1133                 emitEntityAttributes(entity, instanceIRI, mentionIRI);
1134 
1135             } else if (pos == 'G' || pos == 'A' || pos == 'V') {
1136                 // WAS AT THE BEGINNING
1137                 // If modifier is an adjective, noun, pronoun or verb, then attach a
1138                 // 'quality' attribute to the modified node
1139                 final Set<Term> terms = this.document.getTermsByDepAncestors(
1140                         Collections.singleton(modifierTerm), "(AMOD|NMOD)*");
1141                 final IRI mentionIRI = emitMention(terms);
1142                 final IRI expressionIRI = emitTerm(modifierTerm);
1143                 emitFact(instanceIRI, KS_OLD.MOD, expressionIRI, mentionIRI, null);
1144             }
1145         }
1146 
1147         private void processCoref(final List<Span<Term>> spans) throws RDFHandlerException {
1148 
1149             // Build three correlated lists containing, for each member of the coref cluster, its
1150             // span, the head terms of instances in the span and the associated IRIs
1151             final List<Span<Term>> corefSpans = Lists.newArrayList();
1152             final List<List<Term>> corefTerms = Lists.newArrayList();
1153             final List<List<Term>> corefExtents = Lists.newArrayList();
1154             final List<List<IRI>> corefIRIs = Lists.newArrayList();
1155             for (final Span<Term> span : spans) {
1156                 final Term head = NAFUtils.extractHead(this.document, span);
1157                 if (head != null) {
1158                     final List<Term> terms = Lists.newArrayList();
1159                     final List<IRI> uris = Lists.newArrayList();
1160                     final Set<Term> extent = Sets.newHashSet();
1161                     for (final Term term : this.document.getTermsByDepAncestors(
1162                             Collections.singleton(head), "(COORD CONJ?)*")) {
1163                         if (!span.getTargets().contains(term)) {
1164                             continue;
1165                         }
1166                         final Annotation ann = this.annotations.get(term.getId());
1167                         final IRI uri = ann == null ? null : ann.objectIRI != null ? ann.objectIRI
1168                                 : ann.predicateIRI;
1169                         if (uri != null) {
1170                             terms.add(term);
1171                             uris.add(uri);
1172                             extent.addAll(ann.extent);
1173                         }
1174                     }
1175                     if (!terms.isEmpty()) {
1176                         corefSpans.add(span);
1177                         corefTerms.add(terms);
1178                         corefExtents.add(Ordering.natural().immutableSortedCopy(extent));
1179                         corefIRIs.add(uris);
1180                     }
1181                 }
1182             }
1183 
1184             // Abort in case there is only one member in the coref cluster
1185             if (corefTerms.size() <= 1) {
1186                 return;
1187             }
1188 
1189             // Map each coref member to a term / IRI pair, possibly grouping coordinated instances
1190             // in a compound instance via a ego:Composition relation
1191             final Map<Term, IRI> members = Maps.newHashMap();
1192             final Map<Term, Span<Term>> memberSpans = Maps.newHashMap();
1193             for (int i = 0; i < corefTerms.size(); ++i) {
1194                 final Span<Term> span = corefSpans.get(i);
1195                 final List<Term> terms = corefTerms.get(i);
1196                 final List<Term> extent = corefExtents.get(i);
1197                 final List<IRI> uris = corefIRIs.get(i);
1198                 memberSpans.put(terms.get(0), span);
1199                 if (terms.size() == 1) {
1200                     members.put(terms.get(0), uris.get(0));
1201                 } else {
1202                     final StringBuilder builder = new StringBuilder();
1203                     for (final IRI uri : uris) {
1204                         builder.append(builder.length() == 0 ? "" : "_");
1205                         builder.append(uri.getLocalName());
1206                     }
1207                     final IRI compIRI = mintIRI(builder.toString(), null);
1208                     final IRI mentionIRI = emitMention(extent);
1209                     // final String label =
1210                     // NAFUtils.getText(NAFUtils.filterTerms(span.getTargets()));
1211 
1212                     // final IRI predIRI =
1213                     // this.emitter.mintIRI(builder.append("_pred").toString(),
1214                     // null);
1215                     // this.emitter.emitFact(predIRI, RDF.TYPE, new Object[] { KS_OLD.THING,
1216                     // KS_OLD.PREDICATE, SUMO.ENTITY, SEM.EVENT, "predicate.relation",
1217                     // KS_OLD.COMPOSITION }, mentionIRI, null);
1218                     // this.emitter.emitFact(compIRI, EGO.PLURAL, true, mentionIRI, null);
1219                     // this.emitter.emitMeta(mentionIRI, RDF.TYPE, KS_OLD.PREDICATE_MENTION);
1220 
1221                     emitFact(compIRI, RDF.TYPE, new Object[] { KS_OLD.ENTITY }, mentionIRI, null);
1222                     // emitFact(compIRI, RDFS.LABEL, label, mentionIRI, null);
1223                     // emitMeta(mentionIRI, RDF.TYPE, KS_OLD.MISC_MENTION);
1224 
1225                     // emitMeta(compIRI, GAF.DENOTED_BY, mentionIRI);
1226 
1227                     // this.emitter.emitFact(predIRI, KS_OLD.COMPOSITE, compIRI, mentionIRI, null);
1228                     for (int j = 0; j < uris.size(); ++j) {
1229                         // this.emitter
1230                         // .emitFact(predIRI, KS_OLD.COMPONENT, uris.get(j), mentionIRI, null);
1231                         emitFact(compIRI, KS_OLD.INCLUDE, uris.get(j), mentionIRI, null);
1232                     }
1233                     members.put(terms.get(0), compIRI);
1234                 }
1235             }
1236 
1237             // Emit all possible coreference relations between cluster members
1238             for (final Map.Entry<Term, IRI> entry1 : members.entrySet()) {
1239                 for (final Map.Entry<Term, IRI> entry2 : members.entrySet()) {
1240                     final Term term1 = entry1.getKey();
1241                     final Term term2 = entry2.getKey();
1242                     if (term1.getId().compareTo(term2.getId()) < 0) {
1243                         final Span<Term> span1 = memberSpans.get(term1);
1244                         final Span<Term> span2 = memberSpans.get(term2);
1245                         final IRI mentionIRI = emitMention(Iterables.concat(span1.getTargets(),
1246                                 span2.getTargets()));
1247                         final IRI uri1 = entry1.getValue();
1248                         final IRI uri2 = entry2.getValue();
1249                         // final int distance = Math.abs(term1.getSent() - term2.getSent());
1250                         emitFact(uri1, OWL.SAMEAS, uri2, mentionIRI, null);
1251                     }
1252                 }
1253             }
1254         }
1255 
1256         private void processRole(final Predicate predicate, final Role role, final Term argHead,
1257                 final boolean isCorefPredicateRole) throws RDFHandlerException {
1258 
1259             // Retrieve the IRI previously associated to the predicate; abort if not found
1260             final Term predHead = NAFUtils.extractHead(this.document, predicate.getSpan());
1261             final Annotation predAnn = this.annotations.get(predHead.getId());
1262             final IRI predIRI = predAnn == null ? null : predAnn.predicateIRI;
1263             if (predIRI == null) {
1264                 return;
1265             }
1266 
1267             // Retrieve the IRI previously associated to the argument, if any
1268             IRI argIRI = null;
1269             final Annotation argAnn = this.annotations.get(argHead.getId());
1270             if (argAnn != null) {
1271                 if (argAnn.predicateIRI != null
1272                         && (argAnn.objectIRI == null || isCorefPredicateRole)) {
1273                     argIRI = argAnn.predicateIRI;
1274                 } else {
1275                     argIRI = argAnn.objectIRI;
1276                 }
1277             }
1278 
1279             // Discard invalid arguments (arg = pred, no arg IRI and arg not noun, adj, adv)
1280             final char pos = Character.toUpperCase(argHead.getPos().charAt(0));
1281             if (argIRI != null && argIRI.equals(predIRI) || argIRI == null && pos != 'N'
1282                     && pos != 'G' && pos != 'A') {
1283                 return;
1284             }
1285 
1286             // Determine the participation properties, starting with ego:argument
1287             final Set<IRI> properties = Sets.newHashSet();
1288 
1289             // Add properties from the SEM ontology
1290             String semRole = role.getSemRole();
1291             if (semRole != null && !semRole.equals("")) {
1292 
1293                 // TODO Drop R-AX
1294                 if (semRole.startsWith("R-")) {
1295                     return;
1296                 }
1297 
1298                 semRole = semRole.toLowerCase();
1299                 final int index = semRole.lastIndexOf('-');
1300                 if (index >= 0) {
1301                     semRole = semRole.substring(index + 1);
1302                 }
1303                 if (Character.isDigit(semRole.charAt(semRole.length() - 1))) {
1304                     semRole = semRole.substring(semRole.length() - 1);
1305                     properties.add(SEM.HAS_ACTOR);
1306                 } else if (semRole.equals("tmp")) {
1307                     properties.add(SEM.HAS_TIME);
1308                 } else if (semRole.equals("loc")) {
1309                     properties.add(SEM.HAS_PLACE);
1310                 }
1311             }
1312 
1313             // Determine the resource (propbank/nombank) to use for interpreting the sem role
1314             final String semRoleResource = predHead.getPos().equalsIgnoreCase("V") ? "propbank"
1315                     : "nombank";
1316 
1317             // Add properties from ProbBank, NomBank, VerbNet, FrameNet
1318             for (final ExternalRef ref : role.getExternalRefs()) {
1319                 final String resource = ref.getResource().toLowerCase();
1320                 final String name = ref.getReference().replace('#', '.');
1321                 if (resource.equals(semRoleResource) || name.equals("")) {
1322                     continue;
1323                 }
1324 //                final int index = name.lastIndexOf('@');
1325 //                final String arg = (index < 0 ? name : name.substring(index + 1)).toLowerCase();
1326 //
1327 //                if (resource.equalsIgnoreCase(NAFUtils.RESOURCE_FRAMENET)
1328 //                        || resource.equalsIgnoreCase(NAFUtils.RESOURCE_VERBNET) || index < 0) {
1329 //                    properties.add(mintRefIRI(resource, arg));
1330 //                } else {
1331 //                    if (Character.isDigit(arg.charAt(0))) {
1332 //                        final String sense = name.substring(0, index);
1333 //                        properties.add(mintRefIRI(resource, sense + "_" + arg));
1334 //                    } else {
1335 //                        properties.add(mintRefIRI(resource, arg));
1336 //                    }
1337 //                }
1338                 properties.add(mintRefIRI(resource,name));
1339             }
1340 
1341             // The AX, AM-X information may not be encoded in external references, so
1342             // we derive it from predicate sense and role semRole property.
1343             if (!Strings.isNullOrEmpty(semRole)) {
1344                 for (final ExternalRef ref : predicate.getExternalRefs()) {
1345                     final String resource = ref.getResource().toLowerCase();
1346                     if (resource.equals(semRoleResource)) {
1347                         if (Character.isDigit(semRole.charAt(0))) {
1348                             properties.add(mintRefIRI(resource, ref.getReference().toLowerCase()
1349                                     + "_" + semRole));
1350                         } else {
1351                             properties.add(mintRefIRI(resource, semRole));
1352                         }
1353                     }
1354                 }
1355             }
1356 
1357             // Add path properties
1358             final String path = extractPath(predHead, argHead);
1359             if (path == null) {
1360                 LOGGER.debug("Could not compute dependency path from " + predHead.getId() + " to "
1361                         + argHead.getId());
1362             }
1363             if (!Strings.isNullOrEmpty(path)) {
1364                 properties.add(mintRefIRI("conn", path));
1365             }
1366 
1367             // Create either an edge or an attribute
1368             final List<Term> predTerms = predicate.getSpan().getTargets();
1369             if (argIRI != null) {
1370                 final IRI mentionIRI = emitMention(Iterables.concat(predTerms, argAnn.extent));
1371                 emitMeta(mentionIRI, RDF.TYPE, KS_OLD.PARTICIPATION_MENTION);
1372                 for (final IRI property : properties) {
1373                     emitFact(predIRI, property, argIRI, mentionIRI, null);
1374                 }
1375             } else {
1376                 final Set<Term> argTerms = this.document.getTermsByDepAncestors(
1377                         Collections.singleton(argHead), "(AMOD|NMOD)*");
1378                 final IRI mentionIRI = emitMention(Iterables.concat(predTerms, argTerms));
1379                 emitMeta(mentionIRI, RDF.TYPE, KS_OLD.PARTICIPATION_MENTION);
1380                 final IRI expressionIRI = emitTerm(argHead);
1381                 for (final IRI property : properties) {
1382                     emitFact(predIRI, property, expressionIRI, mentionIRI, null);
1383                 }
1384             }
1385         }
1386 
1387         private void processOpinion(final Opinion opinion) {
1388 
1389             // Identify the sentence where the opinion occurs (for normalization purposes)
1390             final int sentenceID = opinion.getOpinionExpression().getTerms().get(0).getSent();
1391 
1392             // Mint a IRI for the opinion and emit polarity and label facts
1393             final IRI opinionIRI = mintIRI(opinion.getId(), null);
1394             final Polarity polarity = Polarity.forExpression(opinion.getOpinionExpression());
1395             emitFact(opinionIRI, RDF.TYPE, SUMO.ENTITY, null, null);
1396             emitFact(opinionIRI, RDF.TYPE, KS_OLD.OPINION, null, null);
1397             emitFact(opinionIRI, RDF.TYPE, polarity == Polarity.POSITIVE ? KS_OLD.POSITIVE_OPINION
1398                     : polarity == Polarity.NEGATIVE ? KS_OLD.NEGATIVE_OPINION : KS_OLD.NEUTRAL_OPINION,
1399                     null, null);
1400             if (opinion.getLabel() != null) {
1401                 emitFact(opinionIRI, RDFS.LABEL, opinion.getLabel(), null, null);
1402             }
1403 
1404             // Emit links from opinion to its expression nodes
1405             final Span<Term> exprSpan = NAFUtils.trimSpan(
1406                     opinion.getOpinionExpression().getSpan(), sentenceID);
1407             final Set<Term> exprHeads = exprSpan == null ? ImmutableSet.<Term>of() : NAFUtils
1408                     .extractHeads(this.document, null, exprSpan.getTargets(),
1409                             NAFUtils.matchExtendedPos(this.document, "NN", "VB", "JJ", "R"));
1410             emitOpinionArgument(opinionIRI, null, KS_OLD.EXPRESSION, exprSpan, exprHeads);
1411 
1412             // Emit links from opinion to target nodes
1413             final OpinionTarget target = opinion.getOpinionTarget();
1414             final Span<Term> targetSpan = target == null ? null : NAFUtils.trimSpan(
1415                     target.getSpan(), sentenceID);
1416             final Set<Term> targetHeads = targetSpan == null ? ImmutableSet.<Term>of() : NAFUtils
1417                     .extractHeads(this.document, null, targetSpan.getTargets(),
1418                             NAFUtils.matchExtendedPos(this.document, "NN", "PRP", "JJP", "DTP",
1419                                     "WP", "VB"));
1420             emitOpinionArgument(opinionIRI, null, KS_OLD.TARGET, targetSpan, targetHeads);
1421 
1422             // Emit links from opinion to holder nodes
1423             final OpinionHolder holder = opinion.getOpinionHolder();
1424             final Span<Term> holderSpan = holder == null ? null : NAFUtils.trimSpan(
1425                     holder.getSpan(), sentenceID);
1426             final Set<Term> holderHeads = holderSpan == null ? ImmutableSet.<Term>of() : NAFUtils
1427                     .extractHeads(this.document, null, holderSpan.getTargets(), NAFUtils
1428                             .matchExtendedPos(this.document, "NN", "PRP", "JJP", "DTP", "WP"));
1429             emitOpinionArgument(opinionIRI, null, KS_OLD.HOLDER, holderSpan, holderHeads);
1430         }
1431 
1432         private void emitOpinionArgument(final IRI opinionID, @Nullable final IRI spanProperty,
1433                 @Nullable final IRI headProperty, @Nullable final Span<Term> span,
1434                 @Nullable final Set<Term> heads) {
1435 
1436             if (span != null) {
1437                 outer: for (final Term term : span.getTargets()) {
1438                     final Annotation ann = this.annotations.get(term.getId());
1439                     IRI uri = ann == null ? null : ann.objectIRI != null ? ann.objectIRI
1440                             : ann.predicateIRI;
1441                     if (uri == null && "AGV".contains(term.getPos())) {
1442                         for (final Dep dep : this.document.getDepsFromTerm(term)) {
1443                             if (dep.getRfunc().equals("VC")) {
1444                                 continue outer;
1445                             }
1446                         }
1447                         uri = emitTerm(term);
1448                     }
1449                     if (uri != null) {
1450                         if (spanProperty != null) {
1451                             emitFact(opinionID, spanProperty, uri, null, null);
1452                         }
1453                         if (headProperty != null && heads != null && heads.contains(term)) {
1454                             emitFact(opinionID, headProperty, uri, null, null);
1455                         }
1456                     }
1457                 }
1458             }
1459         }
1460 
1461         private void emitCommonAttributes(final IRI instanceID, final IRI mentionID,
1462                 final Term head, final String label, final boolean emitSumo)
1463                 throws RDFHandlerException {
1464 
1465             if ("QPD".indexOf(head.getPos()) < 0 && label != null && !label.isEmpty()) {
1466                 emitFact(instanceID, RDFS.LABEL, label, mentionID, null);
1467             }
1468 
1469             final char pos = Character.toUpperCase(head.getPos().charAt(0));
1470             if (pos == 'N' || pos == 'V') {
1471                 emitMeta(mentionID, KS_OLD.LEMMA, head.getLemma());
1472                 // this.emitter.emitFact(instanceID, EGO.LEMMA, head.getLemma(), mentionID, null);
1473             }
1474 
1475             final ExternalRef sstRef = NAFUtils.getRef(head, NAFUtils.RESOURCE_WN_SST, null);
1476             if (sstRef != null) {
1477                 final String sst = sstRef.getReference();
1478                 final IRI uri = FACTORY.createIRI("http://www.newsreader-project.eu/sst/",
1479                         sst.substring(sst.lastIndexOf('-') + 1));
1480                 emitMeta(mentionID, KS_OLD.SST, uri);
1481                 // this.emitter.emitFact(instanceID, EGO.SST, uri, mentionID, null);
1482             }
1483 
1484             final ExternalRef synsetRef = NAFUtils.getRef(head, NAFUtils.RESOURCE_WN_SYNSET, null);
1485             if (synsetRef != null) {
1486                 final IRI uri = FACTORY.createIRI("http://www.newsreader-project.eu/syn/",
1487                         synsetRef.getReference());
1488                 emitMeta(mentionID, KS_OLD.SYNSET, uri);
1489                 // this.emitter.emitFact(instanceID, EGO.SYNSET, uri, mentionID, null);
1490             }
1491 
1492             final String p = head.getMorphofeat().toUpperCase();
1493             if (p.equals("NNS") || p.equals("NNPS")) {
1494                 emitMeta(mentionID, KS_OLD.PLURAL, true);
1495                 // this.emitter.emitFact(instanceID, EGO.PLURAL, true, mentionID, null);
1496             }
1497 
1498             for (final ExternalRef ref : head.getExternalRefs()) {
1499                 final IRI typeIRI = mintRefIRI(ref.getResource(), ref.getReference());
1500                 if (ref.getResource().equals(NAFUtils.RESOURCE_SUMO)) {
1501                     if (emitSumo) {
1502                         emitFact(instanceID, RDF.TYPE, typeIRI, mentionID, ref.getConfidence());
1503                         emitFact(instanceID, RDF.TYPE, Sumo.getSuperClasses(typeIRI), mentionID,
1504                                 ref.getConfidence());
1505                     }
1506                 } else {
1507                     emitFact(instanceID, RDF.TYPE, typeIRI, mentionID, ref.getConfidence());
1508                 }
1509             }
1510         }
1511 
1512         private void emitEntityAttributes(final Entity entity, final IRI subject, final IRI mention)
1513                 throws RDFHandlerException {
1514 
1515             // Retrieve normalized value and NER tag
1516             final ExternalRef valueRef = NAFUtils.getRef(entity, "value", null);
1517             String nerTag = entity.getType();
1518             nerTag = nerTag == null ? null : nerTag.toLowerCase();
1519 
1520             // For NORP and LANGUAGE entities we use the DBpedia IRIs from entity linking
1521             if (Objects.equal(nerTag, "norp") || Objects.equal(nerTag, "language")) {
1522                 final IRI attribute = Objects.equal(nerTag, "norp") ? KS_OLD.PROVENANCE : KS_OLD.LANGUAGE;
1523                 for (final ExternalRef ref : entity.getExternalRefs()) {
1524                     try {
1525                         final IRI refIRI = FACTORY.createIRI(Util.cleanIRI(ref.getReference()));
1526                         emitFact(subject, attribute, refIRI, mention, (double) ref.getConfidence());
1527                     } catch (final Throwable ex) {
1528                         // ignore: not a IRI
1529                     }
1530                 }
1531 
1532             } else if (valueRef != null) {
1533                 // Otherwise, we use the normalized value from Stanford
1534                 try {
1535                     final String s = valueRef.getReference().trim();
1536                     if (s.isEmpty()) {
1537                         return;
1538                     }
1539                     if (Objects.equal(nerTag, "cardinal") || Objects.equal(nerTag, "quantity")) {
1540                         emitFact(subject, KS_OLD.QUANTITY, Double.parseDouble(s), mention, null);
1541 
1542                     } else if (Objects.equal(nerTag, "ordinal")) {
1543                         emitFact(subject, KS_OLD.RANK, Double.parseDouble(s), mention, null);
1544 
1545                     } else if (Objects.equal(nerTag, "percent")) {
1546                         final int index = s.indexOf('%');
1547                         emitFact(subject, KS_OLD.PERCENTAGE,
1548                                 Double.parseDouble(s.substring(index + 1)), mention, null);
1549 
1550                     } else if (Objects.equal(nerTag, "money")) {
1551                         int index = 0;
1552                         while (index < s.length()) {
1553                             final char c = s.charAt(index);
1554                             if (c == '€') {
1555                                 emitFact(subject, GR.HAS_CURRENCY, "EUR", mention, null);
1556                             } else if (c == '$') {
1557                                 emitFact(subject, GR.HAS_CURRENCY, "USD", mention, null);
1558                             } else if (c == '¥') {
1559                                 emitFact(subject, GR.HAS_CURRENCY, "YEN", mention, null);
1560                             } else if (Character.isDigit(c)) {
1561                                 break;
1562                             }
1563                             ++index;
1564                         }
1565                         emitFact(subject, GR.HAS_CURRENCY_VALUE,
1566                                 Double.parseDouble(s.substring(index)), mention, null);
1567                     }
1568                 } catch (final NumberFormatException ex) {
1569                     LOGGER.debug("Could not process normalized value: " + valueRef.getReference());
1570                 }
1571             }
1572         }
1573 
1574         @Nullable
1575         private IRI emitMention(final Iterable<Term> terms) {
1576 
1577             final List<Term> sortedTerms = Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(terms);
1578             final int numTerms = sortedTerms.size();
1579             if (numTerms == 0) {
1580                 return null;
1581             }
1582 
1583             final String text = this.documentText;
1584             final List<IRI> componentIRIs = Lists.newArrayList();
1585             final int begin = NAFUtils.getBegin(sortedTerms.get(0));
1586             int offset = begin;
1587             int startTermIdx = 0;
1588 
1589             final StringBuilder anchorBuilder = new StringBuilder();
1590             final StringBuilder uriBuilder = new StringBuilder(this.documentIRI.stringValue())
1591                     .append("#char=").append(begin).append(",");
1592 
1593             for (int i = 0; i < numTerms; ++i) {
1594                 final Term term = sortedTerms.get(i);
1595                 final int termOffset = NAFUtils.getBegin(term);
1596                 if (termOffset > offset && !text.substring(offset, termOffset).trim().isEmpty()) {
1597                     final int start = NAFUtils.getBegin(sortedTerms.get(startTermIdx));
1598                     anchorBuilder.append(text.substring(start, offset)).append(" [...] ");
1599                     uriBuilder.append(offset).append(";").append(termOffset).append(',');
1600                     componentIRIs.add(emitMention(sortedTerms.subList(startTermIdx, i)));
1601                     startTermIdx = i;
1602                 }
1603                 offset = NAFUtils.getEnd(term);
1604             }
1605             if (startTermIdx > 0) {
1606                 componentIRIs.add(emitMention(sortedTerms.subList(startTermIdx, numTerms)));
1607             }
1608             anchorBuilder.append(text.substring(NAFUtils.getBegin(sortedTerms.get(startTermIdx)),
1609                     offset));
1610             uriBuilder.append(offset);
1611 
1612             final String anchor = anchorBuilder.toString();
1613             final IRI mentionID = FACTORY.createIRI(uriBuilder.toString());
1614             emitMeta(mentionID, KS_OLD.MENTION_OF, this.documentIRI);
1615             emitMeta(this.documentIRI, KS_OLD.HAS_MENTION, mentionID);
1616             emitMeta(mentionID, RDF.TYPE, KS_OLD.MENTION);
1617             if (!componentIRIs.isEmpty()) {
1618                 emitMeta(mentionID, RDF.TYPE, KS_OLD.COMPOUND_STRING);
1619                 for (final IRI componentIRI : componentIRIs) {
1620                     emitMeta(mentionID, KS_OLD.COMPONENT_SUB_STRING, componentIRI);
1621                 }
1622             }
1623             emitMeta(mentionID, NIF.BEGIN_INDEX, FACTORY.createLiteral(begin));
1624             emitMeta(mentionID, NIF.END_INDEX, FACTORY.createLiteral(offset));
1625             emitMeta(mentionID, NIF.ANCHOR_OF, FACTORY.createLiteral(anchor));
1626 
1627             // Emit context of 3 sentences around the mention TODO
1628             // final int sentID = sortedTerms.get(0).getSent();
1629             // final List<Term> sentTerms = Lists.newArrayList();
1630             // for (int s = Math.max(1, sentID - 1); s <=
1631             // Math.min(this.document.getNumSentences(),
1632             // sentID + 1); ++s) {
1633             // sentTerms.addAll(this.document.getTermsBySent(s));
1634             // }
1635             // Collections.sort(sentTerms, Term.OFFSET_COMPARATOR);
1636             // final StringBuilder sentBuilder = new StringBuilder();
1637             // int sentOffset = -1;
1638             // boolean lastSelected = false;
1639             // for (final Term term : sentTerms) {
1640             // final boolean nextSelected = sortedTerms.contains(term);
1641             // if (!nextSelected && lastSelected) {
1642             // sentBuilder.append(" ]__ ");
1643             // }
1644             // if (sentOffset >= 0) {
1645             // for (int i = 0; i < term.getOffset() - sentOffset; ++i) {
1646             // sentBuilder.append(' ');
1647             // }
1648             // }
1649             // if (nextSelected && !lastSelected) {
1650             // sentBuilder.append("  __[ ");
1651             // }
1652             // sentBuilder.append(term.getStr());
1653             // sentOffset = term.getOffset() + term.getLength();
1654             // lastSelected = nextSelected;
1655             // }
1656             // emitMeta(mentionID, new IRIImpl(KS_OLD.NAMESPACE + "context"),
1657             // FACTORY.createLiteral(sentBuilder.toString()));
1658 
1659             return mentionID;
1660         }
1661 
1662         private IRI emitTerm(final Term head) {
1663 
1664             final ExternalRef synsetRef = NAFUtils.getRef(head, NAFUtils.RESOURCE_WN_SYNSET, null);
1665             final String headSynsetID = synsetRef == null ? null : synsetRef.getReference();
1666             final String readableHeadSynsetID = WordNet.getReadableSynsetID(headSynsetID);
1667             final String headID = MoreObjects.firstNonNull(readableHeadSynsetID, //
1668                     head.getLemma().toLowerCase());
1669 
1670             final List<IRI> modifierIRIs = Lists.newArrayList();
1671             final List<String> modifierIDs = Lists.newArrayList();
1672 
1673             for (final Term modifier : this.document.getTermsByDepAncestors(ImmutableSet.of(head),
1674                     "AMOD|NMOD")) {
1675                 if ("AGV".contains(modifier.getPos())) {
1676                     final IRI modifierIRI = emitTerm(modifier);
1677                     modifierIRIs.add(modifierIRI);
1678                     modifierIDs.add(modifierIRI.getLocalName());
1679                 }
1680             }
1681 
1682             final Set<Term> terms = this.document.getTermsByDepAncestors(ImmutableSet.of(head),
1683                     "(AMOD|NMOD)*");
1684             for (final Iterator<Term> i = terms.iterator(); i.hasNext();) {
1685                 if (!"AGV".contains(i.next().getPos())) {
1686                     i.remove();
1687                 }
1688             }
1689             final String label = NAFUtils.getText(NAFUtils.filterTerms(terms));
1690 
1691             final StringBuilder idBuilder = new StringBuilder();
1692             int level = 0;
1693             for (final String modifierID : modifierIDs) {
1694                 for (int i = 1; modifierID.contains(Strings.repeat("_", i)); ++i) {
1695                     level = Math.max(level, i);
1696                 }
1697             }
1698             final String separator = Strings.repeat("_", level + 1);
1699             for (final String modifierID : Ordering.natural().immutableSortedCopy(modifierIDs)) {
1700                 idBuilder.append(modifierID).append(separator);
1701             }
1702             final String id = idBuilder.append(headID).toString();
1703             final IRI uri = mintRefIRI("attribute", id);
1704             // final IRI uri = this.emitter.mintIRI(id + "-" + head.getId(), id);
1705 
1706             emitFact(uri, RDF.TYPE, KS_OLD.ATTRIBUTE, null, null);
1707             emitFact(uri, RDFS.LABEL, label, null, null);
1708             if (headSynsetID != null) {
1709                 emitFact(uri, KS_OLD.HEAD_SYNSET, mintRefIRI("syn", headSynsetID), null, null);
1710             }
1711             for (final IRI modifierIRI : modifierIRIs) {
1712                 emitFact(uri, KS_OLD.MOD, modifierIRI, null, null);
1713             }
1714 
1715             final IRI mentionIRI = emitMention(terms);
1716             emitMeta(mentionIRI, RDF.TYPE, KS_OLD.ATTRIBUTE_MENTION);
1717             emitMeta(uri, GAF.DENOTED_BY, mentionIRI);
1718 
1719             return uri;
1720         }
1721 
1722         @Nullable
1723         private String extractPath(final Term from, final Term to) {
1724 
1725             final Set<Term> fromTerms = this.document.getTermsByDepDescendants(
1726                     ImmutableSet.of(from), "(-VC|-IM|-OPRD)*");
1727             final Set<Term> toTerms = this.document.getTermsByDepDescendants(ImmutableSet.of(to),
1728                     "(-VC|-IM|-OPRD)*");
1729 
1730             if (!Sets.intersection(fromTerms, toTerms).isEmpty()) {
1731                 return null;
1732             }
1733 
1734             final List<Dep> path = this.document.getDepPath(from, to);
1735             if (path == null) {
1736                 return null;
1737             }
1738 
1739             for (final Iterator<Dep> i = path.iterator(); i.hasNext();) {
1740                 final Dep dep = i.next();
1741                 if (fromTerms.contains(dep.getFrom()) && fromTerms.contains(dep.getTo())
1742                         || toTerms.contains(dep.getFrom()) && toTerms.contains(dep.getTo())) {
1743                     i.remove();
1744                 }
1745             }
1746 
1747             if (fromTerms.contains(path.get(0).getTo())) {
1748                 return null; // moving towards tree root
1749             }
1750 
1751             final StringBuilder builder = new StringBuilder();
1752             for (int i = 1; i < path.size(); ++i) {
1753                 final Dep dep = path.get(i);
1754                 final String func = dep.getRfunc();
1755                 final Term term = dep.getFrom();
1756                 if (!func.equalsIgnoreCase("COORD") && !func.equals("CONJ")) {
1757                     builder.append(builder.length() > 0 ? "_" : "").append(
1758                             term.getLemma().toLowerCase().replace(' ', '_'));
1759                 }
1760             }
1761 
1762             return builder.toString();
1763         }
1764 
1765         @Nullable
1766         private Annotation defineAnnotation(final Term head, final Iterable<Term> terms) {
1767             if (head == null) {
1768                 return null;
1769             }
1770             Annotation ann = this.annotations.get(head.getId());
1771             if (ann == null) {
1772                 ann = new Annotation(head, terms);
1773                 this.annotations.put(head.getId(), ann);
1774             }
1775             return ann;
1776         }
1777 
1778         private IRI mintIRI(final String id, @Nullable final String suggestedLocalName) {
1779             String localName = this.mintedIRIs.get(id);
1780             if (localName == null) {
1781                 final String name = MoreObjects.firstNonNull(suggestedLocalName, id);
1782                 final StringBuilder builder = new StringBuilder();
1783                 for (int i = 0; i < name.length(); ++i) {
1784                     final char c = name.charAt(i);
1785                     builder.append(Character.isWhitespace(c) ? '_' : c);
1786                 }
1787                 final String base = builder.toString();
1788                 int counter = 1;
1789                 while (true) {
1790                     localName = base + (counter == 1 ? "" : "_" + counter);
1791                     if (!this.mintedIRIs.inverse().containsKey(localName)) {
1792                         this.mintedIRIs.put(id, localName);
1793                         break;
1794                     }
1795                     ++counter;
1796                 }
1797             }
1798             return FACTORY.createIRI(Util.cleanIRI(this.baseIRI + "#" + localName));
1799         }
1800 
1801         @Nullable
1802         private IRI mintRefIRI(@Nullable final String resource, @Nullable final String reference) {
1803             if (!Strings.isNullOrEmpty(resource) && !Strings.isNullOrEmpty(reference)) {
1804                 final String normResource = resource.toLowerCase();
1805                 final String namespace = RDFGenerator.this.namespaceMap.get(normResource);
1806                 if (namespace != null) {
1807                     return FACTORY
1808                             .createIRI(Util.cleanIRI(namespace + reference.replace('#', '.')));
1809                 }
1810             }
1811             return null;
1812         }
1813 
1814         private void emitMeta(@Nullable final IRI subject, @Nullable final IRI property,
1815                 @Nullable final Object objects) {
1816             if (subject != null && property != null) {
1817                 for (final Value object : extract(Value.class, objects,
1818                         RDF.TYPE.equals(property) ? RDFGenerator.this.typeMap : null)) {
1819                     this.statements.add(FACTORY.createStatement(subject, property, object));
1820                 }
1821             }
1822         }
1823 
1824         private void emitFact(@Nullable final IRI subject, @Nullable final IRI property,
1825                 @Nullable final Object objects, @Nullable final IRI mention,
1826                 @Nullable final Object confidence) {
1827             if (subject != null && property != null) {
1828                 for (final Value object : extract(Value.class, objects,
1829                         RDF.TYPE.equals(property) ? RDFGenerator.this.typeMap : null)) {
1830                     final IRI factIRI = hash(subject, property, object);
1831                     this.statements.add(FACTORY
1832                             .createStatement(subject, property, object, factIRI));
1833                     if (mention != null) {
1834                         this.statements.add(FACTORY.createStatement(factIRI, KS_OLD.EXPRESSED_BY,
1835                                 mention));
1836                     }
1837                     if (confidence instanceof Number) {
1838                         final double confidenceValue = ((Number) confidence).doubleValue();
1839                         if (confidenceValue != 0.0) {
1840                             // this.statements.add(FACTORY.createStatement(factIRI, KS_OLD.CONFIDENCE,
1841                             // FACTORY.createLiteral(confidenceValue)));
1842                         }
1843                     }
1844                 }
1845             }
1846         }
1847 
1848         private Iterable<Statement> merge(final Iterable<Statement> stmts)
1849                 throws RDFHandlerException {
1850 
1851             final List<Statement> smushedStmts = Lists.newArrayList();
1852 
1853             ///???????
1854             RDFProcessors.smush(null, true, "http://dbpedia.org/resource/").wrap(RDFSources.wrap(stmts))
1855                     .emit(RDFHandlers.wrap(smushedStmts), 1);
1856 
1857             final Set<Resource> named = Sets.newHashSet();
1858             final Multimap<Resource, Resource> groups = HashMultimap.create();
1859             for (final Statement stmt : smushedStmts) {
1860                 if (stmt.getPredicate().equals(KS_OLD.INCLUDE)) {
1861                     groups.put(stmt.getSubject(), (Resource) stmt.getObject());
1862                 } else if (stmt.getPredicate().equals(FOAF.NAME)) {
1863                     named.add(stmt.getSubject());
1864                 }
1865             }
1866 
1867             final List<Statement> output = Lists.newArrayList();
1868             final Multimap<Resource, Statement> groupProps = HashMultimap.create();
1869             final Multimap<Resource, Statement> groupRels = HashMultimap.create();
1870             for (final Statement stmt : smushedStmts) {
1871                 final Resource subj = stmt.getSubject();
1872                 final Value obj = stmt.getObject();
1873                 final boolean subjIsGroup = groups.containsKey(subj);
1874                 final boolean objIsGroup = groups.containsKey(obj);
1875                 if (stmt.getPredicate().equals(OWL.SAMEAS)
1876                         && (obj instanceof BNode || obj.stringValue().startsWith(this.baseIRI))) {
1877                     // discard statement
1878                 } else if (subjIsGroup && objIsGroup && !subj.equals(obj)) {
1879                     groupRels.put(subj, stmt);
1880                     groupRels.put((Resource) obj, stmt);
1881                 } else if (subjIsGroup) {
1882                     groupProps.put(subj, stmt);
1883                 } else if (objIsGroup) {
1884                     groupProps.put((Resource) obj, stmt);
1885                 } else {
1886                     output.add(stmt);
1887                 }
1888             }
1889 
1890             // Merge one composite / components structure at a time
1891             final ValueFactory vf = Statements.VALUE_FACTORY;
1892             for (final Resource composite : groups.keySet()) {
1893                 final Collection<Resource> components = groups.get(composite);
1894                 final boolean isNamed = composite instanceof IRI
1895                         && ((IRI) composite).getNamespace().equals("http://dbpedia.org/resource/")
1896                         || named.contains(composite);
1897                 if (isNamed) {
1898                     output.addAll(groupProps.get(composite));
1899                     for (final Statement stmt : groupRels.removeAll(composite)) {
1900                         if (stmt.getSubject().equals(composite)) {
1901                             groupRels.remove(stmt.getObject(), stmt);
1902                             groupProps.put((Resource) stmt.getObject(), stmt);
1903                         } else {
1904                             groupRels.remove(stmt.getSubject(), stmt);
1905                             groupProps.put(stmt.getSubject(), stmt);
1906                         }
1907                     }
1908                 } else {
1909                     for (final Statement stmt : groupRels.removeAll(composite)) {
1910                         final Resource subj = stmt.getSubject();
1911                         final IRI pred = stmt.getPredicate();
1912                         final Value obj = stmt.getObject();
1913                         final Resource ctx = stmt.getContext();
1914                         if (subj.equals(composite)) {
1915                             groupRels.remove(obj, stmt);
1916                             for (final Resource component : components) {
1917                                 groupProps.put((Resource) obj,
1918                                         vf.createStatement(component, pred, obj, ctx));
1919                             }
1920                         } else {
1921                             groupRels.remove(subj, stmt);
1922                             for (final Resource component : components) {
1923                                 groupProps.put(subj,
1924                                         vf.createStatement(subj, pred, component, ctx));
1925                             }
1926                         }
1927                     }
1928                     for (final Statement stmt : groupProps.get(composite)) {
1929                         final IRI pred = stmt.getPredicate();
1930                         final Resource ctx = stmt.getContext();
1931                         Collection<Resource> subjs = ImmutableList.of(stmt.getSubject());
1932                         Collection<? extends Value> objs = ImmutableList.of(stmt.getObject());
1933                         if (composite.equals(stmt.getSubject())) {
1934                             subjs = components;
1935                             if (KS_OLD.INCLUDE.equals(pred) || RDFS.LABEL.equals(pred)) {
1936                                 continue;
1937                             }
1938                         }
1939                         if (composite.equals(stmt.getObject())) {
1940                             objs = components;
1941                         }
1942                         for (final Resource subj : subjs) {
1943                             for (final Value obj : objs) {
1944                                 output.add(Statements.VALUE_FACTORY.createStatement(subj, pred,
1945                                         obj, ctx));
1946                             }
1947                         }
1948                     }
1949                 }
1950             }
1951 
1952             return output;
1953         }
1954 
1955         @SuppressWarnings("unchecked")
1956         private <T extends Value> Collection<T> extract(final Class<T> clazz,
1957                 @Nullable final Object object, @Nullable final Multimap<String, ? extends T> map) {
1958             if (object == null) {
1959                 return ImmutableList.of();
1960             } else if (clazz.isInstance(object)) {
1961                 return ImmutableList.of((T) object);
1962             } else if (object instanceof Iterable<?>) {
1963                 final List<T> list = Lists.newArrayList();
1964                 for (final Object element : (Iterable<?>) object) {
1965                     list.addAll(extract(clazz, element, map));
1966                 }
1967                 return list;
1968             } else if (object.getClass().isArray()) {
1969                 final List<T> list = Lists.newArrayList();
1970                 final int length = Array.getLength(object);
1971                 for (int i = 0; i < length; ++i) {
1972                     list.addAll(extract(clazz, Array.get(object, i), map));
1973                 }
1974                 return list;
1975             } else if (map != null) {
1976                 return (Collection<T>) map.get(object.toString());
1977             } else {
1978                 return ImmutableList.of(Statements.convert(object, clazz));
1979             }
1980         }
1981 
1982         private IRI hash(final Resource subject, final IRI predicate, final Value object) {
1983             final List<String> list = Lists.newArrayList();
1984             for (final Value value : new Value[] { subject, predicate, object }) {
1985                 if (value instanceof IRI) {
1986                     list.add("\u0001");
1987                     list.add(value.stringValue());
1988                 } else if (value instanceof BNode) {
1989                     list.add("\u0002");
1990                     list.add(((BNode) value).getID());
1991                 } else if (value instanceof Literal) {
1992                     final Literal l = (Literal) value;
1993                     list.add("\u0003");
1994                     list.add(l.getLabel());
1995                     if (!l.getDatatype().equals(XMLSchema.STRING)) {
1996                         list.add(l.getDatatype().stringValue());
1997                     } else if (l.getLanguage().isPresent()) {
1998                         list.add(l.getLanguage().get());
1999                     }
2000                 }
2001             }
2002             final String id = Hash.murmur3(list.toArray(new String[list.size()])).toString();
2003             return FACTORY.createIRI("fact:" + id);
2004         }
2005 
2006     }
2007 
2008     private static final class Annotation {
2009 
2010         final Term head;
2011 
2012         final List<Term> extent;
2013 
2014         IRI objectIRI;
2015 
2016         IRI predicateIRI;
2017 
2018         Annotation(final Term head, final Iterable<Term> extent) {
2019             this.head = head;
2020             this.extent = ImmutableList.copyOf(extent);
2021             this.objectIRI = null;
2022             this.predicateIRI = null;
2023         }
2024 
2025     }
2026 
2027 }