1   package eu.fbk.dkm.pikes.rdf;
3   import java.io.File;
4   import java.lang.reflect.Array;
5   import java.nio.file.Path;
6   import java.util.Arrays;
7   import java.util.Collection;
8   import java.util.Collections;
9   import java.util.Iterator;
10  import java.util.List;
11  import java.util.Map;
12  import java.util.Set;
13  import java.util.concurrent.CountDownLatch;
14  import java.util.concurrent.atomic.AtomicInteger;
16  import javax.annotation.Nullable;
18  import com.google.common.base.MoreObjects;
19  import com.google.common.base.Objects;
20  import com.google.common.base.Strings;
21  import com.google.common.collect.BiMap;
22  import com.google.common.collect.HashBiMap;
23  import com.google.common.collect.HashMultimap;
24  import com.google.common.collect.ImmutableList;
25  import com.google.common.collect.ImmutableMap;
26  import com.google.common.collect.ImmutableMultimap;
27  import com.google.common.collect.ImmutableSet;
28  import com.google.common.collect.Iterables;
29  import com.google.common.collect.Lists;
30  import com.google.common.collect.Maps;
31  import com.google.common.collect.Multimap;
32  import com.google.common.collect.Ordering;
33  import com.google.common.collect.Sets;
34  import com.google.common.io.Files;
36  import eu.fbk.dkm.pikes.rdf.vocab.*;
37  import org.eclipse.rdf4j.model.BNode;
38  import org.eclipse.rdf4j.model.Literal;
39  import org.eclipse.rdf4j.model.Model;
40  import org.eclipse.rdf4j.model.Resource;
41  import org.eclipse.rdf4j.model.Statement;
42  import org.eclipse.rdf4j.model.IRI;
43  import org.eclipse.rdf4j.model.Value;
44  import org.eclipse.rdf4j.model.ValueFactory;
45  import org.eclipse.rdf4j.model.impl.LinkedHashModel;
46  import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
47  import org.eclipse.rdf4j.model.impl.ValueFactoryImpl;
48  import org.eclipse.rdf4j.model.vocabulary.*;
49  import org.eclipse.rdf4j.rio.RDFHandler;
50  import org.eclipse.rdf4j.rio.RDFHandlerException;
51  import org.slf4j.Logger;
52  import org.slf4j.LoggerFactory;
53  import org.slf4j.MDC;
55  import ixa.kaflib.Coref;
56  import ixa.kaflib.Dep;
57  import ixa.kaflib.Entity;
58  import ixa.kaflib.ExternalRef;
59  import ixa.kaflib.Factuality;
60  import ixa.kaflib.KAFDocument;
61  import ixa.kaflib.KAFDocument.FileDesc;
62  import ixa.kaflib.LinguisticProcessor;
63  import ixa.kaflib.Opinion;
64  import ixa.kaflib.Opinion.OpinionHolder;
65  import ixa.kaflib.Opinion.OpinionTarget;
66  import ixa.kaflib.Opinion.Polarity;
67  import ixa.kaflib.Predicate;
68  import ixa.kaflib.Predicate.Role;
69  import ixa.kaflib.Span;
70  import ixa.kaflib.Term;
71  import ixa.kaflib.Timex3;
72  import ixa.kaflib.WF;
74  import eu.fbk.dkm.pikes.naflib.Corpus;
75  import eu.fbk.dkm.pikes.rdf.util.ModelUtil;
76  import eu.fbk.dkm.pikes.rdf.util.OWLTime;
77  import eu.fbk.dkm.pikes.rdf.util.ProcessorASNorm;
78  import eu.fbk.dkm.pikes.resources.NAFFilter;
79  import eu.fbk.dkm.pikes.resources.NAFUtils;
80  import eu.fbk.dkm.pikes.resources.PropBank;
81  import eu.fbk.dkm.pikes.resources.Sumo;
82  import eu.fbk.dkm.pikes.resources.WordNet;
83  import eu.fbk.dkm.pikes.resources.YagoTaxonomy;
84  import eu.fbk.utils.svm.Util;
85  import eu.fbk.rdfpro.RDFHandlers;
86  import eu.fbk.rdfpro.RDFProcessors;
87  import eu.fbk.rdfpro.RDFSource;
88  import eu.fbk.rdfpro.RDFSources;
89  import eu.fbk.rdfpro.util.Environment;
90  import eu.fbk.rdfpro.util.Hash;
91  import eu.fbk.rdfpro.util.Options;
92  import eu.fbk.rdfpro.util.QuadModel;
93  import eu.fbk.rdfpro.util.Statements;
94  import eu.fbk.rdfpro.util.Tracker;
96  // entity.type
97  // instance
99  public final class RDFGenerator {
101     private static final Logger LOGGER = LoggerFactory.getLogger(RDFGenerator.class);
103     private static final ValueFactory FACTORY = SimpleValueFactory.getInstance();
105     // todo adapta to UD
106     private static final String MODIFIER_REGEX = "(NMOD|AMOD|TMP|LOC|TITLE) PMOD? (COORD CONJ?)* PMOD?";
108     // todo adapta to UD
109     private static final String PARTICIPATION_REGEX = ""
110             + "SUB? (COORD CONJ?)* (PMOD (COORD CONJ?)*)? ((VC OPRD?)|(IM OPRD?))*";
112     private static final Multimap<String, IRI> DEFAULT_TYPE_MAP = ImmutableMultimap
113             .<String, IRI>builder() //
114             .put("entity.person", NWR.PERSON) //
115             .put("entity.per", NWR.PERSON) //
116             .put("entity.organization", NWR.ORGANIZATION) //
117             .put("entity.org", NWR.ORGANIZATION) //
118             .put("entity.location", NWR.LOCATION) //
119             .put("entity.loc", NWR.LOCATION) //
120             .put("entity.misc", NWR.MISC) //
121             .put("entity.money", GR.PRICE_SPECIFICATION) //
122             .put("entity.date", OWLTIME.DATE_TIME_INTERVAL) //
123             .put("entity.time", OWLTIME.DATE_TIME_INTERVAL) //
124             .put("timex.date", OWLTIME.DATE_TIME_INTERVAL) //
125             .put("timex.duration", OWLTIME.PROPER_INTERVAL) //
126             .build();
128     private static final Map<String, String> DEFAULT_NAMESPACE_MAP = ImmutableMap
129             .<String, String>builder()
130             .put("propbank", "http://www.newsreader-project.eu/ontologies/propbank/")
131             .put("nombank", "http://www.newsreader-project.eu/ontologies/nombank/")
132             .put("framenet", "http://www.newsreader-project.eu/ontologies/framenet/")
133             .put("verbnet", "http://www.newsreader-project.eu/ontologies/verbnet/")
134             .put("premon+propbank", "http://premon.fbk.eu/resource/")
135             .put("premon+nombank", "http://premon.fbk.eu/resource/")
136             .put("premon+framenet", "http://premon.fbk.eu/resource/")
137             .put("premon+verbnet", "http://premon.fbk.eu/resource/")
138             .put("eso", "http://www.newsreader-project.eu/domain-ontology#")
139             .put("framebase", "http://framebase.org/ns/") //
140             .put("attribute", "attr:")
141             // TODO: change this namespace
142             .put("syn", "http://wordnet-rdf.princeton.edu/wn30/")
143             // TODO .put("conn", "http://www.newsreader-project.eu/conn/")
144             .put("sumo", SUMO.NAMESPACE).put("yago", YagoTaxonomy.NAMESPACE).build();
146     private static final String DEFAULT_OWLTIME_NAMESPACE = "http://www.newsreader-project.eu/time/";
148     public static final RDFGenerator DEFAULT = RDFGenerator.builder().build();
150     private final Multimap<String, IRI> typeMap;
152     private final Map<String, String> namespaceMap;
154     private final String owltimeNamespace;
156     private final boolean merging;
158     private final boolean normalization;
160     private RDFGenerator(final Builder builder) {
161         this.typeMap = ImmutableMultimap
162                 .copyOf(MoreObjects.firstNonNull(builder.typeMap, DEFAULT_TYPE_MAP));
163         this.namespaceMap = ImmutableMap
164                 .copyOf(MoreObjects.firstNonNull(builder.namespaceMap, DEFAULT_NAMESPACE_MAP));
165         this.owltimeNamespace = MoreObjects.firstNonNull(builder.owltimeNamespace,
166                 DEFAULT_OWLTIME_NAMESPACE);
167         this.merging = MoreObjects.firstNonNull(builder.merging, Boolean.FALSE);
168         this.normalization = MoreObjects.firstNonNull(builder.normalization, Boolean.FALSE);
169     }
171     public Model generate(final KAFDocument document,
172             @Nullable final Iterable<Integer> sentenceIDs) {
173         final Model model = new LinkedHashModel();
174         generate(document, sentenceIDs, model);
175         return model;
176     }
178     public void generate(final KAFDocument document, @Nullable final Iterable<Integer> sentenceIDs,
179             final Collection<? super Statement> output) {
180         final RDFHandler handler = RDFHandlers.wrap(output);
181         try {
182             generate(document, sentenceIDs, handler);
183         } catch (final Throwable ex) {
184             throw new RuntimeException("Unexpected exception (!)", ex);
185         }
186     }
188     public void generate(final KAFDocument document, @Nullable final Iterable<Integer> sentenceIDs,
189             final RDFHandler handler) throws RDFHandlerException {
191         final boolean[] ids = new boolean[document.getNumSentences() + 1];
192         if (sentenceIDs == null) {
193             Arrays.fill(ids, true);
194         } else {
195             for (final Integer sentenceID : sentenceIDs) {
196                 ids[sentenceID] = true;
197             }
198         }
200         final String baseIRI = document.getPublic().uri;
201         new Extractor(baseIRI, handler, document, ids).run();
202     }
204     public static Builder builder() {
205         return new Builder();
206     }
208     public static final class Builder {
210         @Nullable
211         private Multimap<String, IRI> typeMap;
213         @Nullable
214         private Multimap<String, IRI> propertyMap;
216         @Nullable
217         private Map<String, String> namespaceMap;
219         @Nullable
220         private String owltimeNamespace;
222         @Nullable
223         private Boolean merging;
225         @Nullable
226         private Boolean normalization;
228         /**
229          * Sets all the properties in the map supplied, matching an optional prefix.
230          *
231          * @param properties
232          *            the properties to configure, not null
233          * @param prefix
234          *            an optional prefix used to select the relevant properties in the map
235          * @return this builder object, for call chaining
236          */
237         public Builder withProperties(final Map<?, ?> properties, @Nullable final String prefix) {
238             final String p = prefix == null ? "" : prefix.endsWith(".") ? prefix : prefix + ".";
239             for (final Map.Entry<?, ?> entry : properties.entrySet()) {
240                 if (entry.getKey() != null && entry.getValue() != null
241                         && entry.getKey().toString().startsWith(p)) {
242                     final String name = entry.getKey().toString().substring(p.length());
243                     final String value = Strings.emptyToNull(entry.getValue().toString());
244                     if ("fusion".equals(name)) {
245                         withMerging(Boolean.valueOf(value));
246                     } else if ("normalization".equals(name)) {
247                         withNormalization(Boolean.valueOf(value));
248                     }
249                 }
250             }
251             return this;
252         }
254         public Builder withTypeMap(@Nullable final Multimap<String, IRI> typeMap) {
255             this.typeMap = typeMap;
256             return this;
257         }
259         public Builder withPropertyMap(@Nullable final Multimap<String, IRI> propertyMap) {
260             this.propertyMap = propertyMap;
261             return this;
262         }
264         public Builder withNamespaceMap(@Nullable final Map<String, String> namespaceMap) {
265             this.namespaceMap = namespaceMap;
266             return this;
267         }
269         public Builder withOWLTimeNamespace(@Nullable final String owltimeNamespace) {
270             this.owltimeNamespace = owltimeNamespace;
271             return this;
272         }
274         public Builder withMerging(@Nullable final Boolean merging) {
275             this.merging = merging;
276             return this;
277         }
279         public Builder withNormalization(@Nullable final Boolean normalization) {
280             this.normalization = normalization;
281             return this;
282         }
284         public RDFGenerator build() {
285             return new RDFGenerator(this);
286         }
288     }
290     static final class Runner implements Runnable {
292         private final Corpus corpus;
294         private final RDFGenerator generator;
296         private final File outputFile;
298         private final boolean intermediate;
300         private Runner(final Corpus corpus, final RDFGenerator generator, final File outputFile,
301                 final boolean split) {
302             this.corpus = corpus;
303             this.generator = generator;
304             this.outputFile = outputFile.getAbsoluteFile();
305             this.intermediate = split;
306         }
308         static Runner create(final String name, final String... args) {
309             final Options options = Options
310                     .parse("r,recursive|o,output!|m,merge|n,normalize|i,intermediate|+", args);
311             final File outputFile = options.getOptionArg("o", File.class);
312             final boolean recursive = options.hasOption("r");
313             final boolean merge = options.hasOption("m");
314             final boolean normalize = options.hasOption("n");
315             final boolean intermediate = options.hasOption("i");
316             final Corpus corpus = Corpus.create(recursive, options.getPositionalArgs(File.class));
317             final RDFGenerator generator = RDFGenerator.builder()
318                     .withProperties(Util.PROPERTIES, "eu.fbk.dkm.pikes.rdf.RDFGenerator")
319                     .withMerging(merge).withNormalization(normalize).build();
320             return new Runner(corpus, generator, outputFile, intermediate);
321         }
323         @Override
324         public void run() {
326             LOGGER.info("Converting {} NAF files to RDF", this.corpus.size());
328             final NAFFilter filter = NAFFilter.builder()
329                     .withProperties(Util.PROPERTIES, "eu.fbk.dkm.pikes.rdf.NAFFilter")
330                     .withSRLPreprocess(true, true, true).build();
332             final RDFHandler writer;
333             if (!this.intermediate) {
334                 try {
335                     Files.createParentDirs(this.outputFile);
336                     writer = RDFHandlers.write(null, 1, Runner.this.outputFile.getAbsolutePath());
337                     writer.startRDF();
338                 } catch (final Throwable ex) {
339                     throw new RuntimeException(ex);
340                 }
341             } else {
342                 writer = null;
343             }
345             final Tracker tracker = new Tracker(LOGGER, null, //
346                     "Processed %d NAF files (%d NAF/s avg)", //
347                     "Processed %d NAF files (%d NAF/s, %d NAF/s avg)");
349             final int numThreads = Environment.getCores();
350             final CountDownLatch latch = new CountDownLatch(numThreads);
351             final AtomicInteger counter = new AtomicInteger(0);
352             final AtomicInteger succeeded = new AtomicInteger(0);
353             tracker.start();
354             for (int i = 0; i < numThreads; ++i) {
355                 Environment.getPool().submit(new Runnable() {
357                     @Override
358                     public void run() {
359                         try {
360                             final Path outBase = Runner.this.outputFile.toPath().getParent()
361                                     .toAbsolutePath().normalize();
362                             while (true) {
363                                 final int i = counter.getAndIncrement();
364                                 if (i >= Runner.this.corpus.size()) {
365                                     break;
366                                 }
367                                 String docName = null;
369                                 final Path path = Runner.this.corpus.file(i);
371                                 Path output = null;
372                                 if (Runner.this.intermediate) {
373                                     try {
374                                         final Path base = Runner.this.corpus.path();
375                                         final Path relative = base.toAbsolutePath()
376                                                 .relativize(path.toAbsolutePath());
377                                         String name = relative.toString();
378                                         int index = name.indexOf(".naf");
379                                         if (index < 0) {
380                                             index = name.indexOf(".xml");
381                                         }
382                                         name = name.substring(0, index) + ".tql.gz";
383                                         output = outBase.resolve(name);
384                                         if (java.nio.file.Files.exists(output)) {
385                                             LOGGER.info("Skipping {}", path);
386                                             succeeded.incrementAndGet();
387                                             tracker.increment();
388                                             continue;
389                                         }
390                                     } catch (final Throwable ex) {
391                                         LOGGER.error("Could not compute output file name", ex);
392                                     }
393                                 }
395                                 LOGGER.info("Processing {}", path);
397                                 try {
398                                     final KAFDocument document = Runner.this.corpus.get(i);
399                                     docName = document.getPublic().publicId;
400                                     MDC.put("context", docName);
401                                     filter.filter(document);
402                                     final RDFSource source = RDFSources
403                                             .wrap(Runner.this.generator.generate(document, null));
405                                     if (!Runner.this.intermediate) {
406                                         source.emit(RDFHandlers.ignoreMethods(writer,
407                                                 RDFHandlers.METHOD_START_RDF
408                                                         | RDFHandlers.METHOD_END_RDF
409                                                         | RDFHandlers.METHOD_CLOSE),
410                                                 1);
411                                     } else {
412                                         java.nio.file.Files.createDirectories(output.getParent());
413                                         source.emit(RDFHandlers.write(null, 1,
414                                                 output.toAbsolutePath().toString()), 1);
415                                     }
417                                     succeeded.incrementAndGet();
419                                 } catch (final Throwable ex) {
420                                     LOGGER.error("Processing failed for " + docName, ex);
421                                 } finally {
422                                     MDC.remove("context");
423                                 }
424                                 tracker.increment();
425                             }
426                         } finally {
427                             latch.countDown();
428                         }
429                     }
431                 });
432             }
433             try {
434                 latch.await();
435                 if (!this.intermediate) {
436                     writer.endRDF();
437                 }
438             } catch (final InterruptedException ex) {
439                 Thread.currentThread().interrupt();
440             } catch (final RDFHandlerException ex) {
441                 throw new RuntimeException(ex);
442             }
443             tracker.end();
445             LOGGER.info("Successfully converted {}/{} files", succeeded, this.corpus.size());
446         }
447     }
449     private final class Extractor {
451         private final String baseIRI;
453         private final RDFHandler handler;
455         private final QuadModel statements;
457         private final BiMap<String, String> mintedIRIs;
459         private final KAFDocument document;
461         private final IRI documentIRI;
463         private final boolean[] sentenceIDs;
465         private final String documentText;
467         private final Map<String, Annotation> annotations;
469         public Extractor(final String baseIRI, final RDFHandler handler,
470                 final KAFDocument document, final boolean[] sentenceIDs) {
472             this.baseIRI = baseIRI;
473             this.handler = handler;
474             this.statements = QuadModel.create();
475             this.mintedIRIs = HashBiMap.create();
476             this.document = document;
477             this.documentIRI = FACTORY.createIRI(Util.cleanIRI(document.getPublic().uri));
478             this.sentenceIDs = sentenceIDs;
480             final StringBuilder builder = new StringBuilder();
481             for (final WF word : document.getWFs()) {
482                 final int offset = word.getOffset();
483                 if (builder.length() > offset) {
484                     builder.setLength(offset);
485                 } else {
486                     while (builder.length() < offset) {
487                         builder.append(" ");
488                     }
489                 }
490                 builder.append(word.getForm());
491             }
492             this.documentText = builder.toString();
494             this.annotations = Maps.newHashMap();
495         }
497         public void run() throws RDFHandlerException {
499             // 0. Process NAF metadata
500             processMetadata();
502             // 1. Process <timex3> annotations
503             for (final Timex3 timex : this.document.getTimeExs()) {
504                 if (timex.getSpan() == null
505                         || this.sentenceIDs[timex.getSpan().getFirstTarget().getSent()]) {
506                     try {
507                         processTimex(timex);
508                     } catch (final Throwable ex) {
509                         LOGGER.error("Error processing " + NAFUtils.toString(timex) + ", type "
510                                 + timex.getType() + ", value " + timex.getValue(), ex);
511                     }
512                 }
513             }
515             // 2. Process <entity> annotations
516             for (final Entity entity : this.document.getEntities()) {
517                 for (final Span<Term> span : entity.getSpans()) {
518                     if (this.sentenceIDs[span.getFirstTarget().getSent()]) {
519                         try {
520                             processEntity(entity);
521                         } catch (final Throwable ex) {
522                             LOGGER.error("Error processing " + NAFUtils.toString(entity)
523                                     + ", type " + entity.getType(), ex);
524                         }
525                         break; // move to next entity
526                     }
527                 }
528             }
530             // 3. Process <predicate> annotations; must be done after 1, 2
531             outer: for (final Predicate predicate : this.document.getPredicates()) {
532                 if (this.sentenceIDs[predicate.getSpan().getFirstTarget().getSent()]) {
533                     // TODO: the code below is madness... :-(
534                     for (final ExternalRef ref : predicate.getExternalRefs()) {
535                         if (NAFUtils.RESOURCE_PROPBANK.equals(ref.getResource())
536                                 && ref.getReference().equals("be.01")) {
537                             Term a1Head = null;
538                             Term a2Head = null;
539                             for (final Role role : predicate.getRoles()) {
540                                 final Term head = NAFUtils.extractHead(this.document,
541                                         role.getSpan());
542                                 if (head != null) {
543                                     if ("A1".equals(role.getSemRole())) {
544                                         a1Head = head;
545                                     } else if ("A2".equals(role.getSemRole())) {
546                                         a2Head = head;
547                                     }
548                                 }
549                             }
550                             if (a1Head != null && a2Head != null) {
551                                 for (final Coref coref : this.document.getCorefsByTerm(a1Head)) {
552                                     final Set<Term> corefHeads = Sets.newHashSet();
553                                     for (final Span<Term> span : coref.getSpans()) {
554                                         final Term head = NAFUtils.extractHead(this.document,
555                                                 span);
556                                         if (head != null) {
557                                             corefHeads.add(head);
558                                         }
559                                     }
560                                     if (corefHeads.contains(a1Head)
561                                             && corefHeads.contains(a2Head)) {
562                                         continue outer;
563                                     }
564                                 }
565                             }
566                         }
567                     }
568                     try {
569                         processPredicate(predicate);
570                     } catch (final Throwable ex) {
571                         LOGGER.error("Error processing " + NAFUtils.toString(predicate), ex);
572                     }
573                 }
574             }
576             // 4. Process <factvalue> annotations; must be done after 3
577             for (final Factuality factuality : this.document.getFactualities()) {
578                 if (this.sentenceIDs[factuality.getWord().getSent()]) {
579                     try {
580                         processFactuality(factuality);
581                     } catch (final Throwable ex) {
582                         LOGGER.error("Error processing " + NAFUtils.toString(factuality), ex);
583                     }
584                 }
585             }
587             // 5. Process <term> acting as modifiers; must be done after 1, 2, 3
588             for (final Annotation ann : this.annotations.values()) {
589                 final IRI uri = ann.predicateIRI != null ? ann.predicateIRI : ann.objectIRI;
590                 if (uri != null) {
591                     final Set<Term> forbiddenTerms = Sets.newHashSet();
592                     final List<Coref> corefs = this.document.getCorefsByTerm(ann.head);
593                     for (final Coref coref : corefs) {
594                         final List<Term> heads = Lists.newArrayList();
595                         for (final Span<Term> span : coref.getSpans()) {
596                             final Term head = NAFUtils.extractHead(this.document, span);
597                             if (head != null) {
598                                 heads.add(head);
599                             }
600                         }
601                         if (heads.contains(ann.head)) {
602                             forbiddenTerms.addAll(heads);
603                         }
604                     }
605                     for (final Term term : this.document.getTermsByDepAncestors(
606                             Collections.singleton(ann.head), MODIFIER_REGEX)) {
607                         if (!forbiddenTerms.contains(term)) {
608                             try {
609                                 processModifier(term, ann.head, uri, ann.extent);
610                             } catch (final Throwable ex) {
611                                 LOGGER.error("Error processing MODIFIER " + NAFUtils.toString(term)
612                                         + " of " + NAFUtils.toString(ann.head) + " (object IRI "
613                                         + ann.objectIRI + "; predicate IRI " + ann.predicateIRI
614                                         + ")", ex);
615                             }
616                         }
617                     }
618                 }
619             }
621             // 6. Process <coref> annotations; must be done after 1, 2, 3
622             for (final Coref coref : this.document.getCorefs()) {
623                 if ("event".equalsIgnoreCase(coref.getType())) {
624                     continue;
625                 }
626                 final List<Span<Term>> spans = Lists.newArrayList();
627                 for (final Span<Term> span : coref.getSpans()) {
628                     if (this.sentenceIDs[span.getFirstTarget().getSent()]) {
629                         spans.add(span);
630                     }
631                 }
632                 if (!spans.isEmpty()) {
633                     try {
634                         processCoref(spans);
635                     } catch (final Throwable ex) {
636                         LOGGER.error("Error processing " + NAFUtils.toString(coref), ex);
637                     }
638                 }
639             }
641             // 7. Process head <term>s in <role> annotations; must be done after 1, 2, 3
642             for (final Predicate predicate : this.document.getPredicates()) {
643                 if (this.sentenceIDs[predicate.getSpan().getFirstTarget().getSent()]) {
644                     final PropBank.Roleset rs = PropBank
645                             .getRoleset(NAFUtils.getRoleset(predicate));
646                     final String entitySuffix = rs == null ? "?"
647                             : Integer.toString(rs.getCoreferenceEntityArg());
648                     final String predicateSuffix = rs == null ? "?"
649                             : Integer.toString(rs.getCoreferencePredicateArg());
650                     Set<Term> corefEntityHeads = null;
651                     Set<Term> corefPredicateHeads = null;
652                     for (final Role role : predicate.getRoles()) {
653                         final Term roleHead = NAFUtils.extractHead(this.document, role.getSpan());
654                         if (roleHead != null) {
655                             final Set<Term> argHeads = this.document.getTermsByDepAncestors(
656                                     Collections.singleton(roleHead), PARTICIPATION_REGEX);
657                             boolean isCorefPredicateRole = false;
658                             if (role.getSemRole().endsWith(entitySuffix)) {
659                                 corefEntityHeads = argHeads;
660                             } else if (role.getSemRole().endsWith(predicateSuffix)) {
661                                 corefPredicateHeads = argHeads;
662                                 isCorefPredicateRole = true;
663                             }
664                             for (final Term argHead : argHeads) {
665                                 try {
666                                     processRole(predicate, role, argHead, isCorefPredicateRole);
667                                 } catch (final Throwable ex) {
668                                     LOGGER.error("Error processing " + NAFUtils.toString(role)
669                                             + " of " + NAFUtils.toString(predicate) + ", argument "
670                                             + NAFUtils.toString(argHead), ex);
671                                 }
672                             }
673                         }
674                     }
675                     if (corefEntityHeads != null && corefEntityHeads.size() == 1
676                             && corefPredicateHeads != null && corefPredicateHeads.size() == 1) {
677                         final Annotation entityAnn = this.annotations
678                                 .get(corefEntityHeads.iterator().next().getId());
679                         final Annotation predicateAnn = this.annotations
680                                 .get(corefPredicateHeads.iterator().next().getId());
681                         if (predicateAnn != null && entityAnn != null
682                                 && predicateAnn.predicateIRI != null
683                                 && predicateAnn.objectIRI != null && entityAnn.objectIRI != null) {
684                             final IRI mentionIRI = emitMention(
685                                     Iterables.concat(predicateAnn.extent, entityAnn.extent));
686                             emitFact(predicateAnn.objectIRI, OWL.SAMEAS, entityAnn.objectIRI,
687                                     mentionIRI, null);
688                         }
689                     }
690                 }
691             }
693             // 8. Process <opinion>s; must be done after 1, 2, 3
694             for (final Opinion opinion : this.document.getOpinions()) {
695                 if (opinion.getOpinionExpression() == null || opinion.getLabel() != null
696                         && (opinion.getLabel().toLowerCase().contains("stanford")
697                                 || opinion.getLabel().toLowerCase().contains("gold"))) {
698                     continue;
699                 }
700                 for (final Term term : opinion.getOpinionExpression().getTerms()) {
701                     if (this.sentenceIDs[term.getSent()]) {
702                         processOpinion(opinion);
703                         break;
704                     }
705                 }
706             }
708             // 9. Finalize
709             Iterable<Statement> statements = RDFGenerator.this.merging ? merge(this.statements)
710                     : this.statements;
711             if (RDFGenerator.this.normalization) {
712                 statements = new ProcessorASNorm("fact:").wrap(RDFSources.wrap(statements));
713             }
714             this.handler.startRDF();
715             for (final Statement statement : statements) {
716                 this.handler.handleStatement(statement);
717             }
718             this.handler.endRDF();
719         }
721         private void processMetadata() throws RDFHandlerException {
723             // Obtain IRIs of document and NAF resources
724             final IRI docIRI = this.documentIRI;
725             final IRI nafIRI = FACTORY.createIRI(docIRI.stringValue() + ".naf");
727             // Emit document types
728             emitMeta(docIRI, RDF.TYPE, new IRI[] { KS_OLD.RESOURCE, KS_OLD.TEXT });
730             // Emit title, author and DCT from the <fileDesc> element, if present
731             if (this.document.getFileDesc() != null) {
732                 final FileDesc fd = this.document.getFileDesc();
733                 emitMeta(docIRI, DCTERMS.TITLE, fd.title);
734                 emitMeta(docIRI, DCTERMS.CREATOR, fd.author);
735                 emitMeta(docIRI, DCTERMS.CREATED, fd.creationtime);
736                 emitMeta(docIRI, KS_OLD.NAF_FILE_NAME, fd.filename);
737                 emitMeta(docIRI, KS_OLD.NAF_FILE_TYPE, fd.filetype);
738                 emitMeta(docIRI, KS_OLD.NAF_PAGES, fd.pages);
739             }
741             // Emit the document language, if available
742             if (this.document.getLang() != null) {
743                 emitMeta(docIRI, DCTERMS.LANGUAGE,
744                         ModelUtil.languageCodeToIRI(this.document.getLang()));
745             }
747             // Emit an hash of the whitespace-normalized raw text, if available
748             if (this.document.getRawText() != null) {
749                 final String rawText = this.document.getRawText();
750                 final StringBuilder builder = new StringBuilder();
751                 boolean addSpace = false;
752                 for (int i = 0; i < rawText.length(); ++i) {
753                     final char c = rawText.charAt(i);
754                     if (Character.isWhitespace(c)) {
755                         addSpace = builder.length() > 0;
756                     } else {
757                         if (addSpace) {
758                             builder.append(' ');
759                             addSpace = false;
760                         }
761                         builder.append(c);
762                     }
763                 }
764                 emitMeta(docIRI, KS_OLD.TEXT_HASH, Hash.murmur3(builder.toString()).toString());
765             }
767             // Link document to its NAF annotation
768             emitMeta(docIRI, KS_OLD.ANNOTATED_WITH, nafIRI);
769             emitMeta(nafIRI, KS_OLD.ANNOTATION_OF, docIRI);
771             // Emit types, version and publicId of NAF resource
772             emitMeta(nafIRI, RDF.TYPE, new IRI[] { KS_OLD.RESOURCE, KS_OLD.NAF });
773             emitMeta(nafIRI, KS_OLD.VERSION, this.document.getVersion());
774             emitMeta(nafIRI, DCTERMS.IDENTIFIER, this.document.getPublic().publicId);
776             // Emit information about linguistic processors: dct:created, dct:creatro, ego:layer
777             String timestamp = null;
778             for (final Map.Entry<String, List<LinguisticProcessor>> entry : this.document
779                     .getLinguisticProcessors().entrySet()) {
780                 emitMeta(nafIRI, KS_OLD.LAYER,
781                         FACTORY.createIRI(KS_OLD.NAMESPACE, "layer_" + entry.getKey()));
782                 for (final LinguisticProcessor lp : entry.getValue()) {
783                     if (timestamp == null) {
784                         if (!Strings.isNullOrEmpty(lp.getBeginTimestamp())) {
785                             timestamp = lp.getBeginTimestamp();
786                         } else if (!Strings.isNullOrEmpty(lp.getEndTimestamp())) {
787                             timestamp = lp.getEndTimestamp();
788                         }
789                     }
790                     final IRI lpIRI = FACTORY.createIRI(ModelUtil
791                             .cleanIRI(KS_OLD.NAMESPACE + lp.getName() + '.' + lp.getVersion()));
792                     emitMeta(nafIRI, DCTERMS.CREATOR, lpIRI);
793                     emitMeta(lpIRI, DCTERMS.TITLE, lp.getName());
794                     emitMeta(lpIRI, KS_OLD.VERSION, lp.getVersion());
795                 }
796             }
797             emitMeta(nafIRI, DCTERMS.CREATED, timestamp);
798         }
800         private void processTimex(final Timex3 timex) throws RDFHandlerException {
802             // Abort if timex has no span (e.g., the DCT)
803             if (timex.getSpan() == null) {
804                 return;
805             }
807             // Extract terms, head and label
808             final List<Term> terms = this.document.getTermsByWFs(timex.getSpan().getTargets());
809             final Term head = NAFUtils.extractHead(this.document, KAFDocument.newTermSpan(terms));
810             final String label = NAFUtils.getText(NAFUtils.filterTerms(terms));
811             final String type = timex.getType().trim().toLowerCase();
813             // Annotate the term (or pickup the existing annotation)
814             final Annotation ann = defineAnnotation(head, terms);
816             // Abort if cannot annotate (wrong head) or if a IRI was already assigned to the term
817             if (ann == null || ann.objectIRI != null) {
818                 return;
819             }
821             // Emit a mention and its triples for the current timex
822             final IRI mentionIRI = emitMention(terms);
823             emitMeta(mentionIRI, RDF.TYPE, KS_OLD.TIME_MENTION);
825             // Emit type specific statements
826             IRI timexIRI = null;
827             if (timex.getValue() != null) {
828                 if (type.equals("date") || type.equals("time")) {
829                     final OWLTime.Interval interval = OWLTime.Interval
830                             .parseTimex(timex.getValue());
831                     if (interval != null) {
832                         timexIRI = interval.toRDF(this.handler, RDFGenerator.this.owltimeNamespace,
833                                 null);
834                     } else {
835                         LOGGER.debug("Could not represent date/time value '" + timex.getValue()
836                                 + "' of " + NAFUtils.toString(timex));
837                     }
838                 } else if (type.equals("duration")) {
839                     final OWLTime.Duration duration = OWLTime.Duration
840                             .parseTimex(timex.getValue());
841                     if (duration != null) {
842                         timexIRI = FACTORY.createIRI(RDFGenerator.this.owltimeNamespace,
843                                 duration.toString());
844                         final IRI durationIRI = duration.toRDF(this.handler,
845                                 RDFGenerator.this.owltimeNamespace, null);
846                         emitFact(timexIRI, OWLTIME.HAS_DURATION_DESCRIPTION, durationIRI,
847                                 mentionIRI, null);
848                     } else {
849                         LOGGER.debug("Could not represent duration value '" + timex.getValue()
850                                 + "' of " + NAFUtils.toString(timex));
851                     }
852                 } else {
853                     // TODO: support SET?
854                     throw new UnsupportedOperationException("Unsupported TIMEX3 type: " + type);
855                 }
856             }
858             // Generate a default timex IRI on failure
859             if (timexIRI == null) {
860                 timexIRI = mintIRI(timex.getId(),
861                         MoreObjects.firstNonNull(timex.getValue(), timex.getSpan().getStr()));
862             }
864             // Register the timex IRI it in the term annotation and link it to the mention
865             ann.objectIRI = timexIRI;
866             emitMeta(timexIRI, GAF.DENOTED_BY, mentionIRI);
868             // Emit common attributes based on head and label
869             emitFact(timexIRI, RDF.TYPE,
870                     ImmutableList.of(KS_OLD.ENTITY, KS_OLD.TIME, "timex." + type), mentionIRI,
871                     null);
872             emitCommonAttributes(timexIRI, mentionIRI, head, label, true);
873         }
875         private void processEntity(final Entity entity) throws RDFHandlerException {
877             // Retrieve terms, head and label
878             final List<Term> terms = entity.getSpans().get(0).getTargets();
879             final String label = NAFUtils.getText(NAFUtils.filterTerms(terms));
880             final Term head = NAFUtils.extractHead(this.document, entity.getSpans().get(0));
881             if (head == null) {
882                 return;
883             }
885             // Extract type information (type IRI, whether timex or attribute) based on NER tag
886             String type = entity.getType();
887             type = type == null ? null : type.toLowerCase();
888             // final Collection<IRI> typeIRIs = RDFGenerator.this.typeMap.get("entity." + type);
889             final boolean isLinked = !entity.getExternalRefs().isEmpty();
890             final boolean isProperty = "money".equals(type) || "cardinal".equals(type)
891                     || "ordinal".equals(type) || "percent".equals(type) || "language".equals(type)
892                     || "norp".equals(type) || "quantity".equals(type);
894             // Discard attributes in modifier position, as they will be considered later
895             final Dep dep = this.document.getDepToTerm(head);
896             if (isProperty && dep != null) {
897                 final String depLabel = dep.getRfunc().toUpperCase();
898                 if (depLabel.contains("NMOD") || depLabel.contains("AMOD")) {
899                     return;
900                 }
901             }
903             // Annotate the term (or pickup the existing annotation)
904             final Annotation ann = defineAnnotation(head, terms);
906             // Abort if cannot annotate (wrong head) or if a IRI was already assigned to the term
907             if (ann == null || ann.objectIRI != null) {
908                 return;
909             }
911             // Mint a IRI for the entity and register it in the term annotation
912             final IRI entityIRI;
913             if (!entity.isNamed() || isLinked) {
914                 entityIRI = mintIRI(entity.getId(),
915                         entity.isNamed() ? entity.getSpans().get(0).getStr() : head.getLemma());
916             } else {
917                 entityIRI = Statements.VALUE_FACTORY.createIRI(Util
918                         .cleanIRI("entity:" + entity.getStr().toLowerCase().replace(' ', '_')));
919             }
920             ann.objectIRI = entityIRI;
922             // Emit a mention and its triples for the current entity
923             final IRI mentionIRI = emitMention(terms);
924             emitMeta(entityIRI, GAF.DENOTED_BY, mentionIRI);
925             emitMeta(mentionIRI, RDF.TYPE, KS_OLD.ENTITY_MENTION);
926             // if ("person".equals(type)) {
927             // emitMeta(mentionIRI, RDF.TYPE, KS_OLD.PERSON_MENTION);
928             // } else if ("organization".equals(type)) {
929             // emitMeta(mentionIRI, RDF.TYPE, KS_OLD.ORGANIZATION_MENTION);
930             // } else if ("location".equals(type)) {
931             // emitMeta(mentionIRI, RDF.TYPE, KS_OLD.LOCATION_MENTION);
932             // } else if (!isProperty) {
933             // emitMeta(mentionIRI, RDF.TYPE, KS_OLD.MISC_MENTION);
934             // }
935             if (isProperty) {
936                 emitMeta(mentionIRI, RDF.TYPE, KS_OLD.ATTRIBUTE_MENTION);
937             }
939             // Emit common attributes based on head and label
940             emitFact(entityIRI, RDF.TYPE, new Object[] { KS_OLD.ENTITY, "entity",
941                     type == null ? null : "entity." + type }, mentionIRI, null);
942             if (this.document.getPredicatesByTerm(head).isEmpty()) {
943                 emitCommonAttributes(entityIRI, mentionIRI, head, label, true);
944             }
946             // Handle the case the <entity> is an attribute of some anonymous instance
947             if (isProperty) {
948                 emitEntityAttributes(entity, entityIRI, mentionIRI);
949             } else {
951                 // TODO: originally the following check was enforced
952                 // if (!typeIRIs.isEmpty()) {
953                 // }
955                 // Handle the case the <entity> is an ontological instance itself
956                 final boolean named = entity.isNamed() || "romanticism".equalsIgnoreCase(label)
957                         || "operant conditioning chamber".equalsIgnoreCase(label); // TODO
958                 if (named) {
959                     emitFact(entityIRI, FOAF.NAME, label, mentionIRI, null);
960                     emitMeta(mentionIRI, RDF.TYPE, KS_OLD.NAME_MENTION);
961                 }
962                 final IRI property = named ? OWL.SAMEAS : RDFS.SEEALSO;
963                 for (final ExternalRef ref : entity.getExternalRefs()) {
964                     try {
965                         final IRI refIRI = FACTORY.createIRI(Util.cleanIRI(ref.getReference()));
966                         emitFact(entityIRI, property, refIRI, mentionIRI,
967                                 (double) ref.getConfidence());
968                     } catch (final Throwable ex) {
969                         // ignore: not a IRI
970                     }
971                 }
972             }
973         }
975         private void processPredicate(final Predicate predicate) throws RDFHandlerException {
977             // Retrieve terms, head and label
978             final List<Term> terms = predicate.getSpan().getTargets();
979             final String label = NAFUtils.getText(NAFUtils.filterTerms(terms));
980             final Term head = NAFUtils.extractHead(this.document, predicate.getSpan());
982             // Abort if predicate overlaps with timex or named / ordinal entity
983             if (!this.document.getTimeExsByTerm(head).isEmpty()) {
984                 return;
985             }
986             for (final Entity entity : this.document.getEntitiesByTerm(head)) {
987                 if (entity.isNamed() || "ordinal".equalsIgnoreCase(entity.getType())) {
988                     return;
989                 }
990             }
992             // Annotate the term (or pickup the existing annotation); abort if wrong head
993             final Annotation ann = defineAnnotation(head, terms);
994             if (ann == null) {
995                 return;
996             }
998             // Validate the existing annotation based on expected previous processing
999             if (ann.predicateIRI != null) {
1000                 LOGGER.warn("Already processed: " + NAFUtils.toString(predicate) + "; head is "
1001                         + NAFUtils.toString(head));
1002                 return; // this is a problem of the NAF
1003             }
1005             // Determine whether the predicate admit its own span as an argument
1006             boolean selfArg = false;
1007             if (ann.objectIRI != null) {
1008                 for (final Role role : predicate.getRoles()) {
1009                     selfArg |= head.equals(NAFUtils.extractHead(this.document, role.getSpan()));
1010                 }
1011             }
1013             // Determine if the predicate is an event, based on SUMO mapping
1014             boolean isEvent = false;
1015             for (final ExternalRef ref : head.getExternalRefs()) {
1016                 if ("SUMO".equals(ref.getResource())) {
1017                     final IRI conceptIRI = SimpleValueFactory.getInstance()
1018                             .createIRI(SUMO.NAMESPACE, ref.getReference());
1019                     if (Sumo.isSubClassOf(conceptIRI, SUMO.PROCESS)) {
1020                         isEvent = true;
1021                         break;
1022                     }
1023                 }
1024             }
1026             // Assign a IRI to the predicate, possibly reusing the IRI of an entity
1027             final IRI predicateIRI = ann.objectIRI != null && !selfArg ? ann.objectIRI
1028                     : mintIRI(predicate.getId(), head.getLemma());
1029             ann.predicateIRI = predicateIRI;
1031             // Emit a mention and its triples (reuse an entity span if possible)
1032             IRI mentionIRI = null;
1033             if (predicateIRI.equals(ann.objectIRI)) {
1034                 for (final Entity entity : this.document.getEntitiesByTerm(head)) {
1035                     mentionIRI = emitMention(entity.getSpans().get(0).getTargets());
1036                 }
1037             } else {
1038                 mentionIRI = emitMention(terms);
1039             }
1040             emitMeta(mentionIRI, RDF.TYPE, KS_OLD.PREDICATE_MENTION);
1041             emitMeta(predicateIRI, GAF.DENOTED_BY, mentionIRI);
1043             // Emit common attributes
1044             if (ann.objectIRI == null) {
1045                 emitCommonAttributes(ann.predicateIRI, mentionIRI, head, label, true);
1046             } else {
1047                 emitCommonAttributes(ann.objectIRI, mentionIRI, head, label, !selfArg);
1048             }
1050             // Process framenet/verbnet/etc external refs
1051             for (final ExternalRef ref : predicate.getExternalRefs()) {
1052                 if ("".equals(ref.getReference())) {
1053                     continue;
1054                 }
1055                 final IRI typeIRI = mintRefIRI(ref.getResource(), ref.getReference());
1056                 emitFact(predicateIRI, RDF.TYPE, typeIRI, mentionIRI, null);
1057                 // if (ref.getResource().equals(NAFUtils.RESOURCE_FRAMENET)) {
1058                 // for (final String id : FrameNet.getRelatedFrames(true, ref.getReference(),
1059                 // FrameNet.Relation.INHERITS_FROM)) {
1060                 // final IRI uri = mintRefIRI(NAFUtils.RESOURCE_FRAMENET, id);
1061                 // emitFact(predicateIRI, RDF.TYPE, uri, mentionIRI, null);
1062                 // }
1063                 // } else if (ref.getResource().equals(NAFUtils.RESOURCE_VERBNET)) {
1064                 // for (final String id : VerbNet.getSuperClasses(true, ref.getReference())) {
1065                 // final IRI uri = mintRefIRI(NAFUtils.RESOURCE_VERBNET, id);
1066                 // emitFact(predicateIRI, RDF.TYPE, uri, mentionIRI, null);
1067                 // }
1068                 // }
1069             }
1071             // Mark the predicate as sem:Event and associate it the correct ego: type
1072             final List<Object> typeKeys = Lists.newArrayList(KS_OLD.ENTITY, KS_OLD.PREDICATE,
1073                     SEM.EVENT);
1074             if (isEvent) {
1075                 typeKeys.add(SUMO.PROCESS);
1076             }
1077             emitFact(predicateIRI, RDF.TYPE, typeKeys, mentionIRI, null);
1078         }
1080         private void processFactuality(final Factuality factuality) throws RDFHandlerException {
1082             // TODO: factuality should be better handled
1084             // Retrieve term and corresponding annotation
1085             final Term term = factuality.getWord();
1086             final Annotation ann = this.annotations.get(term.getId());
1088             // Abort if the annotation is missing or does not refer to a predicate
1089             if (ann == null || ann.predicateIRI == null) {
1090                 return;
1091             }
1093             // Emit a mention for the predicate extent
1094             final IRI mentionIRI = emitMention(ann.extent);
1096             // Emit a triple associating the factuality value to the predicate
1097             final String value = factuality.getMaxPart().getPrediction();
1098             emitFact(ann.predicateIRI, KS_OLD.FACTUALITY, value, mentionIRI, null);
1099         }
1101         private void processModifier(final Term modifierTerm, final Term instanceTerm,
1102                 final IRI instanceIRI, final List<Term> instanceExtent)
1103                 throws RDFHandlerException {
1105             // Retrieve POS and <entity> corresponding to the modifier term
1106             final char pos = Character.toUpperCase(modifierTerm.getPos().charAt(0));
1107             final List<Entity> entities = this.document.getEntitiesByTerm(modifierTerm);
1108             final Annotation ann = this.annotations.get(modifierTerm.getId());
1110             // Ignore modifiers marked as TIMEX
1111             if (!this.document.getTimeExsByTerm(modifierTerm).isEmpty()) {
1112                 return;
1113             }
1115             if (ann != null) {
1116                 // If modifier has been mapped to some other instance, link the two instances
1117                 final IRI otherIRI = ann.objectIRI != null ? ann.objectIRI : ann.predicateIRI;
1118                 if (otherIRI != null) {
1119                     final IRI mentionID = emitMention(
1120                             Iterables.concat(instanceExtent, ann.extent));
1121                     emitFact(instanceIRI, KS_OLD.MOD, otherIRI, mentionID, null);
1122                 }
1123                 final String path = extractPath(instanceTerm, modifierTerm);
1124                 if (!Strings.isNullOrEmpty(path)) {
1125                     final IRI mentionID = emitMention(
1126                             Iterables.concat(instanceExtent, ann.extent));
1127                     final IRI property = mintRefIRI("conn", path);
1128                     emitFact(instanceIRI, property, otherIRI, mentionID, null);
1129                 }
1131             } else if (!entities.isEmpty()) {
1132                 // If modifier is an <entity> for which we didn't create a node, then create
1133                 // an attribute and attach it to the modified entity
1134                 final Entity entity = entities.get(0);
1135                 final IRI mentionIRI = emitMention(entity.getSpans().get(0).getTargets());
1136                 emitMeta(mentionIRI, RDF.TYPE, KS_OLD.ATTRIBUTE_MENTION);
1137                 emitEntityAttributes(entity, instanceIRI, mentionIRI);
1139             } else if (pos == 'G' || pos == 'A' || pos == 'V') {
1140                 // WAS AT THE BEGINNING
1141                 // If modifier is an adjective, noun, pronoun or verb, then attach a
1142                 // 'quality' attribute to the modified node
1143                 final Set<Term> terms = this.document.getTermsByDepAncestors(
1144                         Collections.singleton(modifierTerm), "(AMOD|NMOD)*");
1145                 final IRI mentionIRI = emitMention(terms);
1146                 final IRI expressionIRI = emitTerm(modifierTerm);
1147                 emitFact(instanceIRI, KS_OLD.MOD, expressionIRI, mentionIRI, null);
1148             }
1149         }
1151         private void processCoref(final List<Span<Term>> spans) throws RDFHandlerException {
1153             // Build three correlated lists containing, for each member of the coref cluster, its
1154             // span, the head terms of instances in the span and the associated IRIs
1155             final List<Span<Term>> corefSpans = Lists.newArrayList();
1156             final List<List<Term>> corefTerms = Lists.newArrayList();
1157             final List<List<Term>> corefExtents = Lists.newArrayList();
1158             final List<List<IRI>> corefIRIs = Lists.newArrayList();
1159             for (final Span<Term> span : spans) {
1160                 final Term head = NAFUtils.extractHead(this.document, span);
1161                 if (head != null) {
1162                     final List<Term> terms = Lists.newArrayList();
1163                     final List<IRI> uris = Lists.newArrayList();
1164                     final Set<Term> extent = Sets.newHashSet();
1165                     for (final Term term : this.document.getTermsByDepAncestors(
1166                             Collections.singleton(head), "(COORD CONJ?)*")) {
1167                         if (!span.getTargets().contains(term)) {
1168                             continue;
1169                         }
1170                         final Annotation ann = this.annotations.get(term.getId());
1171                         final IRI uri = ann == null ? null
1172                                 : ann.objectIRI != null ? ann.objectIRI : ann.predicateIRI;
1173                         if (uri != null) {
1174                             terms.add(term);
1175                             uris.add(uri);
1176                             extent.addAll(ann.extent);
1177                         }
1178                     }
1179                     if (!terms.isEmpty()) {
1180                         corefSpans.add(span);
1181                         corefTerms.add(terms);
1182                         corefExtents.add(Ordering.natural().immutableSortedCopy(extent));
1183                         corefIRIs.add(uris);
1184                     }
1185                 }
1186             }
1188             // Abort in case there is only one member in the coref cluster
1189             if (corefTerms.size() <= 1) {
1190                 return;
1191             }
1193             // Map each coref member to a term / IRI pair, possibly grouping coordinated instances
1194             // in a compound instance via a ego:Composition relation
1195             final Map<Term, IRI> members = Maps.newHashMap();
1196             final Map<Term, Span<Term>> memberSpans = Maps.newHashMap();
1197             for (int i = 0; i < corefTerms.size(); ++i) {
1198                 final Span<Term> span = corefSpans.get(i);
1199                 final List<Term> terms = corefTerms.get(i);
1200                 final List<Term> extent = corefExtents.get(i);
1201                 final List<IRI> uris = corefIRIs.get(i);
1202                 memberSpans.put(terms.get(0), span);
1203                 if (terms.size() == 1) {
1204                     members.put(terms.get(0), uris.get(0));
1205                 } else {
1206                     final StringBuilder builder = new StringBuilder();
1207                     for (final IRI uri : uris) {
1208                         builder.append(builder.length() == 0 ? "" : "_");
1209                         builder.append(uri.getLocalName());
1210                     }
1211                     final IRI compIRI = mintIRI(builder.toString(), null);
1212                     final IRI mentionIRI = emitMention(extent);
1213                     // final String label =
1214                     // NAFUtils.getText(NAFUtils.filterTerms(span.getTargets()));
1216                     // final IRI predIRI =
1217                     // this.emitter.mintIRI(builder.append("_pred").toString(),
1218                     // null);
1219                     // this.emitter.emitFact(predIRI, RDF.TYPE, new Object[] { KS_OLD.THING,
1220                     // KS_OLD.PREDICATE, SUMO.ENTITY, SEM.EVENT, "predicate.relation",
1221                     // KS_OLD.COMPOSITION }, mentionIRI, null);
1222                     // this.emitter.emitFact(compIRI, EGO.PLURAL, true, mentionIRI, null);
1223                     // this.emitter.emitMeta(mentionIRI, RDF.TYPE, KS_OLD.PREDICATE_MENTION);
1225                     emitFact(compIRI, RDF.TYPE, new Object[] { KS_OLD.ENTITY }, mentionIRI, null);
1226                     // emitFact(compIRI, RDFS.LABEL, label, mentionIRI, null);
1227                     // emitMeta(mentionIRI, RDF.TYPE, KS_OLD.MISC_MENTION);
1229                     // emitMeta(compIRI, GAF.DENOTED_BY, mentionIRI);
1231                     // this.emitter.emitFact(predIRI, KS_OLD.COMPOSITE, compIRI, mentionIRI,
1232                     // null);
1233                     for (int j = 0; j < uris.size(); ++j) {
1234                         // this.emitter
1235                         // .emitFact(predIRI, KS_OLD.COMPONENT, uris.get(j), mentionIRI, null);
1236                         emitFact(compIRI, KS_OLD.INCLUDE, uris.get(j), mentionIRI, null);
1237                     }
1238                     members.put(terms.get(0), compIRI);
1239                 }
1240             }
1242             // Emit all possible coreference relations between cluster members
1243             for (final Map.Entry<Term, IRI> entry1 : members.entrySet()) {
1244                 for (final Map.Entry<Term, IRI> entry2 : members.entrySet()) {
1245                     final Term term1 = entry1.getKey();
1246                     final Term term2 = entry2.getKey();
1247                     if (term1.getId().compareTo(term2.getId()) < 0) {
1248                         final Span<Term> span1 = memberSpans.get(term1);
1249                         final Span<Term> span2 = memberSpans.get(term2);
1250                         final IRI mentionIRI = emitMention(
1251                                 Iterables.concat(span1.getTargets(), span2.getTargets()));
1252                         final IRI uri1 = entry1.getValue();
1253                         final IRI uri2 = entry2.getValue();
1254                         // final int distance = Math.abs(term1.getSent() - term2.getSent());
1255                         emitFact(uri1, OWL.SAMEAS, uri2, mentionIRI, null);
1256                     }
1257                 }
1258             }
1259         }
1261         private void processRole(final Predicate predicate, final Role role, final Term argHead,
1262                 final boolean isCorefPredicateRole) throws RDFHandlerException {
1264             // Retrieve the IRI previously associated to the predicate; abort if not found
1265             final Term predHead = NAFUtils.extractHead(this.document, predicate.getSpan());
1266             final Annotation predAnn = this.annotations.get(predHead.getId());
1267             final IRI predIRI = predAnn == null ? null : predAnn.predicateIRI;
1268             if (predIRI == null) {
1269                 return;
1270             }
1272             // Retrieve the IRI previously associated to the argument, if any
1273             IRI argIRI = null;
1274             final Annotation argAnn = this.annotations.get(argHead.getId());
1275             if (argAnn != null) {
1276                 if (argAnn.predicateIRI != null
1277                         && (argAnn.objectIRI == null || isCorefPredicateRole)) {
1278                     argIRI = argAnn.predicateIRI;
1279                 } else {
1280                     argIRI = argAnn.objectIRI;
1281                 }
1282             }
1284             // Discard invalid arguments (arg = pred, no arg IRI and arg not noun, adj, adv)
1285             final char pos = Character.toUpperCase(argHead.getPos().charAt(0));
1286             if (argIRI != null && argIRI.equals(predIRI)
1287                     || argIRI == null && pos != 'N' && pos != 'G' && pos != 'A') {
1288                 return;
1289             }
1291             // Determine the participation properties, starting with ego:argument
1292             final Set<IRI> properties = Sets.newHashSet();
1294             // Add properties from the SEM ontology
1295             String semRole = role.getSemRole();
1296             if (semRole != null && !semRole.equals("")) {
1298                 // TODO Drop R-AX
1299                 if (semRole.startsWith("R-")) {
1300                     return;
1301                 }
1303                 semRole = semRole.toLowerCase();
1304                 final int index = semRole.lastIndexOf('-');
1305                 if (index >= 0) {
1306                     semRole = semRole.substring(index + 1);
1307                 }
1308                 if (Character.isDigit(semRole.charAt(semRole.length() - 1))) {
1309                     semRole = semRole.substring(semRole.length() - 1);
1310                     properties.add(SEM.HAS_ACTOR);
1311                 } else if (semRole.equals("tmp")) {
1312                     properties.add(SEM.HAS_TIME);
1313                 } else if (semRole.equals("loc")) {
1314                     properties.add(SEM.HAS_PLACE);
1315                 }
1316             }
1318             // Determine the resource (propbank/nombank) to use for interpreting the sem role
1319             final String semRoleResource = predHead.getPos().equalsIgnoreCase("V") ? "propbank"
1320                     : "nombank";
1322             // Add properties from ProbBank, NomBank, VerbNet, FrameNet
1323             for (final ExternalRef ref : role.getExternalRefs()) {
1324                 final String resource = ref.getResource().toLowerCase();
1325                 final String name = ref.getReference().replace('#', '.');
1326                 if (resource.equals(semRoleResource) || name.equals("")) {
1327                     continue;
1328                 }
1329                 // final int index = name.lastIndexOf('@');
1330                 // final String arg = (index < 0 ? name : name.substring(index +
1331                 // 1)).toLowerCase();
1332                 //
1333                 // if (resource.equalsIgnoreCase(NAFUtils.RESOURCE_FRAMENET)
1334                 // || resource.equalsIgnoreCase(NAFUtils.RESOURCE_VERBNET) || index < 0) {
1335                 // properties.add(mintRefIRI(resource, arg));
1336                 // } else {
1337                 // if (Character.isDigit(arg.charAt(0))) {
1338                 // final String sense = name.substring(0, index);
1339                 // properties.add(mintRefIRI(resource, sense + "_" + arg));
1340                 // } else {
1341                 // properties.add(mintRefIRI(resource, arg));
1342                 // }
1343                 // }
1344                 properties.add(mintRefIRI(resource, name));
1345             }
1347             // The AX, AM-X information may not be encoded in external references, so
1348             // we derive it from predicate sense and role semRole property.
1349             if (!Strings.isNullOrEmpty(semRole)) {
1350                 for (final ExternalRef ref : predicate.getExternalRefs()) {
1351                     final String resource = ref.getResource().toLowerCase();
1352                     if (resource.equals(semRoleResource)) {
1353                         if (Character.isDigit(semRole.charAt(0))) {
1354                             properties.add(mintRefIRI(resource,
1355                                     ref.getReference().toLowerCase() + "_" + semRole));
1356                         } else {
1357                             properties.add(mintRefIRI(resource, semRole));
1358                         }
1359                     }
1360                 }
1361             }
1363             // Add path properties
1364             final String path = extractPath(predHead, argHead);
1365             if (path == null) {
1366                 LOGGER.debug("Could not compute dependency path from " + predHead.getId() + " to "
1367                         + argHead.getId());
1368             }
1369             if (!Strings.isNullOrEmpty(path)) {
1370                 properties.add(mintRefIRI("conn", path));
1371             }
1373             // Create either an edge or an attribute
1374             final List<Term> predTerms = predicate.getSpan().getTargets();
1375             if (argIRI != null) {
1376                 final IRI mentionIRI = emitMention(Iterables.concat(predTerms, argAnn.extent));
1377                 emitMeta(mentionIRI, RDF.TYPE, KS_OLD.PARTICIPATION_MENTION);
1378                 for (final IRI property : properties) {
1379                     emitFact(predIRI, property, argIRI, mentionIRI, null);
1380                 }
1381             } else {
1382                 final Set<Term> argTerms = this.document
1383                         .getTermsByDepAncestors(Collections.singleton(argHead), "(AMOD|NMOD)*");
1384                 final IRI mentionIRI = emitMention(Iterables.concat(predTerms, argTerms));
1385                 emitMeta(mentionIRI, RDF.TYPE, KS_OLD.PARTICIPATION_MENTION);
1386                 final IRI expressionIRI = emitTerm(argHead);
1387                 for (final IRI property : properties) {
1388                     emitFact(predIRI, property, expressionIRI, mentionIRI, null);
1389                 }
1390             }
1391         }
1393         private void processOpinion(final Opinion opinion) {
1395             // Identify the sentence where the opinion occurs (for normalization purposes)
1396             final int sentenceID = opinion.getOpinionExpression().getTerms().get(0).getSent();
1398             // Mint a IRI for the opinion and emit polarity and label facts
1399             final IRI opinionIRI = mintIRI(opinion.getId(), null);
1400             final Polarity polarity = Polarity.forExpression(opinion.getOpinionExpression());
1401             emitFact(opinionIRI, RDF.TYPE, SUMO.ENTITY, null, null);
1402             emitFact(opinionIRI, RDF.TYPE, KS_OLD.OPINION, null, null);
1403             emitFact(opinionIRI, RDF.TYPE,
1404                     polarity == Polarity.POSITIVE ? KS_OLD.POSITIVE_OPINION
1405                             : polarity == Polarity.NEGATIVE ? KS_OLD.NEGATIVE_OPINION
1406                                     : KS_OLD.NEUTRAL_OPINION,
1407                     null, null);
1408             if (opinion.getLabel() != null) {
1409                 emitFact(opinionIRI, RDFS.LABEL, opinion.getLabel(), null, null);
1410             }
1412             // Emit links from opinion to its expression nodes
1413             final Span<Term> exprSpan = NAFUtils.trimSpan(opinion.getOpinionExpression().getSpan(),
1414                     sentenceID);
1415             final Set<Term> exprHeads = exprSpan == null ? ImmutableSet.<Term>of()
1416                     : NAFUtils.extractHeads(this.document, null, exprSpan.getTargets(),
1417                             NAFUtils.matchExtendedPos(this.document, "NN", "VB", "JJ", "R"));
1418             emitOpinionArgument(opinionIRI, null, KS_OLD.EXPRESSION, exprSpan, exprHeads);
1420             // Emit links from opinion to target nodes
1421             final OpinionTarget target = opinion.getOpinionTarget();
1422             final Span<Term> targetSpan = target == null ? null
1423                     : NAFUtils.trimSpan(target.getSpan(), sentenceID);
1424             final Set<Term> targetHeads = targetSpan == null ? ImmutableSet.<Term>of()
1425                     : NAFUtils.extractHeads(this.document, null, targetSpan.getTargets(),
1426                             NAFUtils.matchExtendedPos(this.document, "NN", "PRP", "JJP", "DTP",
1427                                     "WP", "VB"));
1428             emitOpinionArgument(opinionIRI, null, KS_OLD.TARGET, targetSpan, targetHeads);
1430             // Emit links from opinion to holder nodes
1431             final OpinionHolder holder = opinion.getOpinionHolder();
1432             final Span<Term> holderSpan = holder == null ? null
1433                     : NAFUtils.trimSpan(holder.getSpan(), sentenceID);
1434             final Set<Term> holderHeads = holderSpan == null ? ImmutableSet.<Term>of()
1435                     : NAFUtils.extractHeads(this.document, null, holderSpan.getTargets(), NAFUtils
1436                             .matchExtendedPos(this.document, "NN", "PRP", "JJP", "DTP", "WP"));
1437             emitOpinionArgument(opinionIRI, null, KS_OLD.HOLDER, holderSpan, holderHeads);
1438         }
1440         private void emitOpinionArgument(final IRI opinionID, @Nullable final IRI spanProperty,
1441                 @Nullable final IRI headProperty, @Nullable final Span<Term> span,
1442                 @Nullable final Set<Term> heads) {
1444             if (span != null) {
1445                 outer: for (final Term term : span.getTargets()) {
1446                     final Annotation ann = this.annotations.get(term.getId());
1447                     IRI uri = ann == null ? null
1448                             : ann.objectIRI != null ? ann.objectIRI : ann.predicateIRI;
1449                     if (uri == null && "AGV".contains(term.getPos())) {
1450                         for (final Dep dep : this.document.getDepsFromTerm(term)) {
1451                             if (dep.getRfunc().equals("VC")) {
1452                                 continue outer;
1453                             }
1454                         }
1455                         uri = emitTerm(term);
1456                     }
1457                     if (uri != null) {
1458                         if (spanProperty != null) {
1459                             emitFact(opinionID, spanProperty, uri, null, null);
1460                         }
1461                         if (headProperty != null && heads != null && heads.contains(term)) {
1462                             emitFact(opinionID, headProperty, uri, null, null);
1463                         }
1464                     }
1465                 }
1466             }
1467         }
1469         private void emitCommonAttributes(final IRI instanceID, final IRI mentionID,
1470                 final Term head, final String label, final boolean emitSumo)
1471                 throws RDFHandlerException {
1473             if ("QPD".indexOf(head.getPos()) < 0 && label != null && !label.isEmpty()) {
1474                 emitFact(instanceID, RDFS.LABEL, label, mentionID, null);
1475             }
1477             final char pos = Character.toUpperCase(head.getPos().charAt(0));
1478             if (pos == 'N' || pos == 'V') {
1479                 emitMeta(mentionID, KS_OLD.LEMMA, head.getLemma());
1480                 // this.emitter.emitFact(instanceID, EGO.LEMMA, head.getLemma(), mentionID, null);
1481             }
1483             final ExternalRef sstRef = NAFUtils.getRef(head, NAFUtils.RESOURCE_WN_SST, null);
1484             if (sstRef != null) {
1485                 final String sst = sstRef.getReference();
1486                 final IRI uri = FACTORY.createIRI("http://www.newsreader-project.eu/sst/",
1487                         sst.substring(sst.lastIndexOf('-') + 1));
1488                 emitMeta(mentionID, KS_OLD.SST, uri);
1489                 // this.emitter.emitFact(instanceID, EGO.SST, uri, mentionID, null);
1490             }
1492             final ExternalRef synsetRef = NAFUtils.getRef(head, NAFUtils.RESOURCE_WN_SYNSET, null);
1493             if (synsetRef != null) {
1494                 final IRI uri = FACTORY.createIRI("http://www.newsreader-project.eu/syn/",
1495                         synsetRef.getReference());
1496                 emitMeta(mentionID, KS_OLD.SYNSET, uri);
1497                 // this.emitter.emitFact(instanceID, EGO.SYNSET, uri, mentionID, null);
1498             }
1500             final String p = head.getMorphofeat().toUpperCase();
1501             if (p.equals("NNS") || p.equals("NNPS")) {
1502                 emitMeta(mentionID, KS_OLD.PLURAL, true);
1503                 // this.emitter.emitFact(instanceID, EGO.PLURAL, true, mentionID, null);
1504             }
1506             for (final ExternalRef ref : head.getExternalRefs()) {
1507                 final IRI typeIRI = mintRefIRI(ref.getResource(), ref.getReference());
1508                 if (ref.getResource().equals(NAFUtils.RESOURCE_SUMO)) {
1509                     if (emitSumo) {
1510                         emitFact(instanceID, RDF.TYPE, typeIRI, mentionID, ref.getConfidence());
1511                         emitFact(instanceID, RDF.TYPE, Sumo.getSuperClasses(typeIRI), mentionID,
1512                                 ref.getConfidence());
1513                     }
1514                 } else {
1515                     emitFact(instanceID, RDF.TYPE, typeIRI, mentionID, ref.getConfidence());
1516                 }
1517             }
1518         }
1520         private void emitEntityAttributes(final Entity entity, final IRI subject,
1521                 final IRI mention) throws RDFHandlerException {
1523             // Retrieve normalized value and NER tag
1524             final ExternalRef valueRef = NAFUtils.getRef(entity, "value", null);
1525             String nerTag = entity.getType();
1526             nerTag = nerTag == null ? null : nerTag.toLowerCase();
1528             // For NORP and LANGUAGE entities we use the DBpedia IRIs from entity linking
1529             if (Objects.equal(nerTag, "norp") || Objects.equal(nerTag, "language")) {
1530                 final IRI attribute = Objects.equal(nerTag, "norp") ? KS_OLD.PROVENANCE
1531                         : KS_OLD.LANGUAGE;
1532                 for (final ExternalRef ref : entity.getExternalRefs()) {
1533                     try {
1534                         final IRI refIRI = FACTORY.createIRI(Util.cleanIRI(ref.getReference()));
1535                         emitFact(subject, attribute, refIRI, mention,
1536                                 (double) ref.getConfidence());
1537                     } catch (final Throwable ex) {
1538                         // ignore: not a IRI
1539                     }
1540                 }
1542             } else if (valueRef != null) {
1543                 // Otherwise, we use the normalized value from Stanford
1544                 try {
1545                     final String s = valueRef.getReference().trim();
1546                     if (s.isEmpty()) {
1547                         return;
1548                     }
1549                     if (Objects.equal(nerTag, "cardinal") || Objects.equal(nerTag, "quantity")) {
1550                         emitFact(subject, KS_OLD.QUANTITY, Double.parseDouble(s), mention, null);
1552                     } else if (Objects.equal(nerTag, "ordinal")) {
1553                         emitFact(subject, KS_OLD.RANK, Double.parseDouble(s), mention, null);
1555                     } else if (Objects.equal(nerTag, "percent")) {
1556                         final int index = s.indexOf('%');
1557                         emitFact(subject, KS_OLD.PERCENTAGE,
1558                                 Double.parseDouble(s.substring(index + 1)), mention, null);
1560                     } else if (Objects.equal(nerTag, "money")) {
1561                         int index = 0;
1562                         while (index < s.length()) {
1563                             final char c = s.charAt(index);
1564                             if (c == '€') {
1565                                 emitFact(subject, GR.HAS_CURRENCY, "EUR", mention, null);
1566                             } else if (c == '$') {
1567                                 emitFact(subject, GR.HAS_CURRENCY, "USD", mention, null);
1568                             } else if (c == 'Â¥') {
1569                                 emitFact(subject, GR.HAS_CURRENCY, "YEN", mention, null);
1570                             } else if (Character.isDigit(c)) {
1571                                 break;
1572                             }
1573                             ++index;
1574                         }
1575                         emitFact(subject, GR.HAS_CURRENCY_VALUE,
1576                                 Double.parseDouble(s.substring(index)), mention, null);
1577                     }
1578                 } catch (final NumberFormatException ex) {
1579                     LOGGER.debug("Could not process normalized value: " + valueRef.getReference());
1580                 }
1581             }
1582         }
1584         @Nullable
1585         private IRI emitMention(final Iterable<Term> terms) {
1587             final List<Term> sortedTerms = Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(terms);
1588             final int numTerms = sortedTerms.size();
1589             if (numTerms == 0) {
1590                 return null;
1591             }
1593             final String text = this.documentText;
1594             final List<IRI> componentIRIs = Lists.newArrayList();
1595             final int begin = NAFUtils.getBegin(sortedTerms.get(0));
1596             int offset = begin;
1597             int startTermIdx = 0;
1599             final StringBuilder anchorBuilder = new StringBuilder();
1600             final StringBuilder uriBuilder = new StringBuilder(this.documentIRI.stringValue())
1601                     .append("#char=").append(begin).append(",");
1603             for (int i = 0; i < numTerms; ++i) {
1604                 final Term term = sortedTerms.get(i);
1605                 final int termOffset = NAFUtils.getBegin(term);
1606                 if (termOffset > offset && !text.substring(offset, termOffset).trim().isEmpty()) {
1607                     final int start = NAFUtils.getBegin(sortedTerms.get(startTermIdx));
1608                     anchorBuilder.append(text.substring(start, offset)).append(" [...] ");
1609                     uriBuilder.append(offset).append(";").append(termOffset).append(',');
1610                     componentIRIs.add(emitMention(sortedTerms.subList(startTermIdx, i)));
1611                     startTermIdx = i;
1612                 }
1613                 offset = NAFUtils.getEnd(term);
1614             }
1615             if (startTermIdx > 0) {
1616                 componentIRIs.add(emitMention(sortedTerms.subList(startTermIdx, numTerms)));
1617             }
1618             anchorBuilder.append(
1619                     text.substring(NAFUtils.getBegin(sortedTerms.get(startTermIdx)), offset));
1620             uriBuilder.append(offset);
1622             final String anchor = anchorBuilder.toString();
1623             final IRI mentionID = FACTORY.createIRI(uriBuilder.toString());
1624             emitMeta(mentionID, KS_OLD.MENTION_OF, this.documentIRI);
1625             emitMeta(this.documentIRI, KS_OLD.HAS_MENTION, mentionID);
1626             emitMeta(mentionID, RDF.TYPE, KS_OLD.MENTION);
1627             if (!componentIRIs.isEmpty()) {
1628                 emitMeta(mentionID, RDF.TYPE, KS_OLD.COMPOUND_STRING);
1629                 for (final IRI componentIRI : componentIRIs) {
1630                     emitMeta(mentionID, KS_OLD.COMPONENT_SUB_STRING, componentIRI);
1631                 }
1632             }
1633             emitMeta(mentionID, NIF.BEGIN_INDEX, FACTORY.createLiteral(begin));
1634             emitMeta(mentionID, NIF.END_INDEX, FACTORY.createLiteral(offset));
1635             emitMeta(mentionID, NIF.ANCHOR_OF, FACTORY.createLiteral(anchor));
1637             // Emit context of 3 sentences around the mention TODO
1638             // final int sentID = sortedTerms.get(0).getSent();
1639             // final List<Term> sentTerms = Lists.newArrayList();
1640             // for (int s = Math.max(1, sentID - 1); s <=
1641             // Math.min(this.document.getNumSentences(),
1642             // sentID + 1); ++s) {
1643             // sentTerms.addAll(this.document.getTermsBySent(s));
1644             // }
1645             // Collections.sort(sentTerms, Term.OFFSET_COMPARATOR);
1646             // final StringBuilder sentBuilder = new StringBuilder();
1647             // int sentOffset = -1;
1648             // boolean lastSelected = false;
1649             // for (final Term term : sentTerms) {
1650             // final boolean nextSelected = sortedTerms.contains(term);
1651             // if (!nextSelected && lastSelected) {
1652             // sentBuilder.append(" ]__ ");
1653             // }
1654             // if (sentOffset >= 0) {
1655             // for (int i = 0; i < term.getOffset() - sentOffset; ++i) {
1656             // sentBuilder.append(' ');
1657             // }
1658             // }
1659             // if (nextSelected && !lastSelected) {
1660             // sentBuilder.append(" __[ ");
1661             // }
1662             // sentBuilder.append(term.getStr());
1663             // sentOffset = term.getOffset() + term.getLength();
1664             // lastSelected = nextSelected;
1665             // }
1666             // emitMeta(mentionID, new IRIImpl(KS_OLD.NAMESPACE + "context"),
1667             // FACTORY.createLiteral(sentBuilder.toString()));
1669             return mentionID;
1670         }
1672         private IRI emitTerm(final Term head) {
1674             final ExternalRef synsetRef = NAFUtils.getRef(head, NAFUtils.RESOURCE_WN_SYNSET, null);
1675             final String headSynsetID = synsetRef == null ? null : synsetRef.getReference();
1676             final String readableHeadSynsetID = WordNet.getReadableSynsetID(headSynsetID);
1677             final String headID = MoreObjects.firstNonNull(readableHeadSynsetID, //
1678                     head.getLemma().toLowerCase());
1680             final List<IRI> modifierIRIs = Lists.newArrayList();
1681             final List<String> modifierIDs = Lists.newArrayList();
1683             for (final Term modifier : this.document.getTermsByDepAncestors(ImmutableSet.of(head),
1684                     "AMOD|NMOD")) {
1685                 if ("AGV".contains(modifier.getPos())) {
1686                     final IRI modifierIRI = emitTerm(modifier);
1687                     modifierIRIs.add(modifierIRI);
1688                     modifierIDs.add(modifierIRI.getLocalName());
1689                 }
1690             }
1692             final Set<Term> terms = this.document.getTermsByDepAncestors(ImmutableSet.of(head),
1693                     "(AMOD|NMOD)*");
1694             for (final Iterator<Term> i = terms.iterator(); i.hasNext();) {
1695                 if (!"AGV".contains(i.next().getPos())) {
1696                     i.remove();
1697                 }
1698             }
1699             final String label = NAFUtils.getText(NAFUtils.filterTerms(terms));
1701             final StringBuilder idBuilder = new StringBuilder();
1702             int level = 0;
1703             for (final String modifierID : modifierIDs) {
1704                 for (int i = 1; modifierID.contains(Strings.repeat("_", i)); ++i) {
1705                     level = Math.max(level, i);
1706                 }
1707             }
1708             final String separator = Strings.repeat("_", level + 1);
1709             for (final String modifierID : Ordering.natural().immutableSortedCopy(modifierIDs)) {
1710                 idBuilder.append(modifierID).append(separator);
1711             }
1712             final String id = idBuilder.append(headID).toString();
1713             final IRI uri = mintRefIRI("attribute", id);
1714             // final IRI uri = this.emitter.mintIRI(id + "-" + head.getId(), id);
1716             emitFact(uri, RDF.TYPE, KS_OLD.ATTRIBUTE, null, null);
1717             emitFact(uri, RDFS.LABEL, label, null, null);
1718             if (headSynsetID != null) {
1719                 emitFact(uri, KS_OLD.HEAD_SYNSET, mintRefIRI("syn", headSynsetID), null, null);
1720             }
1721             for (final IRI modifierIRI : modifierIRIs) {
1722                 emitFact(uri, KS_OLD.MOD, modifierIRI, null, null);
1723             }
1725             final IRI mentionIRI = emitMention(terms);
1726             emitMeta(mentionIRI, RDF.TYPE, KS_OLD.ATTRIBUTE_MENTION);
1727             emitMeta(uri, GAF.DENOTED_BY, mentionIRI);
1729             return uri;
1730         }
1732         @Nullable
1733         private String extractPath(final Term from, final Term to) {
1735             final Set<Term> fromTerms = this.document
1736                     .getTermsByDepDescendants(ImmutableSet.of(from), "(-VC|-IM|-OPRD)*");
1737             final Set<Term> toTerms = this.document.getTermsByDepDescendants(ImmutableSet.of(to),
1738                     "(-VC|-IM|-OPRD)*");
1740             if (!Sets.intersection(fromTerms, toTerms).isEmpty()) {
1741                 return null;
1742             }
1744             final List<Dep> path = this.document.getDepPath(from, to);
1745             if (path == null) {
1746                 return null;
1747             }
1749             for (final Iterator<Dep> i = path.iterator(); i.hasNext();) {
1750                 final Dep dep = i.next();
1751                 if (fromTerms.contains(dep.getFrom()) && fromTerms.contains(dep.getTo())
1752                         || toTerms.contains(dep.getFrom()) && toTerms.contains(dep.getTo())) {
1753                     i.remove();
1754                 }
1755             }
1757             if (fromTerms.contains(path.get(0).getTo())) {
1758                 return null; // moving towards tree root
1759             }
1761             final StringBuilder builder = new StringBuilder();
1762             for (int i = 1; i < path.size(); ++i) {
1763                 final Dep dep = path.get(i);
1764                 final String func = dep.getRfunc();
1765                 final Term term = dep.getFrom();
1766                 if (!func.equalsIgnoreCase("COORD") && !func.equals("CONJ")) {
1767                     builder.append(builder.length() > 0 ? "_" : "")
1768                             .append(term.getLemma().toLowerCase().replace(' ', '_'));
1769                 }
1770             }
1772             return builder.toString();
1773         }
1775         @Nullable
1776         private Annotation defineAnnotation(final Term head, final Iterable<Term> terms) {
1777             if (head == null) {
1778                 return null;
1779             }
1780             Annotation ann = this.annotations.get(head.getId());
1781             if (ann == null) {
1782                 ann = new Annotation(head, terms);
1783                 this.annotations.put(head.getId(), ann);
1784             }
1785             return ann;
1786         }
1788         private IRI mintIRI(final String id, @Nullable final String suggestedLocalName) {
1789             String localName = this.mintedIRIs.get(id);
1790             if (localName == null) {
1791                 final String name = MoreObjects.firstNonNull(suggestedLocalName, id);
1792                 final StringBuilder builder = new StringBuilder();
1793                 for (int i = 0; i < name.length(); ++i) {
1794                     final char c = name.charAt(i);
1795                     builder.append(Character.isWhitespace(c) ? '_' : c);
1796                 }
1797                 final String base = builder.toString();
1798                 int counter = 1;
1799                 while (true) {
1800                     localName = base + (counter == 1 ? "" : "_" + counter);
1801                     if (!this.mintedIRIs.inverse().containsKey(localName)) {
1802                         this.mintedIRIs.put(id, localName);
1803                         break;
1804                     }
1805                     ++counter;
1806                 }
1807             }
1808             return FACTORY.createIRI(Util.cleanIRI(this.baseIRI + "#" + localName));
1809         }
1811         @Nullable
1812         private IRI mintRefIRI(@Nullable final String resource, @Nullable final String reference) {
1813             if (!Strings.isNullOrEmpty(resource) && !Strings.isNullOrEmpty(reference)) {
1814                 final String normResource = resource.toLowerCase();
1815                 final String namespace = RDFGenerator.this.namespaceMap.get(normResource);
1816                 if (namespace != null) {
1817                     return FACTORY
1818                             .createIRI(Util.cleanIRI(namespace + reference.replace('#', '.')));
1819                 }
1820             }
1821             return null;
1822         }
1824         private void emitMeta(@Nullable final IRI subject, @Nullable final IRI property,
1825                 @Nullable final Object objects) {
1826             if (subject != null && property != null) {
1827                 for (final Value object : extract(Value.class, objects,
1828                         RDF.TYPE.equals(property) ? RDFGenerator.this.typeMap : null)) {
1829                     this.statements.add(FACTORY.createStatement(subject, property, object));
1830                 }
1831             }
1832         }
1834         private void emitFact(@Nullable final IRI subject, @Nullable final IRI property,
1835                 @Nullable final Object objects, @Nullable final IRI mention,
1836                 @Nullable final Object confidence) {
1837             if (subject != null && property != null) {
1838                 for (final Value object : extract(Value.class, objects,
1839                         RDF.TYPE.equals(property) ? RDFGenerator.this.typeMap : null)) {
1840                     final IRI factIRI = hash(subject, property, object);
1841                     this.statements
1842                             .add(FACTORY.createStatement(subject, property, object, factIRI));
1843                     if (mention != null) {
1844                         this.statements.add(
1845                                 FACTORY.createStatement(factIRI, KS_OLD.EXPRESSED_BY, mention));
1846                     }
1847                     if (confidence instanceof Number) {
1848                         final double confidenceValue = ((Number) confidence).doubleValue();
1849                         if (confidenceValue != 0.0) {
1850                             // this.statements.add(FACTORY.createStatement(factIRI,
1851                             // KS_OLD.CONFIDENCE,
1852                             // FACTORY.createLiteral(confidenceValue)));
1853                         }
1854                     }
1855                 }
1856             }
1857         }
1859         private Iterable<Statement> merge(final Iterable<Statement> stmts)
1860                 throws RDFHandlerException {
1862             final List<Statement> smushedStmts = Lists.newArrayList();
1864             /// ???????
1865             RDFProcessors.smush(null, true, "http://dbpedia.org/resource/")
1866                     .wrap(RDFSources.wrap(stmts)).emit(RDFHandlers.wrap(smushedStmts), 1);
1868             final Set<Resource> named = Sets.newHashSet();
1869             final Multimap<Resource, Resource> groups = HashMultimap.create();
1870             for (final Statement stmt : smushedStmts) {
1871                 if (stmt.getPredicate().equals(KS_OLD.INCLUDE)) {
1872                     groups.put(stmt.getSubject(), (Resource) stmt.getObject());
1873                 } else if (stmt.getPredicate().equals(FOAF.NAME)) {
1874                     named.add(stmt.getSubject());
1875                 }
1876             }
1878             final List<Statement> output = Lists.newArrayList();
1879             final Multimap<Resource, Statement> groupProps = HashMultimap.create();
1880             final Multimap<Resource, Statement> groupRels = HashMultimap.create();
1881             for (final Statement stmt : smushedStmts) {
1882                 final Resource subj = stmt.getSubject();
1883                 final Value obj = stmt.getObject();
1884                 final boolean subjIsGroup = groups.containsKey(subj);
1885                 final boolean objIsGroup = groups.containsKey(obj);
1886                 if (stmt.getPredicate().equals(OWL.SAMEAS)
1887                         && (obj instanceof BNode || obj.stringValue().startsWith(this.baseIRI))) {
1888                     // discard statement
1889                 } else if (subjIsGroup && objIsGroup && !subj.equals(obj)) {
1890                     groupRels.put(subj, stmt);
1891                     groupRels.put((Resource) obj, stmt);
1892                 } else if (subjIsGroup) {
1893                     groupProps.put(subj, stmt);
1894                 } else if (objIsGroup) {
1895                     groupProps.put((Resource) obj, stmt);
1896                 } else {
1897                     output.add(stmt);
1898                 }
1899             }
1901             // Merge one composite / components structure at a time
1902             final ValueFactory vf = Statements.VALUE_FACTORY;
1903             for (final Resource composite : groups.keySet()) {
1904                 final Collection<Resource> components = groups.get(composite);
1905                 final boolean isNamed = composite instanceof IRI
1906                         && ((IRI) composite).getNamespace().equals("http://dbpedia.org/resource/")
1907                         || named.contains(composite);
1908                 if (isNamed) {
1909                     output.addAll(groupProps.get(composite));
1910                     for (final Statement stmt : groupRels.removeAll(composite)) {
1911                         if (stmt.getSubject().equals(composite)) {
1912                             groupRels.remove(stmt.getObject(), stmt);
1913                             groupProps.put((Resource) stmt.getObject(), stmt);
1914                         } else {
1915                             groupRels.remove(stmt.getSubject(), stmt);
1916                             groupProps.put(stmt.getSubject(), stmt);
1917                         }
1918                     }
1919                 } else {
1920                     for (final Statement stmt : groupRels.removeAll(composite)) {
1921                         final Resource subj = stmt.getSubject();
1922                         final IRI pred = stmt.getPredicate();
1923                         final Value obj = stmt.getObject();
1924                         final Resource ctx = stmt.getContext();
1925                         if (subj.equals(composite)) {
1926                             groupRels.remove(obj, stmt);
1927                             for (final Resource component : components) {
1928                                 groupProps.put((Resource) obj,
1929                                         vf.createStatement(component, pred, obj, ctx));
1930                             }
1931                         } else {
1932                             groupRels.remove(subj, stmt);
1933                             for (final Resource component : components) {
1934                                 groupProps.put(subj,
1935                                         vf.createStatement(subj, pred, component, ctx));
1936                             }
1937                         }
1938                     }
1939                     for (final Statement stmt : groupProps.get(composite)) {
1940                         final IRI pred = stmt.getPredicate();
1941                         final Resource ctx = stmt.getContext();
1942                         Collection<Resource> subjs = ImmutableList.of(stmt.getSubject());
1943                         Collection<? extends Value> objs = ImmutableList.of(stmt.getObject());
1944                         if (composite.equals(stmt.getSubject())) {
1945                             subjs = components;
1946                             if (KS_OLD.INCLUDE.equals(pred) || RDFS.LABEL.equals(pred)) {
1947                                 continue;
1948                             }
1949                         }
1950                         if (composite.equals(stmt.getObject())) {
1951                             objs = components;
1952                         }
1953                         for (final Resource subj : subjs) {
1954                             for (final Value obj : objs) {
1955                                 output.add(Statements.VALUE_FACTORY.createStatement(subj, pred,
1956                                         obj, ctx));
1957                             }
1958                         }
1959                     }
1960                 }
1961             }
1963             return output;
1964         }
1966         @SuppressWarnings("unchecked")
1967         private <T extends Value> Collection<T> extract(final Class<T> clazz,
1968                 @Nullable final Object object, @Nullable final Multimap<String, ? extends T> map) {
1969             if (object == null) {
1970                 return ImmutableList.of();
1971             } else if (clazz.isInstance(object)) {
1972                 return ImmutableList.of((T) object);
1973             } else if (object instanceof Iterable<?>) {
1974                 final List<T> list = Lists.newArrayList();
1975                 for (final Object element : (Iterable<?>) object) {
1976                     list.addAll(extract(clazz, element, map));
1977                 }
1978                 return list;
1979             } else if (object.getClass().isArray()) {
1980                 final List<T> list = Lists.newArrayList();
1981                 final int length = Array.getLength(object);
1982                 for (int i = 0; i < length; ++i) {
1983                     list.addAll(extract(clazz, Array.get(object, i), map));
1984                 }
1985                 return list;
1986             } else if (map != null) {
1987                 return (Collection<T>) map.get(object.toString());
1988             } else {
1989                 return ImmutableList.of(Statements.convert(object, clazz));
1990             }
1991         }
1993         private IRI hash(final Resource subject, final IRI predicate, final Value object) {
1994             final List<String> list = Lists.newArrayList();
1995             for (final Value value : new Value[] { subject, predicate, object }) {
1996                 if (value instanceof IRI) {
1997                     list.add("\u0001");
1998                     list.add(value.stringValue());
1999                 } else if (value instanceof BNode) {
2000                     list.add("\u0002");
2001                     list.add(((BNode) value).getID());
2002                 } else if (value instanceof Literal) {
2003                     final Literal l = (Literal) value;
2004                     list.add("\u0003");
2005                     list.add(l.getLabel());
2006                     if (!l.getDatatype().equals(XMLSchema.STRING)) {
2007                         list.add(l.getDatatype().stringValue());
2008                     } else if (l.getLanguage().isPresent()) {
2009                         list.add(l.getLanguage().get());
2010                     }
2011                 }
2012             }
2013             final String id = Hash.murmur3(list.toArray(new String[list.size()])).toString();
2014             return FACTORY.createIRI("fact:" + id);
2015         }
2017     }
2019     private static final class Annotation {
2021         final Term head;
2023         final List<Term> extent;
2025         IRI objectIRI;
2027         IRI predicateIRI;
2029         Annotation(final Term head, final Iterable<Term> extent) {
2030             this.head = head;
2031             this.extent = ImmutableList.copyOf(extent);
2032             this.objectIRI = null;
2033             this.predicateIRI = null;
2034         }
2036     }
2038 }