1 package eu.fbk.dkm.pikes.rdf;
2
3 import java.io.File;
4 import java.lang.reflect.Array;
5 import java.nio.file.Path;
6 import java.util.Arrays;
7 import java.util.Collection;
8 import java.util.Collections;
9 import java.util.Iterator;
10 import java.util.List;
11 import java.util.Map;
12 import java.util.Set;
13 import java.util.concurrent.CountDownLatch;
14 import java.util.concurrent.atomic.AtomicInteger;
15
16 import javax.annotation.Nullable;
17
18 import com.google.common.base.MoreObjects;
19 import com.google.common.base.Objects;
20 import com.google.common.base.Strings;
21 import com.google.common.collect.BiMap;
22 import com.google.common.collect.HashBiMap;
23 import com.google.common.collect.HashMultimap;
24 import com.google.common.collect.ImmutableList;
25 import com.google.common.collect.ImmutableMap;
26 import com.google.common.collect.ImmutableMultimap;
27 import com.google.common.collect.ImmutableSet;
28 import com.google.common.collect.Iterables;
29 import com.google.common.collect.Lists;
30 import com.google.common.collect.Maps;
31 import com.google.common.collect.Multimap;
32 import com.google.common.collect.Ordering;
33 import com.google.common.collect.Sets;
34 import com.google.common.io.Files;
35
36 import eu.fbk.dkm.pikes.rdf.vocab.*;
37 import org.eclipse.rdf4j.model.BNode;
38 import org.eclipse.rdf4j.model.Literal;
39 import org.eclipse.rdf4j.model.Model;
40 import org.eclipse.rdf4j.model.Resource;
41 import org.eclipse.rdf4j.model.Statement;
42 import org.eclipse.rdf4j.model.IRI;
43 import org.eclipse.rdf4j.model.Value;
44 import org.eclipse.rdf4j.model.ValueFactory;
45 import org.eclipse.rdf4j.model.impl.LinkedHashModel;
46 import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
47 import org.eclipse.rdf4j.model.impl.ValueFactoryImpl;
48 import org.eclipse.rdf4j.model.vocabulary.*;
49 import org.eclipse.rdf4j.rio.RDFHandler;
50 import org.eclipse.rdf4j.rio.RDFHandlerException;
51 import org.slf4j.Logger;
52 import org.slf4j.LoggerFactory;
53 import org.slf4j.MDC;
54
55 import ixa.kaflib.Coref;
56 import ixa.kaflib.Dep;
57 import ixa.kaflib.Entity;
58 import ixa.kaflib.ExternalRef;
59 import ixa.kaflib.Factuality;
60 import ixa.kaflib.KAFDocument;
61 import ixa.kaflib.KAFDocument.FileDesc;
62 import ixa.kaflib.LinguisticProcessor;
63 import ixa.kaflib.Opinion;
64 import ixa.kaflib.Opinion.OpinionHolder;
65 import ixa.kaflib.Opinion.OpinionTarget;
66 import ixa.kaflib.Opinion.Polarity;
67 import ixa.kaflib.Predicate;
68 import ixa.kaflib.Predicate.Role;
69 import ixa.kaflib.Span;
70 import ixa.kaflib.Term;
71 import ixa.kaflib.Timex3;
72 import ixa.kaflib.WF;
73
74 import eu.fbk.dkm.pikes.naflib.Corpus;
75 import eu.fbk.dkm.pikes.rdf.util.ModelUtil;
76 import eu.fbk.dkm.pikes.rdf.util.OWLTime;
77 import eu.fbk.dkm.pikes.rdf.util.ProcessorASNorm;
78 import eu.fbk.dkm.pikes.resources.NAFFilter;
79 import eu.fbk.dkm.pikes.resources.NAFUtils;
80 import eu.fbk.dkm.pikes.resources.PropBank;
81 import eu.fbk.dkm.pikes.resources.Sumo;
82 import eu.fbk.dkm.pikes.resources.WordNet;
83 import eu.fbk.dkm.pikes.resources.YagoTaxonomy;
84 import eu.fbk.utils.svm.Util;
85 import eu.fbk.rdfpro.RDFHandlers;
86 import eu.fbk.rdfpro.RDFProcessors;
87 import eu.fbk.rdfpro.RDFSource;
88 import eu.fbk.rdfpro.RDFSources;
89 import eu.fbk.rdfpro.util.Environment;
90 import eu.fbk.rdfpro.util.Hash;
91 import eu.fbk.rdfpro.util.Options;
92 import eu.fbk.rdfpro.util.QuadModel;
93 import eu.fbk.rdfpro.util.Statements;
94 import eu.fbk.rdfpro.util.Tracker;
95
96
97
98
99 public final class RDFGenerator {
100
101 private static final Logger LOGGER = LoggerFactory.getLogger(RDFGenerator.class);
102
103 private static final ValueFactory FACTORY = SimpleValueFactory.getInstance();
104
105
106 private static final String MODIFIER_REGEX = "(NMOD|AMOD|TMP|LOC|TITLE) PMOD? (COORD CONJ?)* PMOD?";
107
108
109 private static final String PARTICIPATION_REGEX = ""
110 + "SUB? (COORD CONJ?)* (PMOD (COORD CONJ?)*)? ((VC OPRD?)|(IM OPRD?))*";
111
112 private static final Multimap<String, IRI> DEFAULT_TYPE_MAP = ImmutableMultimap
113 .<String, IRI>builder()
114 .put("entity.person", NWR.PERSON)
115 .put("entity.per", NWR.PERSON)
116 .put("entity.organization", NWR.ORGANIZATION)
117 .put("entity.org", NWR.ORGANIZATION)
118 .put("entity.location", NWR.LOCATION)
119 .put("entity.loc", NWR.LOCATION)
120 .put("entity.misc", NWR.MISC)
121 .put("entity.money", GR.PRICE_SPECIFICATION)
122 .put("entity.date", OWLTIME.DATE_TIME_INTERVAL)
123 .put("entity.time", OWLTIME.DATE_TIME_INTERVAL)
124 .put("timex.date", OWLTIME.DATE_TIME_INTERVAL)
125 .put("timex.duration", OWLTIME.PROPER_INTERVAL)
126 .build();
127
128 private static final Map<String, String> DEFAULT_NAMESPACE_MAP = ImmutableMap
129 .<String, String>builder()
130 .put("propbank", "http://www.newsreader-project.eu/ontologies/propbank/")
131 .put("nombank", "http://www.newsreader-project.eu/ontologies/nombank/")
132 .put("framenet", "http://www.newsreader-project.eu/ontologies/framenet/")
133 .put("verbnet", "http://www.newsreader-project.eu/ontologies/verbnet/")
134 .put("premon+propbank", "http://premon.fbk.eu/resource/")
135 .put("premon+nombank", "http://premon.fbk.eu/resource/")
136 .put("premon+framenet", "http://premon.fbk.eu/resource/")
137 .put("premon+verbnet", "http://premon.fbk.eu/resource/")
138 .put("eso", "http://www.newsreader-project.eu/domain-ontology#")
139 .put("framebase", "http://framebase.org/ns/") //
140 .put("attribute", "attr:")
141
142 .put("syn", "http://wordnet-rdf.princeton.edu/wn30/")
143
144 .put("sumo", SUMO.NAMESPACE).put("yago", YagoTaxonomy.NAMESPACE).build();
145
146 private static final String DEFAULT_OWLTIME_NAMESPACE = "http://www.newsreader-project.eu/time/";
147
148 public static final RDFGenerator DEFAULT = RDFGenerator.builder().build();
149
150 private final Multimap<String, IRI> typeMap;
151
152 private final Map<String, String> namespaceMap;
153
154 private final String owltimeNamespace;
155
156 private final boolean merging;
157
158 private final boolean normalization;
159
160 private RDFGenerator(final Builder builder) {
161 this.typeMap = ImmutableMultimap
162 .copyOf(MoreObjects.firstNonNull(builder.typeMap, DEFAULT_TYPE_MAP));
163 this.namespaceMap = ImmutableMap
164 .copyOf(MoreObjects.firstNonNull(builder.namespaceMap, DEFAULT_NAMESPACE_MAP));
165 this.owltimeNamespace = MoreObjects.firstNonNull(builder.owltimeNamespace,
166 DEFAULT_OWLTIME_NAMESPACE);
167 this.merging = MoreObjects.firstNonNull(builder.merging, Boolean.FALSE);
168 this.normalization = MoreObjects.firstNonNull(builder.normalization, Boolean.FALSE);
169 }
170
171 public Model generate(final KAFDocument document,
172 @Nullable final Iterable<Integer> sentenceIDs) {
173 final Model model = new LinkedHashModel();
174 generate(document, sentenceIDs, model);
175 return model;
176 }
177
178 public void generate(final KAFDocument document, @Nullable final Iterable<Integer> sentenceIDs,
179 final Collection<? super Statement> output) {
180 final RDFHandler handler = RDFHandlers.wrap(output);
181 try {
182 generate(document, sentenceIDs, handler);
183 } catch (final Throwable ex) {
184 throw new RuntimeException("Unexpected exception (!)", ex);
185 }
186 }
187
188 public void generate(final KAFDocument document, @Nullable final Iterable<Integer> sentenceIDs,
189 final RDFHandler handler) throws RDFHandlerException {
190
191 final boolean[] ids = new boolean[document.getNumSentences() + 1];
192 if (sentenceIDs == null) {
193 Arrays.fill(ids, true);
194 } else {
195 for (final Integer sentenceID : sentenceIDs) {
196 ids[sentenceID] = true;
197 }
198 }
199
200 final String baseIRI = document.getPublic().uri;
201 new Extractor(baseIRI, handler, document, ids).run();
202 }
203
204 public static Builder builder() {
205 return new Builder();
206 }
207
208 public static final class Builder {
209
210 @Nullable
211 private Multimap<String, IRI> typeMap;
212
213 @Nullable
214 private Multimap<String, IRI> propertyMap;
215
216 @Nullable
217 private Map<String, String> namespaceMap;
218
219 @Nullable
220 private String owltimeNamespace;
221
222 @Nullable
223 private Boolean merging;
224
225 @Nullable
226 private Boolean normalization;
227
228
229
230
231
232
233
234
235
236
237 public Builder withProperties(final Map<?, ?> properties, @Nullable final String prefix) {
238 final String p = prefix == null ? "" : prefix.endsWith(".") ? prefix : prefix + ".";
239 for (final Map.Entry<?, ?> entry : properties.entrySet()) {
240 if (entry.getKey() != null && entry.getValue() != null
241 && entry.getKey().toString().startsWith(p)) {
242 final String name = entry.getKey().toString().substring(p.length());
243 final String value = Strings.emptyToNull(entry.getValue().toString());
244 if ("fusion".equals(name)) {
245 withMerging(Boolean.valueOf(value));
246 } else if ("normalization".equals(name)) {
247 withNormalization(Boolean.valueOf(value));
248 }
249 }
250 }
251 return this;
252 }
253
254 public Builder withTypeMap(@Nullable final Multimap<String, IRI> typeMap) {
255 this.typeMap = typeMap;
256 return this;
257 }
258
259 public Builder withPropertyMap(@Nullable final Multimap<String, IRI> propertyMap) {
260 this.propertyMap = propertyMap;
261 return this;
262 }
263
264 public Builder withNamespaceMap(@Nullable final Map<String, String> namespaceMap) {
265 this.namespaceMap = namespaceMap;
266 return this;
267 }
268
269 public Builder withOWLTimeNamespace(@Nullable final String owltimeNamespace) {
270 this.owltimeNamespace = owltimeNamespace;
271 return this;
272 }
273
274 public Builder withMerging(@Nullable final Boolean merging) {
275 this.merging = merging;
276 return this;
277 }
278
279 public Builder withNormalization(@Nullable final Boolean normalization) {
280 this.normalization = normalization;
281 return this;
282 }
283
284 public RDFGenerator build() {
285 return new RDFGenerator(this);
286 }
287
288 }
289
290 static final class Runner implements Runnable {
291
292 private final Corpus corpus;
293
294 private final RDFGenerator generator;
295
296 private final File outputFile;
297
298 private final boolean intermediate;
299
300 private Runner(final Corpus corpus, final RDFGenerator generator, final File outputFile,
301 final boolean split) {
302 this.corpus = corpus;
303 this.generator = generator;
304 this.outputFile = outputFile.getAbsoluteFile();
305 this.intermediate = split;
306 }
307
308 static Runner create(final String name, final String... args) {
309 final Options options = Options
310 .parse("r,recursive|o,output!|m,merge|n,normalize|i,intermediate|+", args);
311 final File outputFile = options.getOptionArg("o", File.class);
312 final boolean recursive = options.hasOption("r");
313 final boolean merge = options.hasOption("m");
314 final boolean normalize = options.hasOption("n");
315 final boolean intermediate = options.hasOption("i");
316 final Corpus corpus = Corpus.create(recursive, options.getPositionalArgs(File.class));
317 final RDFGenerator generator = RDFGenerator.builder()
318 .withProperties(Util.PROPERTIES, "eu.fbk.dkm.pikes.rdf.RDFGenerator")
319 .withMerging(merge).withNormalization(normalize).build();
320 return new Runner(corpus, generator, outputFile, intermediate);
321 }
322
323 @Override
324 public void run() {
325
326 LOGGER.info("Converting {} NAF files to RDF", this.corpus.size());
327
328 final NAFFilter filter = NAFFilter.builder()
329 .withProperties(Util.PROPERTIES, "eu.fbk.dkm.pikes.rdf.NAFFilter")
330 .withSRLPreprocess(true, true, true).build();
331
332 final RDFHandler writer;
333 if (!this.intermediate) {
334 try {
335 Files.createParentDirs(this.outputFile);
336 writer = RDFHandlers.write(null, 1, Runner.this.outputFile.getAbsolutePath());
337 writer.startRDF();
338 } catch (final Throwable ex) {
339 throw new RuntimeException(ex);
340 }
341 } else {
342 writer = null;
343 }
344
345 final Tracker tracker = new Tracker(LOGGER, null,
346 "Processed %d NAF files (%d NAF/s avg)",
347 "Processed %d NAF files (%d NAF/s, %d NAF/s avg)");
348
349 final int numThreads = Environment.getCores();
350 final CountDownLatch latch = new CountDownLatch(numThreads);
351 final AtomicInteger counter = new AtomicInteger(0);
352 final AtomicInteger succeeded = new AtomicInteger(0);
353 tracker.start();
354 for (int i = 0; i < numThreads; ++i) {
355 Environment.getPool().submit(new Runnable() {
356
357 @Override
358 public void run() {
359 try {
360 final Path outBase = Runner.this.outputFile.toPath().getParent()
361 .toAbsolutePath().normalize();
362 while (true) {
363 final int i = counter.getAndIncrement();
364 if (i >= Runner.this.corpus.size()) {
365 break;
366 }
367 String docName = null;
368
369 final Path path = Runner.this.corpus.file(i);
370
371 Path output = null;
372 if (Runner.this.intermediate) {
373 try {
374 final Path base = Runner.this.corpus.path();
375 final Path relative = base.toAbsolutePath()
376 .relativize(path.toAbsolutePath());
377 String name = relative.toString();
378 int index = name.indexOf(".naf");
379 if (index < 0) {
380 index = name.indexOf(".xml");
381 }
382 name = name.substring(0, index) + ".tql.gz";
383 output = outBase.resolve(name);
384 if (java.nio.file.Files.exists(output)) {
385 LOGGER.info("Skipping {}", path);
386 succeeded.incrementAndGet();
387 tracker.increment();
388 continue;
389 }
390 } catch (final Throwable ex) {
391 LOGGER.error("Could not compute output file name", ex);
392 }
393 }
394
395 LOGGER.info("Processing {}", path);
396
397 try {
398 final KAFDocument document = Runner.this.corpus.get(i);
399 docName = document.getPublic().publicId;
400 MDC.put("context", docName);
401 filter.filter(document);
402 final RDFSource source = RDFSources
403 .wrap(Runner.this.generator.generate(document, null));
404
405 if (!Runner.this.intermediate) {
406 source.emit(RDFHandlers.ignoreMethods(writer,
407 RDFHandlers.METHOD_START_RDF
408 | RDFHandlers.METHOD_END_RDF
409 | RDFHandlers.METHOD_CLOSE),
410 1);
411 } else {
412 java.nio.file.Files.createDirectories(output.getParent());
413 source.emit(RDFHandlers.write(null, 1,
414 output.toAbsolutePath().toString()), 1);
415 }
416
417 succeeded.incrementAndGet();
418
419 } catch (final Throwable ex) {
420 LOGGER.error("Processing failed for " + docName, ex);
421 } finally {
422 MDC.remove("context");
423 }
424 tracker.increment();
425 }
426 } finally {
427 latch.countDown();
428 }
429 }
430
431 });
432 }
433 try {
434 latch.await();
435 if (!this.intermediate) {
436 writer.endRDF();
437 }
438 } catch (final InterruptedException ex) {
439 Thread.currentThread().interrupt();
440 } catch (final RDFHandlerException ex) {
441 throw new RuntimeException(ex);
442 }
443 tracker.end();
444
445 LOGGER.info("Successfully converted {}/{} files", succeeded, this.corpus.size());
446 }
447 }
448
449 private final class Extractor {
450
451 private final String baseIRI;
452
453 private final RDFHandler handler;
454
455 private final QuadModel statements;
456
457 private final BiMap<String, String> mintedIRIs;
458
459 private final KAFDocument document;
460
461 private final IRI documentIRI;
462
463 private final boolean[] sentenceIDs;
464
465 private final String documentText;
466
467 private final Map<String, Annotation> annotations;
468
469 public Extractor(final String baseIRI, final RDFHandler handler,
470 final KAFDocument document, final boolean[] sentenceIDs) {
471
472 this.baseIRI = baseIRI;
473 this.handler = handler;
474 this.statements = QuadModel.create();
475 this.mintedIRIs = HashBiMap.create();
476 this.document = document;
477 this.documentIRI = FACTORY.createIRI(Util.cleanIRI(document.getPublic().uri));
478 this.sentenceIDs = sentenceIDs;
479
480 final StringBuilder builder = new StringBuilder();
481 for (final WF word : document.getWFs()) {
482 final int offset = word.getOffset();
483 if (builder.length() > offset) {
484 builder.setLength(offset);
485 } else {
486 while (builder.length() < offset) {
487 builder.append(" ");
488 }
489 }
490 builder.append(word.getForm());
491 }
492 this.documentText = builder.toString();
493
494 this.annotations = Maps.newHashMap();
495 }
496
497 public void run() throws RDFHandlerException {
498
499
500 processMetadata();
501
502
503 for (final Timex3 timex : this.document.getTimeExs()) {
504 if (timex.getSpan() == null
505 || this.sentenceIDs[timex.getSpan().getFirstTarget().getSent()]) {
506 try {
507 processTimex(timex);
508 } catch (final Throwable ex) {
509 LOGGER.error("Error processing " + NAFUtils.toString(timex) + ", type "
510 + timex.getType() + ", value " + timex.getValue(), ex);
511 }
512 }
513 }
514
515
516 for (final Entity entity : this.document.getEntities()) {
517 for (final Span<Term> span : entity.getSpans()) {
518 if (this.sentenceIDs[span.getFirstTarget().getSent()]) {
519 try {
520 processEntity(entity);
521 } catch (final Throwable ex) {
522 LOGGER.error("Error processing " + NAFUtils.toString(entity)
523 + ", type " + entity.getType(), ex);
524 }
525 break;
526 }
527 }
528 }
529
530
531 outer: for (final Predicate predicate : this.document.getPredicates()) {
532 if (this.sentenceIDs[predicate.getSpan().getFirstTarget().getSent()]) {
533
534 for (final ExternalRef ref : predicate.getExternalRefs()) {
535 if (NAFUtils.RESOURCE_PROPBANK.equals(ref.getResource())
536 && ref.getReference().equals("be.01")) {
537 Term a1Head = null;
538 Term a2Head = null;
539 for (final Role role : predicate.getRoles()) {
540 final Term head = NAFUtils.extractHead(this.document,
541 role.getSpan());
542 if (head != null) {
543 if ("A1".equals(role.getSemRole())) {
544 a1Head = head;
545 } else if ("A2".equals(role.getSemRole())) {
546 a2Head = head;
547 }
548 }
549 }
550 if (a1Head != null && a2Head != null) {
551 for (final Coref coref : this.document.getCorefsByTerm(a1Head)) {
552 final Set<Term> corefHeads = Sets.newHashSet();
553 for (final Span<Term> span : coref.getSpans()) {
554 final Term head = NAFUtils.extractHead(this.document,
555 span);
556 if (head != null) {
557 corefHeads.add(head);
558 }
559 }
560 if (corefHeads.contains(a1Head)
561 && corefHeads.contains(a2Head)) {
562 continue outer;
563 }
564 }
565 }
566 }
567 }
568 try {
569 processPredicate(predicate);
570 } catch (final Throwable ex) {
571 LOGGER.error("Error processing " + NAFUtils.toString(predicate), ex);
572 }
573 }
574 }
575
576
577 for (final Factuality factuality : this.document.getFactualities()) {
578 if (this.sentenceIDs[factuality.getWord().getSent()]) {
579 try {
580 processFactuality(factuality);
581 } catch (final Throwable ex) {
582 LOGGER.error("Error processing " + NAFUtils.toString(factuality), ex);
583 }
584 }
585 }
586
587
588 for (final Annotation ann : this.annotations.values()) {
589 final IRI uri = ann.predicateIRI != null ? ann.predicateIRI : ann.objectIRI;
590 if (uri != null) {
591 final Set<Term> forbiddenTerms = Sets.newHashSet();
592 final List<Coref> corefs = this.document.getCorefsByTerm(ann.head);
593 for (final Coref coref : corefs) {
594 final List<Term> heads = Lists.newArrayList();
595 for (final Span<Term> span : coref.getSpans()) {
596 final Term head = NAFUtils.extractHead(this.document, span);
597 if (head != null) {
598 heads.add(head);
599 }
600 }
601 if (heads.contains(ann.head)) {
602 forbiddenTerms.addAll(heads);
603 }
604 }
605 for (final Term term : this.document.getTermsByDepAncestors(
606 Collections.singleton(ann.head), MODIFIER_REGEX)) {
607 if (!forbiddenTerms.contains(term)) {
608 try {
609 processModifier(term, ann.head, uri, ann.extent);
610 } catch (final Throwable ex) {
611 LOGGER.error("Error processing MODIFIER " + NAFUtils.toString(term)
612 + " of " + NAFUtils.toString(ann.head) + " (object IRI "
613 + ann.objectIRI + "; predicate IRI " + ann.predicateIRI
614 + ")", ex);
615 }
616 }
617 }
618 }
619 }
620
621
622 for (final Coref coref : this.document.getCorefs()) {
623 if ("event".equalsIgnoreCase(coref.getType())) {
624 continue;
625 }
626 final List<Span<Term>> spans = Lists.newArrayList();
627 for (final Span<Term> span : coref.getSpans()) {
628 if (this.sentenceIDs[span.getFirstTarget().getSent()]) {
629 spans.add(span);
630 }
631 }
632 if (!spans.isEmpty()) {
633 try {
634 processCoref(spans);
635 } catch (final Throwable ex) {
636 LOGGER.error("Error processing " + NAFUtils.toString(coref), ex);
637 }
638 }
639 }
640
641
642 for (final Predicate predicate : this.document.getPredicates()) {
643 if (this.sentenceIDs[predicate.getSpan().getFirstTarget().getSent()]) {
644 final PropBank.Roleset rs = PropBank
645 .getRoleset(NAFUtils.getRoleset(predicate));
646 final String entitySuffix = rs == null ? "?"
647 : Integer.toString(rs.getCoreferenceEntityArg());
648 final String predicateSuffix = rs == null ? "?"
649 : Integer.toString(rs.getCoreferencePredicateArg());
650 Set<Term> corefEntityHeads = null;
651 Set<Term> corefPredicateHeads = null;
652 for (final Role role : predicate.getRoles()) {
653 final Term roleHead = NAFUtils.extractHead(this.document, role.getSpan());
654 if (roleHead != null) {
655 final Set<Term> argHeads = this.document.getTermsByDepAncestors(
656 Collections.singleton(roleHead), PARTICIPATION_REGEX);
657 boolean isCorefPredicateRole = false;
658 if (role.getSemRole().endsWith(entitySuffix)) {
659 corefEntityHeads = argHeads;
660 } else if (role.getSemRole().endsWith(predicateSuffix)) {
661 corefPredicateHeads = argHeads;
662 isCorefPredicateRole = true;
663 }
664 for (final Term argHead : argHeads) {
665 try {
666 processRole(predicate, role, argHead, isCorefPredicateRole);
667 } catch (final Throwable ex) {
668 LOGGER.error("Error processing " + NAFUtils.toString(role)
669 + " of " + NAFUtils.toString(predicate) + ", argument "
670 + NAFUtils.toString(argHead), ex);
671 }
672 }
673 }
674 }
675 if (corefEntityHeads != null && corefEntityHeads.size() == 1
676 && corefPredicateHeads != null && corefPredicateHeads.size() == 1) {
677 final Annotation entityAnn = this.annotations
678 .get(corefEntityHeads.iterator().next().getId());
679 final Annotation predicateAnn = this.annotations
680 .get(corefPredicateHeads.iterator().next().getId());
681 if (predicateAnn != null && entityAnn != null
682 && predicateAnn.predicateIRI != null
683 && predicateAnn.objectIRI != null && entityAnn.objectIRI != null) {
684 final IRI mentionIRI = emitMention(
685 Iterables.concat(predicateAnn.extent, entityAnn.extent));
686 emitFact(predicateAnn.objectIRI, OWL.SAMEAS, entityAnn.objectIRI,
687 mentionIRI, null);
688 }
689 }
690 }
691 }
692
693
694 for (final Opinion opinion : this.document.getOpinions()) {
695 if (opinion.getOpinionExpression() == null || opinion.getLabel() != null
696 && (opinion.getLabel().toLowerCase().contains("stanford")
697 || opinion.getLabel().toLowerCase().contains("gold"))) {
698 continue;
699 }
700 for (final Term term : opinion.getOpinionExpression().getTerms()) {
701 if (this.sentenceIDs[term.getSent()]) {
702 processOpinion(opinion);
703 break;
704 }
705 }
706 }
707
708
709 Iterable<Statement> statements = RDFGenerator.this.merging ? merge(this.statements)
710 : this.statements;
711 if (RDFGenerator.this.normalization) {
712 statements = new ProcessorASNorm("fact:").wrap(RDFSources.wrap(statements));
713 }
714 this.handler.startRDF();
715 for (final Statement statement : statements) {
716 this.handler.handleStatement(statement);
717 }
718 this.handler.endRDF();
719 }
720
721 private void processMetadata() throws RDFHandlerException {
722
723
724 final IRI docIRI = this.documentIRI;
725 final IRI nafIRI = FACTORY.createIRI(docIRI.stringValue() + ".naf");
726
727
728 emitMeta(docIRI, RDF.TYPE, new IRI[] { KS_OLD.RESOURCE, KS_OLD.TEXT });
729
730
731 if (this.document.getFileDesc() != null) {
732 final FileDesc fd = this.document.getFileDesc();
733 emitMeta(docIRI, DCTERMS.TITLE, fd.title);
734 emitMeta(docIRI, DCTERMS.CREATOR, fd.author);
735 emitMeta(docIRI, DCTERMS.CREATED, fd.creationtime);
736 emitMeta(docIRI, KS_OLD.NAF_FILE_NAME, fd.filename);
737 emitMeta(docIRI, KS_OLD.NAF_FILE_TYPE, fd.filetype);
738 emitMeta(docIRI, KS_OLD.NAF_PAGES, fd.pages);
739 }
740
741
742 if (this.document.getLang() != null) {
743 emitMeta(docIRI, DCTERMS.LANGUAGE,
744 ModelUtil.languageCodeToIRI(this.document.getLang()));
745 }
746
747
748 if (this.document.getRawText() != null) {
749 final String rawText = this.document.getRawText();
750 final StringBuilder builder = new StringBuilder();
751 boolean addSpace = false;
752 for (int i = 0; i < rawText.length(); ++i) {
753 final char c = rawText.charAt(i);
754 if (Character.isWhitespace(c)) {
755 addSpace = builder.length() > 0;
756 } else {
757 if (addSpace) {
758 builder.append(' ');
759 addSpace = false;
760 }
761 builder.append(c);
762 }
763 }
764 emitMeta(docIRI, KS_OLD.TEXT_HASH, Hash.murmur3(builder.toString()).toString());
765 }
766
767
768 emitMeta(docIRI, KS_OLD.ANNOTATED_WITH, nafIRI);
769 emitMeta(nafIRI, KS_OLD.ANNOTATION_OF, docIRI);
770
771
772 emitMeta(nafIRI, RDF.TYPE, new IRI[] { KS_OLD.RESOURCE, KS_OLD.NAF });
773 emitMeta(nafIRI, KS_OLD.VERSION, this.document.getVersion());
774 emitMeta(nafIRI, DCTERMS.IDENTIFIER, this.document.getPublic().publicId);
775
776
777 String timestamp = null;
778 for (final Map.Entry<String, List<LinguisticProcessor>> entry : this.document
779 .getLinguisticProcessors().entrySet()) {
780 emitMeta(nafIRI, KS_OLD.LAYER,
781 FACTORY.createIRI(KS_OLD.NAMESPACE, "layer_" + entry.getKey()));
782 for (final LinguisticProcessor lp : entry.getValue()) {
783 if (timestamp == null) {
784 if (!Strings.isNullOrEmpty(lp.getBeginTimestamp())) {
785 timestamp = lp.getBeginTimestamp();
786 } else if (!Strings.isNullOrEmpty(lp.getEndTimestamp())) {
787 timestamp = lp.getEndTimestamp();
788 }
789 }
790 final IRI lpIRI = FACTORY.createIRI(ModelUtil
791 .cleanIRI(KS_OLD.NAMESPACE + lp.getName() + '.' + lp.getVersion()));
792 emitMeta(nafIRI, DCTERMS.CREATOR, lpIRI);
793 emitMeta(lpIRI, DCTERMS.TITLE, lp.getName());
794 emitMeta(lpIRI, KS_OLD.VERSION, lp.getVersion());
795 }
796 }
797 emitMeta(nafIRI, DCTERMS.CREATED, timestamp);
798 }
799
800 private void processTimex(final Timex3 timex) throws RDFHandlerException {
801
802
803 if (timex.getSpan() == null) {
804 return;
805 }
806
807
808 final List<Term> terms = this.document.getTermsByWFs(timex.getSpan().getTargets());
809 final Term head = NAFUtils.extractHead(this.document, KAFDocument.newTermSpan(terms));
810 final String label = NAFUtils.getText(NAFUtils.filterTerms(terms));
811 final String type = timex.getType().trim().toLowerCase();
812
813
814 final Annotation ann = defineAnnotation(head, terms);
815
816
817 if (ann == null || ann.objectIRI != null) {
818 return;
819 }
820
821
822 final IRI mentionIRI = emitMention(terms);
823 emitMeta(mentionIRI, RDF.TYPE, KS_OLD.TIME_MENTION);
824
825
826 IRI timexIRI = null;
827 if (timex.getValue() != null) {
828 if (type.equals("date") || type.equals("time")) {
829 final OWLTime.Interval interval = OWLTime.Interval
830 .parseTimex(timex.getValue());
831 if (interval != null) {
832 timexIRI = interval.toRDF(this.handler, RDFGenerator.this.owltimeNamespace,
833 null);
834 } else {
835 LOGGER.debug("Could not represent date/time value '" + timex.getValue()
836 + "' of " + NAFUtils.toString(timex));
837 }
838 } else if (type.equals("duration")) {
839 final OWLTime.Duration duration = OWLTime.Duration
840 .parseTimex(timex.getValue());
841 if (duration != null) {
842 timexIRI = FACTORY.createIRI(RDFGenerator.this.owltimeNamespace,
843 duration.toString());
844 final IRI durationIRI = duration.toRDF(this.handler,
845 RDFGenerator.this.owltimeNamespace, null);
846 emitFact(timexIRI, OWLTIME.HAS_DURATION_DESCRIPTION, durationIRI,
847 mentionIRI, null);
848 } else {
849 LOGGER.debug("Could not represent duration value '" + timex.getValue()
850 + "' of " + NAFUtils.toString(timex));
851 }
852 } else {
853
854 throw new UnsupportedOperationException("Unsupported TIMEX3 type: " + type);
855 }
856 }
857
858
859 if (timexIRI == null) {
860 timexIRI = mintIRI(timex.getId(),
861 MoreObjects.firstNonNull(timex.getValue(), timex.getSpan().getStr()));
862 }
863
864
865 ann.objectIRI = timexIRI;
866 emitMeta(timexIRI, GAF.DENOTED_BY, mentionIRI);
867
868
869 emitFact(timexIRI, RDF.TYPE,
870 ImmutableList.of(KS_OLD.ENTITY, KS_OLD.TIME, "timex." + type), mentionIRI,
871 null);
872 emitCommonAttributes(timexIRI, mentionIRI, head, label, true);
873 }
874
875 private void processEntity(final Entity entity) throws RDFHandlerException {
876
877
878 final List<Term> terms = entity.getSpans().get(0).getTargets();
879 final String label = NAFUtils.getText(NAFUtils.filterTerms(terms));
880 final Term head = NAFUtils.extractHead(this.document, entity.getSpans().get(0));
881 if (head == null) {
882 return;
883 }
884
885
886 String type = entity.getType();
887 type = type == null ? null : type.toLowerCase();
888
889 final boolean isLinked = !entity.getExternalRefs().isEmpty();
890 final boolean isProperty = "money".equals(type) || "cardinal".equals(type)
891 || "ordinal".equals(type) || "percent".equals(type) || "language".equals(type)
892 || "norp".equals(type) || "quantity".equals(type);
893
894
895 final Dep dep = this.document.getDepToTerm(head);
896 if (isProperty && dep != null) {
897 final String depLabel = dep.getRfunc().toUpperCase();
898 if (depLabel.contains("NMOD") || depLabel.contains("AMOD")) {
899 return;
900 }
901 }
902
903
904 final Annotation ann = defineAnnotation(head, terms);
905
906
907 if (ann == null || ann.objectIRI != null) {
908 return;
909 }
910
911
912 final IRI entityIRI;
913 if (!entity.isNamed() || isLinked) {
914 entityIRI = mintIRI(entity.getId(),
915 entity.isNamed() ? entity.getSpans().get(0).getStr() : head.getLemma());
916 } else {
917 entityIRI = Statements.VALUE_FACTORY.createIRI(Util
918 .cleanIRI("entity:" + entity.getStr().toLowerCase().replace(' ', '_')));
919 }
920 ann.objectIRI = entityIRI;
921
922
923 final IRI mentionIRI = emitMention(terms);
924 emitMeta(entityIRI, GAF.DENOTED_BY, mentionIRI);
925 emitMeta(mentionIRI, RDF.TYPE, KS_OLD.ENTITY_MENTION);
926
927
928
929
930
931
932
933
934
935 if (isProperty) {
936 emitMeta(mentionIRI, RDF.TYPE, KS_OLD.ATTRIBUTE_MENTION);
937 }
938
939
940 emitFact(entityIRI, RDF.TYPE, new Object[] { KS_OLD.ENTITY, "entity",
941 type == null ? null : "entity." + type }, mentionIRI, null);
942 if (this.document.getPredicatesByTerm(head).isEmpty()) {
943 emitCommonAttributes(entityIRI, mentionIRI, head, label, true);
944 }
945
946
947 if (isProperty) {
948 emitEntityAttributes(entity, entityIRI, mentionIRI);
949 } else {
950
951
952
953
954
955
956 final boolean named = entity.isNamed() || "romanticism".equalsIgnoreCase(label)
957 || "operant conditioning chamber".equalsIgnoreCase(label);
958 if (named) {
959 emitFact(entityIRI, FOAF.NAME, label, mentionIRI, null);
960 emitMeta(mentionIRI, RDF.TYPE, KS_OLD.NAME_MENTION);
961 }
962 final IRI property = named ? OWL.SAMEAS : RDFS.SEEALSO;
963 for (final ExternalRef ref : entity.getExternalRefs()) {
964 try {
965 final IRI refIRI = FACTORY.createIRI(Util.cleanIRI(ref.getReference()));
966 emitFact(entityIRI, property, refIRI, mentionIRI,
967 (double) ref.getConfidence());
968 } catch (final Throwable ex) {
969
970 }
971 }
972 }
973 }
974
975 private void processPredicate(final Predicate predicate) throws RDFHandlerException {
976
977
978 final List<Term> terms = predicate.getSpan().getTargets();
979 final String label = NAFUtils.getText(NAFUtils.filterTerms(terms));
980 final Term head = NAFUtils.extractHead(this.document, predicate.getSpan());
981
982
983 if (!this.document.getTimeExsByTerm(head).isEmpty()) {
984 return;
985 }
986 for (final Entity entity : this.document.getEntitiesByTerm(head)) {
987 if (entity.isNamed() || "ordinal".equalsIgnoreCase(entity.getType())) {
988 return;
989 }
990 }
991
992
993 final Annotation ann = defineAnnotation(head, terms);
994 if (ann == null) {
995 return;
996 }
997
998
999 if (ann.predicateIRI != null) {
1000 LOGGER.warn("Already processed: " + NAFUtils.toString(predicate) + "; head is "
1001 + NAFUtils.toString(head));
1002 return;
1003 }
1004
1005
1006 boolean selfArg = false;
1007 if (ann.objectIRI != null) {
1008 for (final Role role : predicate.getRoles()) {
1009 selfArg |= head.equals(NAFUtils.extractHead(this.document, role.getSpan()));
1010 }
1011 }
1012
1013
1014 boolean isEvent = false;
1015 for (final ExternalRef ref : head.getExternalRefs()) {
1016 if ("SUMO".equals(ref.getResource())) {
1017 final IRI conceptIRI = SimpleValueFactory.getInstance()
1018 .createIRI(SUMO.NAMESPACE, ref.getReference());
1019 if (Sumo.isSubClassOf(conceptIRI, SUMO.PROCESS)) {
1020 isEvent = true;
1021 break;
1022 }
1023 }
1024 }
1025
1026
1027 final IRI predicateIRI = ann.objectIRI != null && !selfArg ? ann.objectIRI
1028 : mintIRI(predicate.getId(), head.getLemma());
1029 ann.predicateIRI = predicateIRI;
1030
1031
1032 IRI mentionIRI = null;
1033 if (predicateIRI.equals(ann.objectIRI)) {
1034 for (final Entity entity : this.document.getEntitiesByTerm(head)) {
1035 mentionIRI = emitMention(entity.getSpans().get(0).getTargets());
1036 }
1037 } else {
1038 mentionIRI = emitMention(terms);
1039 }
1040 emitMeta(mentionIRI, RDF.TYPE, KS_OLD.PREDICATE_MENTION);
1041 emitMeta(predicateIRI, GAF.DENOTED_BY, mentionIRI);
1042
1043
1044 if (ann.objectIRI == null) {
1045 emitCommonAttributes(ann.predicateIRI, mentionIRI, head, label, true);
1046 } else {
1047 emitCommonAttributes(ann.objectIRI, mentionIRI, head, label, !selfArg);
1048 }
1049
1050
1051 for (final ExternalRef ref : predicate.getExternalRefs()) {
1052 if ("".equals(ref.getReference())) {
1053 continue;
1054 }
1055 final IRI typeIRI = mintRefIRI(ref.getResource(), ref.getReference());
1056 emitFact(predicateIRI, RDF.TYPE, typeIRI, mentionIRI, null);
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069 }
1070
1071
1072 final List<Object> typeKeys = Lists.newArrayList(KS_OLD.ENTITY, KS_OLD.PREDICATE,
1073 SEM.EVENT);
1074 if (isEvent) {
1075 typeKeys.add(SUMO.PROCESS);
1076 }
1077 emitFact(predicateIRI, RDF.TYPE, typeKeys, mentionIRI, null);
1078 }
1079
1080 private void processFactuality(final Factuality factuality) throws RDFHandlerException {
1081
1082
1083
1084
1085 final Term term = factuality.getWord();
1086 final Annotation ann = this.annotations.get(term.getId());
1087
1088
1089 if (ann == null || ann.predicateIRI == null) {
1090 return;
1091 }
1092
1093
1094 final IRI mentionIRI = emitMention(ann.extent);
1095
1096
1097 final String value = factuality.getMaxPart().getPrediction();
1098 emitFact(ann.predicateIRI, KS_OLD.FACTUALITY, value, mentionIRI, null);
1099 }
1100
1101 private void processModifier(final Term modifierTerm, final Term instanceTerm,
1102 final IRI instanceIRI, final List<Term> instanceExtent)
1103 throws RDFHandlerException {
1104
1105
1106 final char pos = Character.toUpperCase(modifierTerm.getPos().charAt(0));
1107 final List<Entity> entities = this.document.getEntitiesByTerm(modifierTerm);
1108 final Annotation ann = this.annotations.get(modifierTerm.getId());
1109
1110
1111 if (!this.document.getTimeExsByTerm(modifierTerm).isEmpty()) {
1112 return;
1113 }
1114
1115 if (ann != null) {
1116
1117 final IRI otherIRI = ann.objectIRI != null ? ann.objectIRI : ann.predicateIRI;
1118 if (otherIRI != null) {
1119 final IRI mentionID = emitMention(
1120 Iterables.concat(instanceExtent, ann.extent));
1121 emitFact(instanceIRI, KS_OLD.MOD, otherIRI, mentionID, null);
1122 }
1123 final String path = extractPath(instanceTerm, modifierTerm);
1124 if (!Strings.isNullOrEmpty(path)) {
1125 final IRI mentionID = emitMention(
1126 Iterables.concat(instanceExtent, ann.extent));
1127 final IRI property = mintRefIRI("conn", path);
1128 emitFact(instanceIRI, property, otherIRI, mentionID, null);
1129 }
1130
1131 } else if (!entities.isEmpty()) {
1132
1133
1134 final Entity entity = entities.get(0);
1135 final IRI mentionIRI = emitMention(entity.getSpans().get(0).getTargets());
1136 emitMeta(mentionIRI, RDF.TYPE, KS_OLD.ATTRIBUTE_MENTION);
1137 emitEntityAttributes(entity, instanceIRI, mentionIRI);
1138
1139 } else if (pos == 'G' || pos == 'A' || pos == 'V') {
1140
1141
1142
1143 final Set<Term> terms = this.document.getTermsByDepAncestors(
1144 Collections.singleton(modifierTerm), "(AMOD|NMOD)*");
1145 final IRI mentionIRI = emitMention(terms);
1146 final IRI expressionIRI = emitTerm(modifierTerm);
1147 emitFact(instanceIRI, KS_OLD.MOD, expressionIRI, mentionIRI, null);
1148 }
1149 }
1150
1151 private void processCoref(final List<Span<Term>> spans) throws RDFHandlerException {
1152
1153
1154
1155 final List<Span<Term>> corefSpans = Lists.newArrayList();
1156 final List<List<Term>> corefTerms = Lists.newArrayList();
1157 final List<List<Term>> corefExtents = Lists.newArrayList();
1158 final List<List<IRI>> corefIRIs = Lists.newArrayList();
1159 for (final Span<Term> span : spans) {
1160 final Term head = NAFUtils.extractHead(this.document, span);
1161 if (head != null) {
1162 final List<Term> terms = Lists.newArrayList();
1163 final List<IRI> uris = Lists.newArrayList();
1164 final Set<Term> extent = Sets.newHashSet();
1165 for (final Term term : this.document.getTermsByDepAncestors(
1166 Collections.singleton(head), "(COORD CONJ?)*")) {
1167 if (!span.getTargets().contains(term)) {
1168 continue;
1169 }
1170 final Annotation ann = this.annotations.get(term.getId());
1171 final IRI uri = ann == null ? null
1172 : ann.objectIRI != null ? ann.objectIRI : ann.predicateIRI;
1173 if (uri != null) {
1174 terms.add(term);
1175 uris.add(uri);
1176 extent.addAll(ann.extent);
1177 }
1178 }
1179 if (!terms.isEmpty()) {
1180 corefSpans.add(span);
1181 corefTerms.add(terms);
1182 corefExtents.add(Ordering.natural().immutableSortedCopy(extent));
1183 corefIRIs.add(uris);
1184 }
1185 }
1186 }
1187
1188
1189 if (corefTerms.size() <= 1) {
1190 return;
1191 }
1192
1193
1194
1195 final Map<Term, IRI> members = Maps.newHashMap();
1196 final Map<Term, Span<Term>> memberSpans = Maps.newHashMap();
1197 for (int i = 0; i < corefTerms.size(); ++i) {
1198 final Span<Term> span = corefSpans.get(i);
1199 final List<Term> terms = corefTerms.get(i);
1200 final List<Term> extent = corefExtents.get(i);
1201 final List<IRI> uris = corefIRIs.get(i);
1202 memberSpans.put(terms.get(0), span);
1203 if (terms.size() == 1) {
1204 members.put(terms.get(0), uris.get(0));
1205 } else {
1206 final StringBuilder builder = new StringBuilder();
1207 for (final IRI uri : uris) {
1208 builder.append(builder.length() == 0 ? "" : "_");
1209 builder.append(uri.getLocalName());
1210 }
1211 final IRI compIRI = mintIRI(builder.toString(), null);
1212 final IRI mentionIRI = emitMention(extent);
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225 emitFact(compIRI, RDF.TYPE, new Object[] { KS_OLD.ENTITY }, mentionIRI, null);
1226
1227
1228
1229
1230
1231
1232
1233 for (int j = 0; j < uris.size(); ++j) {
1234
1235
1236 emitFact(compIRI, KS_OLD.INCLUDE, uris.get(j), mentionIRI, null);
1237 }
1238 members.put(terms.get(0), compIRI);
1239 }
1240 }
1241
1242
1243 for (final Map.Entry<Term, IRI> entry1 : members.entrySet()) {
1244 for (final Map.Entry<Term, IRI> entry2 : members.entrySet()) {
1245 final Term term1 = entry1.getKey();
1246 final Term term2 = entry2.getKey();
1247 if (term1.getId().compareTo(term2.getId()) < 0) {
1248 final Span<Term> span1 = memberSpans.get(term1);
1249 final Span<Term> span2 = memberSpans.get(term2);
1250 final IRI mentionIRI = emitMention(
1251 Iterables.concat(span1.getTargets(), span2.getTargets()));
1252 final IRI uri1 = entry1.getValue();
1253 final IRI uri2 = entry2.getValue();
1254
1255 emitFact(uri1, OWL.SAMEAS, uri2, mentionIRI, null);
1256 }
1257 }
1258 }
1259 }
1260
1261 private void processRole(final Predicate predicate, final Role role, final Term argHead,
1262 final boolean isCorefPredicateRole) throws RDFHandlerException {
1263
1264
1265 final Term predHead = NAFUtils.extractHead(this.document, predicate.getSpan());
1266 final Annotation predAnn = this.annotations.get(predHead.getId());
1267 final IRI predIRI = predAnn == null ? null : predAnn.predicateIRI;
1268 if (predIRI == null) {
1269 return;
1270 }
1271
1272
1273 IRI argIRI = null;
1274 final Annotation argAnn = this.annotations.get(argHead.getId());
1275 if (argAnn != null) {
1276 if (argAnn.predicateIRI != null
1277 && (argAnn.objectIRI == null || isCorefPredicateRole)) {
1278 argIRI = argAnn.predicateIRI;
1279 } else {
1280 argIRI = argAnn.objectIRI;
1281 }
1282 }
1283
1284
1285 final char pos = Character.toUpperCase(argHead.getPos().charAt(0));
1286 if (argIRI != null && argIRI.equals(predIRI)
1287 || argIRI == null && pos != 'N' && pos != 'G' && pos != 'A') {
1288 return;
1289 }
1290
1291
1292 final Set<IRI> properties = Sets.newHashSet();
1293
1294
1295 String semRole = role.getSemRole();
1296 if (semRole != null && !semRole.equals("")) {
1297
1298
1299 if (semRole.startsWith("R-")) {
1300 return;
1301 }
1302
1303 semRole = semRole.toLowerCase();
1304 final int index = semRole.lastIndexOf('-');
1305 if (index >= 0) {
1306 semRole = semRole.substring(index + 1);
1307 }
1308 if (Character.isDigit(semRole.charAt(semRole.length() - 1))) {
1309 semRole = semRole.substring(semRole.length() - 1);
1310 properties.add(SEM.HAS_ACTOR);
1311 } else if (semRole.equals("tmp")) {
1312 properties.add(SEM.HAS_TIME);
1313 } else if (semRole.equals("loc")) {
1314 properties.add(SEM.HAS_PLACE);
1315 }
1316 }
1317
1318
1319 final String semRoleResource = predHead.getPos().equalsIgnoreCase("V") ? "propbank"
1320 : "nombank";
1321
1322
1323 for (final ExternalRef ref : role.getExternalRefs()) {
1324 final String resource = ref.getResource().toLowerCase();
1325 final String name = ref.getReference().replace('#', '.');
1326 if (resource.equals(semRoleResource) || name.equals("")) {
1327 continue;
1328 }
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344 properties.add(mintRefIRI(resource, name));
1345 }
1346
1347
1348
1349 if (!Strings.isNullOrEmpty(semRole)) {
1350 for (final ExternalRef ref : predicate.getExternalRefs()) {
1351 final String resource = ref.getResource().toLowerCase();
1352 if (resource.equals(semRoleResource)) {
1353 if (Character.isDigit(semRole.charAt(0))) {
1354 properties.add(mintRefIRI(resource,
1355 ref.getReference().toLowerCase() + "_" + semRole));
1356 } else {
1357 properties.add(mintRefIRI(resource, semRole));
1358 }
1359 }
1360 }
1361 }
1362
1363
1364 final String path = extractPath(predHead, argHead);
1365 if (path == null) {
1366 LOGGER.debug("Could not compute dependency path from " + predHead.getId() + " to "
1367 + argHead.getId());
1368 }
1369 if (!Strings.isNullOrEmpty(path)) {
1370 properties.add(mintRefIRI("conn", path));
1371 }
1372
1373
1374 final List<Term> predTerms = predicate.getSpan().getTargets();
1375 if (argIRI != null) {
1376 final IRI mentionIRI = emitMention(Iterables.concat(predTerms, argAnn.extent));
1377 emitMeta(mentionIRI, RDF.TYPE, KS_OLD.PARTICIPATION_MENTION);
1378 for (final IRI property : properties) {
1379 emitFact(predIRI, property, argIRI, mentionIRI, null);
1380 }
1381 } else {
1382 final Set<Term> argTerms = this.document
1383 .getTermsByDepAncestors(Collections.singleton(argHead), "(AMOD|NMOD)*");
1384 final IRI mentionIRI = emitMention(Iterables.concat(predTerms, argTerms));
1385 emitMeta(mentionIRI, RDF.TYPE, KS_OLD.PARTICIPATION_MENTION);
1386 final IRI expressionIRI = emitTerm(argHead);
1387 for (final IRI property : properties) {
1388 emitFact(predIRI, property, expressionIRI, mentionIRI, null);
1389 }
1390 }
1391 }
1392
1393 private void processOpinion(final Opinion opinion) {
1394
1395
1396 final int sentenceID = opinion.getOpinionExpression().getTerms().get(0).getSent();
1397
1398
1399 final IRI opinionIRI = mintIRI(opinion.getId(), null);
1400 final Polarity polarity = Polarity.forExpression(opinion.getOpinionExpression());
1401 emitFact(opinionIRI, RDF.TYPE, SUMO.ENTITY, null, null);
1402 emitFact(opinionIRI, RDF.TYPE, KS_OLD.OPINION, null, null);
1403 emitFact(opinionIRI, RDF.TYPE,
1404 polarity == Polarity.POSITIVE ? KS_OLD.POSITIVE_OPINION
1405 : polarity == Polarity.NEGATIVE ? KS_OLD.NEGATIVE_OPINION
1406 : KS_OLD.NEUTRAL_OPINION,
1407 null, null);
1408 if (opinion.getLabel() != null) {
1409 emitFact(opinionIRI, RDFS.LABEL, opinion.getLabel(), null, null);
1410 }
1411
1412
1413 final Span<Term> exprSpan = NAFUtils.trimSpan(opinion.getOpinionExpression().getSpan(),
1414 sentenceID);
1415 final Set<Term> exprHeads = exprSpan == null ? ImmutableSet.<Term>of()
1416 : NAFUtils.extractHeads(this.document, null, exprSpan.getTargets(),
1417 NAFUtils.matchExtendedPos(this.document, "NN", "VB", "JJ", "R"));
1418 emitOpinionArgument(opinionIRI, null, KS_OLD.EXPRESSION, exprSpan, exprHeads);
1419
1420
1421 final OpinionTarget target = opinion.getOpinionTarget();
1422 final Span<Term> targetSpan = target == null ? null
1423 : NAFUtils.trimSpan(target.getSpan(), sentenceID);
1424 final Set<Term> targetHeads = targetSpan == null ? ImmutableSet.<Term>of()
1425 : NAFUtils.extractHeads(this.document, null, targetSpan.getTargets(),
1426 NAFUtils.matchExtendedPos(this.document, "NN", "PRP", "JJP", "DTP",
1427 "WP", "VB"));
1428 emitOpinionArgument(opinionIRI, null, KS_OLD.TARGET, targetSpan, targetHeads);
1429
1430
1431 final OpinionHolder holder = opinion.getOpinionHolder();
1432 final Span<Term> holderSpan = holder == null ? null
1433 : NAFUtils.trimSpan(holder.getSpan(), sentenceID);
1434 final Set<Term> holderHeads = holderSpan == null ? ImmutableSet.<Term>of()
1435 : NAFUtils.extractHeads(this.document, null, holderSpan.getTargets(), NAFUtils
1436 .matchExtendedPos(this.document, "NN", "PRP", "JJP", "DTP", "WP"));
1437 emitOpinionArgument(opinionIRI, null, KS_OLD.HOLDER, holderSpan, holderHeads);
1438 }
1439
1440 private void emitOpinionArgument(final IRI opinionID, @Nullable final IRI spanProperty,
1441 @Nullable final IRI headProperty, @Nullable final Span<Term> span,
1442 @Nullable final Set<Term> heads) {
1443
1444 if (span != null) {
1445 outer: for (final Term term : span.getTargets()) {
1446 final Annotation ann = this.annotations.get(term.getId());
1447 IRI uri = ann == null ? null
1448 : ann.objectIRI != null ? ann.objectIRI : ann.predicateIRI;
1449 if (uri == null && "AGV".contains(term.getPos())) {
1450 for (final Dep dep : this.document.getDepsFromTerm(term)) {
1451 if (dep.getRfunc().equals("VC")) {
1452 continue outer;
1453 }
1454 }
1455 uri = emitTerm(term);
1456 }
1457 if (uri != null) {
1458 if (spanProperty != null) {
1459 emitFact(opinionID, spanProperty, uri, null, null);
1460 }
1461 if (headProperty != null && heads != null && heads.contains(term)) {
1462 emitFact(opinionID, headProperty, uri, null, null);
1463 }
1464 }
1465 }
1466 }
1467 }
1468
1469 private void emitCommonAttributes(final IRI instanceID, final IRI mentionID,
1470 final Term head, final String label, final boolean emitSumo)
1471 throws RDFHandlerException {
1472
1473 if ("QPD".indexOf(head.getPos()) < 0 && label != null && !label.isEmpty()) {
1474 emitFact(instanceID, RDFS.LABEL, label, mentionID, null);
1475 }
1476
1477 final char pos = Character.toUpperCase(head.getPos().charAt(0));
1478 if (pos == 'N' || pos == 'V') {
1479 emitMeta(mentionID, KS_OLD.LEMMA, head.getLemma());
1480
1481 }
1482
1483 final ExternalRef sstRef = NAFUtils.getRef(head, NAFUtils.RESOURCE_WN_SST, null);
1484 if (sstRef != null) {
1485 final String sst = sstRef.getReference();
1486 final IRI uri = FACTORY.createIRI("http://www.newsreader-project.eu/sst/",
1487 sst.substring(sst.lastIndexOf('-') + 1));
1488 emitMeta(mentionID, KS_OLD.SST, uri);
1489
1490 }
1491
1492 final ExternalRef synsetRef = NAFUtils.getRef(head, NAFUtils.RESOURCE_WN_SYNSET, null);
1493 if (synsetRef != null) {
1494 final IRI uri = FACTORY.createIRI("http://www.newsreader-project.eu/syn/",
1495 synsetRef.getReference());
1496 emitMeta(mentionID, KS_OLD.SYNSET, uri);
1497
1498 }
1499
1500 final String p = head.getMorphofeat().toUpperCase();
1501 if (p.equals("NNS") || p.equals("NNPS")) {
1502 emitMeta(mentionID, KS_OLD.PLURAL, true);
1503
1504 }
1505
1506 for (final ExternalRef ref : head.getExternalRefs()) {
1507 final IRI typeIRI = mintRefIRI(ref.getResource(), ref.getReference());
1508 if (ref.getResource().equals(NAFUtils.RESOURCE_SUMO)) {
1509 if (emitSumo) {
1510 emitFact(instanceID, RDF.TYPE, typeIRI, mentionID, ref.getConfidence());
1511 emitFact(instanceID, RDF.TYPE, Sumo.getSuperClasses(typeIRI), mentionID,
1512 ref.getConfidence());
1513 }
1514 } else {
1515 emitFact(instanceID, RDF.TYPE, typeIRI, mentionID, ref.getConfidence());
1516 }
1517 }
1518 }
1519
1520 private void emitEntityAttributes(final Entity entity, final IRI subject,
1521 final IRI mention) throws RDFHandlerException {
1522
1523
1524 final ExternalRef valueRef = NAFUtils.getRef(entity, "value", null);
1525 String nerTag = entity.getType();
1526 nerTag = nerTag == null ? null : nerTag.toLowerCase();
1527
1528
1529 if (Objects.equal(nerTag, "norp") || Objects.equal(nerTag, "language")) {
1530 final IRI attribute = Objects.equal(nerTag, "norp") ? KS_OLD.PROVENANCE
1531 : KS_OLD.LANGUAGE;
1532 for (final ExternalRef ref : entity.getExternalRefs()) {
1533 try {
1534 final IRI refIRI = FACTORY.createIRI(Util.cleanIRI(ref.getReference()));
1535 emitFact(subject, attribute, refIRI, mention,
1536 (double) ref.getConfidence());
1537 } catch (final Throwable ex) {
1538
1539 }
1540 }
1541
1542 } else if (valueRef != null) {
1543
1544 try {
1545 final String s = valueRef.getReference().trim();
1546 if (s.isEmpty()) {
1547 return;
1548 }
1549 if (Objects.equal(nerTag, "cardinal") || Objects.equal(nerTag, "quantity")) {
1550 emitFact(subject, KS_OLD.QUANTITY, Double.parseDouble(s), mention, null);
1551
1552 } else if (Objects.equal(nerTag, "ordinal")) {
1553 emitFact(subject, KS_OLD.RANK, Double.parseDouble(s), mention, null);
1554
1555 } else if (Objects.equal(nerTag, "percent")) {
1556 final int index = s.indexOf('%');
1557 emitFact(subject, KS_OLD.PERCENTAGE,
1558 Double.parseDouble(s.substring(index + 1)), mention, null);
1559
1560 } else if (Objects.equal(nerTag, "money")) {
1561 int index = 0;
1562 while (index < s.length()) {
1563 final char c = s.charAt(index);
1564 if (c == '€') {
1565 emitFact(subject, GR.HAS_CURRENCY, "EUR", mention, null);
1566 } else if (c == '$') {
1567 emitFact(subject, GR.HAS_CURRENCY, "USD", mention, null);
1568 } else if (c == 'Â¥') {
1569 emitFact(subject, GR.HAS_CURRENCY, "YEN", mention, null);
1570 } else if (Character.isDigit(c)) {
1571 break;
1572 }
1573 ++index;
1574 }
1575 emitFact(subject, GR.HAS_CURRENCY_VALUE,
1576 Double.parseDouble(s.substring(index)), mention, null);
1577 }
1578 } catch (final NumberFormatException ex) {
1579 LOGGER.debug("Could not process normalized value: " + valueRef.getReference());
1580 }
1581 }
1582 }
1583
1584 @Nullable
1585 private IRI emitMention(final Iterable<Term> terms) {
1586
1587 final List<Term> sortedTerms = Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(terms);
1588 final int numTerms = sortedTerms.size();
1589 if (numTerms == 0) {
1590 return null;
1591 }
1592
1593 final String text = this.documentText;
1594 final List<IRI> componentIRIs = Lists.newArrayList();
1595 final int begin = NAFUtils.getBegin(sortedTerms.get(0));
1596 int offset = begin;
1597 int startTermIdx = 0;
1598
1599 final StringBuilder anchorBuilder = new StringBuilder();
1600 final StringBuilder uriBuilder = new StringBuilder(this.documentIRI.stringValue())
1601 .append("#char=").append(begin).append(",");
1602
1603 for (int i = 0; i < numTerms; ++i) {
1604 final Term term = sortedTerms.get(i);
1605 final int termOffset = NAFUtils.getBegin(term);
1606 if (termOffset > offset && !text.substring(offset, termOffset).trim().isEmpty()) {
1607 final int start = NAFUtils.getBegin(sortedTerms.get(startTermIdx));
1608 anchorBuilder.append(text.substring(start, offset)).append(" [...] ");
1609 uriBuilder.append(offset).append(";").append(termOffset).append(',');
1610 componentIRIs.add(emitMention(sortedTerms.subList(startTermIdx, i)));
1611 startTermIdx = i;
1612 }
1613 offset = NAFUtils.getEnd(term);
1614 }
1615 if (startTermIdx > 0) {
1616 componentIRIs.add(emitMention(sortedTerms.subList(startTermIdx, numTerms)));
1617 }
1618 anchorBuilder.append(
1619 text.substring(NAFUtils.getBegin(sortedTerms.get(startTermIdx)), offset));
1620 uriBuilder.append(offset);
1621
1622 final String anchor = anchorBuilder.toString();
1623 final IRI mentionID = FACTORY.createIRI(uriBuilder.toString());
1624 emitMeta(mentionID, KS_OLD.MENTION_OF, this.documentIRI);
1625 emitMeta(this.documentIRI, KS_OLD.HAS_MENTION, mentionID);
1626 emitMeta(mentionID, RDF.TYPE, KS_OLD.MENTION);
1627 if (!componentIRIs.isEmpty()) {
1628 emitMeta(mentionID, RDF.TYPE, KS_OLD.COMPOUND_STRING);
1629 for (final IRI componentIRI : componentIRIs) {
1630 emitMeta(mentionID, KS_OLD.COMPONENT_SUB_STRING, componentIRI);
1631 }
1632 }
1633 emitMeta(mentionID, NIF.BEGIN_INDEX, FACTORY.createLiteral(begin));
1634 emitMeta(mentionID, NIF.END_INDEX, FACTORY.createLiteral(offset));
1635 emitMeta(mentionID, NIF.ANCHOR_OF, FACTORY.createLiteral(anchor));
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669 return mentionID;
1670 }
1671
1672 private IRI emitTerm(final Term head) {
1673
1674 final ExternalRef synsetRef = NAFUtils.getRef(head, NAFUtils.RESOURCE_WN_SYNSET, null);
1675 final String headSynsetID = synsetRef == null ? null : synsetRef.getReference();
1676 final String readableHeadSynsetID = WordNet.getReadableSynsetID(headSynsetID);
1677 final String headID = MoreObjects.firstNonNull(readableHeadSynsetID,
1678 head.getLemma().toLowerCase());
1679
1680 final List<IRI> modifierIRIs = Lists.newArrayList();
1681 final List<String> modifierIDs = Lists.newArrayList();
1682
1683 for (final Term modifier : this.document.getTermsByDepAncestors(ImmutableSet.of(head),
1684 "AMOD|NMOD")) {
1685 if ("AGV".contains(modifier.getPos())) {
1686 final IRI modifierIRI = emitTerm(modifier);
1687 modifierIRIs.add(modifierIRI);
1688 modifierIDs.add(modifierIRI.getLocalName());
1689 }
1690 }
1691
1692 final Set<Term> terms = this.document.getTermsByDepAncestors(ImmutableSet.of(head),
1693 "(AMOD|NMOD)*");
1694 for (final Iterator<Term> i = terms.iterator(); i.hasNext();) {
1695 if (!"AGV".contains(i.next().getPos())) {
1696 i.remove();
1697 }
1698 }
1699 final String label = NAFUtils.getText(NAFUtils.filterTerms(terms));
1700
1701 final StringBuilder idBuilder = new StringBuilder();
1702 int level = 0;
1703 for (final String modifierID : modifierIDs) {
1704 for (int i = 1; modifierID.contains(Strings.repeat("_", i)); ++i) {
1705 level = Math.max(level, i);
1706 }
1707 }
1708 final String separator = Strings.repeat("_", level + 1);
1709 for (final String modifierID : Ordering.natural().immutableSortedCopy(modifierIDs)) {
1710 idBuilder.append(modifierID).append(separator);
1711 }
1712 final String id = idBuilder.append(headID).toString();
1713 final IRI uri = mintRefIRI("attribute", id);
1714
1715
1716 emitFact(uri, RDF.TYPE, KS_OLD.ATTRIBUTE, null, null);
1717 emitFact(uri, RDFS.LABEL, label, null, null);
1718 if (headSynsetID != null) {
1719 emitFact(uri, KS_OLD.HEAD_SYNSET, mintRefIRI("syn", headSynsetID), null, null);
1720 }
1721 for (final IRI modifierIRI : modifierIRIs) {
1722 emitFact(uri, KS_OLD.MOD, modifierIRI, null, null);
1723 }
1724
1725 final IRI mentionIRI = emitMention(terms);
1726 emitMeta(mentionIRI, RDF.TYPE, KS_OLD.ATTRIBUTE_MENTION);
1727 emitMeta(uri, GAF.DENOTED_BY, mentionIRI);
1728
1729 return uri;
1730 }
1731
1732 @Nullable
1733 private String extractPath(final Term from, final Term to) {
1734
1735 final Set<Term> fromTerms = this.document
1736 .getTermsByDepDescendants(ImmutableSet.of(from), "(-VC|-IM|-OPRD)*");
1737 final Set<Term> toTerms = this.document.getTermsByDepDescendants(ImmutableSet.of(to),
1738 "(-VC|-IM|-OPRD)*");
1739
1740 if (!Sets.intersection(fromTerms, toTerms).isEmpty()) {
1741 return null;
1742 }
1743
1744 final List<Dep> path = this.document.getDepPath(from, to);
1745 if (path == null) {
1746 return null;
1747 }
1748
1749 for (final Iterator<Dep> i = path.iterator(); i.hasNext();) {
1750 final Dep dep = i.next();
1751 if (fromTerms.contains(dep.getFrom()) && fromTerms.contains(dep.getTo())
1752 || toTerms.contains(dep.getFrom()) && toTerms.contains(dep.getTo())) {
1753 i.remove();
1754 }
1755 }
1756
1757 if (fromTerms.contains(path.get(0).getTo())) {
1758 return null;
1759 }
1760
1761 final StringBuilder builder = new StringBuilder();
1762 for (int i = 1; i < path.size(); ++i) {
1763 final Dep dep = path.get(i);
1764 final String func = dep.getRfunc();
1765 final Term term = dep.getFrom();
1766 if (!func.equalsIgnoreCase("COORD") && !func.equals("CONJ")) {
1767 builder.append(builder.length() > 0 ? "_" : "")
1768 .append(term.getLemma().toLowerCase().replace(' ', '_'));
1769 }
1770 }
1771
1772 return builder.toString();
1773 }
1774
1775 @Nullable
1776 private Annotation defineAnnotation(final Term head, final Iterable<Term> terms) {
1777 if (head == null) {
1778 return null;
1779 }
1780 Annotation ann = this.annotations.get(head.getId());
1781 if (ann == null) {
1782 ann = new Annotation(head, terms);
1783 this.annotations.put(head.getId(), ann);
1784 }
1785 return ann;
1786 }
1787
1788 private IRI mintIRI(final String id, @Nullable final String suggestedLocalName) {
1789 String localName = this.mintedIRIs.get(id);
1790 if (localName == null) {
1791 final String name = MoreObjects.firstNonNull(suggestedLocalName, id);
1792 final StringBuilder builder = new StringBuilder();
1793 for (int i = 0; i < name.length(); ++i) {
1794 final char c = name.charAt(i);
1795 builder.append(Character.isWhitespace(c) ? '_' : c);
1796 }
1797 final String base = builder.toString();
1798 int counter = 1;
1799 while (true) {
1800 localName = base + (counter == 1 ? "" : "_" + counter);
1801 if (!this.mintedIRIs.inverse().containsKey(localName)) {
1802 this.mintedIRIs.put(id, localName);
1803 break;
1804 }
1805 ++counter;
1806 }
1807 }
1808 return FACTORY.createIRI(Util.cleanIRI(this.baseIRI + "#" + localName));
1809 }
1810
1811 @Nullable
1812 private IRI mintRefIRI(@Nullable final String resource, @Nullable final String reference) {
1813 if (!Strings.isNullOrEmpty(resource) && !Strings.isNullOrEmpty(reference)) {
1814 final String normResource = resource.toLowerCase();
1815 final String namespace = RDFGenerator.this.namespaceMap.get(normResource);
1816 if (namespace != null) {
1817 return FACTORY
1818 .createIRI(Util.cleanIRI(namespace + reference.replace('#', '.')));
1819 }
1820 }
1821 return null;
1822 }
1823
1824 private void emitMeta(@Nullable final IRI subject, @Nullable final IRI property,
1825 @Nullable final Object objects) {
1826 if (subject != null && property != null) {
1827 for (final Value object : extract(Value.class, objects,
1828 RDF.TYPE.equals(property) ? RDFGenerator.this.typeMap : null)) {
1829 this.statements.add(FACTORY.createStatement(subject, property, object));
1830 }
1831 }
1832 }
1833
1834 private void emitFact(@Nullable final IRI subject, @Nullable final IRI property,
1835 @Nullable final Object objects, @Nullable final IRI mention,
1836 @Nullable final Object confidence) {
1837 if (subject != null && property != null) {
1838 for (final Value object : extract(Value.class, objects,
1839 RDF.TYPE.equals(property) ? RDFGenerator.this.typeMap : null)) {
1840 final IRI factIRI = hash(subject, property, object);
1841 this.statements
1842 .add(FACTORY.createStatement(subject, property, object, factIRI));
1843 if (mention != null) {
1844 this.statements.add(
1845 FACTORY.createStatement(factIRI, KS_OLD.EXPRESSED_BY, mention));
1846 }
1847 if (confidence instanceof Number) {
1848 final double confidenceValue = ((Number) confidence).doubleValue();
1849 if (confidenceValue != 0.0) {
1850
1851
1852
1853 }
1854 }
1855 }
1856 }
1857 }
1858
1859 private Iterable<Statement> merge(final Iterable<Statement> stmts)
1860 throws RDFHandlerException {
1861
1862 final List<Statement> smushedStmts = Lists.newArrayList();
1863
1864
1865 RDFProcessors.smush(null, true, "http://dbpedia.org/resource/")
1866 .wrap(RDFSources.wrap(stmts)).emit(RDFHandlers.wrap(smushedStmts), 1);
1867
1868 final Set<Resource> named = Sets.newHashSet();
1869 final Multimap<Resource, Resource> groups = HashMultimap.create();
1870 for (final Statement stmt : smushedStmts) {
1871 if (stmt.getPredicate().equals(KS_OLD.INCLUDE)) {
1872 groups.put(stmt.getSubject(), (Resource) stmt.getObject());
1873 } else if (stmt.getPredicate().equals(FOAF.NAME)) {
1874 named.add(stmt.getSubject());
1875 }
1876 }
1877
1878 final List<Statement> output = Lists.newArrayList();
1879 final Multimap<Resource, Statement> groupProps = HashMultimap.create();
1880 final Multimap<Resource, Statement> groupRels = HashMultimap.create();
1881 for (final Statement stmt : smushedStmts) {
1882 final Resource subj = stmt.getSubject();
1883 final Value obj = stmt.getObject();
1884 final boolean subjIsGroup = groups.containsKey(subj);
1885 final boolean objIsGroup = groups.containsKey(obj);
1886 if (stmt.getPredicate().equals(OWL.SAMEAS)
1887 && (obj instanceof BNode || obj.stringValue().startsWith(this.baseIRI))) {
1888
1889 } else if (subjIsGroup && objIsGroup && !subj.equals(obj)) {
1890 groupRels.put(subj, stmt);
1891 groupRels.put((Resource) obj, stmt);
1892 } else if (subjIsGroup) {
1893 groupProps.put(subj, stmt);
1894 } else if (objIsGroup) {
1895 groupProps.put((Resource) obj, stmt);
1896 } else {
1897 output.add(stmt);
1898 }
1899 }
1900
1901
1902 final ValueFactory vf = Statements.VALUE_FACTORY;
1903 for (final Resource composite : groups.keySet()) {
1904 final Collection<Resource> components = groups.get(composite);
1905 final boolean isNamed = composite instanceof IRI
1906 && ((IRI) composite).getNamespace().equals("http://dbpedia.org/resource/")
1907 || named.contains(composite);
1908 if (isNamed) {
1909 output.addAll(groupProps.get(composite));
1910 for (final Statement stmt : groupRels.removeAll(composite)) {
1911 if (stmt.getSubject().equals(composite)) {
1912 groupRels.remove(stmt.getObject(), stmt);
1913 groupProps.put((Resource) stmt.getObject(), stmt);
1914 } else {
1915 groupRels.remove(stmt.getSubject(), stmt);
1916 groupProps.put(stmt.getSubject(), stmt);
1917 }
1918 }
1919 } else {
1920 for (final Statement stmt : groupRels.removeAll(composite)) {
1921 final Resource subj = stmt.getSubject();
1922 final IRI pred = stmt.getPredicate();
1923 final Value obj = stmt.getObject();
1924 final Resource ctx = stmt.getContext();
1925 if (subj.equals(composite)) {
1926 groupRels.remove(obj, stmt);
1927 for (final Resource component : components) {
1928 groupProps.put((Resource) obj,
1929 vf.createStatement(component, pred, obj, ctx));
1930 }
1931 } else {
1932 groupRels.remove(subj, stmt);
1933 for (final Resource component : components) {
1934 groupProps.put(subj,
1935 vf.createStatement(subj, pred, component, ctx));
1936 }
1937 }
1938 }
1939 for (final Statement stmt : groupProps.get(composite)) {
1940 final IRI pred = stmt.getPredicate();
1941 final Resource ctx = stmt.getContext();
1942 Collection<Resource> subjs = ImmutableList.of(stmt.getSubject());
1943 Collection<? extends Value> objs = ImmutableList.of(stmt.getObject());
1944 if (composite.equals(stmt.getSubject())) {
1945 subjs = components;
1946 if (KS_OLD.INCLUDE.equals(pred) || RDFS.LABEL.equals(pred)) {
1947 continue;
1948 }
1949 }
1950 if (composite.equals(stmt.getObject())) {
1951 objs = components;
1952 }
1953 for (final Resource subj : subjs) {
1954 for (final Value obj : objs) {
1955 output.add(Statements.VALUE_FACTORY.createStatement(subj, pred,
1956 obj, ctx));
1957 }
1958 }
1959 }
1960 }
1961 }
1962
1963 return output;
1964 }
1965
1966 @SuppressWarnings("unchecked")
1967 private <T extends Value> Collection<T> extract(final Class<T> clazz,
1968 @Nullable final Object object, @Nullable final Multimap<String, ? extends T> map) {
1969 if (object == null) {
1970 return ImmutableList.of();
1971 } else if (clazz.isInstance(object)) {
1972 return ImmutableList.of((T) object);
1973 } else if (object instanceof Iterable<?>) {
1974 final List<T> list = Lists.newArrayList();
1975 for (final Object element : (Iterable<?>) object) {
1976 list.addAll(extract(clazz, element, map));
1977 }
1978 return list;
1979 } else if (object.getClass().isArray()) {
1980 final List<T> list = Lists.newArrayList();
1981 final int length = Array.getLength(object);
1982 for (int i = 0; i < length; ++i) {
1983 list.addAll(extract(clazz, Array.get(object, i), map));
1984 }
1985 return list;
1986 } else if (map != null) {
1987 return (Collection<T>) map.get(object.toString());
1988 } else {
1989 return ImmutableList.of(Statements.convert(object, clazz));
1990 }
1991 }
1992
1993 private IRI hash(final Resource subject, final IRI predicate, final Value object) {
1994 final List<String> list = Lists.newArrayList();
1995 for (final Value value : new Value[] { subject, predicate, object }) {
1996 if (value instanceof IRI) {
1997 list.add("\u0001");
1998 list.add(value.stringValue());
1999 } else if (value instanceof BNode) {
2000 list.add("\u0002");
2001 list.add(((BNode) value).getID());
2002 } else if (value instanceof Literal) {
2003 final Literal l = (Literal) value;
2004 list.add("\u0003");
2005 list.add(l.getLabel());
2006 if (!l.getDatatype().equals(XMLSchema.STRING)) {
2007 list.add(l.getDatatype().stringValue());
2008 } else if (l.getLanguage().isPresent()) {
2009 list.add(l.getLanguage().get());
2010 }
2011 }
2012 }
2013 final String id = Hash.murmur3(list.toArray(new String[list.size()])).toString();
2014 return FACTORY.createIRI("fact:" + id);
2015 }
2016
2017 }
2018
2019 private static final class Annotation {
2020
2021 final Term head;
2022
2023 final List<Term> extent;
2024
2025 IRI objectIRI;
2026
2027 IRI predicateIRI;
2028
2029 Annotation(final Term head, final Iterable<Term> extent) {
2030 this.head = head;
2031 this.extent = ImmutableList.copyOf(extent);
2032 this.objectIRI = null;
2033 this.predicateIRI = null;
2034 }
2035
2036 }
2037
2038 }