1 package ixa.kaflib;
2
3 import org.jdom2.Element;
4
5 import java.io.Serializable;
6 import java.util.*;
7
8
9
10
11 class AnnotationContainer implements Serializable {
12
13 private String rawText;
14
15
16
17
18 private List<WF> text;
19
20
21
22
23 private int nextOffset;
24
25
26
27
28 private List<Term> terms;
29
30 private Map<String, List<Mark>> marks;
31
32
33
34
35 private List<Dep> deps;
36
37
38
39
40 private List<Chunk> chunks;
41
42
43
44
45 private List<Entity> entities;
46
47
48
49
50 private List<Feature> properties;
51
52
53
54
55 private List<Feature> categories;
56
57
58
59
60 private List<Coref> coreferences;
61
62
63
64
65 private List<Timex3> timeExpressions;
66
67
68 private List<TLink> tLinks;
69
70
71 private List<CLink> cLinks;
72
73
74
75
76 private List<Topic> topics;
77
78
79
80
81 private List<Factuality> factualities;
82
83
84
85
86 private List<LinkedEntity> linkedEntities;
87
88
89
90
91 private List<SSTspan> sstSpans;
92
93
94
95
96 private List<Opinion> opinions;
97
98
99
100
101 private List<Relation> relations;
102
103
104
105
106 private List<Predicate> predicates;
107
108
109
110
111 private List<Tree> trees;
112 private HashMap<Integer, String> constituencyStrings;
113
114
115
116
117 private List<Element> unknownLayers;
118
119
120
121
122 private HashMap<String, List<Term>> termsIndexedByWF;
123 private HashMap<String, Map<String, List<Mark>>> marksIndexedByTerm;
124 private HashMap<String, List<Dep>> depsIndexedByTerm;
125 private HashMap<String, List<Chunk>> chunksIndexedByTerm;
126 private HashMap<String, List<Entity>> entitiesIndexedByTerm;
127 private HashMap<String, List<Coref>> corefsIndexedByTerm;
128 private HashMap<String, List<Timex3>> timeExsIndexedByWF;
129 private HashMap<String, List<Factuality>> factsIndexedByWF;
130 private HashMap<String, List<LinkedEntity>> linkedEntitiesIndexedByWF;
131 private HashMap<String, List<SSTspan>> sstSpansIndexedByTerm;
132 private HashMap<String, List<Feature>> propertiesIndexedByTerm;
133 private HashMap<String, List<Feature>> categoriesIndexedByTerm;
134 private HashMap<String, List<Opinion>> opinionsIndexedByTerm;
135 private HashMap<String, List<Relation>> relationsIndexedByRelational;
136 private HashMap<String, List<Predicate>> predicatesIndexedByTerm;
137
138 HashMap<Integer, List<WF>> textIndexedBySent;
139 HashMap<Integer, List<Term>> termsIndexedBySent;
140 HashMap<Integer, Map<String, List<Mark>>> marksIndexedBySent;
141 HashMap<Integer, List<Entity>> entitiesIndexedBySent;
142 HashMap<Integer, List<Dep>> depsIndexedBySent;
143 HashMap<Integer, List<Chunk>> chunksIndexedBySent;
144 HashMap<Integer, List<Coref>> corefsIndexedBySent;
145 HashMap<Integer, List<Timex3>> timeExsIndexedBySent;
146 HashMap<Integer, List<Factuality>> factsIndexedBySent;
147 HashMap<Integer, List<LinkedEntity>> linkedEntitiesIndexedBySent;
148 HashMap<Integer, List<SSTspan>> sstSpansIndexedBySent;
149 HashMap<Integer, List<Feature>> propertiesIndexedBySent;
150 HashMap<Integer, List<Feature>> categoriesIndexedBySent;
151 HashMap<Integer, List<Opinion>> opinionsIndexedBySent;
152 HashMap<Integer, List<Relation>> relationsIndexedBySent;
153 HashMap<Integer, List<Predicate>> predicatesIndexedBySent;
154 HashMap<Integer, List<Tree>> treesIndexedBySent;
155
156 HashMap<Integer, LinkedHashSet<Integer>> sentsIndexedByParagraphs;
157
158
159
160
161 AnnotationContainer() {
162 rawText = new String();
163 text = new ArrayList();
164 nextOffset = 0;
165 terms = new ArrayList();
166 marks = new HashMap();
167 deps = new ArrayList();
168 chunks = new ArrayList();
169 entities = new ArrayList();
170 properties = new ArrayList();
171 sstSpans = new ArrayList();
172 categories = new ArrayList();
173 coreferences = new ArrayList();
174 timeExpressions = new ArrayList();
175 factualities = new ArrayList();
176 linkedEntities = new ArrayList();
177 opinions = new ArrayList();
178 relations = new ArrayList();
179 predicates = new ArrayList();
180 trees = new ArrayList();
181 unknownLayers = new ArrayList<Element>();
182 topics = new ArrayList();
183 tLinks = new ArrayList();
184 cLinks = new ArrayList();
185
186 termsIndexedByWF = new HashMap<String, List<Term>>();
187 marksIndexedByTerm = new HashMap<String, Map<String, List<Mark>>>();
188 depsIndexedByTerm = new HashMap<String, List<Dep>>();
189 chunksIndexedByTerm = new HashMap<String, List<Chunk>>();
190 entitiesIndexedByTerm = new HashMap<String, List<Entity>>();
191 corefsIndexedByTerm = new HashMap<String, List<Coref>>();
192 timeExsIndexedByWF = new HashMap<String, List<Timex3>>();
193 linkedEntitiesIndexedByWF = new HashMap<String, List<LinkedEntity>>();
194 sstSpansIndexedByTerm = new HashMap();
195 factsIndexedByWF = new HashMap<String, List<Factuality>>();
196 propertiesIndexedByTerm = new HashMap<String, List<Feature>>();
197 categoriesIndexedByTerm = new HashMap<String, List<Feature>>();
198 opinionsIndexedByTerm = new HashMap<String, List<Opinion>>();
199 relationsIndexedByRelational = new HashMap<String, List<Relation>>();
200 predicatesIndexedByTerm = new HashMap<String, List<Predicate>>();
201
202 textIndexedBySent = new HashMap<Integer, List<WF>>();
203 termsIndexedBySent = new HashMap<Integer, List<Term>>();
204 marksIndexedBySent = new HashMap<Integer, Map<String, List<Mark>>>();
205 entitiesIndexedBySent = new HashMap<Integer, List<Entity>>();
206 depsIndexedBySent = new HashMap<Integer, List<Dep>>();
207 chunksIndexedBySent = new HashMap<Integer, List<Chunk>>();
208 corefsIndexedBySent = new HashMap<Integer, List<Coref>>();
209 timeExsIndexedBySent = new HashMap<Integer, List<Timex3>>();
210 linkedEntitiesIndexedBySent = new HashMap<Integer, List<LinkedEntity>>();
211 sstSpansIndexedBySent = new HashMap();
212 factsIndexedBySent = new HashMap<Integer, List<Factuality>>();
213 propertiesIndexedBySent = new HashMap<Integer, List<Feature>>();
214 categoriesIndexedBySent = new HashMap<Integer, List<Feature>>();
215 opinionsIndexedBySent = new HashMap<Integer, List<Opinion>>();
216 relationsIndexedBySent = new HashMap<Integer, List<Relation>>();
217 predicatesIndexedBySent = new HashMap<Integer, List<Predicate>>();
218 treesIndexedBySent = new HashMap<Integer, List<Tree>>();
219
220 sentsIndexedByParagraphs = new HashMap<Integer, LinkedHashSet<Integer>>();
221 constituencyStrings = new HashMap<>();
222
223 }
224
225 private <T> void indexBySent(T annotation, Integer sent, HashMap<Integer, List<T>> index) {
226 if (sent > 0) {
227 if (index.get(sent) == null) {
228 index.put(sent, new ArrayList<T>());
229 }
230 index.get(sent).add(annotation);
231 }
232 }
233
234 private void indexMarkBySent(Mark mark, String source, Integer sent) {
235 if (sent > 0) {
236 if (marksIndexedBySent.get(sent) == null) {
237 marksIndexedBySent.put(sent, new HashMap<String, List<Mark>>());
238 }
239 if (marksIndexedBySent.get(sent).get(source) == null) {
240 marksIndexedBySent.get(sent).put(source, new ArrayList<Mark>());
241 }
242 marksIndexedBySent.get(sent).get(source).add(mark);
243 }
244 }
245
246 void indexSentByPara(Integer sent, Integer para) {
247 if ((sent > 0) && (para > 0)) {
248 if (this.sentsIndexedByParagraphs.get(para) == null) {
249 this.sentsIndexedByParagraphs.put(para, new LinkedHashSet<Integer>());
250 }
251 this.sentsIndexedByParagraphs.get(para).add(sent);
252 }
253 }
254
255 public List<Integer> getSentsByParagraph(Integer para) {
256 return new ArrayList<Integer>(this.sentsIndexedByParagraphs.get(para));
257 }
258
259 <T> List<T> getLayerByPara(Integer para, HashMap<Integer, List<T>> index) {
260 List<T> layer = new ArrayList<T>();
261 for (Integer sent : this.getSentsByParagraph(para)) {
262 layer.addAll(index.get(sent));
263 }
264 return layer;
265 }
266
267 String getRawText() {
268 return rawText;
269 }
270
271
272
273
274 List<WF> getText() {
275 return text;
276 }
277
278
279
280
281 List<Term> getTerms() {
282 return terms;
283 }
284
285 List<SSTspan> getSstSpans() {
286 return sstSpans;
287 }
288
289 List<String> getMarkSources() {
290 return new ArrayList<String>(marks.keySet());
291 }
292
293 List<Mark> getMarks(String source) {
294 return (marks.get(source) == null) ? new ArrayList<Mark>() : marks.get(source);
295 }
296
297
298
299
300 List<Dep> getDeps() {
301 return deps;
302 }
303
304
305
306
307 List<Chunk> getChunks() {
308 return chunks;
309 }
310
311
312
313
314 List<Entity> getEntities() {
315 return entities;
316 }
317
318
319
320
321 List<Feature> getProperties() {
322 return properties;
323 }
324
325
326
327
328 List<Feature> getCategories() {
329 return categories;
330 }
331
332
333
334
335 HashMap<Integer, String> getConstituencyStrings() {
336 return constituencyStrings;
337 }
338
339
340
341
342 List<Coref> getCorefs() {
343 return coreferences;
344 }
345
346
347
348
349 List<Timex3> getTimeExs() {
350 return timeExpressions;
351 }
352
353
354 List<TLink> getTLinks() {
355 return this.tLinks;
356 }
357
358
359 List<CLink> getCLinks() {
360 return this.cLinks;
361 }
362
363 List<Factuality> getFactualities() {
364 return factualities;
365 }
366
367 List<LinkedEntity> getLinkedEntities() {
368 return linkedEntities;
369 }
370
371
372
373
374 List<Opinion> getOpinions() {
375 return opinions;
376 }
377
378
379
380
381 List<Topic> getTopics() {
382 return topics;
383 }
384
385
386
387
388 List<Relation> getRelations() {
389 return relations;
390 }
391
392
393
394
395 List<Predicate> getPredicates() {
396 return predicates;
397 }
398
399
400
401
402 List<Tree> getConstituents() {
403 return trees;
404 }
405
406
407
408
409 List<Element> getUnknownLayers() {
410 return unknownLayers;
411 }
412
413 void setRawText(String str) {
414 rawText = str;
415 }
416
417
418
419
420 void add(WF wf) {
421 text.add(wf);
422
423 this.indexBySent(wf, wf.getSent(), this.textIndexedBySent);
424 }
425
426 private <T> void indexAnnotation(T annotation, String hashId, HashMap<String, List<T>> index) {
427 if (index.get(hashId) == null) {
428 index.put(hashId, new ArrayList<T>());
429 }
430 index.get(hashId).add(annotation);
431 }
432
433 private void indexMarkByTerm(Mark mark, String source, String tid) {
434 if (marksIndexedByTerm.get(tid) == null) {
435 marksIndexedByTerm.put(tid, new HashMap<String, List<Mark>>());
436 }
437 if (marksIndexedByTerm.get(tid).get(source) == null) {
438 marksIndexedByTerm.get(tid).put(source, new ArrayList<Mark>());
439 }
440 marksIndexedByTerm.get(tid).get(source).add(mark);
441 }
442
443
444
445
446 void add(Term term) {
447 this.add(term, this.terms.size());
448 }
449
450 void add(Term term, int index) {
451 terms.add(index, term);
452 for (WF wf : term.getWFs()) {
453 indexAnnotation(term, wf.getId(), termsIndexedByWF);
454 }
455 if (!term.isComponent()) {
456 this.indexBySent(term, term.getSent(), this.termsIndexedBySent);
457 }
458 }
459
460 void remove(Term term) {
461 this.terms.remove(term);
462 }
463
464 void add(Mark mark, String source) {
465 List<Mark> sourceMarks = marks.get(source);
466 if (sourceMarks == null) {
467 sourceMarks = new ArrayList<Mark>();
468 }
469 sourceMarks.add(mark);
470 marks.put(source, sourceMarks);
471 for (Term term : mark.getSpan().getTargets()) {
472 indexMarkByTerm(mark, source, term.getId());
473 }
474 this.indexMarkBySent(mark, source, mark.getSpan().getTargets().get(0).getSent());
475 }
476
477
478
479
480 void add(Dep dep) {
481 deps.add(dep);
482
483 if (dep.getFrom() != null) {
484 String tId = dep.getFrom().getId();
485 indexAnnotation(dep, tId, depsIndexedByTerm);
486 }
487 if (dep.getTo() != null) {
488 String tId = dep.getTo().getId();
489 indexAnnotation(dep, tId, depsIndexedByTerm);
490 }
491 this.indexBySent(dep, dep.getFrom().getSent(), this.depsIndexedBySent);
492 }
493
494
495
496
497 void add(Chunk chunk) {
498 chunks.add(chunk);
499
500 for (Term term : chunk.getTerms()) {
501 indexAnnotation(chunk, term.getId(), chunksIndexedByTerm);
502 }
503 this.indexBySent(chunk, chunk.getSpan().getTargets().get(0).getSent(), this.chunksIndexedBySent);
504 }
505
506
507
508
509 void add(Entity entity) {
510 entities.add(entity);
511
512 for (Term term : entity.getTerms()) {
513 indexAnnotation(entity, term.getId(), entitiesIndexedByTerm);
514 }
515 this.indexBySent(entity, entity.getSpans().get(0).getTargets().get(0).getSent(), this.entitiesIndexedBySent);
516 }
517
518
519
520
521 void add(Feature feature) {
522 if (feature.isAProperty()) {
523 properties.add(feature);
524
525 for (Term term : feature.getTerms()) {
526 indexAnnotation(feature, term.getId(), propertiesIndexedByTerm);
527 }
528
529 }
530 else {
531 categories.add(feature);
532
533 for (Term term : feature.getTerms()) {
534 indexAnnotation(feature, term.getId(), categoriesIndexedByTerm);
535 }
536
537 }
538 }
539
540
541
542
543 void add(Coref coref) {
544 coreferences.add(coref);
545
546 for (Span<Term> span : coref.getSpans()) {
547 for (Term term : span.getTargets()) {
548 indexAnnotation(coref, term.getId(), corefsIndexedByTerm);
549 }
550 }
551
552 }
553
554
555 void add(Timex3 timex3) {
556 timeExpressions.add(timex3);
557
558 if(timex3.hasSpan()){
559 for (WF wf : timex3.getSpan().getTargets()) {
560 indexAnnotation(timex3, wf.getId(), timeExsIndexedByWF);
561 }
562 }
563 }
564
565
566 void add(TLink tLink) {
567 tLinks.add(tLink);
568
569 }
570
571
572 void add(CLink cLink) {
573 cLinks.add(cLink);
574
575 }
576
577
578
579
580 void add(Factuality factuality) {
581 factualities.add(factuality);
582
583 if (factuality.getWFs() != null) {
584 for (WF wf : factuality.getWFs()) {
585 indexAnnotation(factuality, wf.getId(), factsIndexedByWF);
586 }
587 }
588 }
589
590
591
592
593 void add(LinkedEntity linkedEntity) {
594 linkedEntities.add(linkedEntity);
595
596 if (linkedEntity.getWFs() != null) {
597 for (WF wf : linkedEntity.getWFs().getTargets()) {
598 indexAnnotation(linkedEntity, wf.getId(), linkedEntitiesIndexedByWF);
599 }
600 }
601 }
602
603
604
605
606 void add(SSTspan sst) {
607 sstSpans.add(sst);
608
609 if (sst.getTerms() != null) {
610 for (Term t : sst.getTerms().getTargets()) {
611 indexAnnotation(sst, t.getId(), sstSpansIndexedByTerm);
612 }
613 }
614 }
615
616
617
618
619 void add(Topic t) {
620 topics.add(t);
621 }
622
623
624
625
626 void add(Opinion opinion) {
627 opinions.add(opinion);
628
629
630
631
632
633
634
635
636
637
638
639 }
640
641
642
643
644 void add(Relation relation) {
645 relations.add(relation);
646
647 if (relation.getFrom() != null) {
648 String rId = relation.getFrom().getId();
649 indexAnnotation(relation, rId, relationsIndexedByRelational);
650 }
651 if (relation.getTo() != null) {
652 String rId = relation.getTo().getId();
653 indexAnnotation(relation, rId, relationsIndexedByRelational);
654 }
655 }
656
657
658
659
660 void add(Predicate predicate) {
661 predicates.add(predicate);
662
663 for (Term term : predicate.getTerms()) {
664 indexAnnotation(predicate, term.getId(), predicatesIndexedByTerm);
665 }
666 this.indexBySent(predicate, predicate.getSpan().getTargets().get(0).getSent(), this.predicatesIndexedBySent);
667 }
668
669
670
671
672 void add(Tree tree) {
673 add(tree, null);
674 }
675
676 void add(Tree tree, Integer sent) {
677 trees.add(tree);
678
679 if (sent == null) {
680 TreeNode currentNode = tree.getRoot();
681 while (!currentNode.isTerminal()) {
682 currentNode = ((NonTerminal) currentNode).getChildren().get(0);
683 }
684 sent = ((Terminal) currentNode).getSpan().getTargets().get(0).getSent();
685 }
686
687 if (sent != null) {
688 List<Tree> sentTrees = treesIndexedBySent.get(sent);
689 if (sentTrees == null) {
690 sentTrees = new ArrayList<Tree>();
691 treesIndexedBySent.put(sent, sentTrees);
692 }
693 sentTrees.add(tree);
694 }
695 }
696
697
698
699
700 void add(String constituencyString, Integer sent) {
701 constituencyStrings.put(sent, constituencyString);
702 }
703
704
705
706
707 void add(Element layer) {
708 unknownLayers.add(layer);
709 }
710
711
712
713
714 void indexTermBySent(Term term, Integer sent) {
715 if (sent == -1) {
716 throw new IllegalStateException("You can't call indexTermBySent not having defined the sentence for its WFs");
717 }
718 List<Term> sentTerms = termsIndexedBySent.get(sent);
719 if (sentTerms == null) {
720 sentTerms = new ArrayList<Term>();
721 termsIndexedBySent.put(sent, sentTerms);
722 }
723 sentTerms.add(term);
724 }
725
726
727
728
729 List<List<WF>> getSentences() {
730 List<List<WF>> sentences = new ArrayList<List<WF>>();
731 Set<Integer> sentNumsSet = this.textIndexedBySent.keySet();
732 List<Integer> sentNumsList = new ArrayList<Integer>(sentNumsSet);
733 Collections.sort(sentNumsList);
734 for (int i : sentNumsList) {
735 List<WF> wfs = this.textIndexedBySent.get(i);
736 sentences.add(wfs);
737 }
738 return sentences;
739 }
740
741 Integer termPosition(Term term) {
742 return this.terms.indexOf(term);
743 }
744
745
746
747
748 List<WF> getSentenceWFs(int sent) {
749 return this.textIndexedBySent.get(sent);
750 }
751
752
753
754
755 List<Term> getSentenceTerms(int sent) {
756 return this.termsIndexedBySent.get(sent);
757 }
758
759 Term getTermByWF(WF wf) {
760 List<Term> terms = this.termsIndexedByWF.get(wf.getId());
761 if (terms == null) {
762 return null;
763 }
764 return terms.get(0);
765 }
766
767 List<Term> getTermsByWF(WF wf) {
768 List<Term> terms = this.termsIndexedByWF.get(wf.getId());
769 return (terms == null) ? new ArrayList<Term>() : terms;
770 }
771
772
773
774
775
776
777
778 List<Term> getTermsByWFs(List<WF> wfs) {
779 LinkedHashSet<Term> terms = new LinkedHashSet<Term>();
780 for (WF wf : wfs) {
781 terms.addAll(getTermsByWF(wf));
782 }
783 return new ArrayList<Term>(terms);
784 }
785
786 List<Mark> getMarksByTerm(Term term, String source) {
787 Map<String, List<Mark>> marks = this.marksIndexedByTerm.get(term.getId());
788 if (marks == null) {
789 return new ArrayList<Mark>();
790 }
791 List<Mark> sourceMarks = marks.get(source);
792 return (sourceMarks == null) ? new ArrayList<Mark>() : sourceMarks;
793 }
794
795 List<Dep> getDepsByTerm(Term term) {
796 List<Dep> deps = this.depsIndexedByTerm.get(term.getId());
797 return (deps == null) ? new ArrayList<Dep>() : deps;
798 }
799
800 List<Chunk> getChunksByTerm(Term term) {
801 List<Chunk> chunks = this.chunksIndexedByTerm.get(term.getId());
802 return (chunks == null) ? new ArrayList<Chunk>() : chunks;
803 }
804
805 List<Entity> getEntitiesByTerm(Term term) {
806 List<Entity> entities = null;
807 try {
808 entities = this.entitiesIndexedByTerm.get(term.getId());
809 } catch (Exception e) {
810 }
811 return (entities == null) ? new ArrayList<Entity>() : entities;
812 }
813
814 List<Coref> getCorefsByTerm(Term term) {
815 List<Coref> corefs = this.corefsIndexedByTerm.get(term.getId());
816 return (corefs == null) ? new ArrayList<Coref>() : corefs;
817 }
818
819 List<Timex3> getTimeExsByWF(WF wf) {
820 List<Timex3> timeExs = this.timeExsIndexedByWF.get(wf.getId());
821 return (timeExs == null) ? new ArrayList<Timex3>() : timeExs;
822 }
823
824 List<Feature> getPropertiesByTerm(Term term) {
825 List<Feature> properties = this.propertiesIndexedByTerm.get(term.getId());
826 return (properties == null) ? new ArrayList<Feature>() : properties;
827 }
828
829 List<Feature> getCategoriesByTerm(Term term) {
830 List<Feature> categories = this.categoriesIndexedByTerm.get(term.getId());
831 return (categories == null) ? new ArrayList<Feature>() : categories;
832 }
833
834 List<Opinion> getOpinionsByTerm(Term term) {
835 List<Opinion> opinions = this.opinionsIndexedByTerm.get(term.getId());
836 return (opinions == null) ? new ArrayList<Opinion>() : opinions;
837 }
838
839 List<Relation> getRelationsByRelational(Relational relational) {
840 List<Relation> relations = this.relationsIndexedByRelational.get(relational.getId());
841 return (relations == null) ? new ArrayList<Relation>() : relations;
842 }
843
844 List<Predicate> getPredicatesByTerm(Term term) {
845 List<Predicate> predicates = this.predicatesIndexedByTerm.get(term.getId());
846 return (predicates == null) ? new ArrayList<Predicate>() : predicates;
847 }
848
849 List<Dep> getDepsByTerms(List<Term> terms) {
850 LinkedHashSet<Dep> deps = new LinkedHashSet<Dep>();
851 for (Term term : terms) {
852 deps.addAll(getDepsByTerm(term));
853 }
854 return new ArrayList<Dep>(deps);
855 }
856
857 List<Chunk> getChunksByTerms(List<Term> terms) {
858 LinkedHashSet<Chunk> chunks = new LinkedHashSet<Chunk>();
859 for (Term term : terms) {
860 chunks.addAll(getChunksByTerm(term));
861 }
862 return new ArrayList<Chunk>(chunks);
863 }
864
865 List<Entity> getEntitiesByTerms(List<Term> terms) {
866 LinkedHashSet<Entity> entities = new LinkedHashSet<Entity>();
867 for (Term term : terms) {
868 entities.addAll(getEntitiesByTerm(term));
869 }
870 return new ArrayList<Entity>(entities);
871 }
872
873 List<Coref> getCorefsByTerms(List<Term> terms) {
874 LinkedHashSet<Coref> corefs = new LinkedHashSet<Coref>();
875 for (Term term : terms) {
876 corefs.addAll(getCorefsByTerm(term));
877 }
878 return new ArrayList<Coref>(corefs);
879 }
880
881 List<Timex3> getTimeExsByWFs(List<WF> wfs) {
882 LinkedHashSet<Timex3> timeExs = new LinkedHashSet<Timex3>();
883 for (WF wf : wfs) {
884 timeExs.addAll(getTimeExsByWF(wf));
885 }
886 return new ArrayList<Timex3>(timeExs);
887 }
888
889 List<Feature> getPropertiesByTerms(List<Term> terms) {
890 LinkedHashSet<Feature> properties = new LinkedHashSet<Feature>();
891 for (Term term : terms) {
892 properties.addAll(getPropertiesByTerm(term));
893 }
894 return new ArrayList<Feature>(properties);
895 }
896
897 List<Feature> getCategoriesByTerms(List<Term> terms) {
898 LinkedHashSet<Feature> categories = new LinkedHashSet<Feature>();
899 for (Term term : terms) {
900 categories.addAll(getCategoriesByTerm(term));
901 }
902 return new ArrayList<Feature>(categories);
903 }
904
905 List<Opinion> getOpinionsByTerms(List<Term> terms) {
906 LinkedHashSet<Opinion> opinions = new LinkedHashSet<Opinion>();
907 for (Term term : terms) {
908 opinions.addAll(getOpinionsByTerm(term));
909 }
910 return new ArrayList<Opinion>(opinions);
911 }
912
913 List<Relation> getRelationsByRelationals(List<Relational> relationals) {
914 LinkedHashSet<Relation> relations = new LinkedHashSet<Relation>();
915 for (Relational relational : relationals) {
916 relations.addAll(getRelationsByRelational(relational));
917 }
918 return new ArrayList<Relation>(relations);
919 }
920
921 List<Predicate> getPredicatesByTerms(List<Term> terms) {
922 LinkedHashSet<Predicate> predicates = new LinkedHashSet<Predicate>();
923 for (Term term : terms) {
924 predicates.addAll(getPredicatesByTerm(term));
925 }
926 return new ArrayList<Predicate>(predicates);
927 }
928
929
930
931
932 int getNextOffset() {
933 return nextOffset;
934 }
935
936
937
938
939
940
941
942
943 List<Term> getTermsByWFIds(List<String> wfIds) {
944 LinkedHashSet<Term> terms = new LinkedHashSet<Term>();
945 for (String wfId : wfIds) {
946 terms.addAll(this.termsIndexedByWF.get(wfId));
947 }
948 return new ArrayList<Term>(terms);
949 }
950
951 void removeLayer(KAFDocument.Layer layer) {
952 switch (layer) {
953 case text:
954 this.text.clear();
955 break;
956 case terms:
957 this.terms.clear();
958 break;
959 case deps:
960 this.deps.clear();
961 break;
962 case chunks:
963 this.chunks.clear();
964 break;
965 case entities:
966 this.entities.clear();
967 break;
968 case properties:
969 this.properties.clear();
970 break;
971 case categories:
972 this.categories.clear();
973 break;
974 case coreferences:
975 this.coreferences.clear();
976 break;
977 case opinions:
978 this.opinions.clear();
979 break;
980 case relations:
981 this.relations.clear();
982 break;
983 case srl:
984 this.predicates.clear();
985 break;
986 case constituency:
987 this.trees.clear();
988 break;
989 case timeExpressions:
990 this.timeExpressions.clear();
991 break;
992 case linkedEntities:
993 this.linkedEntities.clear();
994 break;
995 case constituencyStrings:
996
997 this.unknownLayers.clear();
998 break;
999 default:
1000 throw new IllegalArgumentException("Wrong layer");
1001 }
1002 }
1003
1004
1005 void removeAnnotation(Object annotation) {
1006 if (annotation instanceof Term) {
1007 Term term = (Term) annotation;
1008 terms.remove(term);
1009 for (WF wf : term.getWFs()) {
1010 unindexAnnotation(term, wf.getId(), termsIndexedByWF);
1011 }
1012 if (!term.isComponent()) {
1013 unindexBySent(term, term.getSent(), this.termsIndexedBySent);
1014 }
1015 } else if (annotation instanceof Mark) {
1016 throw new UnsupportedOperationException();
1017
1018 } else if (annotation instanceof Dep) {
1019 Dep dep = (Dep) annotation;
1020 deps.remove(dep);
1021 if (dep.getFrom() != null) {
1022 unindexAnnotation(dep, dep.getFrom().getId(), depsIndexedByTerm);
1023 }
1024 if (dep.getTo() != null) {
1025 unindexAnnotation(dep, dep.getTo().getId(), depsIndexedByTerm);
1026 }
1027 unindexBySent(dep, dep.getFrom().getSent(), this.depsIndexedBySent);
1028
1029 } else if (annotation instanceof Chunk) {
1030 throw new UnsupportedOperationException();
1031
1032 } else if (annotation instanceof Entity) {
1033 Entity entity = (Entity) annotation;
1034 entities.remove(entity);
1035 for (Term term : entity.getTerms()) {
1036 unindexAnnotation(entity, term.getId(), entitiesIndexedByTerm);
1037 }
1038 this.unindexBySent(entity, entity.getSpans().get(0).getTargets().get(0).getSent(),
1039 this.entitiesIndexedBySent);
1040
1041 } else if (annotation instanceof Feature) {
1042 throw new UnsupportedOperationException();
1043
1044 } else if (annotation instanceof Timex3) {
1045 Timex3 timex3 = (Timex3) annotation;
1046 timeExpressions.remove(timex3);
1047 if (timex3.getSpan().getTargets() != null) {
1048 for (WF wf : timex3.getSpan().getTargets()) {
1049 unindexAnnotation(timex3, wf.getId(), timeExsIndexedByWF);
1050 }
1051 }
1052 unindexBySent(timex3, timex3.getSpan().getTargets().get(0).getSent(), timeExsIndexedBySent);
1053
1054 } else if (annotation instanceof Coref) {
1055 Coref coref = (Coref) annotation;
1056 coreferences.remove(coref);
1057 for (Span<Term> span : coref.getSpans()) {
1058 for (Term term : span.getTargets()) {
1059 unindexAnnotation(coref, term.getId(), corefsIndexedByTerm);
1060 }
1061 }
1062
1063 } else if (annotation instanceof Factuality) {
1064 Factuality factuality = (Factuality) annotation;
1065 factualities.remove(factuality);
1066 if (factuality.getWFs() != null) {
1067 for (WF wf : factuality.getWFs()) {
1068 unindexAnnotation(factuality, wf.getId(), factsIndexedByWF);
1069 }
1070 }
1071
1072 } else if (annotation instanceof LinkedEntity) {
1073 LinkedEntity entity = (LinkedEntity) annotation;
1074 linkedEntities.remove(entity);
1075 if (entity.getWFs() != null) {
1076 for (WF wf : entity.getWFs().getTargets()) {
1077 unindexAnnotation(entity, wf.getId(), linkedEntitiesIndexedByWF);
1078 }
1079 }
1080
1081 } else if (annotation instanceof SSTspan) {
1082 SSTspan sst = (SSTspan) annotation;
1083 sstSpans.remove(sst);
1084 if (sst.getTerms() != null) {
1085 for (Term t : sst.getTerms().getTargets()) {
1086 unindexAnnotation(sst, t.getId(), sstSpansIndexedByTerm);
1087 }
1088 }
1089
1090 } else if (annotation instanceof Topic) {
1091 throw new UnsupportedOperationException();
1092
1093 } else if (annotation instanceof Opinion) {
1094 opinions.remove(annotation);
1095
1096 } else if (annotation instanceof Relation) {
1097 throw new UnsupportedOperationException();
1098
1099 } else if (annotation instanceof Predicate) {
1100 Predicate predicate = (Predicate) annotation;
1101 predicates.remove(predicate);
1102 for (Term term : predicate.getTerms()) {
1103 unindexAnnotation(predicate, term.getId(), predicatesIndexedByTerm);
1104 }
1105 unindexBySent(predicate, predicate.getSpan().getTargets().get(0).getSent(),
1106 this.predicatesIndexedBySent);
1107
1108 } else if (annotation instanceof Tree) {
1109 throw new UnsupportedOperationException();
1110 } else if (annotation instanceof Element) {
1111 throw new UnsupportedOperationException();
1112 }
1113 }
1114
1115 private <T> void unindexAnnotation(T annotation, String hashId, HashMap<String, List<T>> index) {
1116 List<T> list = index.get(hashId);
1117 if (list != null) {
1118 list.remove(annotation);
1119 if (list.isEmpty()) {
1120 index.remove(hashId);
1121 }
1122 }
1123 }
1124
1125 private <T> void unindexBySent(T annotation, Integer sent, HashMap<Integer, List<T>> index) {
1126 if (sent > 0) {
1127 List<T> list = index.get(sent);
1128 if (list != null) {
1129 list.remove(annotation);
1130 if (list.isEmpty()) {
1131 index.remove(sent);
1132 }
1133 }
1134 }
1135 }
1136
1137 }