1 package ixa.kaflib;
2
3 import com.google.gson.Gson;
4 import com.google.gson.GsonBuilder;
5 import org.jdom2.Element;
6 import org.jdom2.JDOMException;
7
8 import java.io.File;
9 import java.io.IOException;
10 import java.io.Reader;
11 import java.io.Serializable;
12 import java.util.*;
13 import java.util.concurrent.ConcurrentHashMap;
14 import java.util.regex.Pattern;
15
16
17
18
19
20
21 public class KAFDocument implements Serializable {
22
23 public enum Layer {
24 text, terms, marks, deps, chunks, entities, properties, categories, coreferences, opinions, relations, srl, constituency, timeExpressions, linkedEntities, constituencyStrings;
25 }
26
27 public class FileDesc implements Serializable {
28 public String author;
29 public String title;
30 public String filename;
31 public String filetype;
32 public Integer pages;
33 public String creationtime;
34
35 private FileDesc() {
36 }
37
38 @Override
39 public String toString() {
40 return "FileDesc{" +
41 "author='" + author + '\'' +
42 ", title='" + title + '\'' +
43 ", filename='" + filename + '\'' +
44 ", filetype='" + filetype + '\'' +
45 ", pages=" + pages +
46 ", creationtime='" + creationtime + '\'' +
47 '}';
48 }
49 }
50
51 public class Public implements Serializable {
52 public String publicId;
53 public String uri;
54
55 private Public() {
56 }
57 }
58
59
60
61
62 private String lang;
63
64
65
66
67 private String version;
68
69
70
71
72 private Map<String, List<LinguisticProcessor>> lps;
73
74 private FileDesc fileDesc;
75
76 private Public _public;
77
78
79
80
81 private IdManager idManager;
82
83
84
85
86 private AnnotationContainer annotationContainer;
87
88
89
90
91 public KAFDocument(String lang, String version) {
92 this.lang = lang;
93 this.version = version;
94 lps = new LinkedHashMap<String, List<LinguisticProcessor>>();
95 idManager = new IdManager();
96 annotationContainer = new AnnotationContainer();
97 }
98
99
100
101
102
103
104 public static KAFDocument createFromFile(File file) throws IOException, JDOMException {
105 KAFDocument kaf = null;
106 kaf = ReadWriteManager.load(file);
107 return kaf;
108 }
109
110
111
112
113
114
115 public static KAFDocument createFromStream(Reader stream) throws IOException {
116 KAFDocument kaf = null;
117 try {
118 kaf = ReadWriteManager.load(stream);
119 } catch (JDOMException e) {
120 throw new IOException(e);
121 }
122 return kaf;
123 }
124
125
126
127
128 public void setLang(String lang) {
129 this.lang = lang;
130 }
131
132
133
134
135 public String getLang() {
136 return lang;
137 }
138
139
140
141
142 public void setVersion(String version) {
143 this.version = version;
144 }
145
146
147
148
149 public String getVersion() {
150 return version;
151 }
152
153
154
155
156 public LinguisticProcessor addLinguisticProcessor(String layer, String name) {
157 LinguisticProcessor lp = new LinguisticProcessor(layer, name);
158 List<LinguisticProcessor> layerLps = lps.get(layer);
159 if (layerLps == null) {
160 layerLps = new ArrayList<LinguisticProcessor>();
161 lps.put(layer, layerLps);
162 }
163 layerLps.add(lp);
164 return lp;
165 }
166
167 public LinguisticProcessor addLinguisticProcessor(String layer, LinguisticProcessor linguisticProcessor) {
168 List<LinguisticProcessor> layerLps = lps.get(layer);
169 if (layerLps == null) {
170 layerLps = new ArrayList<LinguisticProcessor>();
171 lps.put(layer, layerLps);
172 }
173 layerLps.add(linguisticProcessor);
174 return linguisticProcessor;
175 }
176
177 public void addLinguisticProcessors(Map<String, List<LinguisticProcessor>> lps) {
178 for (Map.Entry<String, List<LinguisticProcessor>> entry : lps.entrySet()) {
179 List<LinguisticProcessor> layerLps = entry.getValue();
180 for (LinguisticProcessor lp : layerLps) {
181 LinguisticProcessor newLp = this.addLinguisticProcessor(entry.getKey(), lp.name);
182 if (lp.hasTimestamp()) {
183 newLp.setTimestamp(lp.getTimestamp());
184 }
185 if (lp.hasBeginTimestamp()) {
186 newLp.setBeginTimestamp(lp.getBeginTimestamp());
187 }
188 if (lp.hasEndTimestamp()) {
189 newLp.setEndTimestamp(lp.getEndTimestamp());
190 }
191 if (lp.hasVersion()) {
192 newLp.setVersion(lp.getVersion());
193 }
194 }
195 }
196 }
197
198
199
200
201
202 public Map<String, List<LinguisticProcessor>> getLinguisticProcessors() {
203 return lps;
204 }
205
206
207
208
209 public boolean linguisticProcessorExists(String layer, String name, String version) {
210 List<LinguisticProcessor> layerLPs = lps.get(layer);
211 if (layerLPs == null) {
212 return false;
213 }
214 for (LinguisticProcessor lp : layerLPs) {
215 if (lp.version == null) {
216 return false;
217 }
218 else if (lp.name.equals(name) && lp.version.equals(version)) {
219 return true;
220 }
221 }
222 return false;
223 }
224
225
226
227
228 public boolean linguisticProcessorExists(String layer, String name) {
229 List<LinguisticProcessor> layerLPs = lps.get(layer);
230 if (layerLPs == null) {
231 return false;
232 }
233 for (LinguisticProcessor lp : layerLPs) {
234 if (lp.version != null) {
235 return false;
236 }
237 else if (lp.name.equals(name)) {
238 return true;
239 }
240 }
241 return false;
242 }
243
244 public FileDesc createFileDesc() {
245 this.fileDesc = new FileDesc();
246 return this.fileDesc;
247 }
248
249 public FileDesc getFileDesc() {
250 return this.fileDesc;
251 }
252
253 public Public createPublic() {
254 this._public = new Public();
255 return this._public;
256 }
257
258 public Public getPublic() {
259 return this._public;
260 }
261
262
263
264
265 AnnotationContainer getAnnotationContainer() {
266 return annotationContainer;
267 }
268
269
270
271
272 public void setRawText(String rawText) {
273 annotationContainer.setRawText(rawText);
274 }
275
276
277
278
279
280
281
282
283 public WF newWF(String id, String form, int sent) {
284 idManager.wfs.update(id);
285 WF newWF = new WF(this.annotationContainer, id, form, sent);
286 annotationContainer.add(newWF);
287 return newWF;
288 }
289
290
291
292
293
294
295
296
297 public WF newWF(String form, int offset) {
298 String newId = idManager.wfs.getNext();
299 int offsetVal = offset;
300 WF newWF = new WF(this.annotationContainer, newId, form, 0);
301 newWF.setOffset(offsetVal);
302 newWF.setLength(form.length());
303 annotationContainer.add(newWF);
304 return newWF;
305 }
306
307
308
309
310
311
312
313 public WF newWF(String form, int offset, int sent) {
314 String newId = idManager.wfs.getNext();
315 WF newWF = new WF(this.annotationContainer, newId, form, sent);
316 newWF.setOffset(offset);
317 newWF.setLength(form.length());
318 annotationContainer.add(newWF);
319 return newWF;
320 }
321
322
323
324
325
326
327
328
329
330
331
332 public Term newTerm(String id, Span<WF> span) {
333 idManager.terms.update(id);
334 Term newTerm = new Term(id, span, false);
335 annotationContainer.add(newTerm);
336 return newTerm;
337 }
338
339 public Term newTerm(String id, Span<WF> span, boolean isComponent) {
340 idManager.terms.update(id);
341 Term newTerm = new Term(id, span, isComponent);
342 if (!isComponent) {
343 annotationContainer.add(newTerm);
344 }
345 return newTerm;
346 }
347
348 public Term newTerm(String id, Span<WF> span, Integer position) {
349 idManager.terms.update(id);
350 Term newTerm = new Term(id, span, false);
351 annotationContainer.add(newTerm, position);
352 return newTerm;
353 }
354
355
356
357
358
359
360
361
362
363
364 public Term newTerm(Span<WF> span) {
365 String newId = idManager.terms.getNext();
366 Term newTerm = new Term(newId, span, false);
367 annotationContainer.add(newTerm);
368 return newTerm;
369 }
370
371
372
373
374
375
376
377
378
379
380 public Term newTermOptions(String morphofeat, Span<WF> span) {
381 String newId = idManager.terms.getNext();
382 Term newTerm = new Term(newId, span, false);
383 newTerm.setMorphofeat(morphofeat);
384 annotationContainer.add(newTerm);
385 return newTerm;
386 }
387
388 public Term newCompound(List<Term> terms, String lemma) {
389 Span<WF> span = new Span<WF>();
390 for (Term term : terms) {
391 span.addTargets(term.getSpan().getTargets());
392 }
393 String newId = idManager.mws.getNext();
394 Term compound = newTerm(newId, span, annotationContainer.termPosition(terms.get(0)));
395 compound.setLemma(lemma);
396 for (Term term : terms) {
397 compound.addComponent(term);
398 term.setCompound(compound);
399 this.annotationContainer.remove(term);
400 }
401 return compound;
402 }
403
404
405
406
407
408
409 public Term.Sentiment newSentiment() {
410 Term.Sentiment newSentiment = new Term.Sentiment();
411 return newSentiment;
412 }
413
414 public Mark newMark(String id, String source, Span<Term> span) {
415 idManager.marks.update(id);
416 Mark newMark = new Mark(id, span);
417 annotationContainer.add(newMark, source);
418 return newMark;
419 }
420
421 public Mark newMark(String source, Span<Term> span) {
422 String newId = idManager.marks.getNext();
423 Mark newMark = new Mark(newId, span);
424 annotationContainer.add(newMark, source);
425 return newMark;
426 }
427
428
429
430
431
432
433
434
435
436 public Dep newDep(Term from, Term to, String rfunc) {
437 Dep newDep = new Dep(from, to, rfunc);
438 annotationContainer.add(newDep);
439 return newDep;
440 }
441
442
443
444
445
446
447
448
449
450
451 public Chunk newChunk(String id, String phrase, Span<Term> span) {
452 idManager.chunks.update(id);
453 Chunk newChunk = new Chunk(id, span);
454 newChunk.setPhrase(phrase);
455 annotationContainer.add(newChunk);
456 return newChunk;
457 }
458
459
460
461
462
463
464
465
466
467 public Chunk newChunk(String phrase, Span<Term> span) {
468 String newId = idManager.chunks.getNext();
469 Chunk newChunk = new Chunk(newId, span);
470 newChunk.setPhrase(phrase);
471 annotationContainer.add(newChunk);
472 return newChunk;
473 }
474
475
476
477
478
479
480
481
482
483 public Entity newEntity(String id, List<Span<Term>> references) {
484 idManager.entities.update(id);
485 Entity newEntity = new Entity(id, references);
486 annotationContainer.add(newEntity);
487 return newEntity;
488 }
489
490
491
492
493
494
495
496
497 public Entity newEntity(List<Span<Term>> references) {
498 String newId = idManager.entities.getNext();
499 Entity newEntity = new Entity(newId, references);
500 annotationContainer.add(newEntity);
501 return newEntity;
502 }
503
504
505
506
507
508
509
510
511 public Coref newCoref(String id, List<Span<Term>> mentions) {
512 idManager.corefs.update(id);
513 Coref newCoref = new Coref(id, mentions);
514 annotationContainer.add(newCoref);
515 return newCoref;
516 }
517
518
519
520
521
522
523
524 public Coref newCoref(List<Span<Term>> mentions) {
525 String newId = idManager.corefs.getNext();
526 Coref newCoref = new Coref(newId, mentions);
527 annotationContainer.add(newCoref);
528 return newCoref;
529 }
530
531
532
533
534
535
536
537
538 public Timex3 newTimex3(String id, Span<WF> mentions, String type) {
539 idManager.timex3s.update(id);
540 Timex3 newTimex3 = new Timex3(id, type);
541 newTimex3.setSpan(mentions);
542 annotationContainer.add(newTimex3);
543 return newTimex3;
544 }
545
546
547
548
549
550
551
552 public Timex3 newTimex3(Span<WF> mentions, String type) {
553 String newId = idManager.timex3s.getNext();
554 Timex3 newTimex3 = new Timex3(newId, type);
555 newTimex3.setSpan(mentions);
556 annotationContainer.add(newTimex3);
557 return newTimex3;
558 }
559
560
561
562
563
564
565 public Timex3 newTimex3(String id, String type) {
566 idManager.timex3s.update(id);
567 Timex3 newTimex3 = new Timex3(id, type);
568 annotationContainer.add(newTimex3);
569 return newTimex3;
570 }
571
572
573
574
575
576 public Timex3 newTimex3(String type) {
577 String newId = idManager.timex3s.getNext();
578 Timex3 newTimex3 = new Timex3(newId, type);
579 annotationContainer.add(newTimex3);
580 return newTimex3;
581 }
582
583 public TLink newTLink(String id, TLinkReferable from, TLinkReferable to, String relType) {
584 idManager.tlinks.update(id);
585 TLink newTLink = new TLink(id, from, to, relType);
586 annotationContainer.add(newTLink);
587 return newTLink;
588 }
589
590 public TLink newTLink(TLinkReferable from, TLinkReferable to, String relType) {
591 String newId = idManager.tlinks.getNext();
592 TLink newTLink = new TLink(newId, from, to, relType);
593 annotationContainer.add(newTLink);
594 return newTLink;
595 }
596
597 public CLink newCLink(String id, Predicate from, Predicate to) {
598 idManager.clinks.update(id);
599 CLink newCLink = new CLink(id, from, to);
600 annotationContainer.add(newCLink);
601 return newCLink;
602 }
603
604 public CLink newCLink(Predicate from, Predicate to) {
605 String newId = idManager.clinks.getNext();
606 CLink newCLink = new CLink(newId, from, to);
607 annotationContainer.add(newCLink);
608 return newCLink;
609 }
610
611
612
613
614
615
616
617
618 public Factuality newFactuality(Term term) {
619 Factuality factuality = new Factuality(term);
620 annotationContainer.add(factuality);
621 return factuality;
622 }
623
624
625
626
627
628
629
630
631 public LinkedEntity newLinkedEntity(String id, Span<WF> span) {
632 LinkedEntity linkedEntity = new LinkedEntity(id, span);
633 annotationContainer.add(linkedEntity);
634 return linkedEntity;
635 }
636
637
638
639
640
641
642
643 public LinkedEntity newLinkedEntity(Span<WF> span) {
644 String newId = idManager.linkedentities.getNext();
645 LinkedEntity linkedEntity = new LinkedEntity(newId, span);
646 annotationContainer.add(linkedEntity);
647 return linkedEntity;
648 }
649
650
651
652
653
654
655
656 public SSTspan newSST(Span<Term> span) {
657 String newId = idManager.ssts.getNext();
658 SSTspan sst = new SSTspan(newId, span);
659 annotationContainer.add(sst);
660 return sst;
661 }
662
663 public SSTspan newSST(Span<Term> span, String type, String label) {
664 String newId = idManager.ssts.getNext();
665 SSTspan sst = new SSTspan(newId, span);
666 sst.setLabel(label);
667 sst.setType(type);
668 annotationContainer.add(sst);
669 return sst;
670 }
671
672
673
674
675
676
677
678 public Topic newTopic() {
679 String newId = idManager.topics.getNext();
680 Topic t = new Topic(newId);
681 annotationContainer.add(t);
682 return t;
683 }
684
685 public Topic newTopic(String label, float probability) {
686 String newId = idManager.topics.getNext();
687 Topic t = new Topic(newId);
688 t.setLabel(label);
689 t.setProbability(probability);
690 annotationContainer.add(t);
691 return t;
692 }
693
694
695
696
697
698
699
700
701
702 public Feature newProperty(String id, String lemma, List<Span<Term>> references) {
703 idManager.properties.update(id);
704 Feature newProperty = new Feature(id, lemma, references);
705 annotationContainer.add(newProperty);
706 return newProperty;
707 }
708
709
710
711
712
713
714
715
716 public Feature newProperty(String lemma, List<Span<Term>> references) {
717 String newId = idManager.properties.getNext();
718 Feature newProperty = new Feature(newId, lemma, references);
719 annotationContainer.add(newProperty);
720 return newProperty;
721 }
722
723
724
725
726
727
728
729
730
731 public Feature newCategory(String id, String lemma, List<Span<Term>> references) {
732 idManager.categories.update(id);
733 Feature newCategory = new Feature(id, lemma, references);
734 annotationContainer.add(newCategory);
735 return newCategory;
736 }
737
738
739
740
741
742
743
744
745 public Feature newCategory(String lemma, List<Span<Term>> references) {
746 String newId = idManager.categories.getNext();
747 Feature newCategory = new Feature(newId, lemma, references);
748 annotationContainer.add(newCategory);
749 return newCategory;
750 }
751
752
753
754
755
756
757 public Opinion newOpinion() {
758 String newId = idManager.opinions.getNext();
759 Opinion newOpinion = new Opinion(newId);
760 annotationContainer.add(newOpinion);
761 return newOpinion;
762 }
763
764
765
766
767
768
769 public Opinion newOpinion(String id) {
770 idManager.opinions.update(id);
771 Opinion newOpinion = new Opinion(id);
772 annotationContainer.add(newOpinion);
773 return newOpinion;
774 }
775
776
777
778
779
780
781
782
783 public Relation newRelation(Relational from, Relational to) {
784 String newId = idManager.relations.getNext();
785 Relation newRelation = new Relation(newId, from, to);
786 annotationContainer.add(newRelation);
787 return newRelation;
788 }
789
790
791
792
793
794
795
796
797
798 public Relation newRelation(String id, Relational from, Relational to) {
799 idManager.relations.update(id);
800 Relation newRelation = new Relation(id, from, to);
801 annotationContainer.add(newRelation);
802 return newRelation;
803 }
804
805
806
807
808
809
810
811
812 public Predicate newPredicate(String id, Span<Term> span) {
813 idManager.predicates.update(id);
814 Predicate newPredicate = new Predicate(id, span);
815 annotationContainer.add(newPredicate);
816 return newPredicate;
817 }
818
819
820
821
822
823
824
825 public Predicate newPredicate(Span<Term> span) {
826 String newId = idManager.predicates.getNext();
827 Predicate newPredicate = new Predicate(newId, span);
828 annotationContainer.add(newPredicate);
829 return newPredicate;
830 }
831
832
833
834
835
836
837
838
839
840
841 public Predicate.Role newRole(String id, Predicate predicate, String semRole, Span<Term> span) {
842 idManager.roles.update(id);
843 Predicate.Role newRole = new Predicate.Role(id, semRole, span);
844 return newRole;
845 }
846
847
848
849
850
851
852
853
854
855 public Predicate.Role newRole(Predicate predicate, String semRole, Span<Term> span) {
856 String newId = idManager.roles.getNext();
857 Predicate.Role newRole = new Predicate.Role(newId, semRole, span);
858 return newRole;
859 }
860
861
862
863
864
865
866
867
868 public ExternalRef newExternalRef(String resource, String reference) {
869 return new ExternalRef(resource, reference);
870 }
871
872 public Tree newConstituent(TreeNode root) {
873 return newConstituent(root, null);
874 }
875
876 public Tree newConstituent(TreeNode root, Integer sentence) {
877 Tree tree = new Tree(root, sentence);
878 annotationContainer.add(tree, sentence);
879 return tree;
880 }
881
882 public void addConstituencyString(String constituencyString, Integer sent) {
883 annotationContainer.add(constituencyString, sent);
884 }
885
886 public void addConstituencyFromParentheses(String parseOut) throws Exception {
887 addConstituencyFromParentheses(parseOut, null);
888 }
889
890 public void addConstituencyFromParentheses(String parseOut, Integer sentence) throws Exception {
891 Tree.parenthesesToKaf(parseOut, this, sentence);
892 }
893
894 public NonTerminal newNonTerminal(String id, String label) {
895 NonTerminal tn = new NonTerminal(id, label);
896 String newEdgeId = idManager.edges.getNext();
897 tn.setEdgeId(newEdgeId);
898 return tn;
899 }
900
901 public NonTerminal newNonTerminal(String label) {
902 String newId = idManager.nonterminals.getNext();
903 String newEdgeId = idManager.edges.getNext();
904 NonTerminal newNonterminal = new NonTerminal(newId, label);
905 newNonterminal.setEdgeId(newEdgeId);
906 return newNonterminal;
907 }
908
909 public Terminal newTerminal(String id, Span<Term> span) {
910 Terminal tn = new Terminal(id, span);
911 String newEdgeId = idManager.edges.getNext();
912 tn.setEdgeId(newEdgeId);
913 return tn;
914 }
915
916 public Terminal newTerminal(Span<Term> span) {
917 String newId = idManager.terminals.getNext();
918 String newEdgeId = idManager.edges.getNext();
919 Terminal tn = new Terminal(newId, span);
920 tn.setEdgeId(newEdgeId);
921 return tn;
922 }
923
924 public static Span<WF> newWFSpan() {
925 return new Span<WF>();
926 }
927
928 public static Span<WF> newWFSpan(List<WF> targets) {
929 return new Span<WF>(targets);
930 }
931
932 public static Span<WF> newWFSpan(List<WF> targets, WF head) {
933 return new Span<WF>(targets, head);
934 }
935
936 public static Span<Term> newTermSpan() {
937 return new Span<Term>();
938 }
939
940 public static Span<Term> newTermSpan(List<Term> targets) {
941 return new Span<Term>(targets);
942 }
943
944 public static Span<Term> newTermSpan(List<Term> targets, Term head) {
945 return new Span<Term>(targets, head);
946 }
947
948 void addUnknownLayer(Element layer) {
949 annotationContainer.add(layer);
950 }
951
952
953
954
955 public String getRawText() {
956 return annotationContainer.getRawText();
957 }
958
959
960
961
962 public List<WF> getWFs() {
963 return annotationContainer.getText();
964 }
965
966
967
968
969 public List<List<WF>> getSentences() {
970 return annotationContainer.getSentences();
971 }
972
973 public Integer getFirstSentence() {
974 return annotationContainer.getText().get(0).getSent();
975 }
976
977 public Integer getNumSentences() {
978 List<WF> wfs = annotationContainer.getText();
979 Integer firstSentence = wfs.get(0).getSent();
980 Integer lastSentence = wfs.get(wfs.size() - 1).getSent();
981 return lastSentence - firstSentence + 1;
982 }
983
984 public List<Integer> getSentsByParagraph(Integer para) {
985 if (this.annotationContainer.sentsIndexedByParagraphs.get(para) == null) {
986 System.out.println(para + ": 0");
987 }
988 return new ArrayList<Integer>(this.annotationContainer.sentsIndexedByParagraphs.get(para));
989 }
990
991 public Integer getFirstParagraph() {
992 return this.annotationContainer.getText().get(0).getPara();
993 }
994
995 public Integer getNumParagraphs() {
996 return this.annotationContainer.sentsIndexedByParagraphs.keySet().size();
997 }
998
999
1000
1001
1002 public List<Term> getTerms() {
1003 return annotationContainer.getTerms();
1004 }
1005
1006
1007
1008
1009
1010
1011
1012 public List<Term> getTermsByWFs(List<WF> wfs) {
1013 return annotationContainer.getTermsByWFs(wfs);
1014 }
1015
1016 public List<Term> getSentenceTerms(int sent) {
1017 return annotationContainer.getSentenceTerms(sent);
1018 }
1019
1020 public List<String> getMarkSources() {
1021 return annotationContainer.getMarkSources();
1022 }
1023
1024 public List<Mark> getMarks(String source) {
1025 return annotationContainer.getMarks(source);
1026 }
1027
1028 public List<Dep> getDeps() {
1029 return annotationContainer.getDeps();
1030 }
1031
1032 public List<Chunk> getChunks() {
1033 return annotationContainer.getChunks();
1034 }
1035
1036 public List<LinkedEntity> getLinkedEntities() {
1037 return annotationContainer.getLinkedEntities();
1038 }
1039
1040
1041
1042
1043 public List<Entity> getEntities() {
1044 return annotationContainer.getEntities();
1045 }
1046
1047 public List<Coref> getCorefs() {
1048 return annotationContainer.getCorefs();
1049 }
1050
1051 public List<Timex3> getTimeExs() {
1052 return annotationContainer.getTimeExs();
1053 }
1054
1055 public List<TLink> getTLinks() {
1056 return annotationContainer.getTLinks();
1057 }
1058
1059 public List<CLink> getCLinks() {
1060 return annotationContainer.getCLinks();
1061 }
1062
1063
1064
1065
1066 public List<Feature> getProperties() {
1067 return annotationContainer.getProperties();
1068 }
1069
1070
1071
1072
1073 public List<Feature> getCategories() {
1074 return annotationContainer.getCategories();
1075 }
1076
1077 public List<Opinion> getOpinions() {
1078 return annotationContainer.getOpinions();
1079 }
1080
1081 public List<Opinion> getOpinions(String label) {
1082 final List<Opinion> opinions = new ArrayList<Opinion>();
1083 for (final Opinion opinion : annotationContainer.getOpinions()) {
1084 if (Objects.equals(opinion.getLabel(), label)) {
1085 opinions.add(opinion);
1086 }
1087 }
1088 return opinions;
1089 }
1090
1091
1092
1093
1094 public List<Relation> getRelations() {
1095 return annotationContainer.getRelations();
1096 }
1097
1098 public List<Tree> getConstituents() {
1099 return annotationContainer.getConstituents();
1100 }
1101
1102 public List<Element> getUnknownLayers() {
1103 return annotationContainer.getUnknownLayers();
1104 }
1105
1106 public List<WF> getWFsBySent(Integer sent) {
1107 List<WF> wfs = this.annotationContainer.textIndexedBySent.get(sent);
1108 return (wfs == null) ? new ArrayList<WF>() : wfs;
1109 }
1110
1111 public List<WF> getWFsByPara(Integer para) {
1112 return this.annotationContainer.getLayerByPara(para, this.annotationContainer.textIndexedBySent);
1113 }
1114
1115 public List<Term> getTermsBySent(Integer sent) {
1116 List<Term> terms = this.annotationContainer.termsIndexedBySent.get(sent);
1117 return (terms == null) ? new ArrayList<Term>() : terms;
1118 }
1119
1120 public List<Term> getTermsByPara(Integer para) {
1121 return this.annotationContainer.getLayerByPara(para, this.annotationContainer.termsIndexedBySent);
1122 }
1123
1124 public List<Entity> getEntitiesBySent(Integer sent) {
1125 List<Entity> entities = this.annotationContainer.entitiesIndexedBySent.get(sent);
1126 return (entities == null) ? new ArrayList<Entity>() : entities;
1127 }
1128
1129 public List<Entity> getEntitiesByPara(Integer para) {
1130 return this.annotationContainer.getLayerByPara(para, this.annotationContainer.entitiesIndexedBySent);
1131 }
1132
1133 public List<Dep> getDepsBySent(Integer sent) {
1134 return this.annotationContainer.depsIndexedBySent.get(sent);
1135 }
1136
1137 public List<Dep> getDepsByPara(Integer para) {
1138 return this.annotationContainer.getLayerByPara(para, this.annotationContainer.depsIndexedBySent);
1139 }
1140
1141 public List<Chunk> getChunksBySent(Integer sent) {
1142 return this.annotationContainer.chunksIndexedBySent.get(sent);
1143 }
1144
1145 public List<Chunk> getChunksByPara(Integer para) {
1146 return this.annotationContainer.getLayerByPara(para, this.annotationContainer.chunksIndexedBySent);
1147 }
1148
1149 public List<Predicate> getPredicatesBySent(Integer sent) {
1150 List<Predicate> result = this.annotationContainer.predicatesIndexedBySent.get(sent);
1151 return result != null ? result : Collections.<Predicate>emptyList();
1152 }
1153
1154 public List<Predicate> getPredicatesByPara(Integer para) {
1155 return this.annotationContainer.getLayerByPara(para, this.annotationContainer.predicatesIndexedBySent);
1156 }
1157
1158 public List<Tree> getConstituentsBySent(Integer sent) {
1159 Map<Integer, List<Tree>> typeTreeIndex = this.annotationContainer.treesIndexedBySent;
1160 if (typeTreeIndex == null) {
1161 return new ArrayList<Tree>();
1162 }
1163 List<Tree> typeTrees = typeTreeIndex.get(sent);
1164 return (typeTrees == null) ? new ArrayList<Tree>() : typeTrees;
1165 }
1166
1167
1168
1169
1170
1171 private void copyAnnotationsToKAF(KAFDocument kaf,
1172 List<WF> wfs,
1173 List<Term> terms,
1174 List<Dep> deps,
1175 List<Chunk> chunks,
1176 List<Entity> entities,
1177 List<Coref> corefs,
1178 List<Timex3> timeExs,
1179 List<Feature> properties,
1180 List<Feature> categories,
1181 List<Opinion> opinions,
1182 List<Relation> relations,
1183 List<Predicate> predicates
1184 ) {
1185 HashMap<String, WF> copiedWFs = new HashMap<String, WF>();
1186 HashMap<String, Term> copiedTerms = new HashMap<String, Term>();
1187 HashMap<String, Relational> copiedRelationals = new HashMap<String, Relational>();
1188
1189
1190 for (WF wf : wfs) {
1191 WF wfCopy = new WF(wf, kaf.getAnnotationContainer());
1192 kaf.insertWF(wfCopy);
1193 copiedWFs.put(wf.getId(), wfCopy);
1194 }
1195
1196 for (Term term : terms) {
1197 Term termCopy = new Term(term, copiedWFs);
1198 kaf.insertTerm(termCopy);
1199 copiedTerms.put(term.getId(), termCopy);
1200 }
1201
1202 for (Dep dep : deps) {
1203 Dep depCopy = new Dep(dep, copiedTerms);
1204 kaf.insertDep(depCopy);
1205 }
1206
1207 for (Chunk chunk : chunks) {
1208 Chunk chunkCopy = new Chunk(chunk, copiedTerms);
1209 kaf.insertChunk(chunkCopy);
1210 }
1211
1212 for (Entity entity : entities) {
1213 Entity entityCopy = new Entity(entity, copiedTerms);
1214 kaf.insertEntity(entityCopy);
1215 copiedRelationals.put(entity.getId(), entityCopy);
1216 }
1217
1218 for (Coref coref : corefs) {
1219 Coref corefCopy = new Coref(coref, copiedTerms);
1220 kaf.insertCoref(corefCopy);
1221 }
1222
1223
1224
1225
1226
1227
1228 for (Feature property : properties) {
1229 Feature propertyCopy = new Feature(property, copiedTerms);
1230 kaf.insertProperty(propertyCopy);
1231 copiedRelationals.put(property.getId(), propertyCopy);
1232 }
1233
1234 for (Feature category : categories) {
1235 Feature categoryCopy = new Feature(category, copiedTerms);
1236 kaf.insertCategory(categoryCopy);
1237 copiedRelationals.put(category.getId(), categoryCopy);
1238 }
1239
1240 for (Opinion opinion : opinions) {
1241 Opinion opinionCopy = new Opinion(opinion, copiedTerms);
1242 kaf.insertOpinion(opinionCopy);
1243 }
1244
1245 for (Relation relation : relations) {
1246 Relation relationCopy = new Relation(relation, copiedRelationals);
1247 kaf.insertRelation(relationCopy);
1248 }
1249
1250
1251
1252
1253
1254
1255
1256 }
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266 public KAFDocument split(List<WF> wfs) {
1267 List<Term> terms = this.annotationContainer.getTermsByWFs(wfs);
1268 List<Dep> deps = this.annotationContainer.getDepsByTerms(terms);
1269 List<Chunk> chunks = this.annotationContainer.getChunksByTerms(terms);
1270 List<Entity> entities = this.annotationContainer.getEntitiesByTerms(terms);
1271 List<Coref> corefs = this.annotationContainer.getCorefsByTerms(terms);
1272 List<Timex3> timeExs = this.annotationContainer.getTimeExsByWFs(wfs);
1273 List<Feature> properties = this.annotationContainer.getPropertiesByTerms(terms);
1274 List<Feature> categories = this.annotationContainer.getCategoriesByTerms(terms);
1275
1276 List<Predicate> predicates = this.annotationContainer.getPredicatesByTerms(terms);
1277 List<Relational> relationals = new ArrayList<Relational>();
1278 relationals.addAll(properties);
1279 relationals.addAll(categories);
1280 relationals.addAll(entities);
1281 List<Relation> relations = this.annotationContainer.getRelationsByRelationals(relationals);
1282
1283 KAFDocument newKaf = new KAFDocument(this.getLang(), this.getVersion());
1284 newKaf.addLinguisticProcessors(this.getLinguisticProcessors());
1285 this.copyAnnotationsToKAF(newKaf, wfs, terms, deps, chunks, entities, corefs, timeExs, properties, categories, new ArrayList<Opinion>(), relations, predicates);
1286
1287 return newKaf;
1288 }
1289
1290
1291
1292
1293 public void join(KAFDocument doc) {
1294 HashMap<String, WF> copiedWFs = new HashMap<String, WF>();
1295 HashMap<String, Term> copiedTerms = new HashMap<String, Term>();
1296 HashMap<String, Relational> copiedRelationals = new HashMap<String, Relational>();
1297
1298 Map<String, List<LinguisticProcessor>> lps = doc.getLinguisticProcessors();
1299 for (Map.Entry<String, List<LinguisticProcessor>> entry : lps.entrySet()) {
1300 String layer = entry.getKey();
1301 List<LinguisticProcessor> lpList = entry.getValue();
1302 for (LinguisticProcessor lp : lpList) {
1303 if (!this.linguisticProcessorExists(layer, lp.name, lp.version)) {
1304
1305 this.addLinguisticProcessor(layer, lp.name, lp.timestamp, lp.version);
1306 }
1307 }
1308 }
1309
1310 for (WF wf : doc.getWFs()) {
1311 WF wfCopy = new WF(wf, this.annotationContainer);
1312 this.insertWF(wfCopy);
1313 copiedWFs.put(wf.getId(), wfCopy);
1314 }
1315
1316 for (Term term : doc.getTerms()) {
1317 Term termCopy = new Term(term, copiedWFs);
1318 this.insertTerm(termCopy);
1319 copiedTerms.put(term.getId(), termCopy);
1320 }
1321
1322 for (Dep dep : doc.getDeps()) {
1323 Dep depCopy = new Dep(dep, copiedTerms);
1324 this.insertDep(depCopy);
1325 }
1326
1327 for (Chunk chunk : doc.getChunks()) {
1328 Chunk chunkCopy = new Chunk(chunk, copiedTerms);
1329 this.insertChunk(chunkCopy);
1330 }
1331
1332 for (Entity entity : doc.getEntities()) {
1333 Entity entityCopy = new Entity(entity, copiedTerms);
1334 this.insertEntity(entityCopy);
1335 copiedRelationals.put(entity.getId(), entityCopy);
1336 }
1337
1338 for (Coref coref : doc.getCorefs()) {
1339 Coref corefCopy = new Coref(coref, copiedTerms);
1340 this.insertCoref(corefCopy);
1341 }
1342
1343
1344
1345
1346
1347
1348 for (Feature property : doc.getProperties()) {
1349 Feature propertyCopy = new Feature(property, copiedTerms);
1350 this.insertProperty(propertyCopy);
1351 copiedRelationals.put(property.getId(), propertyCopy);
1352 }
1353
1354 for (Feature category : doc.getCategories()) {
1355 Feature categoryCopy = new Feature(category, copiedTerms);
1356 this.insertCategory(categoryCopy);
1357 copiedRelationals.put(category.getId(), categoryCopy);
1358 }
1359
1360 for (Opinion opinion : doc.getOpinions()) {
1361 Opinion opinionCopy = new Opinion(opinion, copiedTerms);
1362 this.insertOpinion(opinionCopy);
1363 }
1364
1365 for (Relation relation : doc.getRelations()) {
1366 Relation relationCopy = new Relation(relation, copiedRelationals);
1367 this.insertRelation(relationCopy);
1368 }
1369 }
1370
1371 public String insertWF(WF wf) {
1372 String newId = idManager.wfs.getNext();
1373 wf.setId(newId);
1374 annotationContainer.add(wf);
1375 return newId;
1376 }
1377
1378 public String insertTerm(Term term) {
1379 String newId = idManager.terms.getNext();
1380 term.setId(newId);
1381 annotationContainer.add(term);
1382 return newId;
1383 }
1384
1385 public void insertDep(Dep dep) {
1386 annotationContainer.add(dep);
1387 }
1388
1389 public String insertChunk(Chunk chunk) {
1390 String newId = idManager.chunks.getNext();
1391 chunk.setId(newId);
1392 annotationContainer.add(chunk);
1393 return newId;
1394 }
1395
1396 public String insertEntity(Entity entity) {
1397 String newId = idManager.entities.getNext();
1398 entity.setId(newId);
1399 annotationContainer.add(entity);
1400 return newId;
1401 }
1402
1403 public String insertCoref(Coref coref) {
1404 String newId = idManager.corefs.getNext();
1405 coref.setId(newId);
1406 annotationContainer.add(coref);
1407 return newId;
1408 }
1409
1410 public String insertTimex3(Timex3 timex3) {
1411 String newId = idManager.timex3s.getNext();
1412 timex3.setId(newId);
1413 annotationContainer.add(timex3);
1414 return newId;
1415 }
1416
1417 public String insertProperty(Feature property) {
1418 String newId = idManager.properties.getNext();
1419 property.setId(newId);
1420 annotationContainer.add(property);
1421 return newId;
1422 }
1423
1424 public String insertCategory(Feature category) {
1425 String newId = idManager.categories.getNext();
1426 category.setId(newId);
1427 annotationContainer.add(category);
1428 return newId;
1429 }
1430
1431 public String insertOpinion(Opinion opinion) {
1432 String newId = idManager.opinions.getNext();
1433 opinion.setId(newId);
1434 annotationContainer.add(opinion);
1435 return newId;
1436 }
1437
1438 public String insertRelation(Relation relation) {
1439 String newId = idManager.relations.getNext();
1440 relation.setId(newId);
1441 annotationContainer.add(relation);
1442 return newId;
1443 }
1444
1445
1446
1447
1448
1449
1450 public void save(String filename) {
1451 ReadWriteManager.save(this, filename);
1452 }
1453
1454
1455
1456
1457
1458
1459 public void save(File file) {
1460 ReadWriteManager.save(this, file);
1461 }
1462
1463 public String toString() {
1464 return ReadWriteManager.kafToStr(this);
1465 }
1466 public String toJsonString() {
1467 Gson gson = new GsonBuilder().setPrettyPrinting().create();
1468 return gson.toJson(ReadWriteManager.KAFToJSON(this));
1469 }
1470
1471
1472
1473
1474 public void print() {
1475 ReadWriteManager.print(this);
1476 }
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486 public LinguisticProcessor addLinguisticProcessor(String layer, String name, String version) {
1487 LinguisticProcessor lp = this.addLinguisticProcessor(layer, name);
1488 lp.setVersion(version);
1489 return lp;
1490 }
1491
1492
1493
1494
1495 public LinguisticProcessor addLinguisticProcessor(String layer, String name, String timestamp, String version) {
1496 LinguisticProcessor lp = this.addLinguisticProcessor(layer, name);
1497 lp.setTimestamp(timestamp);
1498 lp.setVersion(version);
1499 return lp;
1500 }
1501
1502
1503
1504
1505 public WF newWF(String id, String form) {
1506 return this.newWF(id, form, 0);
1507 }
1508
1509
1510
1511
1512 public WF newWF(String form) {
1513 return this.newWF(form, 0);
1514 }
1515
1516
1517
1518
1519 public WF createWF(String id, String form) {
1520 return this.newWF(id, form, 0);
1521 }
1522
1523
1524
1525
1526 public WF createWF(String form) {
1527 return this.newWF(form, 0);
1528 }
1529
1530
1531
1532
1533 public WF createWF(String form, int offset) {
1534 return this.newWF(form, offset);
1535 }
1536
1537
1538
1539
1540 public Term newTerm(String id, String type, String lemma, String pos, Span<WF> span) {
1541 Term term = newTerm(id, span);
1542 term.setType(type);
1543 term.setLemma(lemma);
1544 term.setPos(pos);
1545 return term;
1546 }
1547
1548
1549
1550
1551 public Term newTerm(String type, String lemma, String pos, Span<WF> span) {
1552 Term term = newTerm(span);
1553 term.setType(type);
1554 term.setLemma(lemma);
1555 term.setPos(pos);
1556 return term;
1557 }
1558
1559
1560
1561
1562 public Term newTermOptions(String type, String lemma, String pos, String morphofeat, Span<WF> span) {
1563 Term newTerm = newTermOptions(morphofeat, span);
1564 newTerm.setType(type);
1565 newTerm.setLemma(lemma);
1566 newTerm.setPos(pos);
1567 return newTerm;
1568 }
1569
1570
1571
1572
1573 public Term createTerm(String id, String type, String lemma, String pos, List<WF> wfs) {
1574 return this.newTerm(id, type, lemma, pos, this.<WF>list2Span(wfs));
1575 }
1576
1577
1578
1579
1580 public Term createTerm(String type, String lemma, String pos, List<WF> wfs) {
1581 return this.newTerm(type, lemma, pos, this.<WF>list2Span(wfs));
1582 }
1583
1584
1585
1586
1587 public Term createTermOptions(String type, String lemma, String pos, String morphofeat, List<WF> wfs) {
1588 return this.newTermOptions(type, lemma, pos, morphofeat, this.<WF>list2Span(wfs));
1589 }
1590
1591
1592
1593
1594 public Term.Sentiment createSentiment() {
1595 return this.newSentiment();
1596 }
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636 public Dep createDep(Term from, Term to, String rfunc) {
1637 return this.createDep(from, to, rfunc);
1638 }
1639
1640
1641
1642
1643 public Chunk createChunk(String id, Term head, String phrase, List<Term> terms) {
1644 return this.newChunk(id, phrase, this.<Term>list2Span(terms, head));
1645 }
1646
1647
1648
1649
1650 public Chunk createChunk(Term head, String phrase, List<Term> terms) {
1651 return this.newChunk(phrase, this.<Term>list2Span(terms, head));
1652 }
1653
1654
1655
1656
1657 public Entity createEntity(String id, String type, List<List<Term>> references) {
1658 List<Span<Term>> spanReferences = new ArrayList<Span<Term>>();
1659 for (List<Term> list : references) {
1660 spanReferences.add(this.list2Span(list));
1661 }
1662 Entity entity = this.newEntity(id, spanReferences);
1663 entity.setType(type);
1664 return entity;
1665 }
1666
1667
1668
1669
1670 public Entity createEntity(String type, List<List<Term>> references) {
1671 List<Span<Term>> spanReferences = new ArrayList<Span<Term>>();
1672 for (List<Term> list : references) {
1673 spanReferences.add(this.list2Span(list));
1674 }
1675 Entity entity = this.newEntity(spanReferences);
1676 entity.setType(type);
1677 return entity;
1678 }
1679
1680
1681
1682
1683 public Coref createCoref(String id, List<List<Target>> references) {
1684 List<Span<Term>> spanReferences = new ArrayList<Span<Term>>();
1685 for (List<Target> list : references) {
1686 spanReferences.add(this.targetList2Span(list));
1687 }
1688 return this.newCoref(id, spanReferences);
1689 }
1690
1691
1692
1693
1694 public Coref createCoref(List<List<Target>> references) {
1695 List<Span<Term>> spanReferences = new ArrayList<Span<Term>>();
1696 for (List<Target> list : references) {
1697 spanReferences.add(this.targetList2Span(list));
1698 }
1699 return this.newCoref(spanReferences);
1700 }
1701
1702
1703
1704
1705 public Feature createProperty(String id, String lemma, List<List<Term>> references) {
1706 List<Span<Term>> spanReferences = new ArrayList<Span<Term>>();
1707 for (List<Term> list : references) {
1708 spanReferences.add(this.list2Span(list));
1709 }
1710 return this.newProperty(id, lemma, spanReferences);
1711 }
1712
1713
1714
1715
1716 public Feature createProperty(String lemma, List<List<Term>> references) {
1717 List<Span<Term>> spanReferences = new ArrayList<Span<Term>>();
1718 for (List<Term> list : references) {
1719 spanReferences.add(this.list2Span(list));
1720 }
1721 return this.newProperty(lemma, spanReferences);
1722 }
1723
1724
1725
1726
1727 public Feature createCategory(String id, String lemma, List<List<Term>> references) {
1728 List<Span<Term>> spanReferences = new ArrayList<Span<Term>>();
1729 for (List<Term> list : references) {
1730 spanReferences.add(this.list2Span(list));
1731 }
1732 return this.newCategory(id, lemma, spanReferences);
1733 }
1734
1735
1736
1737
1738 public Feature createCategory(String lemma, List<List<Term>> references) {
1739 List<Span<Term>> spanReferences = new ArrayList<Span<Term>>();
1740 for (List<Term> list : references) {
1741 spanReferences.add(this.list2Span(list));
1742 }
1743 return this.newCategory(lemma, spanReferences);
1744 }
1745
1746
1747
1748
1749 public Opinion createOpinion() {
1750 return this.newOpinion();
1751 }
1752
1753
1754
1755
1756 public Opinion createOpinion(String id) {
1757 return this.newOpinion(id);
1758 }
1759
1760
1761
1762
1763 public Relation createRelation(Relational from, Relational to) {
1764 return this.newRelation(from, to);
1765 }
1766
1767
1768
1769
1770 public Relation createRelation(String id, Relational from, Relational to) {
1771 return this.newRelation(id, from, to);
1772 }
1773
1774
1775
1776
1777 public ExternalRef createExternalRef(String resource, String reference) {
1778 return this.newExternalRef(resource, reference);
1779 }
1780
1781
1782
1783
1784
1785
1786
1787 public static Target createTarget(Term term) {
1788 return new Target(term, false);
1789 }
1790
1791
1792
1793
1794
1795
1796
1797
1798 public static Target createTarget(Term term, boolean isHead) {
1799 return new Target(term, isHead);
1800 }
1801
1802 public void removeLayer(Layer layer) {
1803 this.annotationContainer.removeLayer(layer);
1804 }
1805
1806 public void removeAnnotations(Iterable<?> annotations) {
1807 for (Object annotation : annotations) {
1808 this.annotationContainer.removeAnnotation(annotation);
1809 }
1810 }
1811
1812 public void removeAnnotation(Object annotation) {
1813 this.annotationContainer.removeAnnotation(annotation);
1814 }
1815
1816
1817
1818
1819 static <T> Span<T> list2Span(List<T> list) {
1820 Span<T> span = new Span<T>();
1821 for (T elem : list) {
1822 span.addTarget(elem);
1823 }
1824 return span;
1825 }
1826
1827
1828
1829
1830 static <T> Span<T> list2Span(List<T> list, T head) {
1831 Span<T> span = new Span<T>();
1832 for (T elem : list) {
1833 if (head == elem) {
1834 span.addTarget(elem, true);
1835 }
1836 else {
1837 span.addTarget(elem);
1838 }
1839 }
1840 return span;
1841 }
1842
1843
1844
1845
1846 static Span<Term> targetList2Span(List<Target> list) {
1847 Span<Term> span = new Span<Term>();
1848 for (Target target : list) {
1849 if (target.isHead()) {
1850 span.addTarget(target.getTerm(), true);
1851 }
1852 else {
1853 span.addTarget(target.getTerm());
1854 }
1855 }
1856 return span;
1857 }
1858
1859
1860
1861
1862 static List<Target> span2TargetList(Span<Term> span) {
1863 List<Target> list = new ArrayList<Target>();
1864 for (Term t : span.getTargets()) {
1865 list.add(KAFDocument.createTarget(t, (t == span.getHead())));
1866 }
1867 return list;
1868 }
1869
1870
1871
1872
1873
1874
1875
1876 public List<Term> getTermsFromWFs(List<String> wfIds) {
1877 return annotationContainer.getTermsByWFIds(wfIds);
1878 }
1879
1880
1881
1882 private static final Map<String, Character> DEP_PATH_CHARS = new ConcurrentHashMap<String, Character>();
1883
1884 private static final Map<String, Pattern> DEP_PATH_REGEXS = new ConcurrentHashMap<String, Pattern>();
1885
1886 private static char getDepPathChar(final String label) {
1887 final String key = label.toLowerCase();
1888 Character letter = DEP_PATH_CHARS.get(key);
1889 if (letter == null) {
1890 synchronized (DEP_PATH_CHARS) {
1891 letter = DEP_PATH_CHARS.get(key);
1892 if (letter == null) {
1893 letter = 'a';
1894 for (final Character ch : DEP_PATH_CHARS.values()) {
1895 if (ch >= letter) {
1896 letter = (char) (ch + 1);
1897 }
1898 }
1899 DEP_PATH_CHARS.put(key, letter);
1900 }
1901 }
1902 }
1903 return letter;
1904 }
1905
1906 private static String getDepPathString(final Term from, final Iterable<Dep> path) {
1907 final StringBuilder builder = new StringBuilder("_");
1908 Term term = from;
1909 for (final Dep dep : path) {
1910 char prefix;
1911 if (dep.getFrom() == term) {
1912 prefix = '+';
1913 term = dep.getTo();
1914 } else {
1915 prefix = '-';
1916 term = dep.getFrom();
1917 }
1918 for (final String label : dep.getRfunc().split("-")) {
1919 final Character letter = getDepPathChar(label);
1920 builder.append(prefix).append(letter);
1921 }
1922 builder.append("_");
1923 }
1924 return builder.toString();
1925 }
1926
1927 private static Pattern getDepPathRegex(String pattern) {
1928 Pattern regex = DEP_PATH_REGEXS.get(pattern);
1929 if (regex == null) {
1930 synchronized (DEP_PATH_REGEXS) {
1931 regex = DEP_PATH_REGEXS.get(pattern);
1932 if (regex == null) {
1933 final StringBuilder builder = new StringBuilder();
1934 builder.append('_');
1935 int start = -1;
1936 String pattern2 = pattern + " ";
1937 for (int i = 0; i < pattern2.length(); ++i) {
1938 final char ch = pattern2.charAt(i);
1939 if (Character.isLetter(ch) || ch == '-') {
1940 if (start < 0) {
1941 start = i;
1942 }
1943 } else {
1944 if (start >= 0) {
1945 final boolean inverse = pattern2.charAt(start) == '-';
1946 final String label = pattern2.substring(
1947 inverse ? start + 1 : start, i);
1948 final char letter = getDepPathChar(label);
1949 builder.append("([^_]*")
1950 .append(Pattern.quote((inverse ? "-" : "+") + letter))
1951 .append("[^_]*_)");
1952 start = -1;
1953 }
1954 if (!Character.isWhitespace(ch)) {
1955 builder.append(ch);
1956 }
1957 }
1958 }
1959 regex = Pattern.compile(builder.toString());
1960 DEP_PATH_REGEXS.put(pattern, regex);
1961 }
1962 }
1963 }
1964 return regex;
1965 }
1966
1967 public boolean matchDepPath(final Term from, final Iterable<Dep> path, final String pattern) {
1968 final String pathString = getDepPathString(from, path);
1969 final Pattern pathRegex = getDepPathRegex(pattern);
1970 return pathRegex.matcher(pathString).matches();
1971 }
1972
1973 public List<Dep> getDepPath(final Term from, final Term to) {
1974 if (from == to) {
1975 return Collections.emptyList();
1976 }
1977 final List<Dep> toPath = new ArrayList<Dep>();
1978 for (Dep dep = getDepToTerm(to); dep != null; dep = getDepToTerm(dep.getFrom())) {
1979 toPath.add(dep);
1980 if (dep.getFrom() == from) {
1981 Collections.reverse(toPath);
1982 return toPath;
1983 }
1984 }
1985 final List<Dep> fromPath = new ArrayList<Dep>();
1986 for (Dep dep = getDepToTerm(from); dep != null; dep = getDepToTerm(dep.getFrom())) {
1987 fromPath.add(dep);
1988 if (dep.getFrom() == to) {
1989 return fromPath;
1990 }
1991 for (int i = 0; i < toPath.size(); ++i) {
1992 if (dep.getFrom() == toPath.get(i).getFrom()) {
1993 for (int j = i; j >= 0; --j) {
1994 fromPath.add(toPath.get(j));
1995 }
1996 return fromPath;
1997 }
1998 }
1999 }
2000 return null;
2001 }
2002
2003 public Dep getDepToTerm(final Term term) {
2004 for (final Dep dep : getDepsByTerm(term)) {
2005 if (dep.getTo() == term) {
2006 return dep;
2007 }
2008 }
2009 return null;
2010 }
2011
2012 public List<Dep> getDepsFromTerm(final Term term) {
2013 final List<Dep> result = new ArrayList<Dep>();
2014 for (final Dep dep : getDepsByTerm(term)) {
2015 if (dep.getFrom() == term) {
2016 result.add(dep);
2017 }
2018 }
2019 return result;
2020 }
2021
2022 public List<Dep> getDepsByTerm(final Term term) {
2023 return this.annotationContainer.getDepsByTerm(term);
2024 }
2025
2026
2027 public Term getTermsHead(final Iterable<Term> descendents) {
2028 final Set<Term> termSet = new HashSet<Term>();
2029 for (final Term term : descendents) {
2030 termSet.add(term);
2031 }
2032 Term root = null;
2033 for (final Term term : termSet) {
2034 final Dep dep = getDepToTerm(term);
2035 if (dep == null || !termSet.contains(dep.getFrom())) {
2036 if (root == null) {
2037 root = term;
2038 } else if (root != term) {
2039 return null;
2040 }
2041 }
2042 }
2043 return root;
2044 }
2045
2046 public Set<Term> getTermsByDepAncestors(final Iterable<Term> ancestors) {
2047 final Set<Term> terms = new HashSet<Term>();
2048 final List<Term> queue = new LinkedList<Term>();
2049 for (final Term term : ancestors) {
2050 terms.add(term);
2051 queue.add(term);
2052 }
2053 while (!queue.isEmpty()) {
2054 final Term term = queue.remove(0);
2055 final List<Dep> deps = getDepsByTerm(term);
2056 for (final Dep dep : deps) {
2057 if (dep.getFrom() == term) {
2058 if (terms.add(dep.getTo())) {
2059 queue.add(dep.getTo());
2060 }
2061 }
2062 }
2063 }
2064 return terms;
2065 }
2066
2067 public Set<Term> getTermsByDepAncestors(final Iterable<Term> ancestors, final String pattern) {
2068 final Set<Term> result = new HashSet<Term>();
2069 for (final Term term : ancestors) {
2070 for (final Term descendent : getTermsByDepAncestors(Collections.singleton(term))) {
2071 final List<Dep> path = getDepPath(term, descendent);
2072 if (matchDepPath(term, path, pattern)) {
2073 result.add(descendent);
2074 }
2075 }
2076 }
2077 return result;
2078 }
2079
2080 public Set<Term> getTermsByDepDescendants(Iterable<Term> descendents) {
2081 final Set<Term> terms = new HashSet<Term>();
2082 final List<Term> queue = new LinkedList<Term>();
2083 for (final Term term : descendents) {
2084 terms.add(term);
2085 queue.add(term);
2086 }
2087 while (!queue.isEmpty()) {
2088 final Term term = queue.remove(0);
2089 final List<Dep> deps = getDepsByTerm(term);
2090 for (final Dep dep : deps) {
2091 if (dep.getTo() == term) {
2092 if (terms.add(dep.getFrom())) {
2093 queue.add(dep.getFrom());
2094 }
2095 }
2096 }
2097 }
2098 return terms;
2099 }
2100
2101 public Set<Term> getTermsByDepDescendants(Iterable<Term> descendents, String pattern) {
2102 Set<Term> result = new HashSet<Term>();
2103 for (final Term term : descendents) {
2104 for (final Term ancestor : getTermsByDepDescendants(Collections.singleton(term))) {
2105 final List<Dep> path = getDepPath(term, ancestor);
2106 if (matchDepPath(term, path, pattern)) {
2107 result.add(ancestor);
2108 }
2109 }
2110 }
2111 return result;
2112 }
2113
2114 public List<Entity> getEntitiesByTerm(Term term) {
2115 return this.annotationContainer.getEntitiesByTerm(term);
2116 }
2117
2118
2119 public List<Predicate> getPredicates() {
2120 return this.annotationContainer.getPredicates();
2121 }
2122
2123 public List<Predicate> getPredicatesByTerm(Term term) {
2124 return this.annotationContainer.getPredicatesByTerm(term);
2125 }
2126
2127 public List<Coref> getCorefsByTerm(Term term) {
2128 return this.annotationContainer.getCorefsByTerm(term);
2129 }
2130
2131 public List<Timex3> getTimeExsBySent(Integer sent) {
2132 List<Timex3> timexs = this.annotationContainer.timeExsIndexedBySent.get(sent);
2133 return (timexs == null) ? new ArrayList<Timex3>() : timexs;
2134 }
2135
2136 public List<Timex3> getTimeExsByWF(final WF wf) {
2137 return this.annotationContainer.getTimeExsByWF(wf);
2138 }
2139
2140 public List<Timex3> getTimeExsByTerm(final Term term) {
2141 final List<Timex3> result = new ArrayList<>();
2142 outer: for (final Timex3 timex : getTimeExs()) {
2143 if (timex.getSpan() != null) {
2144 for (final WF wf : timex.getSpan().getTargets()) {
2145 if (term.getWFs().contains(wf)) {
2146 result.add(timex);
2147 continue outer;
2148 }
2149 }
2150 }
2151 }
2152 return result;
2153 }
2154
2155 public List<Factuality> getFactualities() {
2156 return annotationContainer.getFactualities();
2157 }
2158
2159 public static void main(String[] args) {
2160 File file = new File(args[0]);
2161
2162 try {
2163 KAFDocument document = KAFDocument.createFromFile(file);
2164 } catch (Exception e) {
2165 System.err.println(e.getMessage());
2166 }
2167 }
2168 }