1   package ixa.kaflib;
2   
3   import org.jdom2.Element;
4   
5   import java.io.Serializable;
6   import java.util.*;
7   
8   /**
9    * A container to keep all annotations of a document (word forms, terms, dependencies, chunks, entities and coreferences). There are different hash maps to index annotations by different properties as ID, sentence... It enables to retrieve annotations by different properties in an effective way. Performance is very important.
10   */
11  class AnnotationContainer implements Serializable {
12  
13  	private String rawText;
14  
15  	/**
16  	 * List to keep all word forms
17  	 */
18  	private List<WF> text;
19  
20  	/**
21  	 * Next offset: sum of all words' length plus one char per word
22  	 */
23  	private int nextOffset;
24  
25  	/**
26  	 * List to keep all terms
27  	 */
28  	private List<Term> terms;
29  
30  	private Map<String, List<Mark>> marks;
31  
32  	/**
33  	 * List to keep all dependencies
34  	 */
35  	private List<Dep> deps;
36  
37  	/**
38  	 * List to keep all chunks
39  	 */
40  	private List<Chunk> chunks;
41  
42  	/**
43  	 * List to keep all named entities
44  	 */
45  	private List<Entity> entities;
46  
47  	/**
48  	 * List to keep all properties
49  	 */
50  	private List<Feature> properties;
51  
52  	/**
53  	 * List to keep all categories
54  	 */
55  	private List<Feature> categories;
56  
57  	/**
58  	 * List to keep all coreferences
59  	 */
60  	private List<Coref> coreferences;
61  
62  	/**
63  	 * List to keep all timeExpressions
64  	 */
65  	private List<Timex3> timeExpressions;
66  
67  	/** List to keep all tLinks */
68  	private List<TLink> tLinks;
69  
70  	/** List to keep all tLinks */
71  	private List<CLink> cLinks;
72  
73  	/**
74  	 * List to keep all topics
75  	 */
76  	private List<Topic> topics;
77  
78  	/**
79  	 * List to keep all factualities
80  	 */
81  	private List<Factuality> factualities;
82  
83  	/**
84  	 * List to keep all linked entities
85  	 */
86  	private List<LinkedEntity> linkedEntities;
87  
88  	/**
89  	 * List to keep all linked entities
90  	 */
91  	private List<SSTspan> sstSpans;
92  
93  	/**
94  	 * List to keep all opinions
95  	 */
96  	private List<Opinion> opinions;
97  
98  	/**
99  	 * List to keep all relations
100 	 */
101 	private List<Relation> relations;
102 
103 	/**
104 	 * List to keep all predicates
105 	 */
106 	private List<Predicate> predicates;
107 
108 	/**
109 	 * List to keep all trees
110 	 */
111 	private List<Tree> trees;
112 	private HashMap<Integer, String> constituencyStrings;
113 
114 	/**
115 	 * UNKNOWN annotation layers in plain DOM format
116 	 */
117 	private List<Element> unknownLayers;
118 
119 	/**
120 	 * Hash map for mapping word forms to terms.
121 	 */
122 	private HashMap<String, List<Term>> termsIndexedByWF;
123 	private HashMap<String, Map<String, List<Mark>>> marksIndexedByTerm;
124 	private HashMap<String, List<Dep>> depsIndexedByTerm;
125 	private HashMap<String, List<Chunk>> chunksIndexedByTerm;
126 	private HashMap<String, List<Entity>> entitiesIndexedByTerm;
127 	private HashMap<String, List<Coref>> corefsIndexedByTerm;
128 	private HashMap<String, List<Timex3>> timeExsIndexedByWF;
129 	private HashMap<String, List<Factuality>> factsIndexedByWF;
130 	private HashMap<String, List<LinkedEntity>> linkedEntitiesIndexedByWF;
131 	private HashMap<String, List<SSTspan>> sstSpansIndexedByTerm;
132 	private HashMap<String, List<Feature>> propertiesIndexedByTerm;
133 	private HashMap<String, List<Feature>> categoriesIndexedByTerm;
134 	private HashMap<String, List<Opinion>> opinionsIndexedByTerm;
135 	private HashMap<String, List<Relation>> relationsIndexedByRelational;
136 	private HashMap<String, List<Predicate>> predicatesIndexedByTerm;
137 
138 	HashMap<Integer, List<WF>> textIndexedBySent;
139 	HashMap<Integer, List<Term>> termsIndexedBySent;
140 	HashMap<Integer, Map<String, List<Mark>>> marksIndexedBySent;
141 	HashMap<Integer, List<Entity>> entitiesIndexedBySent;
142 	HashMap<Integer, List<Dep>> depsIndexedBySent;
143 	HashMap<Integer, List<Chunk>> chunksIndexedBySent;
144 	HashMap<Integer, List<Coref>> corefsIndexedBySent;
145 	HashMap<Integer, List<Timex3>> timeExsIndexedBySent;
146 	HashMap<Integer, List<Factuality>> factsIndexedBySent;
147 	HashMap<Integer, List<LinkedEntity>> linkedEntitiesIndexedBySent;
148 	HashMap<Integer, List<SSTspan>> sstSpansIndexedBySent;
149 	HashMap<Integer, List<Feature>> propertiesIndexedBySent;
150 	HashMap<Integer, List<Feature>> categoriesIndexedBySent;
151 	HashMap<Integer, List<Opinion>> opinionsIndexedBySent;
152 	HashMap<Integer, List<Relation>> relationsIndexedBySent;
153 	HashMap<Integer, List<Predicate>> predicatesIndexedBySent;
154 	HashMap<Integer, List<Tree>> treesIndexedBySent;
155 
156 	HashMap<Integer, LinkedHashSet<Integer>> sentsIndexedByParagraphs;
157 
158 	/**
159 	 * This creates a new AnnotationContainer object
160 	 */
161 	AnnotationContainer() {
162 		rawText = new String();
163 		text = new ArrayList();
164 		nextOffset = 0;
165 		terms = new ArrayList();
166 		marks = new HashMap();
167 		deps = new ArrayList();
168 		chunks = new ArrayList();
169 		entities = new ArrayList();
170 		properties = new ArrayList();
171 		sstSpans = new ArrayList();
172 		categories = new ArrayList();
173 		coreferences = new ArrayList();
174 		timeExpressions = new ArrayList();
175 		factualities = new ArrayList();
176 		linkedEntities = new ArrayList();
177 		opinions = new ArrayList();
178 		relations = new ArrayList();
179 		predicates = new ArrayList();
180 		trees = new ArrayList();
181 		unknownLayers = new ArrayList<Element>();
182 		topics = new ArrayList();
183 		tLinks = new ArrayList();
184 		cLinks = new ArrayList();
185 
186 		termsIndexedByWF = new HashMap<String, List<Term>>();
187 		marksIndexedByTerm = new HashMap<String, Map<String, List<Mark>>>();
188 		depsIndexedByTerm = new HashMap<String, List<Dep>>();
189 		chunksIndexedByTerm = new HashMap<String, List<Chunk>>();
190 		entitiesIndexedByTerm = new HashMap<String, List<Entity>>();
191 		corefsIndexedByTerm = new HashMap<String, List<Coref>>();
192 		timeExsIndexedByWF = new HashMap<String, List<Timex3>>();
193 		linkedEntitiesIndexedByWF = new HashMap<String, List<LinkedEntity>>();
194 		sstSpansIndexedByTerm = new HashMap();
195 		factsIndexedByWF = new HashMap<String, List<Factuality>>();
196 		propertiesIndexedByTerm = new HashMap<String, List<Feature>>();
197 		categoriesIndexedByTerm = new HashMap<String, List<Feature>>();
198 		opinionsIndexedByTerm = new HashMap<String, List<Opinion>>();
199 		relationsIndexedByRelational = new HashMap<String, List<Relation>>();
200 		predicatesIndexedByTerm = new HashMap<String, List<Predicate>>();
201 
202 		textIndexedBySent = new HashMap<Integer, List<WF>>();
203 		termsIndexedBySent = new HashMap<Integer, List<Term>>();
204 		marksIndexedBySent = new HashMap<Integer, Map<String, List<Mark>>>();
205 		entitiesIndexedBySent = new HashMap<Integer, List<Entity>>();
206 		depsIndexedBySent = new HashMap<Integer, List<Dep>>();
207 		chunksIndexedBySent = new HashMap<Integer, List<Chunk>>();
208 		corefsIndexedBySent = new HashMap<Integer, List<Coref>>();
209 		timeExsIndexedBySent = new HashMap<Integer, List<Timex3>>();
210 		linkedEntitiesIndexedBySent = new HashMap<Integer, List<LinkedEntity>>();
211 		sstSpansIndexedBySent = new HashMap();
212 		factsIndexedBySent = new HashMap<Integer, List<Factuality>>();
213 		propertiesIndexedBySent = new HashMap<Integer, List<Feature>>();
214 		categoriesIndexedBySent = new HashMap<Integer, List<Feature>>();
215 		opinionsIndexedBySent = new HashMap<Integer, List<Opinion>>();
216 		relationsIndexedBySent = new HashMap<Integer, List<Relation>>();
217 		predicatesIndexedBySent = new HashMap<Integer, List<Predicate>>();
218 		treesIndexedBySent = new HashMap<Integer, List<Tree>>();
219 
220 		sentsIndexedByParagraphs = new HashMap<Integer, LinkedHashSet<Integer>>();
221 		constituencyStrings = new HashMap<>();
222 
223 	}
224 
225 	private <T> void indexBySent(T annotation, Integer sent, HashMap<Integer, List<T>> index) {
226 		if (sent > 0) {
227 			if (index.get(sent) == null) {
228 				index.put(sent, new ArrayList<T>());
229 			}
230 			index.get(sent).add(annotation);
231 		}
232 	}
233 
234 	private void indexMarkBySent(Mark mark, String source, Integer sent) {
235 		if (sent > 0) {
236 			if (marksIndexedBySent.get(sent) == null) {
237 				marksIndexedBySent.put(sent, new HashMap<String, List<Mark>>());
238 			}
239 			if (marksIndexedBySent.get(sent).get(source) == null) {
240 				marksIndexedBySent.get(sent).put(source, new ArrayList<Mark>());
241 			}
242 			marksIndexedBySent.get(sent).get(source).add(mark);
243 		}
244 	}
245 
246 	void indexSentByPara(Integer sent, Integer para) {
247 		if ((sent > 0) && (para > 0)) {
248 			if (this.sentsIndexedByParagraphs.get(para) == null) {
249 				this.sentsIndexedByParagraphs.put(para, new LinkedHashSet<Integer>());
250 			}
251 			this.sentsIndexedByParagraphs.get(para).add(sent);
252 		}
253 	}
254 
255 	public List<Integer> getSentsByParagraph(Integer para) {
256 		return new ArrayList<Integer>(this.sentsIndexedByParagraphs.get(para));
257 	}
258 
259 	<T> List<T> getLayerByPara(Integer para, HashMap<Integer, List<T>> index) {
260 		List<T> layer = new ArrayList<T>();
261 		for (Integer sent : this.getSentsByParagraph(para)) {
262 			layer.addAll(index.get(sent));
263 		}
264 		return layer;
265 	}
266 
267 	String getRawText() {
268 		return rawText;
269 	}
270 
271 	/**
272 	 * Returns all word forms.
273 	 */
274 	List<WF> getText() {
275 		return text;
276 	}
277 
278 	/**
279 	 * Returns all terms
280 	 */
281 	List<Term> getTerms() {
282 		return terms;
283 	}
284 
285 	List<SSTspan> getSstSpans() {
286 		return sstSpans;
287 	}
288 
289 	List<String> getMarkSources() {
290 		return new ArrayList<String>(marks.keySet());
291 	}
292 
293 	List<Mark> getMarks(String source) {
294 		return (marks.get(source) == null) ? new ArrayList<Mark>() : marks.get(source);
295 	}
296 
297 	/**
298 	 * Returns all dependencies
299 	 */
300 	List<Dep> getDeps() {
301 		return deps;
302 	}
303 
304 	/**
305 	 * Returns all chunks
306 	 */
307 	List<Chunk> getChunks() {
308 		return chunks;
309 	}
310 
311 	/**
312 	 * Returns all named entities
313 	 */
314 	List<Entity> getEntities() {
315 		return entities;
316 	}
317 
318 	/**
319 	 * Returns all properties
320 	 */
321 	List<Feature> getProperties() {
322 		return properties;
323 	}
324 
325 	/**
326 	 * Returns all categories
327 	 */
328 	List<Feature> getCategories() {
329 		return categories;
330 	}
331 
332 	/**
333 	 * Returns all constituency strings
334 	 */
335 	HashMap<Integer, String> getConstituencyStrings() {
336 		return constituencyStrings;
337 	}
338 
339 	/**
340 	 * Returns all coreferences
341 	 */
342 	List<Coref> getCorefs() {
343 		return coreferences;
344 	}
345 
346 	/**
347 	 * Returns all timeExpressions
348 	 */
349 	List<Timex3> getTimeExs() {
350 		return timeExpressions;
351 	}
352 
353 	/** Returns all tlinks */
354 	List<TLink> getTLinks() {
355 		return this.tLinks;
356 	}
357 
358 	/** Returns all clinks */
359 	List<CLink> getCLinks() {
360 		return this.cLinks;
361 	}
362 
363 	List<Factuality> getFactualities() {
364 		return factualities;
365 	}
366 
367 	List<LinkedEntity> getLinkedEntities() {
368 		return linkedEntities;
369 	}
370 
371 	/**
372 	 * Returns all opinions
373 	 */
374 	List<Opinion> getOpinions() {
375 		return opinions;
376 	}
377 
378 	/**
379 	 * Returns all topics
380 	 */
381 	List<Topic> getTopics() {
382 		return topics;
383 	}
384 
385 	/**
386 	 * Returns all relations
387 	 */
388 	List<Relation> getRelations() {
389 		return relations;
390 	}
391 
392 	/**
393 	 * Returns all predicates
394 	 */
395 	List<Predicate> getPredicates() {
396 		return predicates;
397 	}
398 
399 	/**
400 	 * Returns all trees
401 	 */
402 	List<Tree> getConstituents() {
403 		return trees;
404 	}
405 
406 	/**
407 	 * Returns all unknown layers as a DOM Element list
408 	 */
409 	List<Element> getUnknownLayers() {
410 		return unknownLayers;
411 	}
412 
413 	void setRawText(String str) {
414 		rawText = str;
415 	}
416 
417 	/**
418 	 * Adds a word form to the container
419 	 */
420 	void add(WF wf) {
421 		text.add(wf);
422 		//nextOffset += wf.getLength() + 1;
423 		this.indexBySent(wf, wf.getSent(), this.textIndexedBySent);
424 	}
425 
426 	private <T> void indexAnnotation(T annotation, String hashId, HashMap<String, List<T>> index) {
427 		if (index.get(hashId) == null) {
428 			index.put(hashId, new ArrayList<T>());
429 		}
430 		index.get(hashId).add(annotation);
431 	}
432 
433 	private void indexMarkByTerm(Mark mark, String source, String tid) {
434 		if (marksIndexedByTerm.get(tid) == null) {
435 			marksIndexedByTerm.put(tid, new HashMap<String, List<Mark>>());
436 		}
437 		if (marksIndexedByTerm.get(tid).get(source) == null) {
438 			marksIndexedByTerm.get(tid).put(source, new ArrayList<Mark>());
439 		}
440 		marksIndexedByTerm.get(tid).get(source).add(mark);
441 	}
442 
443 	/**
444 	 * Adds a term to the container
445 	 */
446 	void add(Term term) {
447 		this.add(term, this.terms.size());
448 	}
449 
450 	void add(Term term, int index) {
451 		terms.add(index, term);
452 		for (WF wf : term.getWFs()) {
453 			indexAnnotation(term, wf.getId(), termsIndexedByWF);
454 		}
455 		if (!term.isComponent()) {
456 			this.indexBySent(term, term.getSent(), this.termsIndexedBySent);
457 		}
458 	}
459 
460 	void remove(Term term) {
461 		this.terms.remove(term);
462 	}
463 
464 	void add(Mark mark, String source) {
465 		List<Mark> sourceMarks = marks.get(source);
466 		if (sourceMarks == null) {
467 			sourceMarks = new ArrayList<Mark>();
468 		}
469 		sourceMarks.add(mark);
470 		marks.put(source, sourceMarks);
471 		for (Term term : mark.getSpan().getTargets()) {
472 			indexMarkByTerm(mark, source, term.getId());
473 		}
474 		this.indexMarkBySent(mark, source, mark.getSpan().getTargets().get(0).getSent());
475 	}
476 
477 	/**
478 	 * Adds a dependency to the container
479 	 */
480 	void add(Dep dep) {
481 		deps.add(dep);
482 	/* Index by 'from' and 'to' terms */
483 		if (dep.getFrom() != null) {
484 			String tId = dep.getFrom().getId();
485 			indexAnnotation(dep, tId, depsIndexedByTerm);
486 		}
487 		if (dep.getTo() != null) {
488 			String tId = dep.getTo().getId();
489 			indexAnnotation(dep, tId, depsIndexedByTerm);
490 		}
491 		this.indexBySent(dep, dep.getFrom().getSent(), this.depsIndexedBySent);
492 	}
493 
494 	/**
495 	 * Adds a chunk to the container
496 	 */
497 	void add(Chunk chunk) {
498 		chunks.add(chunk);
499 	/* Index by terms */
500 		for (Term term : chunk.getTerms()) {
501 			indexAnnotation(chunk, term.getId(), chunksIndexedByTerm);
502 		}
503 		this.indexBySent(chunk, chunk.getSpan().getTargets().get(0).getSent(), this.chunksIndexedBySent);
504 	}
505 
506 	/**
507 	 * Adds a named entity to the container
508 	 */
509 	void add(Entity entity) {
510 		entities.add(entity);
511 	/* Index by terms */
512 		for (Term term : entity.getTerms()) {
513 			indexAnnotation(entity, term.getId(), entitiesIndexedByTerm);
514 		}
515 		this.indexBySent(entity, entity.getSpans().get(0).getTargets().get(0).getSent(), this.entitiesIndexedBySent);
516 	}
517 
518 	/**
519 	 * Adds a feature to the container. It checks if it is a property or a category.
520 	 */
521 	void add(Feature feature) {
522 		if (feature.isAProperty()) {
523 			properties.add(feature);
524 		/* Index by terms */
525 			for (Term term : feature.getTerms()) {
526 				indexAnnotation(feature, term.getId(), propertiesIndexedByTerm);
527 			}
528 			//this.indexBySent(feature, feature.getSpans().get(0).getTargets().get(0).getSent(), this.propertiesIndexedBySent);
529 		}
530 		else {
531 			categories.add(feature);
532 	    /* Index by terms */
533 			for (Term term : feature.getTerms()) {
534 				indexAnnotation(feature, term.getId(), categoriesIndexedByTerm);
535 			}
536 			//this.indexBySent(feature, feature.getSpans().get(0).getTargets().get(0).getSent(), this.categoriesIndexedBySent);
537 		}
538 	}
539 
540 	/**
541 	 * Adds a coreference to the container
542 	 */
543 	void add(Coref coref) {
544 		coreferences.add(coref);
545 	/* Index by terms */
546 		for (Span<Term> span : coref.getSpans()) {
547 			for (Term term : span.getTargets()) {
548 				indexAnnotation(coref, term.getId(), corefsIndexedByTerm);
549 			}
550 		}
551 		//this.indexBySent(coref, coref.getSpans().get(0).getTargets().get(0).getSent(), this.corefsIndexedBySent);
552 	}
553 
554 	/** Adds a timeExpression to the container */
555 	void add(Timex3 timex3) {
556 		timeExpressions.add(timex3);
557 	/* Index by terms */
558 		if(timex3.hasSpan()){
559 			for (WF wf : timex3.getSpan().getTargets()) {
560 				indexAnnotation(timex3, wf.getId(), timeExsIndexedByWF);
561 			}
562 		}
563 	}
564 
565 	/** Adds a tlink to the container */
566 	void add(TLink tLink) {
567 		tLinks.add(tLink);
568 	/* Index by from/to (???) */
569 	}
570 
571 	/** Adds a clink to the container */
572 	void add(CLink cLink) {
573 		cLinks.add(cLink);
574 	/* Index by from/to (???) */
575 	}
576 
577 	/**
578 	 * Adds a factuality to the container
579 	 */
580 	void add(Factuality factuality) {
581 		factualities.add(factuality);
582 	/* Index by terms */
583 		if (factuality.getWFs() != null) {
584 			for (WF wf : factuality.getWFs()) {
585 				indexAnnotation(factuality, wf.getId(), factsIndexedByWF);
586 			}
587 		}
588 	}
589 
590 	/**
591 	 * Adds a linked entity to the container
592 	 */
593 	void add(LinkedEntity linkedEntity) {
594 		linkedEntities.add(linkedEntity);
595 	/* Index by terms */
596 		if (linkedEntity.getWFs() != null) {
597 			for (WF wf : linkedEntity.getWFs().getTargets()) {
598 				indexAnnotation(linkedEntity, wf.getId(), linkedEntitiesIndexedByWF);
599 			}
600 		}
601 	}
602 
603 	/**
604 	 * Adds a SST to the container
605 	 */
606 	void add(SSTspan sst) {
607 		sstSpans.add(sst);
608 	/* Index by terms */
609 		if (sst.getTerms() != null) {
610 			for (Term t : sst.getTerms().getTargets()) {
611 				indexAnnotation(sst, t.getId(), sstSpansIndexedByTerm);
612 			}
613 		}
614 	}
615 
616 	/**
617 	 * Adds a topic to the container
618 	 */
619 	void add(Topic t) {
620 		topics.add(t);
621 	}
622 
623 	/**
624 	 * Adds an opinion to the container
625 	 */
626 	void add(Opinion opinion) {
627 		opinions.add(opinion);
628 	/* Index by terms */
629 	/* Ezin hemen indexatu, terminoak oraindik ez baitira gehitu!!!
630 	LinkedHashSet<Term> terms = new LinkedHashSet<Term>();
631 	terms.addAll(opinion.getOpinionHolder().getTerms());
632 	terms.addAll(opinion.getOpinionTarget().getTerms());
633 	terms.addAll(opinion.getOpinionExpression().getTerms());	
634 	for (Term term : terms) {
635 	    indexAnnotation(opinion, term.getId(), opinionsIndexedByTerm);
636 	}
637 	*/
638 
639 	}
640 
641 	/**
642 	 * Adds a relation to the container
643 	 */
644 	void add(Relation relation) {
645 		relations.add(relation);
646 	/* Index by 'from' and 'to' terms */
647 		if (relation.getFrom() != null) {
648 			String rId = relation.getFrom().getId();
649 			indexAnnotation(relation, rId, relationsIndexedByRelational);
650 		}
651 		if (relation.getTo() != null) {
652 			String rId = relation.getTo().getId();
653 			indexAnnotation(relation, rId, relationsIndexedByRelational);
654 		}
655 	}
656 
657 	/**
658 	 * Adds a predicate to the container
659 	 */
660 	void add(Predicate predicate) {
661 		predicates.add(predicate);
662 	/* Index by terms */
663 		for (Term term : predicate.getTerms()) {
664 			indexAnnotation(predicate, term.getId(), predicatesIndexedByTerm);
665 		}
666 		this.indexBySent(predicate, predicate.getSpan().getTargets().get(0).getSent(), this.predicatesIndexedBySent);
667 	}
668 
669 	/**
670 	 * Adds a tree to the container
671 	 */
672 	void add(Tree tree) {
673 		add(tree, null);
674 	}
675 	
676 	void add(Tree tree, Integer sent) {
677 		trees.add(tree);
678 
679 		if (sent == null) {
680 			TreeNode currentNode = tree.getRoot();
681 			while (!currentNode.isTerminal()) {
682 				currentNode = ((NonTerminal) currentNode).getChildren().get(0);
683 			}
684 			sent = ((Terminal) currentNode).getSpan().getTargets().get(0).getSent();
685 		}
686 
687 		if (sent != null) {
688 			List<Tree> sentTrees = treesIndexedBySent.get(sent);
689 			if (sentTrees == null) {
690 				sentTrees = new ArrayList<Tree>();
691 				treesIndexedBySent.put(sent, sentTrees);
692 			}
693 			sentTrees.add(tree);
694 		}
695 	}
696 	
697 	/**
698 	 * Add constituencyString
699 	 */
700 	void add(String constituencyString, Integer sent) {
701 		constituencyStrings.put(sent, constituencyString);
702 	}
703 
704 	/**
705 	 * Adds an unknown layer to the container in DOM format
706 	 */
707 	void add(Element layer) {
708 		unknownLayers.add(layer);
709 	}
710 
711 	/**
712 	 * Index a Term by its sentence number
713 	 */
714 	void indexTermBySent(Term term, Integer sent) {
715 		if (sent == -1) {
716 			throw new IllegalStateException("You can't call indexTermBySent not having defined the sentence for its WFs");
717 		}
718 		List<Term> sentTerms = termsIndexedBySent.get(sent);
719 		if (sentTerms == null) {
720 			sentTerms = new ArrayList<Term>();
721 			termsIndexedBySent.put(sent, sentTerms);
722 		}
723 		sentTerms.add(term);
724 	}
725 
726 	/**
727 	 * Returns all tokens classified by sentences
728 	 */
729 	List<List<WF>> getSentences() {
730 		List<List<WF>> sentences = new ArrayList<List<WF>>();
731 		Set<Integer> sentNumsSet = this.textIndexedBySent.keySet();
732 		List<Integer> sentNumsList = new ArrayList<Integer>(sentNumsSet);
733 		Collections.sort(sentNumsList);
734 		for (int i : sentNumsList) {
735 			List<WF> wfs = this.textIndexedBySent.get(i);
736 			sentences.add(wfs);
737 		}
738 		return sentences;
739 	}
740 
741 	Integer termPosition(Term term) {
742 		return this.terms.indexOf(term);
743 	}
744 
745 	/**
746 	 * Returns WFs from a sentence
747 	 */
748 	List<WF> getSentenceWFs(int sent) {
749 		return this.textIndexedBySent.get(sent);
750 	}
751 
752 	/**
753 	 * Returns terms from a sentence
754 	 */
755 	List<Term> getSentenceTerms(int sent) {
756 		return this.termsIndexedBySent.get(sent);
757 	}
758 
759 	Term getTermByWF(WF wf) {
760 		List<Term> terms = this.termsIndexedByWF.get(wf.getId());
761 		if (terms == null) {
762 			return null;
763 		}
764 		return terms.get(0);
765 	}
766 
767 	List<Term> getTermsByWF(WF wf) {
768 		List<Term> terms = this.termsIndexedByWF.get(wf.getId());
769 		return (terms == null) ? new ArrayList<Term>() : terms;
770 	}
771 
772 	/**
773 	 * Returns a list of terms containing the word forms given on argument.
774 	 *
775 	 * @param wfIds a list of word form IDs whose terms will be found.
776 	 * @return a list of terms containing the given word forms.
777 	 */
778 	List<Term> getTermsByWFs(List<WF> wfs) {
779 		LinkedHashSet<Term> terms = new LinkedHashSet<Term>();
780 		for (WF wf : wfs) {
781 			terms.addAll(getTermsByWF(wf));
782 		}
783 		return new ArrayList<Term>(terms);
784 	}
785 
786 	List<Mark> getMarksByTerm(Term term, String source) {
787 		Map<String, List<Mark>> marks = this.marksIndexedByTerm.get(term.getId());
788 		if (marks == null) {
789 			return new ArrayList<Mark>();
790 		}
791 		List<Mark> sourceMarks = marks.get(source);
792 		return (sourceMarks == null) ? new ArrayList<Mark>() : sourceMarks;
793 	}
794 
795 	List<Dep> getDepsByTerm(Term term) {
796 		List<Dep> deps = this.depsIndexedByTerm.get(term.getId());
797 		return (deps == null) ? new ArrayList<Dep>() : deps;
798 	}
799 
800 	List<Chunk> getChunksByTerm(Term term) {
801 		List<Chunk> chunks = this.chunksIndexedByTerm.get(term.getId());
802 		return (chunks == null) ? new ArrayList<Chunk>() : chunks;
803 	}
804 
805 	List<Entity> getEntitiesByTerm(Term term) {
806 		List<Entity> entities = null;
807 		try {
808 			entities = this.entitiesIndexedByTerm.get(term.getId());
809 		} catch (Exception e) {
810 		}
811 		return (entities == null) ? new ArrayList<Entity>() : entities;
812 	}
813 
814 	List<Coref> getCorefsByTerm(Term term) {
815 		List<Coref> corefs = this.corefsIndexedByTerm.get(term.getId());
816 		return (corefs == null) ? new ArrayList<Coref>() : corefs;
817 	}
818 
819 	List<Timex3> getTimeExsByWF(WF wf) {
820 		List<Timex3> timeExs = this.timeExsIndexedByWF.get(wf.getId());
821 		return (timeExs == null) ? new ArrayList<Timex3>() : timeExs;
822 	}
823 
824 	List<Feature> getPropertiesByTerm(Term term) {
825 		List<Feature> properties = this.propertiesIndexedByTerm.get(term.getId());
826 		return (properties == null) ? new ArrayList<Feature>() : properties;
827 	}
828 
829 	List<Feature> getCategoriesByTerm(Term term) {
830 		List<Feature> categories = this.categoriesIndexedByTerm.get(term.getId());
831 		return (categories == null) ? new ArrayList<Feature>() : categories;
832 	}
833 
834 	List<Opinion> getOpinionsByTerm(Term term) {
835 		List<Opinion> opinions = this.opinionsIndexedByTerm.get(term.getId());
836 		return (opinions == null) ? new ArrayList<Opinion>() : opinions;
837 	}
838 
839 	List<Relation> getRelationsByRelational(Relational relational) {
840 		List<Relation> relations = this.relationsIndexedByRelational.get(relational.getId());
841 		return (relations == null) ? new ArrayList<Relation>() : relations;
842 	}
843 
844 	List<Predicate> getPredicatesByTerm(Term term) {
845 		List<Predicate> predicates = this.predicatesIndexedByTerm.get(term.getId());
846 		return (predicates == null) ? new ArrayList<Predicate>() : predicates;
847 	}
848 
849 	List<Dep> getDepsByTerms(List<Term> terms) {
850 		LinkedHashSet<Dep> deps = new LinkedHashSet<Dep>();
851 		for (Term term : terms) {
852 			deps.addAll(getDepsByTerm(term));
853 		}
854 		return new ArrayList<Dep>(deps);
855 	}
856 
857 	List<Chunk> getChunksByTerms(List<Term> terms) {
858 		LinkedHashSet<Chunk> chunks = new LinkedHashSet<Chunk>();
859 		for (Term term : terms) {
860 			chunks.addAll(getChunksByTerm(term));
861 		}
862 		return new ArrayList<Chunk>(chunks);
863 	}
864 
865 	List<Entity> getEntitiesByTerms(List<Term> terms) {
866 		LinkedHashSet<Entity> entities = new LinkedHashSet<Entity>();
867 		for (Term term : terms) {
868 			entities.addAll(getEntitiesByTerm(term));
869 		}
870 		return new ArrayList<Entity>(entities);
871 	}
872 
873 	List<Coref> getCorefsByTerms(List<Term> terms) {
874 		LinkedHashSet<Coref> corefs = new LinkedHashSet<Coref>();
875 		for (Term term : terms) {
876 			corefs.addAll(getCorefsByTerm(term));
877 		}
878 		return new ArrayList<Coref>(corefs);
879 	}
880 
881 	List<Timex3> getTimeExsByWFs(List<WF> wfs) {
882 		LinkedHashSet<Timex3> timeExs = new LinkedHashSet<Timex3>();
883 		for (WF wf : wfs) {
884 			timeExs.addAll(getTimeExsByWF(wf));
885 		}
886 		return new ArrayList<Timex3>(timeExs);
887 	}
888 
889 	List<Feature> getPropertiesByTerms(List<Term> terms) {
890 		LinkedHashSet<Feature> properties = new LinkedHashSet<Feature>();
891 		for (Term term : terms) {
892 			properties.addAll(getPropertiesByTerm(term));
893 		}
894 		return new ArrayList<Feature>(properties);
895 	}
896 
897 	List<Feature> getCategoriesByTerms(List<Term> terms) {
898 		LinkedHashSet<Feature> categories = new LinkedHashSet<Feature>();
899 		for (Term term : terms) {
900 			categories.addAll(getCategoriesByTerm(term));
901 		}
902 		return new ArrayList<Feature>(categories);
903 	}
904 
905 	List<Opinion> getOpinionsByTerms(List<Term> terms) {
906 		LinkedHashSet<Opinion> opinions = new LinkedHashSet<Opinion>();
907 		for (Term term : terms) {
908 			opinions.addAll(getOpinionsByTerm(term));
909 		}
910 		return new ArrayList<Opinion>(opinions);
911 	}
912 
913 	List<Relation> getRelationsByRelationals(List<Relational> relationals) {
914 		LinkedHashSet<Relation> relations = new LinkedHashSet<Relation>();
915 		for (Relational relational : relationals) {
916 			relations.addAll(getRelationsByRelational(relational));
917 		}
918 		return new ArrayList<Relation>(relations);
919 	}
920 
921 	List<Predicate> getPredicatesByTerms(List<Term> terms) {
922 		LinkedHashSet<Predicate> predicates = new LinkedHashSet<Predicate>();
923 		for (Term term : terms) {
924 			predicates.addAll(getPredicatesByTerm(term));
925 		}
926 		return new ArrayList<Predicate>(predicates);
927 	}
928 
929 	/**
930 	 * Returns next WF's offset.
931 	 */
932 	int getNextOffset() {
933 		return nextOffset;
934 	}
935 
936 
937 	/**
938 	 * Deprecated. Returns a list of terms containing the word forms given on argument.
939 	 *
940 	 * @param wfIds a list of word form IDs whose terms will be found.
941 	 * @return a list of terms containing the given word forms.
942 	 */
943 	List<Term> getTermsByWFIds(List<String> wfIds) {
944 		LinkedHashSet<Term> terms = new LinkedHashSet<Term>();
945 		for (String wfId : wfIds) {
946 			terms.addAll(this.termsIndexedByWF.get(wfId));
947 		}
948 		return new ArrayList<Term>(terms);
949 	}
950 
951 	void removeLayer(KAFDocument.Layer layer) {
952 		switch (layer) {
953 			case text:
954 				this.text.clear();
955 				break;
956 			case terms:
957 				this.terms.clear();
958 				break;
959 			case deps:
960 				this.deps.clear();
961 				break;
962 			case chunks:
963 				this.chunks.clear();
964 				break;
965 			case entities:
966 				this.entities.clear();
967 				break;
968 			case properties:
969 				this.properties.clear();
970 				break;
971 			case categories:
972 				this.categories.clear();
973 				break;
974 			case coreferences:
975 				this.coreferences.clear();
976 				break;
977 			case opinions:
978 				this.opinions.clear();
979 				break;
980 			case relations:
981 				this.relations.clear();
982 				break;
983 			case srl:
984 				this.predicates.clear();
985 				break;
986 			case constituency:
987 				this.trees.clear();
988 				break;
989 			case timeExpressions:
990 				this.timeExpressions.clear();
991 				break;
992 			case linkedEntities:
993 				this.linkedEntities.clear();
994 				break;
995 			case constituencyStrings:
996 				//this.constituencyStrings.clear();
997 				this.unknownLayers.clear();
998 				break;
999 			default:
1000 				throw new IllegalArgumentException("Wrong layer");
1001 		}
1002 	}
1003 
1004 
1005     void removeAnnotation(Object annotation) {
1006         if (annotation instanceof Term) {
1007             Term term = (Term) annotation;
1008             terms.remove(term);
1009             for (WF wf : term.getWFs()) {
1010                 unindexAnnotation(term, wf.getId(), termsIndexedByWF);
1011             }
1012             if (!term.isComponent()) {
1013                 unindexBySent(term, term.getSent(), this.termsIndexedBySent);
1014             }
1015         } else if (annotation instanceof Mark) {
1016             throw new UnsupportedOperationException();
1017 
1018         } else if (annotation instanceof Dep) {
1019             Dep dep = (Dep) annotation;
1020             deps.remove(dep);
1021             if (dep.getFrom() != null) {
1022                 unindexAnnotation(dep, dep.getFrom().getId(), depsIndexedByTerm);
1023             }
1024             if (dep.getTo() != null) {
1025                 unindexAnnotation(dep, dep.getTo().getId(), depsIndexedByTerm);
1026             }
1027             unindexBySent(dep, dep.getFrom().getSent(), this.depsIndexedBySent);
1028 
1029         } else if (annotation instanceof Chunk) {
1030             throw new UnsupportedOperationException();
1031 
1032         } else if (annotation instanceof Entity) {
1033             Entity entity = (Entity) annotation;
1034             entities.remove(entity);
1035             for (Term term : entity.getTerms()) {
1036                 unindexAnnotation(entity, term.getId(), entitiesIndexedByTerm);
1037             }
1038             this.unindexBySent(entity, entity.getSpans().get(0).getTargets().get(0).getSent(),
1039                     this.entitiesIndexedBySent);
1040 
1041         } else if (annotation instanceof Feature) {
1042             throw new UnsupportedOperationException();
1043 
1044         } else if (annotation instanceof Timex3) {
1045             Timex3 timex3 = (Timex3) annotation;
1046             timeExpressions.remove(timex3);
1047             if (timex3.getSpan().getTargets() != null) {
1048                 for (WF wf : timex3.getSpan().getTargets()) {
1049                     unindexAnnotation(timex3, wf.getId(), timeExsIndexedByWF);
1050                 }
1051             }
1052             unindexBySent(timex3, timex3.getSpan().getTargets().get(0).getSent(), timeExsIndexedBySent);
1053 
1054         } else if (annotation instanceof Coref) {
1055             Coref coref = (Coref) annotation;
1056             coreferences.remove(coref);
1057             for (Span<Term> span : coref.getSpans()) {
1058                 for (Term term : span.getTargets()) {
1059                     unindexAnnotation(coref, term.getId(), corefsIndexedByTerm);
1060                 }
1061             }
1062 
1063         } else if (annotation instanceof Factuality) {
1064             Factuality factuality = (Factuality) annotation;
1065             factualities.remove(factuality);
1066             if (factuality.getWFs() != null) {
1067                 for (WF wf : factuality.getWFs()) {
1068                     unindexAnnotation(factuality, wf.getId(), factsIndexedByWF);
1069                 }
1070             }
1071 
1072         } else if (annotation instanceof LinkedEntity) {
1073             LinkedEntity entity = (LinkedEntity) annotation;
1074             linkedEntities.remove(entity);
1075 			if (entity.getWFs() != null) {
1076                 for (WF wf : entity.getWFs().getTargets()) {
1077                     unindexAnnotation(entity, wf.getId(), linkedEntitiesIndexedByWF);
1078                 }
1079             }
1080 
1081         } else if (annotation instanceof SSTspan) {
1082             SSTspan sst = (SSTspan) annotation;
1083             sstSpans.remove(sst);
1084             if (sst.getTerms() != null) {
1085                 for (Term t : sst.getTerms().getTargets()) {
1086                     unindexAnnotation(sst, t.getId(), sstSpansIndexedByTerm);
1087                 }
1088             }
1089 
1090         } else if (annotation instanceof Topic) {
1091             throw new UnsupportedOperationException();
1092 
1093         } else if (annotation instanceof Opinion) {
1094             opinions.remove(annotation);
1095 
1096         } else if (annotation instanceof Relation) {
1097             throw new UnsupportedOperationException();
1098 
1099         } else if (annotation instanceof Predicate) {
1100             Predicate predicate = (Predicate) annotation;
1101             predicates.remove(predicate);
1102             for (Term term : predicate.getTerms()) {
1103                 unindexAnnotation(predicate, term.getId(), predicatesIndexedByTerm);
1104             }
1105             unindexBySent(predicate, predicate.getSpan().getTargets().get(0).getSent(),
1106                     this.predicatesIndexedBySent);
1107 
1108         } else if (annotation instanceof Tree) {
1109 			throw new UnsupportedOperationException();
1110         } else if (annotation instanceof Element) {
1111             throw new UnsupportedOperationException();
1112         }
1113     }
1114 
1115     private <T> void unindexAnnotation(T annotation, String hashId, HashMap<String, List<T>> index) {
1116         List<T> list = index.get(hashId);
1117         if (list != null) {
1118             list.remove(annotation);
1119             if (list.isEmpty()) {
1120                 index.remove(hashId);
1121             }
1122         }
1123     }
1124 
1125     private <T> void unindexBySent(T annotation, Integer sent, HashMap<Integer, List<T>> index) {
1126         if (sent > 0) {
1127             List<T> list = index.get(sent);
1128             if (list != null) {
1129                 list.remove(annotation);
1130                 if (list.isEmpty()) {
1131                     index.remove(sent);
1132                 }
1133             }
1134         }
1135     }
1136     
1137 }