1 package ixa.kaflib;
2
3 import com.google.gson.Gson;
4 import com.google.gson.JsonArray;
5 import com.google.gson.JsonElement;
6 import com.google.gson.JsonObject;
7 import org.jdom2.*;
8 import org.jdom2.input.SAXBuilder;
9 import org.jdom2.output.Format;
10 import org.jdom2.output.LineSeparator;
11 import org.jdom2.output.XMLOutputter;
12
13 import java.io.*;
14 import java.util.*;
15 import java.util.regex.Matcher;
16 import java.util.regex.Pattern;
17 import java.util.stream.Collectors;
18
19
20
21
22
23
24
25 class ReadWriteManager {
26
27
28
29
30 static KAFDocument load(File file) throws IOException, JDOMException, KAFNotValidException {
31 SAXBuilder builder = new SAXBuilder();
32 Document document = (Document) builder.build(file);
33 Element rootElem = document.getRootElement();
34 return DOMToKAF(document);
35 }
36
37
38
39
40 static KAFDocument load(Reader stream) throws IOException, JDOMException, KAFNotValidException {
41 SAXBuilder builder = new SAXBuilder();
42 Document document = (Document) builder.build(stream);
43 Element rootElem = document.getRootElement();
44 return DOMToKAF(document);
45 }
46
47
48
49
50 static void save(KAFDocument kaf, String filename) {
51 File file = new File(filename);
52 save(kaf, file);
53 }
54
55
56
57
58 static void save(KAFDocument kaf, File file) {
59 try {
60 Writer out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF8"));
61 out.write(kafToStr(kaf));
62 out.flush();
63 out.close();
64 } catch (Exception e) {
65 System.err.println(String.format("Error in writing file %s: %s", file.getAbsolutePath(), e.getMessage()));
66 }
67 }
68
69
70
71
72 static void print(KAFDocument kaf) {
73 try {
74 Writer out = new BufferedWriter(new OutputStreamWriter(System.out, "UTF8"));
75 out.write(kafToStr(kaf));
76 out.flush();
77 } catch (Exception e) {
78 e.printStackTrace();
79 }
80 }
81
82
83
84
85 static String kafToStr(KAFDocument kaf) {
86 XMLOutputter out = new XMLOutputter(Format.getPrettyFormat().setLineSeparator(LineSeparator.UNIX));
87
88 Document jdom = KAFToDOM(kaf);
89 KAFToJSON(kaf);
90
91 return out.outputString(jdom);
92 }
93
94
95
96
97
98 private static KAFDocument DOMToKAF(Document dom) throws KAFNotValidException {
99 HashMap<String, WF> wfIndex = new HashMap<String, WF>();
100 HashMap<String, Term> termIndex = new HashMap<String, Term>();
101 HashMap<String, Relational> relationalIndex = new HashMap<String, Relational>();
102 HashMap<String, Timex3> timexIndex = new HashMap<String, Timex3>();
103 HashMap<String, Predicate> predicateIndex = new HashMap<String, Predicate>();
104
105 Element rootElem = dom.getRootElement();
106 String lang = getAttribute("lang", rootElem, Namespace.XML_NAMESPACE);
107 String kafVersion = getAttribute("version", rootElem);
108 KAFDocument kaf = new KAFDocument(lang, kafVersion);
109
110 List<Element> rootChildrenElems = rootElem.getChildren();
111 for (Element elem : rootChildrenElems) {
112 if (elem.getName().equals("nafHeader") || elem.getName().equals("kafHeader")) {
113 List<Element> lpsElems = elem.getChildren("linguisticProcessors");
114 for (Element lpsElem : lpsElems) {
115 String layer = getAttribute("layer", lpsElem);
116 List<Element> lpElems = lpsElem.getChildren();
117 for (Element lpElem : lpElems) {
118 String name = getAttribute("name", lpElem);
119 LinguisticProcessor newLp = kaf.addLinguisticProcessor(layer, name);
120 String timestamp = getOptAttribute("timestamp", lpElem);
121 if (timestamp != null) {
122 newLp.setTimestamp(timestamp);
123 }
124 String beginTimestamp = getOptAttribute("beginTimestamp", lpElem);
125 if (beginTimestamp != null) {
126 newLp.setBeginTimestamp(beginTimestamp);
127 }
128 String endTimestamp = getOptAttribute("endTimestamp", lpElem);
129 if (endTimestamp != null) {
130 newLp.setEndTimestamp(endTimestamp);
131 }
132 String version = getOptAttribute("version", lpElem);
133 if (version != null) {
134 newLp.setVersion(version);
135 }
136 }
137 }
138 Element fileDescElem = elem.getChild("fileDesc");
139 if (fileDescElem != null) {
140 KAFDocument.FileDesc fd = kaf.createFileDesc();
141 String author = getOptAttribute("author", fileDescElem);
142 if (author != null) {
143 fd.author = author;
144 }
145 String title = getOptAttribute("title", fileDescElem);
146 if (title != null) {
147 fd.title = title;
148 }
149 String filename = getOptAttribute("filename", fileDescElem);
150 if (filename != null) {
151 fd.filename = filename;
152 }
153 String filetype = getOptAttribute("filetype", fileDescElem);
154 if (filetype != null) {
155 fd.filetype = filetype;
156 }
157 String pages = getOptAttribute("pages", fileDescElem);
158 if (pages != null) {
159 fd.pages = Integer.parseInt(pages);
160 }
161 String creationtime = getOptAttribute("creationtime", fileDescElem);
162 if (creationtime != null) {
163 fd.creationtime = creationtime;
164 }
165 }
166 Element publicElem = elem.getChild("public");
167 if (publicElem != null) {
168 KAFDocument.Public pub = kaf.createPublic();
169 String publicId = getOptAttribute("publicId", publicElem);
170 if (publicId != null) {
171 pub.publicId = publicId;
172 }
173 String uri = getOptAttribute("uri", publicElem);
174 if (uri != null) {
175 pub.uri = uri;
176 }
177 }
178 } else if (elem.getName().equals("raw")) {
179 kaf.setRawText(elem.getText());
180 } else if (elem.getName().equals("text")) {
181 List<Element> wfElems = elem.getChildren();
182 for (Element wfElem : wfElems) {
183 String wid;
184 try {
185 wid = getAttribute("id", wfElem);
186 } catch (Exception e) {
187 wid = getAttribute("wid", wfElem);
188 }
189 String wForm = wfElem.getText();
190 String wSent = getAttribute("sent", wfElem);
191 WF newWf = kaf.newWF(wid, wForm, Integer.valueOf(wSent));
192 String wPara = getOptAttribute("para", wfElem);
193 if (wPara != null) {
194 newWf.setPara(Integer.valueOf(wPara));
195 }
196 String wPage = getOptAttribute("page", wfElem);
197 if (wPage != null) {
198 newWf.setPage(Integer.valueOf(wPage));
199 }
200 String wOffset = getOptAttribute("offset", wfElem);
201 if (wOffset != null) {
202 newWf.setOffset(Integer.valueOf(wOffset));
203 }
204 String wLength = getOptAttribute("length", wfElem);
205 if (wLength != null) {
206 newWf.setLength(Integer.valueOf(wLength));
207 }
208 String wXpath = getOptAttribute("xpath", wfElem);
209 if (wXpath != null) {
210 newWf.setXpath(wXpath);
211 }
212 wfIndex.put(newWf.getId(), newWf);
213 }
214 } else if (elem.getName().equals("terms")) {
215 List<Element> termElems = elem.getChildren();
216 for (Element termElem : termElems) {
217 DOMToTerm(termElem, kaf, false, wfIndex, termIndex);
218 }
219 } else if (elem.getName().equals("markables")) {
220 String source = getAttribute("source", elem);
221 List<Element> markElems = elem.getChildren();
222 for (Element markElem : markElems) {
223 String sid = getAttribute("id", markElem);
224 Element spanElem = markElem.getChild("span");
225 if (spanElem == null) {
226 throw new IllegalStateException("Every mark must contain a span element");
227 }
228 List<Element> marksTermElems = spanElem.getChildren("target");
229 Span<Term> span = kaf.newTermSpan();
230 for (Element marksTermElem : marksTermElems) {
231 String termId = getAttribute("id", marksTermElem);
232 boolean isHead = isHead(marksTermElem);
233 Term term = termIndex.get(termId);
234 if (term == null) {
235 throw new KAFNotValidException("Term " + termId + " not found when loading mark " + sid);
236 }
237 span.addTarget(term, isHead);
238 }
239 Mark newMark = kaf.newMark(sid, source, span);
240 String type = getOptAttribute("type", markElem);
241 if (type != null) {
242 newMark.setType(type);
243 }
244 String lemma = getOptAttribute("lemma", markElem);
245 if (lemma != null) {
246 newMark.setLemma(lemma);
247 }
248 String pos = getOptAttribute("pos", markElem);
249 if (pos != null) {
250 newMark.setPos(pos);
251 }
252 String tMorphofeat = getOptAttribute("morphofeat", markElem);
253 if (tMorphofeat != null) {
254 newMark.setMorphofeat(tMorphofeat);
255 }
256 String markcase = getOptAttribute("case", markElem);
257 if (markcase != null) {
258 newMark.setCase(markcase);
259 }
260 List<Element> externalReferencesElems = markElem.getChildren("externalReferences");
261 if (externalReferencesElems.size() > 0) {
262 List<ExternalRef> externalRefs = getExternalReferences(externalReferencesElems.get(0), kaf);
263 newMark.addExternalRefs(externalRefs);
264 }
265
266 }
267 } else if (elem.getName().equals("deps")) {
268 List<Element> depElems = elem.getChildren();
269 for (Element depElem : depElems) {
270 String fromId = getAttribute("from", depElem);
271 String toId = getAttribute("to", depElem);
272 Term from = termIndex.get(fromId);
273 if (from == null) {
274 throw new KAFNotValidException(
275 "Term " + fromId + " not found when loading Dep (" + fromId + ", " + toId + ")");
276 }
277 Term to = termIndex.get(toId);
278 if (to == null) {
279 throw new KAFNotValidException(
280 "Term " + toId + " not found when loading Dep (" + fromId + ", " + toId + ")");
281 }
282 String rfunc = getAttribute("rfunc", depElem);
283 Dep newDep = kaf.newDep(from, to, rfunc);
284 String depcase = getOptAttribute("case", depElem);
285 if (depcase != null) {
286 newDep.setCase(depcase);
287 }
288 }
289 } else if (elem.getName().equals("chunks")) {
290
291 List<Element> chunkElems = elem.getChildren();
292 for (Element chunkElem : chunkElems) {
293 String chunkId = getAttribute("id", chunkElem);
294 String headId = getAttribute("head", chunkElem);
295 Term chunkHead = termIndex.get(headId);
296 if (chunkHead == null) {
297 throw new KAFNotValidException(
298 "Term " + headId + " not found when loading chunk " + chunkId + " (head not found)");
299 }
300 Element spanElem = chunkElem.getChild("span");
301 if (spanElem == null) {
302 throw new IllegalStateException("Every chunk must contain a span element");
303 }
304 List<Element> chunksTermElems = spanElem.getChildren("target");
305 Span<Term> span = kaf.newTermSpan();
306 for (Element chunksTermElem : chunksTermElems) {
307 String termId = getAttribute("id", chunksTermElem);
308 boolean isHead = isHead(chunksTermElem);
309 Term targetTerm = termIndex.get(termId);
310 if (targetTerm == null) {
311 throw new KAFNotValidException("Term " + termId + " not found when loading chunk " + chunkId
312 + " (target term not found)");
313 }
314 span.addTarget(targetTerm, ((targetTerm == chunkHead) || isHead));
315 }
316 if (!span.hasTarget(chunkHead)) {
317 throw new KAFNotValidException("The head of the chunk is not in it's span.");
318 }
319 Chunk newChunk = kaf.newChunk(chunkId, span);
320 String chunkPhrase = getOptAttribute("phrase", chunkElem);
321 if (chunkPhrase != null) {
322 newChunk.setPhrase(chunkPhrase);
323 }
324 String chunkCase = getOptAttribute("case", chunkElem);
325 if (chunkCase != null) {
326 newChunk.setCase(chunkCase);
327 }
328 }
329 } else if (elem.getName().equals("entities")) {
330 List<Element> entityElems = elem.getChildren();
331 for (Element entityElem : entityElems) {
332 String entId = getAttribute("id", entityElem);
333 List<Element> referencesElem = entityElem.getChildren("references");
334 if (referencesElem.size() < 1) {
335 throw new IllegalStateException("Every entity must contain a 'references' element");
336 }
337 List<Element> spanElems = referencesElem.get(0).getChildren();
338 if (spanElems.size() < 1) {
339 throw new IllegalStateException(
340 "Every entity must contain a 'span' element inside 'references'");
341 }
342 List<Span<Term>> references = new ArrayList<Span<Term>>();
343 for (Element spanElem : spanElems) {
344 Span<Term> span = kaf.newTermSpan();
345 List<Element> targetElems = spanElem.getChildren();
346 if (targetElems.size() < 1) {
347 throw new IllegalStateException(
348 "Every span in an entity must contain at least one target inside");
349 }
350 for (Element targetElem : targetElems) {
351 String targetTermId = getAttribute("id", targetElem);
352 Term targetTerm = termIndex.get(targetTermId);
353 if (targetTerm == null) {
354 throw new KAFNotValidException(
355 "Term " + targetTermId + " not found when loading entity " + entId);
356 }
357 boolean isHead = isHead(targetElem);
358 span.addTarget(targetTerm, isHead);
359 }
360 references.add(span);
361 }
362 Entity newEntity = kaf.newEntity(entId, references);
363 String entType = getOptAttribute("type", entityElem);
364 if (entType != null) {
365 newEntity.setType(entType);
366 }
367 if ("yes".equals(getOptAttribute("unnamed", entityElem))) {
368 newEntity.setNamed(false);
369 }
370 List<Element> externalReferencesElems = entityElem.getChildren("externalReferences");
371 if (externalReferencesElems.size() > 0) {
372 List<ExternalRef> externalRefs = getExternalReferences(externalReferencesElems.get(0), kaf);
373 newEntity.addExternalRefs(externalRefs);
374 }
375 relationalIndex.put(newEntity.getId(), newEntity);
376 }
377 } else if (elem.getName().equals("coreferences")) {
378 List<Element> corefElems = elem.getChildren();
379 for (Element corefElem : corefElems) {
380 String coId = getAttribute("id", corefElem);
381 String clusterId = getOptAttribute("cluster", corefElem);
382 List<Element> spanElems = corefElem.getChildren("span");
383 if (spanElems.size() < 1) {
384 throw new IllegalStateException(
385 "Every coref must contain a 'span' element inside 'references'");
386 }
387 List<Span<Term>> mentions = new ArrayList<Span<Term>>();
388 for (Element spanElem : spanElems) {
389 Span<Term> span = kaf.newTermSpan();
390 List<Element> targetElems = spanElem.getChildren();
391 if (targetElems.size() < 1) {
392 throw new IllegalStateException(
393 "Every span in an entity must contain at least one target inside");
394 }
395 for (Element targetElem : targetElems) {
396 String targetTermId = getAttribute("id", targetElem);
397 Term targetTerm = termIndex.get(targetTermId);
398 if (targetTerm == null) {
399 throw new KAFNotValidException(
400 "Term " + targetTermId + " not found when loading coref " + coId);
401 }
402 boolean isHead = isHead(targetElem);
403 span.addTarget(targetTerm, isHead);
404 }
405 mentions.add(span);
406 }
407 Coref newCoref = kaf.newCoref(coId, mentions);
408 String corefType = getOptAttribute("type", corefElem);
409 if (corefType != null) {
410 newCoref.setType(corefType);
411 }
412 if (clusterId != null) {
413 newCoref.setCluster(clusterId);
414 }
415 List<Element> externalReferencesElems = corefElem.getChildren("externalReferences");
416 if (externalReferencesElems.size() > 0) {
417 List<ExternalRef> externalRefs = getExternalReferences(externalReferencesElems.get(0), kaf);
418 newCoref.addExternalRefs(externalRefs);
419 }
420 }
421 } else if (elem.getName().equals("timeExpressions")) {
422 List<Element> timex3Elems = elem.getChildren();
423 for (Element timex3Elem : timex3Elems) {
424 String timex3Id = getAttribute("id", timex3Elem);
425 String timex3Type = getAttribute("type", timex3Elem);
426 Timex3 timex3 = kaf.newTimex3(timex3Id, timex3Type);
427 String timex3BeginPointId = getOptAttribute("beginPoint", timex3Elem);
428 if (timex3BeginPointId != null) {
429 Timex3 beginPoint = timexIndex.get(timex3BeginPointId);
430 if (beginPoint == null) {
431
432 } else {
433 timex3.setBeginPoint(beginPoint);
434 }
435 }
436 String timex3EndPointId = getOptAttribute("endPoint", timex3Elem);
437 if (timex3EndPointId != null) {
438 Timex3 endPoint = timexIndex.get(timex3EndPointId);
439 if (endPoint == null) {
440
441 } else {
442 timex3.setEndPoint(endPoint);
443 }
444 }
445 String timex3Quant = getOptAttribute("quant", timex3Elem);
446 if (timex3Quant != null) {
447 timex3.setQuant(timex3Quant);
448 }
449 String timex3Freq = getOptAttribute("freq", timex3Elem);
450 if (timex3Freq != null) {
451 timex3.setFreq(timex3Freq);
452 }
453 String timex3FuncInDoc = getOptAttribute("functionInDocument", timex3Elem);
454 if (timex3FuncInDoc != null) {
455 timex3.setFunctionInDocument(timex3FuncInDoc);
456 }
457 String timex3TempFunc = getOptAttribute("temporalFunction", timex3Elem);
458 if (timex3TempFunc != null) {
459 Boolean tempFunc = timex3TempFunc.equals("true");
460 timex3.setTemporalFunction(tempFunc);
461 }
462 String timex3Value = getOptAttribute("value", timex3Elem);
463 if (timex3Value != null) {
464 timex3.setValue(timex3Value);
465 }
466 String timex3ValueFromFunction = getOptAttribute("valueFromFunction", timex3Elem);
467 if (timex3ValueFromFunction != null) {
468 timex3.setValueFromFunction(timex3ValueFromFunction);
469 }
470 String timex3Mod = getOptAttribute("mod", timex3Elem);
471 if (timex3Mod != null) {
472 timex3.setMod(timex3Mod);
473 }
474 String timex3AnchorTimeId = getOptAttribute("anchorTimeId", timex3Elem);
475 if (timex3AnchorTimeId != null) {
476 timex3.setAnchorTimeId(timex3AnchorTimeId);
477 }
478 String timex3Comment = getOptAttribute("comment", timex3Elem);
479 if (timex3Comment != null) {
480 timex3.setComment(timex3Comment);
481 }
482 Element spanElem = timex3Elem.getChild("span");
483 if (spanElem != null) {
484 Span<WF> timex3Span = kaf.newWFSpan();
485 for (Element targetElem : spanElem.getChildren("target")) {
486 String targetId = getAttribute("id", targetElem);
487 WF wf = wfIndex.get(targetId);
488 if (wf == null) {
489 throw new KAFNotValidException(
490 "Word form " + targetId + " not found when loading timex3 " + timex3Id);
491 }
492 boolean isHead = isHead(targetElem);
493 timex3Span.addTarget(wf, isHead);
494 }
495 timex3.setSpan(timex3Span);
496 }
497 timexIndex.put(timex3.getId(), timex3);
498 }
499 } else if (elem.getName().equals("temporalRelations")) {
500 List<Element> tLinkElems = elem.getChildren("tlink");
501 for (Element tLinkElem : tLinkElems) {
502 String tlid = getAttribute("id", tLinkElem);
503 String fromId = getAttribute("from", tLinkElem);
504 String toId = getAttribute("to", tLinkElem);
505 String fromType = getAttribute("fromType", tLinkElem);
506 String toType = getAttribute("toType", tLinkElem);
507 String relType = getAttribute("relType", tLinkElem);
508 TLinkReferable from = fromType.equals("event")
509 ? predicateIndex.get(fromId) : timexIndex.get(fromId);
510 TLinkReferable to = toType.equals("event")
511 ? predicateIndex.get(toId) : timexIndex.get(toId);
512 TLink tLink = kaf.newTLink(tlid, from, to, relType);
513 }
514 } else if (elem.getName().equals("causalRelations")) {
515 List<Element> clinkElems = elem.getChildren("clink");
516 for (Element clinkElem : clinkElems) {
517 String clid = getAttribute("id", clinkElem);
518 String fromId = getAttribute("from", clinkElem);
519 String toId = getAttribute("to", clinkElem);
520 String relType = getOptAttribute("relType", clinkElem);
521 Predicate from = predicateIndex.get(fromId);
522 Predicate to = predicateIndex.get(toId);
523 CLink clink = kaf.newCLink(clid, from, to);
524 if (relType != null) {
525 clink.setRelType(relType);
526 }
527 }
528 } else if (elem.getName().equals("features")) {
529 Element propertiesElem = elem.getChild("properties");
530 Element categoriesElem = elem.getChild("categories");
531 if (propertiesElem != null) {
532 List<Element> propertyElems = propertiesElem.getChildren("property");
533 for (Element propertyElem : propertyElems) {
534 String pid = getAttribute("id", propertyElem);
535 String lemma = getAttribute("lemma", propertyElem);
536 Element referencesElem = propertyElem.getChild("references");
537 if (referencesElem == null) {
538 throw new IllegalStateException("Every property must contain a 'references' element");
539 }
540 List<Element> spanElems = referencesElem.getChildren("span");
541 if (spanElems.size() < 1) {
542 throw new IllegalStateException(
543 "Every property must contain a 'span' element inside 'references'");
544 }
545 List<Span<Term>> references = new ArrayList<Span<Term>>();
546 for (Element spanElem : spanElems) {
547 Span<Term> span = kaf.newTermSpan();
548 List<Element> targetElems = spanElem.getChildren();
549 if (targetElems.size() < 1) {
550 throw new IllegalStateException(
551 "Every span in a property must contain at least one target inside");
552 }
553 for (Element targetElem : targetElems) {
554 String targetTermId = getAttribute("id", targetElem);
555 Term targetTerm = termIndex.get(targetTermId);
556 if (targetTerm == null) {
557 throw new KAFNotValidException(
558 "Term " + targetTermId + " not found when loading property " + pid);
559 }
560 boolean isHead = isHead(targetElem);
561 span.addTarget(targetTerm, isHead);
562 }
563 references.add(span);
564 }
565 Feature newProperty = kaf.newProperty(pid, lemma, references);
566 List<Element> externalReferencesElems = propertyElem.getChildren("externalReferences");
567 if (externalReferencesElems.size() > 0) {
568 List<ExternalRef> externalRefs = getExternalReferences(externalReferencesElems.get(0), kaf);
569 newProperty.addExternalRefs(externalRefs);
570 }
571 relationalIndex.put(newProperty.getId(), newProperty);
572 }
573 }
574 if (categoriesElem != null) {
575 List<Element> categoryElems = categoriesElem.getChildren("category");
576 for (Element categoryElem : categoryElems) {
577 String cid = getAttribute("id", categoryElem);
578 String lemma = getAttribute("lemma", categoryElem);
579 Element referencesElem = categoryElem.getChild("references");
580 if (referencesElem == null) {
581 throw new IllegalStateException("Every category must contain a 'references' element");
582 }
583 List<Element> spanElems = referencesElem.getChildren("span");
584 if (spanElems.size() < 1) {
585 throw new IllegalStateException(
586 "Every category must contain a 'span' element inside 'references'");
587 }
588 List<Span<Term>> references = new ArrayList<Span<Term>>();
589 for (Element spanElem : spanElems) {
590 Span<Term> span = kaf.newTermSpan();
591 List<Element> targetElems = spanElem.getChildren();
592 if (targetElems.size() < 1) {
593 throw new IllegalStateException(
594 "Every span in a property must contain at least one target inside");
595 }
596 for (Element targetElem : targetElems) {
597 String targetTermId = getAttribute("id", targetElem);
598 Term targetTerm = termIndex.get(targetTermId);
599 if (targetTerm == null) {
600 throw new KAFNotValidException(
601 "Term " + targetTermId + " not found when loading category " + cid);
602 }
603 boolean isHead = isHead(targetElem);
604 span.addTarget(targetTerm, isHead);
605 }
606 references.add(span);
607 }
608 Feature newCategory = kaf.newCategory(cid, lemma, references);
609 List<Element> externalReferencesElems = categoryElem.getChildren("externalReferences");
610 if (externalReferencesElems.size() > 0) {
611 List<ExternalRef> externalRefs = getExternalReferences(externalReferencesElems.get(0), kaf);
612 newCategory.addExternalRefs(externalRefs);
613 }
614 relationalIndex.put(newCategory.getId(), newCategory);
615 }
616 }
617 } else if (elem.getName().equals("opinions")) {
618 List<Element> opinionElems = elem.getChildren("opinion");
619 for (Element opinionElem : opinionElems) {
620 String opinionId;
621 try {
622 opinionId = getAttribute("id", opinionElem);
623 } catch (Exception e) {
624 opinionId = getAttribute("oid", opinionElem);
625 }
626 Opinion opinion = kaf.newOpinion(opinionId);
627 try {
628 String label = getAttribute("label", opinionElem);
629 opinion.setLabel(label);
630 } catch (Exception e) {
631
632 }
633 List<Element> opinionExternalRefs = opinionElem.getChildren("externalReferences");
634 if (opinionExternalRefs.size() > 0) {
635 opinion.addExternalRefs(getExternalReferences(opinionExternalRefs.get(0), kaf));
636 }
637 Element opinionHolderElem = opinionElem.getChild("opinion_holder");
638 if (opinionHolderElem != null) {
639 Span<Term> span = kaf.newTermSpan();
640 Opinion.OpinionHolder opinionHolder = opinion.createOpinionHolder(span);
641 String ohType = getOptAttribute("type", opinionHolderElem);
642 if (ohType != null) {
643 opinionHolder.setType(ohType);
644 }
645 Element spanElem = opinionHolderElem.getChild("span");
646 if (spanElem != null) {
647 List<Element> targetElems = spanElem.getChildren("target");
648 for (Element targetElem : targetElems) {
649 String refId = getOptAttribute("id", targetElem);
650 boolean isHead = isHead(targetElem);
651 Term targetTerm = termIndex.get(refId);
652 if (targetTerm == null) {
653 throw new KAFNotValidException(
654 "Term " + refId + " not found when loading opinion " + opinionId);
655 }
656 span.addTarget(targetTerm, isHead);
657 }
658 }
659 List<Element> holderExternalRefs = opinionHolderElem.getChildren("externalReferences");
660 if (holderExternalRefs.size() > 0) {
661 opinionHolder.addExternalRefs(getExternalReferences(holderExternalRefs.get(0), kaf));
662 }
663 }
664 Element opinionTargetElem = opinionElem.getChild("opinion_target");
665 if (opinionTargetElem != null) {
666 Span<Term> span = kaf.newTermSpan();
667 Opinion.OpinionTarget opinionTarget = opinion.createOpinionTarget(span);
668 String otType = getOptAttribute("type", opinionTargetElem);
669 if (otType != null) {
670 opinionTarget.setType(otType);
671 }
672 Element spanElem = opinionTargetElem.getChild("span");
673 if (spanElem != null) {
674 List<Element> targetElems = spanElem.getChildren("target");
675 for (Element targetElem : targetElems) {
676 String refId = getOptAttribute("id", targetElem);
677 boolean isHead = isHead(targetElem);
678 Term targetTerm = termIndex.get(refId);
679 if (targetTerm == null) {
680 throw new KAFNotValidException(
681 "Term " + refId + " not found when loading opinion " + opinionId);
682 }
683 span.addTarget(targetTerm, isHead);
684 }
685 }
686 List<Element> targetExternalRefs = opinionTargetElem.getChildren("externalReferences");
687 if (targetExternalRefs.size() > 0) {
688 opinionTarget.addExternalRefs(getExternalReferences(targetExternalRefs.get(0), kaf));
689 }
690 }
691 Element opinionExpressionElem = opinionElem.getChild("opinion_expression");
692 if (opinionExpressionElem != null) {
693 Span<Term> span = kaf.newTermSpan();
694 String polarity = getOptAttribute("polarity", opinionExpressionElem);
695 String strength = getOptAttribute("strength", opinionExpressionElem);
696 String subjectivity = getOptAttribute("subjectivity", opinionExpressionElem);
697 String sentimentSemanticType = getOptAttribute("sentiment_semantic_type",
698 opinionExpressionElem);
699 String sentimentProductFeature = getOptAttribute("sentiment_product_feature",
700 opinionExpressionElem);
701 Opinion.OpinionExpression opinionExpression = opinion.createOpinionExpression(span);
702 if (polarity != null) {
703 opinionExpression.setPolarity(polarity);
704 }
705 if (strength != null) {
706 opinionExpression.setStrength(strength);
707 }
708 if (subjectivity != null) {
709 opinionExpression.setSubjectivity(subjectivity);
710 }
711 if (sentimentSemanticType != null) {
712 opinionExpression.setSentimentSemanticType(sentimentSemanticType);
713 }
714 if (sentimentProductFeature != null) {
715 opinionExpression.setSentimentProductFeature(sentimentProductFeature);
716 }
717
718 Element spanElem = opinionExpressionElem.getChild("span");
719 if (spanElem != null) {
720 List<Element> targetElems = spanElem.getChildren("target");
721 for (Element targetElem : targetElems) {
722 String refId = getOptAttribute("id", targetElem);
723 boolean isHead = isHead(targetElem);
724 Term targetTerm = termIndex.get(refId);
725 if (targetTerm == null) {
726 throw new KAFNotValidException(
727 "Term " + refId + " not found when loading opinion " + opinionId);
728 }
729 span.addTarget(targetTerm, isHead);
730 }
731 }
732
733 List<Element> expressionExternalRefs = opinionExpressionElem.getChildren("externalReferences");
734 if (expressionExternalRefs.size() > 0) {
735 opinionExpression
736 .addExternalRefs(getExternalReferences(expressionExternalRefs.get(0), kaf));
737 }
738 }
739 }
740 } else if (elem.getName().equals("relations")) {
741 List<Element> relationElems = elem.getChildren("relation");
742 for (Element relationElem : relationElems) {
743 String id = getAttribute("id", relationElem);
744 String fromId = getAttribute("from", relationElem);
745 String toId = getAttribute("to", relationElem);
746 String confidenceStr = getOptAttribute("confidence", relationElem);
747 float confidence = -1.0f;
748 if (confidenceStr != null) {
749 confidence = Float.parseFloat(confidenceStr);
750 }
751 Relational from = relationalIndex.get(fromId);
752 if (from == null) {
753 throw new KAFNotValidException(
754 "Entity/feature object " + fromId + " not found when loading relation " + id);
755 }
756 Relational to = relationalIndex.get(toId);
757 if (to == null) {
758 throw new KAFNotValidException(
759 "Entity/feature object " + toId + " not found when loading relation " + id);
760 }
761 Relation newRelation = kaf.newRelation(id, from, to);
762 if (confidence >= 0) {
763 newRelation.setConfidence(confidence);
764 }
765 }
766 } else if (elem.getName().equals("srl")) {
767 List<Element> predicateElems = elem.getChildren("predicate");
768 for (Element predicateElem : predicateElems) {
769 String id = getAttribute("id", predicateElem);
770 Span<Term> span = kaf.newTermSpan();
771 Element spanElem = predicateElem.getChild("span");
772 if (spanElem != null) {
773 List<Element> targetElems = spanElem.getChildren("target");
774 for (Element targetElem : targetElems) {
775 String targetId = getAttribute("id", targetElem);
776 boolean isHead = isHead(targetElem);
777 Term targetTerm = termIndex.get(targetId);
778 if (targetTerm == null) {
779 throw new KAFNotValidException(
780 "Term object " + targetId + " not found when loading predicate " + id);
781 }
782 span.addTarget(targetTerm, isHead);
783 }
784 }
785 List<String> predTypes = new ArrayList<String>();
786 List<Element> predTypeElems = predicateElem.getChildren("predType");
787 for (Element predTypeElem : predTypeElems) {
788 String ptUri = getAttribute("uri", predTypeElem);
789 predTypes.add(ptUri);
790 }
791 Predicate newPredicate = kaf.newPredicate(id, span);
792 String source = getOptAttribute("source", predicateElem);
793 if (source != null) {
794 newPredicate.setSource(source);
795 }
796 String uri = getOptAttribute("uri", predicateElem);
797 if (uri != null) {
798 newPredicate.setUri(uri);
799 }
800 List<Element> externalReferencesElems = predicateElem.getChildren("externalReferences");
801 if (externalReferencesElems.size() > 0) {
802 List<ExternalRef> externalRefs = getExternalReferences(externalReferencesElems.get(0), kaf);
803 newPredicate.addExternalRefs(externalRefs);
804 }
805 String confidence = getOptAttribute("confidence", predicateElem);
806 if (confidence != null) {
807 newPredicate.setConfidence(Float.valueOf(confidence));
808 }
809 List<Element> roleElems = predicateElem.getChildren("role");
810 for (Element roleElem : roleElems) {
811 String rid = getAttribute("id", roleElem);
812 String semRole = getAttribute("semRole", roleElem);
813 Span<Term> roleSpan = kaf.newTermSpan();
814 Element roleSpanElem = roleElem.getChild("span");
815 if (roleSpanElem != null) {
816 List<Element> targetElems = roleSpanElem.getChildren("target");
817 for (Element targetElem : targetElems) {
818 String targetId = getAttribute("id", targetElem);
819 boolean isHead = isHead(targetElem);
820 Term targetTerm = termIndex.get(targetId);
821 if (targetTerm == null) {
822 throw new KAFNotValidException(
823 "Term object " + targetId + " not found when loading role " + rid);
824 }
825 roleSpan.addTarget(targetTerm, isHead);
826 }
827 }
828 Predicate.Role newRole = kaf.newRole(rid, newPredicate, semRole, roleSpan);
829 List<Element> rExternalReferencesElems = roleElem.getChildren("externalReferences");
830 if (rExternalReferencesElems.size() > 0) {
831 List<ExternalRef> externalRefs = getExternalReferences(rExternalReferencesElems.get(0),
832 kaf);
833 newRole.addExternalRefs(externalRefs);
834 }
835 newPredicate.addRole(newRole);
836 }
837 predicateIndex.put(newPredicate.getId(), newPredicate);
838 }
839 } else if (elem.getName().equals("constituency")) {
840 try {
841 List<Element> treeElems = elem.getChildren("tree");
842 for (Element treeElem : treeElems) {
843 HashMap<String, TreeNode> treeNodes = new HashMap<String, TreeNode>();
844 HashMap<String, Boolean> rootNodes = new HashMap<String, Boolean>();
845 Integer sentence = null;
846 if (treeElem.getAttribute("sentence") != null) {
847 sentence = Integer.parseInt(treeElem.getAttribute("sentence").getValue());
848 }
849
850
851 List<Element> terminalElems = treeElem.getChildren("t");
852 for (Element terminalElem : terminalElems) {
853 String id = getAttribute("id", terminalElem);
854 Element spanElem = terminalElem.getChild("span");
855 if (spanElem == null) {
856 throw new KAFNotValidException("Constituent non terminal nodes need a span");
857 }
858 Span<Term> span = loadTermSpan(spanElem, termIndex, id);
859 treeNodes.put(id, kaf.newTerminal(id, span));
860 rootNodes.put(id, true);
861 }
862
863 List<Element> nonTerminalElems = treeElem.getChildren("nt");
864 for (Element nonTerminalElem : nonTerminalElems) {
865 String id = getAttribute("id", nonTerminalElem);
866 String label = getAttribute("label", nonTerminalElem);
867 treeNodes.put(id, kaf.newNonTerminal(id, label));
868 rootNodes.put(id, true);
869 }
870
871 List<Element> edgeElems = treeElem.getChildren("edge");
872 for (Element edgeElem : edgeElems) {
873 String fromId = getAttribute("from", edgeElem);
874 String toId = getAttribute("to", edgeElem);
875 String edgeId = getOptAttribute("id", edgeElem);
876 String head = getOptAttribute("head", edgeElem);
877 boolean isHead = (head != null && head.equals("yes")) ? true : false;
878 TreeNode parentNode = treeNodes.get(toId);
879 TreeNode childNode = treeNodes.get(fromId);
880 if ((parentNode == null) || (childNode == null)) {
881 throw new KAFNotValidException(
882 "There is a problem with the edge(" + fromId + ", " + toId
883 + "). One of its targets doesn't exist.");
884 }
885 try {
886 ((NonTerminal) parentNode).addChild(childNode);
887 } catch (Exception e) {
888 }
889 rootNodes.put(fromId, false);
890 if (edgeId != null) {
891 childNode.setEdgeId(edgeId);
892 }
893 if (isHead) {
894 ((NonTerminal) childNode).setHead(isHead);
895 }
896 }
897
898 for (Map.Entry<String, Boolean> areRoot : rootNodes.entrySet()) {
899 if (areRoot.getValue()) {
900 TreeNode rootNode = treeNodes.get(areRoot.getKey());
901 kaf.newConstituent(rootNode, sentence);
902 }
903 }
904 }
905 } catch (Exception e) {
906
907 }
908 } else if (elem.getName().equals("factualitylayer")) {
909 for (Element factElem : elem.getChildren("factvalue")) {
910 String id = getAttribute("id", factElem);
911 WF wf = wfIndex.get(id);
912 List<Term> terms = kaf.getTermsByWFs(Collections.singletonList(wf));
913 if (terms.isEmpty()) {
914 System.err.println("Cannot detect term for factvalue ID " + id);
915 } else {
916 Factuality factuality = kaf.newFactuality(terms.get(0));
917 for (Element partElem : factElem.getChildren("factuality")) {
918 String prediction = getAttribute("prediction", partElem);
919 double confidence = Double.parseDouble(getAttribute("confidence", partElem));
920 factuality.addFactualityPart(prediction, confidence);
921 }
922 }
923 }
924 } else if (elem.getName().equals("linkedEntities")) {
925 for (Element entityElem : elem.getChildren("linkedEntity")) {
926 String id = getAttribute("id", entityElem);
927 Span<WF> span = KAFDocument.newWFSpan();
928 List<Element> targetElems = entityElem.getChild("span").getChildren();
929 if (targetElems.size() < 1) {
930 throw new IllegalStateException(
931 "Every span in an entity must contain at least one target inside");
932 }
933 for (Element targetElem : targetElems) {
934 String targetWfId = getAttribute("id", targetElem);
935 WF wf = wfIndex.get(targetWfId);
936 if (wf == null) {
937 throw new KAFNotValidException(
938 "WF " + targetWfId + " not found when loading linked entity " + id);
939 }
940 span.addTarget(wf);
941 }
942 LinkedEntity e = kaf.newLinkedEntity(id, span);
943 e.setResource(getOptAttribute("resource", entityElem));
944 e.setReference(getOptAttribute("reference", entityElem));
945
946 String spotted = getOptAttribute("spotted", entityElem);
947 e.setSpotted(spotted != null && spotted.equals("true"));
948
949 String confidence = getOptAttribute("confidence", entityElem);
950 if (confidence != null) {
951 e.setConfidence(Double.parseDouble(confidence));
952 }
953 Element typesElem = entityElem.getChild("types");
954 if (typesElem != null) {
955 for (Element topicElem : typesElem.getChildren("type")) {
956 String category = getAttribute("source", topicElem);
957 String label = getAttribute("label", topicElem);
958 e.addType(category, label);
959 }
960 }
961 }
962 } else if (elem.getName().equals("SSTspans")) {
963 for (Element sstElem : elem.getChildren("sst")) {
964 String id = getAttribute("id", sstElem);
965 String type = getAttribute("type", sstElem);
966 String label = getAttribute("label", sstElem);
967 Span<Term> span = KAFDocument.newTermSpan();
968 List<Element> targetElems = sstElem.getChild("span").getChildren();
969 if (targetElems.size() < 1) {
970 throw new IllegalStateException(
971 "Every span in an entity must contain at least one target inside");
972 }
973 for (Element targetElem : targetElems) {
974 String targetTermId = getAttribute("id", targetElem);
975 Term term = termIndex.get(targetTermId);
976 if (term == null) {
977 throw new KAFNotValidException(
978 "Term " + targetTermId + " not found when loading sst " + id);
979 }
980 span.addTarget(term);
981 }
982 kaf.newSST(span, type, label);
983 }
984 } else if (elem.getName().equals("topics")) {
985 for (Element topicElem : elem.getChildren("topic")) {
986 String label = getAttribute("label", topicElem);
987 float probability = Float.parseFloat(getAttribute("probability", topicElem));
988 kaf.newTopic(label, probability);
989 }
990 } else {
991
992 kaf.addUnknownLayer(elem);
993 }
994 }
995
996 return kaf;
997 }
998
999 private static void DOMToTerm(Element termElem, KAFDocument kaf, boolean isComponent, Map<String, WF> wfIndex,
1000 Map<String, Term> termIndex) throws KAFNotValidException {
1001 String tid;
1002 try {
1003 tid = getAttribute("id", termElem);
1004 } catch (Exception e) {
1005 tid = getAttribute("tid", termElem);
1006 }
1007 Element spanElem = termElem.getChild("span");
1008 if (spanElem == null) {
1009 throw new IllegalStateException("Every term must contain a span element");
1010 }
1011 List<Element> termsWfElems = spanElem.getChildren("target");
1012 Span<WF> span = kaf.newWFSpan();
1013 for (Element termsWfElem : termsWfElems) {
1014 String wfId = getAttribute("id", termsWfElem);
1015 boolean isHead = isHead(termsWfElem);
1016 WF wf = wfIndex.get(wfId);
1017 if (wf == null) {
1018 throw new KAFNotValidException("Wf " + wfId + " not found when loading term " + tid);
1019 }
1020 span.addTarget(wf, isHead);
1021 }
1022 Term newTerm = kaf.newTerm(tid, span, isComponent);
1023 String type = getOptAttribute("type", termElem);
1024 if (type != null) {
1025 newTerm.setType(type);
1026 }
1027 String lemma = getOptAttribute("lemma", termElem);
1028 if (lemma != null) {
1029 newTerm.setLemma(lemma);
1030 }
1031 String supersenseTag = getOptAttribute("supersense", termElem);
1032 if (supersenseTag != null) {
1033 newTerm.setSupersenseTag(supersenseTag);
1034 }
1035 String wordnetSense = getOptAttribute("wordnet", termElem);
1036 if (wordnetSense != null) {
1037 newTerm.setWordnetSense(wordnetSense);
1038 }
1039 String bbnTag = getOptAttribute("bbn", termElem);
1040 if (bbnTag != null) {
1041 newTerm.setBBNTag(bbnTag);
1042 }
1043 String pos = getOptAttribute("pos", termElem);
1044 if (pos != null) {
1045 newTerm.setPos(pos);
1046 }
1047 String tMorphofeat = getOptAttribute("morphofeat", termElem);
1048 if (tMorphofeat != null) {
1049 newTerm.setMorphofeat(tMorphofeat);
1050 }
1051 String tHead = getOptAttribute("head", termElem);
1052 String termcase = getOptAttribute("case", termElem);
1053 if (termcase != null) {
1054 newTerm.setCase(termcase);
1055 }
1056 List<Element> sentimentElems = termElem.getChildren("sentiment");
1057 if (sentimentElems.size() > 0) {
1058 Element sentimentElem = sentimentElems.get(0);
1059 Term.Sentiment newSentiment = kaf.newSentiment();
1060 String sentResource = getOptAttribute("resource", sentimentElem);
1061 if (sentResource != null) {
1062 newSentiment.setResource(sentResource);
1063 }
1064 String sentPolarity = getOptAttribute("polarity", sentimentElem);
1065 if (sentPolarity != null) {
1066 newSentiment.setPolarity(sentPolarity);
1067 }
1068 String sentStrength = getOptAttribute("strength", sentimentElem);
1069 if (sentStrength != null) {
1070 newSentiment.setStrength(sentStrength);
1071 }
1072 String sentSubjectivity = getOptAttribute("subjectivity", sentimentElem);
1073 if (sentSubjectivity != null) {
1074 newSentiment.setSubjectivity(sentSubjectivity);
1075 }
1076 String sentSentimentSemanticType = getOptAttribute("sentiment_semantic_type", sentimentElem);
1077 if (sentSentimentSemanticType != null) {
1078 newSentiment.setSentimentSemanticType(sentSentimentSemanticType);
1079 }
1080 String sentSentimentModifier = getOptAttribute("sentiment_modifier", sentimentElem);
1081 if (sentSentimentModifier != null) {
1082 newSentiment.setSentimentModifier(sentSentimentModifier);
1083 }
1084 String sentSentimentMarker = getOptAttribute("sentiment_marker", sentimentElem);
1085 if (sentSentimentMarker != null) {
1086 newSentiment.setSentimentMarker(sentSentimentMarker);
1087 }
1088 String sentSentimentProductFeature = getOptAttribute("sentiment_product_feature", sentimentElem);
1089 if (sentSentimentProductFeature != null) {
1090 newSentiment.setSentimentProductFeature(sentSentimentProductFeature);
1091 }
1092 newTerm.setSentiment(newSentiment);
1093 }
1094 if (!isComponent) {
1095 List<Element> termsComponentElems = termElem.getChildren("component");
1096 for (Element termsComponentElem : termsComponentElems) {
1097 DOMToTerm(termsComponentElem, kaf, true, wfIndex, termIndex);
1098 }
1099 }
1100 List<Element> externalReferencesElems = termElem.getChildren("externalReferences");
1101 if (externalReferencesElems.size() > 0) {
1102 List<ExternalRef> externalRefs = getExternalReferences(externalReferencesElems.get(0), kaf);
1103 newTerm.addExternalRefs(externalRefs);
1104 }
1105 termIndex.put(newTerm.getId(), newTerm);
1106 }
1107
1108 private static Span<Term> loadTermSpan(Element spanElem, HashMap<String, Term> terms, String objId)
1109 throws KAFNotValidException {
1110 List<Element> targetElems = spanElem.getChildren("target");
1111 if (targetElems.size() < 1) {
1112 throw new KAFNotValidException("A span element can not be empty");
1113 }
1114 Span<Term> span = KAFDocument.newTermSpan();
1115 for (Element targetElem : targetElems) {
1116 String targetId = getAttribute("id", targetElem);
1117 boolean isHead = isHead(targetElem);
1118 Term targetTerm = terms.get(targetId);
1119 if (targetTerm == null) {
1120 throw new KAFNotValidException("Term object " + targetId + " not found when loading object " + objId);
1121 }
1122 span.addTarget(targetTerm, isHead);
1123 }
1124 return span;
1125 }
1126
1127 private static Element createTermSpanElem(Span<Term> span) {
1128 Element spanElem = new Element("span");
1129 for (Term term : span.getTargets()) {
1130 Element targetElem = new Element("target");
1131 String targetId = term.getId();
1132 targetElem.setAttribute("id", targetId);
1133 if (span.isHead(term)) {
1134 targetElem.setAttribute("head", "yes");
1135 }
1136 spanElem.addContent(targetElem);
1137 }
1138 return spanElem;
1139 }
1140
1141 private static JsonArray createTermSpanElemJson(Span<Term> span) {
1142 JsonArray spanElem = new JsonArray();
1143 for (Term term : span.getTargets()) {
1144 JsonObject targetElem = new JsonObject();
1145 String targetId = term.getId();
1146 targetElem.addProperty("id", targetId);
1147 if (span.isHead(term)) {
1148 targetElem.addProperty("head", "yes");
1149 }
1150 spanElem.add(targetElem);
1151 }
1152 return spanElem;
1153 }
1154
1155
1156 private static List<ExternalRef> getExternalReferences(Element externalReferencesElem, KAFDocument kaf) {
1157 List<ExternalRef> externalRefs = new ArrayList<ExternalRef>();
1158 List<Element> externalRefElems = externalReferencesElem.getChildren();
1159 for (Element externalRefElem : externalRefElems) {
1160 ExternalRef externalRef = getExternalRef(externalRefElem, kaf);
1161 externalRefs.add(externalRef);
1162 }
1163 return externalRefs;
1164 }
1165
1166 private static ExternalRef getExternalRef(Element externalRefElem, KAFDocument kaf) {
1167 String resource = getAttribute("resource", externalRefElem);
1168 String references = getAttribute("reference", externalRefElem);
1169 ExternalRef newExternalRef = kaf.newExternalRef(resource, references);
1170
1171 try {
1172 String confidence = getOptAttribute("confidence", externalRefElem);
1173 newExternalRef.setConfidence(Float.valueOf(confidence));
1174 } catch (Exception e) {
1175 }
1176
1177 try {
1178 String source = getAttribute("source", externalRefElem);
1179 newExternalRef.setSource(source);
1180 } catch (Exception e) {
1181 }
1182
1183 List<Element> subRefElems = externalRefElem.getChildren("externalRef");
1184 if (subRefElems.size() > 0) {
1185 Element subRefElem = subRefElems.get(0);
1186 ExternalRef subRef = getExternalRef(subRefElem, kaf);
1187 newExternalRef.setExternalRef(subRef);
1188 }
1189 return newExternalRef;
1190 }
1191
1192 private static String getAttribute(String attName, Element elem) {
1193 String value = elem.getAttributeValue(attName);
1194 if (value == null) {
1195 throw new IllegalStateException(attName + " attribute must be defined for element " + elem.getName());
1196 }
1197 return value;
1198 }
1199
1200 private static String getAttribute(String attName, Element elem, Namespace nmspace) {
1201 String value = elem.getAttributeValue(attName, nmspace);
1202 if (value == null) {
1203 throw new IllegalStateException(attName + " attribute must be defined for element " + elem.getName());
1204 }
1205 return value;
1206 }
1207
1208 private static String getOptAttribute(String attName, Element elem) {
1209 String value = elem.getAttributeValue(attName);
1210 if (value == null) {
1211 return null;
1212 }
1213 return value;
1214 }
1215
1216 private static boolean isHead(Element elem) {
1217 String value = elem.getAttributeValue("head");
1218 if (value == null) {
1219 return false;
1220 }
1221 if (value.equals("yes")) {
1222 return true;
1223 }
1224 return false;
1225 }
1226
1227 private static class Edge {
1228
1229 String id;
1230 String from;
1231 String to;
1232 boolean head;
1233
1234 Edge(TreeNode from, TreeNode to) {
1235 if (from.hasEdgeId()) {
1236 this.id = from.getEdgeId();
1237 }
1238 this.from = from.getId();
1239 this.to = to.getId();
1240 this.head = from.getHead();
1241 }
1242 }
1243
1244
1245
1246
1247
1248
1249
1250
1251 public static JsonObject KAFToJSON(KAFDocument kaf) {
1252 AnnotationContainer annotationContainer = kaf.getAnnotationContainer();
1253
1254 Gson gson = new Gson();
1255
1256 JsonObject jo = new JsonObject();
1257
1258 jo.addProperty("lang", kaf.getLang());
1259 jo.addProperty("version", kaf.getVersion());
1260
1261 JsonObject header = new JsonObject();
1262
1263
1264 KAFDocument.FileDesc fd = kaf.getFileDesc();
1265 if (fd != null) {
1266 JsonObject fdElem = new JsonObject();
1267 if (fd.author != null) {
1268 fdElem.addProperty("author", fd.author);
1269 }
1270 if (fd.creationtime != null) {
1271 fdElem.addProperty("creationtime", fd.creationtime);
1272 }
1273 if (fd.title != null) {
1274 fdElem.addProperty("title", fd.title);
1275 }
1276 if (fd.filename != null) {
1277 fdElem.addProperty("filename", fd.filename);
1278 }
1279 if (fd.filetype != null) {
1280 fdElem.addProperty("filetype", fd.filetype);
1281 }
1282 if (fd.pages != null) {
1283 fdElem.addProperty("pages", Integer.toString(fd.pages));
1284 }
1285 header.add("fileDesc", fdElem);
1286 }
1287
1288 KAFDocument.Public pub = kaf.getPublic();
1289 if (pub != null) {
1290 JsonObject pubElem = new JsonObject();
1291 if (pub.publicId != null) {
1292 pubElem.addProperty("publicId", pub.publicId);
1293 }
1294 if (pub.uri != null) {
1295 pubElem.addProperty("uri", pub.uri);
1296 }
1297 header.add("public", pubElem);
1298 }
1299
1300
1301 Map<String, List<LinguisticProcessor>> lps = kaf.getLinguisticProcessors();
1302 for (Map.Entry entry : lps.entrySet()) {
1303 JsonObject lpsElem = new JsonObject();
1304 lpsElem.addProperty("layer", (String) entry.getKey());
1305 for (LinguisticProcessor lp : (List<LinguisticProcessor>) entry.getValue()) {
1306 JsonObject lpElem = new JsonObject();
1307
1308 lpElem.addProperty("name", lp.name);
1309 if (lp.hasTimestamp()) {
1310 lpElem.addProperty("timestamp", lp.timestamp);
1311 }
1312 if (lp.hasBeginTimestamp()) {
1313 lpElem.addProperty("beginTimestamp", lp.beginTimestamp);
1314 }
1315 if (lp.hasEndTimestamp()) {
1316 lpElem.addProperty("endTimestamp", lp.endTimestamp);
1317 }
1318 if (lp.hasVersion()) {
1319 lpElem.addProperty("version", lp.version);
1320 }
1321 lpsElem.add("lp", lpElem);
1322
1323 }
1324
1325 header.add("linguisticProcessors", lpsElem);
1326 }
1327
1328
1329 jo.add("header", header);
1330
1331 String rawText = annotationContainer.getRawText();
1332 if (rawText.length() > 0) {
1333 JsonObject rawElem = new JsonObject();
1334 rawElem.addProperty("text", rawText);
1335 jo.add("raw", rawElem);
1336 }
1337
1338 List<WF> text = annotationContainer.getText();
1339 if (text.size() > 0) {
1340 JsonArray textElem = new JsonArray();
1341
1342 for (WF wf : text) {
1343
1344 JsonObject wfElem = new JsonObject();
1345
1346
1347 wfElem.addProperty("id", wf.getId());
1348 wfElem.addProperty("sent", Integer.toString(wf.getSent()));
1349 if (wf.hasPara()) {
1350 wfElem.addProperty("para", Integer.toString(wf.getPara()));
1351 }
1352 if (wf.hasPage()) {
1353 wfElem.addProperty("page", Integer.toString(wf.getPage()));
1354 }
1355 if (wf.hasOffset()) {
1356 wfElem.addProperty("offset", Integer.toString(wf.getOffset()));
1357 }
1358 if (wf.hasLength()) {
1359 wfElem.addProperty("length", Integer.toString(wf.getLength()));
1360 }
1361 if (wf.hasXpath()) {
1362 wfElem.addProperty("xpath", wf.getXpath());
1363 }
1364 wfElem.addProperty("text", wf.getForm());
1365
1366
1367 textElem.add(gson.fromJson(wfElem.toString(), JsonElement.class));
1368 }
1369 jo.add("text", textElem);
1370 }
1371
1372 List<Term> terms = annotationContainer.getTerms();
1373 if (terms.size() > 0) {
1374 JsonArray termsElem = new JsonArray();
1375 for (Term term : terms) {
1376 termToJson(term, false, termsElem);
1377
1378 }
1379
1380 jo.add("terms", termsElem);
1381
1382 }
1383
1384
1385 List<String> markSources = annotationContainer.getMarkSources();
1386 for (String source : markSources) {
1387 List<Mark> marks = annotationContainer.getMarks(source);
1388 if (marks.size() > 0) {
1389 JsonObject marksElem = new JsonObject();
1390
1391 marksElem.addProperty("source", source);
1392 for (Mark mark : marks) {
1393 JsonObject markElem = new JsonObject();
1394
1395 marksElem.addProperty("id", mark.getId());
1396 if (mark.hasType()) {
1397 markElem.addProperty("type", mark.getType());
1398 }
1399 if (mark.hasLemma()) {
1400 markElem.addProperty("lemma", mark.getLemma());
1401 }
1402 if (mark.hasPos()) {
1403 markElem.addProperty("pos", mark.getPos());
1404 }
1405 if (mark.hasMorphofeat()) {
1406 markElem.addProperty("morphofeat", mark.getMorphofeat());
1407 }
1408 if (mark.hasCase()) {
1409 markElem.addProperty("case", mark.getCase());
1410 }
1411
1412 JsonObject spanElem = new JsonObject();
1413
1414 Span<Term> span = mark.getSpan();
1415 for (Term target : span.getTargets()) {
1416 JsonObject targetElem = new JsonObject();
1417 targetElem.addProperty("id", target.getId());
1418 if (target == span.getHead()) {
1419 targetElem.addProperty("head", "yes");
1420 }
1421 spanElem.add("target", targetElem);
1422 }
1423
1424 markElem.add("span", spanElem);
1425
1426 List<ExternalRef> externalReferences = mark.getExternalRefs();
1427 if (externalReferences.size() > 0) {
1428
1429
1430 }
1431
1432 marksElem.add("mark", marksElem);
1433
1434 }
1435 jo.add("markables", marksElem);
1436 }
1437 }
1438
1439 List<Dep> deps = annotationContainer.getDeps();
1440 if (deps.size() > 0) {
1441
1442 JsonArray depsArr = new JsonArray();
1443
1444 for (Dep dep : deps) {
1445
1446
1447 JsonObject depElem = new JsonObject();
1448
1449 depElem.addProperty("from", dep.getFrom().getId());
1450 depElem.addProperty("to", dep.getTo().getId());
1451 depElem.addProperty("rfunc", dep.getRfunc());
1452 if (dep.hasCase()) {
1453 depElem.addProperty("case", dep.getCase());
1454 }
1455 depsArr.add(depElem);
1456 }
1457
1458 jo.add("deps",depsArr);
1459
1460 }
1461
1462
1463
1464
1465
1466
1467
1468 List<Entity> entities = annotationContainer.getEntities();
1469 if (entities.size() > 0) {
1470
1471 JsonArray entitiesElem = new JsonArray();
1472
1473 for (Entity entity : entities) {
1474
1475 JsonObject entityElem = new JsonObject();
1476 entityElem.addProperty("id", entity.getId());
1477 if (entity.hasType()) {
1478 entityElem.addProperty("type", entity.getType());
1479 }
1480 if (!entity.isNamed()) {
1481 entityElem.addProperty("unnamed", "yes");
1482 }
1483
1484
1485 JsonArray referencesElem = new JsonArray();
1486 for (Span<Term> span : entity.getSpans()) {
1487 JsonArray spanElem = new JsonArray();
1488
1489
1490 for (Term term : span.getTargets()) {
1491 JsonObject targetElem = new JsonObject();
1492 targetElem.addProperty("id", term.getId());
1493 if (term == span.getHead()) {
1494 targetElem.addProperty("head", "yes");
1495 }
1496 spanElem.add( targetElem);
1497 }
1498 JsonObject spanWrapper = new JsonObject();
1499 spanWrapper.add("span",spanElem);
1500
1501 referencesElem.add(spanWrapper);
1502 }
1503
1504
1505 entityElem.add("references",referencesElem);
1506
1507
1508 List<ExternalRef> externalReferences = entity.getExternalRefs();
1509 if (externalReferences.size() > 0) {
1510 JsonArray externalReferencesElem = externalReferencesToJSON(externalReferences);
1511 entityElem.add("extRef",externalReferencesElem);
1512 }
1513 entitiesElem.add(entityElem);
1514 }
1515 jo.add("entities",entitiesElem);
1516 }
1517
1518
1519 List<Coref> corefs = annotationContainer.getCorefs();
1520 if (corefs.size() > 0) {
1521
1522 JsonArray corefsElem = new JsonArray();
1523 for (Coref coref : corefs) {
1524 JsonObject corefElem = new JsonObject();
1525
1526
1527
1528 corefElem.addProperty("id", coref.getId());
1529 if (coref.hasType()) {
1530 corefElem.addProperty("type", coref.getType());
1531 }
1532 if (coref.hasCluster()) {
1533 corefElem.addProperty("cluster", coref.getCluster());
1534 }
1535
1536 JsonArray spans = new JsonArray();
1537
1538
1539
1540 for (Span<Term> span : coref.getSpans()) {
1541 JsonArray spanElem = new JsonArray();
1542 for (Term target : span.getTargets()) {
1543 JsonObject targetElem = new JsonObject();
1544 targetElem.addProperty("id", target.getId());
1545 if (target == span.getHead()) {
1546 targetElem.addProperty("head", "yes");
1547 }
1548 spanElem.add(targetElem);
1549 }
1550 spans.add(spanElem);
1551 }
1552 corefElem.add("span",spans);
1553
1554
1555
1556
1557 List<ExternalRef> externalReferences = coref.getExternalRefs();
1558 if (externalReferences.size() > 0) {
1559 JsonArray externalReferencesElem = externalReferencesToJSON(externalReferences);
1560 corefElem.add("extRef",externalReferencesElem);
1561 }
1562 corefsElem.add(corefElem);
1563 }
1564
1565 jo.add("coreferences",corefsElem);
1566 }
1567
1568
1569
1570
1571
1572
1573 List<Timex3> timeExs = annotationContainer.getTimeExs();
1574 if (timeExs.size() > 0) {
1575
1576
1577 JsonArray timeExsElem = new JsonArray();
1578
1579 for (Timex3 timex3 : timeExs) {
1580 JsonObject timex3Elem = new JsonObject();
1581
1582 timex3Elem.addProperty("id", timex3.getId());
1583 timex3Elem.addProperty("type", timex3.getType());
1584 if (timex3.hasBeginPoint()) {
1585 timex3Elem.addProperty("beginPoint", timex3.getBeginPoint().getId());
1586 }
1587 if (timex3.hasEndPoint()) {
1588 timex3Elem.addProperty("endPoint", timex3.getEndPoint().getId());
1589 }
1590 if (timex3.hasQuant()) {
1591 timex3Elem.addProperty("quant", timex3.getQuant());
1592 }
1593 if (timex3.hasFreq()) {
1594 timex3Elem.addProperty("freq", timex3.getFreq());
1595 }
1596 if (timex3.hasFunctionInDocument()) {
1597 timex3Elem.addProperty("functionInDocument", timex3.getFunctionInDocument());
1598 }
1599 if (timex3.hasTemporalFunction()) {
1600 String tempFun = timex3.getTemporalFunction() ? "true" : "false";
1601 timex3Elem.addProperty("temporalFunction", tempFun);
1602 }
1603 if (timex3.hasValue()) {
1604 timex3Elem.addProperty("value", timex3.getValue());
1605 }
1606 if (timex3.hasValueFromFunction()) {
1607 timex3Elem.addProperty("valueFromFunction", timex3.getValueFromFunction());
1608 }
1609 if (timex3.hasMod()) {
1610 timex3Elem.addProperty("mod", timex3.getMod());
1611 }
1612 if (timex3.hasAnchorTimeId()) {
1613 timex3Elem.addProperty("anchorTimeId", timex3.getAnchorTimeId());
1614 }
1615 if (timex3.hasComment()) {
1616 timex3Elem.addProperty("comment", timex3.getComment());
1617 }
1618
1619
1620 if (timex3.hasSpan()) {
1621 Span<WF> span = timex3.getSpan();
1622
1623 JsonArray spanElem = new JsonArray();
1624
1625 for (WF target : span.getTargets()) {
1626 JsonObject targetElem = new JsonObject();
1627 targetElem.addProperty("id", target.getId());
1628 if (target == span.getHead()) {
1629 targetElem.addProperty("head", "yes");
1630 }
1631 spanElem.add(targetElem);
1632 }
1633 timex3Elem.add("spans",spanElem);
1634 }
1635
1636
1637 timeExsElem.add(timex3Elem);
1638 }
1639 jo.add("timeExpressions",timeExsElem);
1640 }
1641
1642
1643
1644
1645 List<LinkedEntity> linkedEntities = annotationContainer.getLinkedEntities();
1646 if (linkedEntities.size() > 0) {
1647 JsonArray linkedEntityElement = new JsonArray();
1648 for (LinkedEntity e : linkedEntities) {
1649 JsonObject lEnt = new JsonObject();
1650 lEnt.addProperty("id", e.getId());
1651 lEnt.addProperty("resource", e.getResource());
1652 lEnt.addProperty("reference", e.getReference());
1653 lEnt.addProperty("confidence", Double.toString(e.getConfidence()));
1654 lEnt.addProperty("spotted", e.isSpotted().toString());
1655
1656
1657 JsonArray spanElem = new JsonArray();
1658 for (WF target : e.getWFs().getTargets()) {
1659 JsonObject targetElem = new JsonObject();
1660 targetElem.addProperty("id", target.getId());
1661 spanElem.add(targetElem);
1662 }
1663 lEnt.add("span",spanElem);
1664
1665 if (e.getTypes().size() > 0) {
1666
1667
1668 JsonArray typesElement = new JsonArray();
1669 for (String category : e.getTypes().keySet()) {
1670 for (String type : e.getTypes().get(category)) {
1671 JsonObject typeElement = new JsonObject();
1672 typeElement.addProperty("source", category);
1673 typeElement.addProperty("label", type);
1674 typesElement.add(typeElement);
1675 }
1676 }
1677 lEnt.add("types",typesElement);
1678 }
1679 linkedEntityElement.add(lEnt);
1680 }
1681 jo.add("linkedEntities",linkedEntityElement);
1682 }
1683
1684
1685
1686
1687
1688 List<Predicate> predicates = annotationContainer.getPredicates();
1689 if (predicates.size() > 0) {
1690 JsonObject predicatesObj = new JsonObject();
1691 JsonArray predicatesElem = new JsonArray();
1692
1693 for (Predicate predicate : predicates) {
1694
1695
1696 JsonObject predicateElem = new JsonObject();
1697
1698 predicateElem.addProperty("id", predicate.getId());
1699 if (predicate.hasSource()) {
1700 predicateElem.addProperty("source", predicate.getSource());
1701 }
1702 if (predicate.hasUri()) {
1703 predicateElem.addProperty("uri", predicate.getUri());
1704 }
1705 if (predicate.hasConfidence()) {
1706 predicateElem.addProperty("confidence", Double.toString(predicate.getConfidence()));
1707 }
1708
1709
1710 if (!predicate.getFlags().isEmpty()) {
1711 StringBuilder builder = new StringBuilder();
1712 String separator = "";
1713 for (String flag : predicate.getFlags()) {
1714 builder.append(separator).append(flag);
1715 separator = ",";
1716 }
1717 predicateElem.addProperty("flags", builder.toString());
1718 }
1719
1720
1721 Span<Term> span = predicate.getSpan();
1722 if (span.getTargets().size() > 0) {
1723 JsonArray spans = new JsonArray();
1724 for (Term target : span.getTargets())
1725 {
1726 JsonObject targetElem = new JsonObject();
1727 targetElem.addProperty("id", target.getId());
1728 if (target == span.getHead()) {
1729 targetElem.addProperty("head", "yes");
1730 }
1731 spans.add(targetElem);
1732 }
1733 predicateElem.add("span",spans);
1734
1735 }
1736 List<ExternalRef> externalReferences = predicate.getExternalRefs();
1737 if (externalReferences.size() > 0) {
1738 JsonArray externalReferencesElem = externalReferencesToJSON(externalReferences);
1739 predicateElem.add("extRef",externalReferencesElem);
1740 }
1741
1742 JsonArray roles = new JsonArray();
1743
1744 for (Predicate.Role role : predicate.getRoles()) {
1745
1746 JsonObject roleElem = new JsonObject();
1747
1748 roleElem.addProperty("id", role.getId());
1749 roleElem.addProperty("semRole", role.getSemRole());
1750 if (!role.getFlags().isEmpty()) {
1751 StringBuilder builder = new StringBuilder();
1752 String separator = "";
1753 for (String flag : role.getFlags()) {
1754 builder.append(separator).append(flag);
1755 separator = ",";
1756 }
1757 roleElem.addProperty("flags", builder.toString());
1758 }
1759
1760
1761 Span<Term> roleSpan = role.getSpan();
1762 if (roleSpan.getTargets().size() > 0) {
1763 JsonArray spanElem = new JsonArray();
1764 for (Term target : roleSpan.getTargets()) {
1765 JsonObject targetElem = new JsonObject();
1766 targetElem.addProperty("id", target.getId());
1767 if (target == roleSpan.getHead()) {
1768 targetElem.addProperty("head", "yes");
1769 }
1770 spanElem.add(targetElem);
1771 }
1772 roleElem.add("span",spanElem);
1773 }
1774
1775
1776 List<ExternalRef> rExternalReferences = role.getExternalRefs();
1777 if (rExternalReferences.size() > 0) {
1778 JsonArray externalReferencesElem = externalReferencesToJSON(rExternalReferences);
1779 roleElem.add("extRef",externalReferencesElem);
1780 }
1781 roles.add(roleElem);
1782 }
1783
1784 predicateElem.add("roles",roles);
1785
1786 predicatesElem.add(predicateElem);
1787 }
1788 predicatesObj.add("predicates",predicatesElem);
1789 jo.add("srl",predicatesObj);
1790 }
1791
1792 HashMap<Integer, String> conStrings = annotationContainer.getConstituencyStrings();
1793 if (conStrings.size() > 0) {
1794 JsonArray constituentsElem = new JsonArray();
1795 for (Integer sent : conStrings.keySet()) {
1796 String constituencyString = conStrings.get(sent);
1797
1798 JsonObject treeElem = new JsonObject();
1799
1800 treeElem.addProperty("sentence", sent.toString());
1801 treeElem.addProperty("text", constituencyString);
1802 constituentsElem.add(treeElem);
1803 }
1804 jo.add("constituencyStrings",constituentsElem);
1805 }
1806
1807
1808
1809 List<Tree> constituents = annotationContainer.getConstituents();
1810 if (constituents.size() > 0) {
1811
1812 JsonObject constituentsElem = new JsonObject();
1813
1814 JsonArray trees = new JsonArray();
1815
1816
1817 for (Tree tree : constituents) {
1818
1819
1820 JsonObject treeElem = new JsonObject();
1821 try {
1822 treeElem.addProperty("sentence", tree.getSentence().toString());
1823 } catch (Exception e) {
1824
1825 }
1826
1827
1828
1829 List<NonTerminal> nonTerminals = new LinkedList<NonTerminal>();
1830 List<Terminal> terminals = new LinkedList<Terminal>();
1831 List<Edge> edges = new ArrayList<Edge>();
1832 TreeNode rootNode = tree.getRoot();
1833 extractTreeNodes(rootNode, nonTerminals, terminals, edges);
1834
1835
1836
1837
1838 Collections.sort(nonTerminals, new Comparator<NonTerminal>() {
1839
1840 public int compare(NonTerminal nt1, NonTerminal nt2) {
1841 if (cmpId(nt1.getId(), nt2.getId()) < 0) {
1842 return -1;
1843 } else if (nt1.getId().equals(nt2.getId())) {
1844 return 0;
1845 } else {
1846 return 1;
1847 }
1848 }
1849 });
1850 Collections.sort(terminals, new Comparator<Terminal>() {
1851
1852 public int compare(Terminal t1, Terminal t2) {
1853 if (cmpId(t1.getId(), t2.getId()) < 0) {
1854 return -1;
1855 } else if (t1.getId().equals(t2.getId())) {
1856 return 0;
1857 } else {
1858 return 1;
1859 }
1860 }
1861 });
1862
1863
1864 JsonArray nts = new JsonArray();
1865 for (NonTerminal node : nonTerminals) {
1866 JsonObject nodeElem = new JsonObject();
1867 nodeElem.addProperty("id", node.getId());
1868 nodeElem.addProperty("label", node.getLabel());
1869 nts.add(nodeElem);
1870 }
1871 treeElem.add("nt",nts);
1872
1873 JsonArray ts = new JsonArray();
1874 for (Terminal node : terminals) {
1875 JsonObject nodeElem = new JsonObject();
1876 nodeElem.addProperty("id", node.getId());
1877 nodeElem.add("span",createTermSpanElemJson(node.getSpan()));
1878 ts.add(nodeElem);
1879 }
1880 treeElem.add("t",ts);
1881
1882 JsonArray edgesArray = new JsonArray();
1883 for (Edge edge : edges) {
1884 JsonObject edgeElem = new JsonObject();
1885 if (edge.id != null) {
1886 edgeElem.addProperty("id", edge.id);
1887 }
1888 edgeElem.addProperty("from", edge.from);
1889 edgeElem.addProperty("to", edge.to);
1890 if (edge.head) {
1891 edgeElem.addProperty("head", "yes");
1892 }
1893 edgesArray.add(edgeElem);
1894 }
1895
1896 treeElem.add("edges",edgesArray);
1897
1898 trees.add(treeElem);
1899 }
1900
1901 constituentsElem.add("trees",trees);
1902
1903 jo.add("constituency",constituentsElem);
1904 }
1905
1906
1907 return jo;
1908
1909
1910 }
1911
1912
1913
1914
1915
1916 private static Document KAFToDOM(KAFDocument kaf) {
1917 AnnotationContainer annotationContainer = kaf.getAnnotationContainer();
1918 Element root = new Element("NAF");
1919 root.setAttribute("lang", kaf.getLang(), Namespace.XML_NAMESPACE);
1920 root.setAttribute("version", kaf.getVersion());
1921
1922 Document doc = new Document(root);
1923
1924 Element kafHeaderElem = new Element("nafHeader");
1925 root.addContent(kafHeaderElem);
1926
1927 KAFDocument.FileDesc fd = kaf.getFileDesc();
1928 if (fd != null) {
1929 Element fdElem = new Element("fileDesc");
1930 if (fd.author != null) {
1931 fdElem.setAttribute("author", fd.author);
1932 }
1933 if (fd.creationtime != null) {
1934 fdElem.setAttribute("creationtime", fd.creationtime);
1935 }
1936 if (fd.title != null) {
1937 fdElem.setAttribute("title", fd.title);
1938 }
1939 if (fd.filename != null) {
1940 fdElem.setAttribute("filename", fd.filename);
1941 }
1942 if (fd.filetype != null) {
1943 fdElem.setAttribute("filetype", fd.filetype);
1944 }
1945 if (fd.pages != null) {
1946 fdElem.setAttribute("pages", Integer.toString(fd.pages));
1947 }
1948 kafHeaderElem.addContent(fdElem);
1949 }
1950
1951 KAFDocument.Public pub = kaf.getPublic();
1952 if (pub != null) {
1953 Element pubElem = new Element("public");
1954 if (pub.publicId != null) {
1955 pubElem.setAttribute("publicId", pub.publicId);
1956 }
1957 if (pub.uri != null) {
1958 pubElem.setAttribute("uri", pub.uri);
1959 }
1960 kafHeaderElem.addContent(pubElem);
1961 }
1962
1963 Map<String, List<LinguisticProcessor>> lps = kaf.getLinguisticProcessors();
1964 for (Map.Entry entry : lps.entrySet()) {
1965 Element lpsElem = new Element("linguisticProcessors");
1966 lpsElem.setAttribute("layer", (String) entry.getKey());
1967 for (LinguisticProcessor lp : (List<LinguisticProcessor>) entry.getValue()) {
1968 Element lpElem = new Element("lp");
1969 lpElem.setAttribute("name", lp.name);
1970 if (lp.hasTimestamp()) {
1971 lpElem.setAttribute("timestamp", lp.timestamp);
1972 }
1973 if (lp.hasBeginTimestamp()) {
1974 lpElem.setAttribute("beginTimestamp", lp.beginTimestamp);
1975 }
1976 if (lp.hasEndTimestamp()) {
1977 lpElem.setAttribute("endTimestamp", lp.endTimestamp);
1978 }
1979 if (lp.hasVersion()) {
1980 lpElem.setAttribute("version", lp.version);
1981 }
1982 lpsElem.addContent(lpElem);
1983 }
1984 kafHeaderElem.addContent(lpsElem);
1985 }
1986
1987 String rawText = annotationContainer.getRawText();
1988 if (rawText.length() > 0) {
1989 Element rawElem = new Element("raw");
1990 CDATA cdataElem = new CDATA(rawText);
1991 rawElem.addContent(cdataElem);
1992 root.addContent(rawElem);
1993 }
1994
1995 List<WF> text = annotationContainer.getText();
1996 if (text.size() > 0) {
1997 Element textElem = new Element("text");
1998 for (WF wf : text) {
1999 Element wfElem = new Element("wf");
2000 wfElem.setAttribute("id", wf.getId());
2001 wfElem.setAttribute("sent", Integer.toString(wf.getSent()));
2002 if (wf.hasPara()) {
2003 wfElem.setAttribute("para", Integer.toString(wf.getPara()));
2004 }
2005 if (wf.hasPage()) {
2006 wfElem.setAttribute("page", Integer.toString(wf.getPage()));
2007 }
2008 if (wf.hasOffset()) {
2009 wfElem.setAttribute("offset", Integer.toString(wf.getOffset()));
2010 }
2011 if (wf.hasLength()) {
2012 wfElem.setAttribute("length", Integer.toString(wf.getLength()));
2013 }
2014 if (wf.hasXpath()) {
2015 wfElem.setAttribute("xpath", wf.getXpath());
2016 }
2017 wfElem.setText(wf.getForm());
2018 textElem.addContent(wfElem);
2019 }
2020 root.addContent(textElem);
2021 }
2022
2023 List<Term> terms = annotationContainer.getTerms();
2024 if (terms.size() > 0) {
2025 Element termsElem = new Element("terms");
2026 for (Term term : terms) {
2027 termToDOM(term, false, termsElem);
2028 }
2029 root.addContent(termsElem);
2030 }
2031
2032 List<String> markSources = annotationContainer.getMarkSources();
2033 for (String source : markSources) {
2034 List<Mark> marks = annotationContainer.getMarks(source);
2035 if (marks.size() > 0) {
2036 Element marksElem = new Element("markables");
2037 marksElem.setAttribute("source", source);
2038 for (Mark mark : marks) {
2039 Comment markComment = new Comment(mark.getStr());
2040 marksElem.addContent(markComment);
2041 Element markElem = new Element("mark");
2042 markElem.setAttribute("id", mark.getId());
2043 if (mark.hasType()) {
2044 markElem.setAttribute("type", mark.getType());
2045 }
2046 if (mark.hasLemma()) {
2047 markElem.setAttribute("lemma", mark.getLemma());
2048 }
2049 if (mark.hasPos()) {
2050 markElem.setAttribute("pos", mark.getPos());
2051 }
2052 if (mark.hasMorphofeat()) {
2053 markElem.setAttribute("morphofeat", mark.getMorphofeat());
2054 }
2055 if (mark.hasCase()) {
2056 markElem.setAttribute("case", mark.getCase());
2057 }
2058 Element spanElem = new Element("span");
2059 Span<Term> span = mark.getSpan();
2060 for (Term target : span.getTargets()) {
2061 Element targetElem = new Element("target");
2062 targetElem.setAttribute("id", target.getId());
2063 if (target == span.getHead()) {
2064 targetElem.setAttribute("head", "yes");
2065 }
2066 spanElem.addContent(targetElem);
2067 }
2068 markElem.addContent(spanElem);
2069 List<ExternalRef> externalReferences = mark.getExternalRefs();
2070 if (externalReferences.size() > 0) {
2071 Element externalReferencesElem = externalReferencesToDOM(externalReferences);
2072 markElem.addContent(externalReferencesElem);
2073 }
2074 marksElem.addContent(markElem);
2075 }
2076 root.addContent(marksElem);
2077 }
2078 }
2079
2080 List<Dep> deps = annotationContainer.getDeps();
2081 if (deps.size() > 0) {
2082 Element depsElem = new Element("deps");
2083 for (Dep dep : deps) {
2084 Comment depComment = new Comment(dep.getStr());
2085 depsElem.addContent(depComment);
2086 Element depElem = new Element("dep");
2087 depElem.setAttribute("from", dep.getFrom().getId());
2088 depElem.setAttribute("to", dep.getTo().getId());
2089 depElem.setAttribute("rfunc", dep.getRfunc());
2090 if (dep.hasCase()) {
2091 depElem.setAttribute("case", dep.getCase());
2092 }
2093 depsElem.addContent(depElem);
2094 }
2095 root.addContent(depsElem);
2096 }
2097
2098 List<Chunk> chunks = annotationContainer.getChunks();
2099 if (chunks.size() > 0) {
2100 Element chunksElem = new Element("chunks");
2101 for (Chunk chunk : chunks) {
2102 Comment chunkComment = new Comment(chunk.getStr());
2103 chunksElem.addContent(chunkComment);
2104 Element chunkElem = new Element("chunk");
2105 chunkElem.setAttribute("id", chunk.getId());
2106 chunkElem.setAttribute("head", chunk.getHead().getId());
2107 if (chunk.hasPhrase()) {
2108 chunkElem.setAttribute("phrase", chunk.getPhrase());
2109 }
2110 if (chunk.hasCase()) {
2111 chunkElem.setAttribute("case", chunk.getCase());
2112 }
2113 Element spanElem = new Element("span");
2114 for (Term target : chunk.getTerms()) {
2115 Element targetElem = new Element("target");
2116 targetElem.setAttribute("id", target.getId());
2117 spanElem.addContent(targetElem);
2118 }
2119 chunkElem.addContent(spanElem);
2120 chunksElem.addContent(chunkElem);
2121 }
2122 root.addContent(chunksElem);
2123 }
2124
2125 List<Entity> entities = annotationContainer.getEntities();
2126 if (entities.size() > 0) {
2127 Element entitiesElem = new Element("entities");
2128 for (Entity entity : entities) {
2129 Element entityElem = new Element("entity");
2130 entityElem.setAttribute("id", entity.getId());
2131 if (entity.hasType()) {
2132 entityElem.setAttribute("type", entity.getType());
2133 }
2134 if (!entity.isNamed()) {
2135 entityElem.setAttribute("unnamed", "yes");
2136 }
2137 Element referencesElem = new Element("references");
2138 for (Span<Term> span : entity.getSpans()) {
2139 Comment spanComment = new Comment(entity.getSpanStr(span));
2140 referencesElem.addContent(spanComment);
2141 Element spanElem = new Element("span");
2142 for (Term term : span.getTargets()) {
2143 Element targetElem = new Element("target");
2144 targetElem.setAttribute("id", term.getId());
2145 if (term == span.getHead()) {
2146 targetElem.setAttribute("head", "yes");
2147 }
2148 spanElem.addContent(targetElem);
2149 }
2150 referencesElem.addContent(spanElem);
2151 }
2152 entityElem.addContent(referencesElem);
2153 List<ExternalRef> externalReferences = entity.getExternalRefs();
2154 if (externalReferences.size() > 0) {
2155 Element externalReferencesElem = externalReferencesToDOM(externalReferences);
2156 entityElem.addContent(externalReferencesElem);
2157 }
2158 entitiesElem.addContent(entityElem);
2159 }
2160 root.addContent(entitiesElem);
2161 }
2162
2163 List<Coref> corefs = annotationContainer.getCorefs();
2164 if (corefs.size() > 0) {
2165 Element corefsElem = new Element("coreferences");
2166 for (Coref coref : corefs) {
2167 Element corefElem = new Element("coref");
2168 corefElem.setAttribute("id", coref.getId());
2169 if (coref.hasType()) {
2170 corefElem.setAttribute("type", coref.getType());
2171 }
2172 if (coref.hasCluster()) {
2173 corefElem.setAttribute("cluster", coref.getCluster());
2174 }
2175 for (Span<Term> span : coref.getSpans()) {
2176 Comment spanComment = new Comment(coref.getSpanStr(span));
2177 corefElem.addContent(spanComment);
2178 Element spanElem = new Element("span");
2179 for (Term target : span.getTargets()) {
2180 Element targetElem = new Element("target");
2181 targetElem.setAttribute("id", target.getId());
2182 if (target == span.getHead()) {
2183 targetElem.setAttribute("head", "yes");
2184 }
2185 spanElem.addContent(targetElem);
2186 }
2187 corefElem.addContent(spanElem);
2188 }
2189 List<ExternalRef> externalReferences = coref.getExternalRefs();
2190 if (externalReferences.size() > 0) {
2191 Element externalReferencesElem = externalReferencesToDOM(externalReferences);
2192 corefElem.addContent(externalReferencesElem);
2193 }
2194 corefsElem.addContent(corefElem);
2195 }
2196 root.addContent(corefsElem);
2197 }
2198
2199 List<Timex3> timeExs = annotationContainer.getTimeExs();
2200 if (timeExs.size() > 0) {
2201 Element timeExsElem = new Element("timeExpressions");
2202 for (Timex3 timex3 : timeExs) {
2203 Element timex3Elem = new Element("timex3");
2204 timex3Elem.setAttribute("id", timex3.getId());
2205 timex3Elem.setAttribute("type", timex3.getType());
2206 if (timex3.hasBeginPoint()) {
2207 timex3Elem.setAttribute("beginPoint", timex3.getBeginPoint().getId());
2208 }
2209 if (timex3.hasEndPoint()) {
2210 timex3Elem.setAttribute("endPoint", timex3.getEndPoint().getId());
2211 }
2212 if (timex3.hasQuant()) {
2213 timex3Elem.setAttribute("quant", timex3.getQuant());
2214 }
2215 if (timex3.hasFreq()) {
2216 timex3Elem.setAttribute("freq", timex3.getFreq());
2217 }
2218 if (timex3.hasFunctionInDocument()) {
2219 timex3Elem.setAttribute("functionInDocument", timex3.getFunctionInDocument());
2220 }
2221 if (timex3.hasTemporalFunction()) {
2222 String tempFun = timex3.getTemporalFunction() ? "true" : "false";
2223 timex3Elem.setAttribute("temporalFunction", tempFun);
2224 }
2225 if (timex3.hasValue()) {
2226 timex3Elem.setAttribute("value", timex3.getValue());
2227 }
2228 if (timex3.hasValueFromFunction()) {
2229 timex3Elem.setAttribute("valueFromFunction", timex3.getValueFromFunction());
2230 }
2231 if (timex3.hasMod()) {
2232 timex3Elem.setAttribute("mod", timex3.getMod());
2233 }
2234 if (timex3.hasAnchorTimeId()) {
2235 timex3Elem.setAttribute("anchorTimeId", timex3.getAnchorTimeId());
2236 }
2237 if (timex3.hasComment()) {
2238 timex3Elem.setAttribute("comment", timex3.getComment());
2239 }
2240 if (timex3.hasSpan()) {
2241 Span<WF> span = timex3.getSpan();
2242 Comment spanComment = new Comment(timex3.getSpanStr(span));
2243 timex3Elem.addContent(spanComment);
2244 Element spanElem = new Element("span");
2245 for (WF target : span.getTargets()) {
2246 Element targetElem = new Element("target");
2247 targetElem.setAttribute("id", target.getId());
2248 if (target == span.getHead()) {
2249 targetElem.setAttribute("head", "yes");
2250 }
2251 spanElem.addContent(targetElem);
2252 }
2253 timex3Elem.addContent(spanElem);
2254 }
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271 timeExsElem.addContent(timex3Elem);
2272 }
2273 root.addContent(timeExsElem);
2274 }
2275
2276 List<Factuality> factualities = annotationContainer.getFactualities();
2277 if (factualities.size() > 0) {
2278 Element factsElement = new Element("factualitylayer");
2279 for (Factuality f : factualities) {
2280 try {
2281 Element fact = new Element("factvalue");
2282 fact.setAttribute("id", f.getId());
2283 fact.setAttribute("prediction", f.getMaxPart().getPrediction());
2284 fact.setAttribute("confidence", Double.toString(f.getMaxPart().getConfidence()));
2285
2286 for (Factuality.FactualityPart p : f.getFactualityParts()) {
2287 Element factPartial = new Element("factuality");
2288 factPartial.setAttribute("prediction", p.getPrediction());
2289 factPartial.setAttribute("confidence", Double.toString(p.getConfidence()));
2290 fact.addContent(factPartial);
2291 }
2292
2293 factsElement.addContent(fact);
2294 } catch (Exception e) {
2295
2296 }
2297 }
2298 root.addContent(factsElement);
2299 }
2300
2301 List<LinkedEntity> linkedEntities = annotationContainer.getLinkedEntities();
2302 if (linkedEntities.size() > 0) {
2303 Element linkedEntityElement = new Element("linkedEntities");
2304 for (LinkedEntity e : linkedEntities) {
2305 Element lEnt = new Element("linkedEntity");
2306 lEnt.setAttribute("id", e.getId());
2307 lEnt.setAttribute("resource", e.getResource());
2308 lEnt.setAttribute("reference", e.getReference());
2309 lEnt.setAttribute("confidence", Double.toString(e.getConfidence()));
2310 lEnt.setAttribute("spotted", e.isSpotted().toString());
2311
2312 Comment spanComment = new Comment(e.getSpanStr());
2313 lEnt.addContent(spanComment);
2314 Element spanElem = new Element("span");
2315 for (WF target : e.getWFs().getTargets()) {
2316 Element targetElem = new Element("target");
2317 targetElem.setAttribute("id", target.getId());
2318 spanElem.addContent(targetElem);
2319 }
2320 lEnt.addContent(spanElem);
2321
2322 if (e.getTypes().size() > 0) {
2323 Element typesElement = new Element("types");
2324 for (String category : e.getTypes().keySet()) {
2325 for (String type : e.getTypes().get(category)) {
2326 Element typeElement = new Element("type");
2327 typeElement.setAttribute("source", category);
2328 typeElement.setAttribute("label", type);
2329 typesElement.addContent(typeElement);
2330 }
2331 }
2332 lEnt.addContent(typesElement);
2333 }
2334
2335 linkedEntityElement.addContent(lEnt);
2336 }
2337 root.addContent(linkedEntityElement);
2338 }
2339
2340 List<SSTspan> ssts = annotationContainer.getSstSpans();
2341 if (ssts.size() > 0) {
2342 Element linkedEntityElement = new Element("SSTspans");
2343 for (SSTspan s : ssts) {
2344 Element lEnt = new Element("sst");
2345 lEnt.setAttribute("id", s.getId());
2346 lEnt.setAttribute("type", s.getType());
2347 lEnt.setAttribute("label", s.getLabel());
2348
2349 Comment spanComment = new Comment(s.getSpanStr());
2350 lEnt.addContent(spanComment);
2351 Element spanElem = new Element("span");
2352 for (Term target : s.getTerms().getTargets()) {
2353 Element targetElem = new Element("target");
2354 targetElem.setAttribute("id", target.getId());
2355 spanElem.addContent(targetElem);
2356 }
2357 lEnt.addContent(spanElem);
2358
2359 linkedEntityElement.addContent(lEnt);
2360 }
2361 root.addContent(linkedEntityElement);
2362 }
2363
2364 List<Topic> topics = annotationContainer.getTopics();
2365 if (topics.size() > 0) {
2366 Element topicLayer = new Element("topics");
2367 for (Topic t : topics) {
2368 Element topicElement = new Element("topic");
2369 topicElement.setAttribute("label", t.getLabel());
2370 topicElement.setAttribute("probability", Float.toString(t.getProbability()));
2371 topicLayer.addContent(topicElement);
2372 }
2373 root.addContent(topicLayer);
2374 }
2375
2376 Element featuresElem = new Element("features");
2377 List<Feature> properties = annotationContainer.getProperties();
2378 if (properties.size() > 0) {
2379 Element propertiesElem = new Element("properties");
2380 for (Feature property : properties) {
2381 Element propertyElem = new Element("property");
2382 propertyElem.setAttribute("id", property.getId());
2383 propertyElem.setAttribute("lemma", property.getLemma());
2384 List<Span<Term>> references = property.getSpans();
2385 Element referencesElem = new Element("references");
2386 for (Span<Term> span : references) {
2387 Comment spanComment = new Comment(property.getSpanStr(span));
2388 referencesElem.addContent(spanComment);
2389 Element spanElem = new Element("span");
2390 for (Term term : span.getTargets()) {
2391 Element targetElem = new Element("target");
2392 targetElem.setAttribute("id", term.getId());
2393 if (term == span.getHead()) {
2394 targetElem.setAttribute("head", "yes");
2395 }
2396 spanElem.addContent(targetElem);
2397 }
2398 referencesElem.addContent(spanElem);
2399 }
2400 propertyElem.addContent(referencesElem);
2401 propertiesElem.addContent(propertyElem);
2402 }
2403 featuresElem.addContent(propertiesElem);
2404 }
2405 List<Feature> categories = annotationContainer.getCategories();
2406 if (categories.size() > 0) {
2407 Element categoriesElem = new Element("categories");
2408 for (Feature category : categories) {
2409 Element categoryElem = new Element("category");
2410 categoryElem.setAttribute("id", category.getId());
2411 categoryElem.setAttribute("lemma", category.getLemma());
2412 List<Span<Term>> references = category.getSpans();
2413 Element referencesElem = new Element("references");
2414 for (Span<Term> span : references) {
2415 Comment spanComment = new Comment(category.getSpanStr(span));
2416 referencesElem.addContent(spanComment);
2417 Element spanElem = new Element("span");
2418 for (Term term : span.getTargets()) {
2419 Element targetElem = new Element("target");
2420 targetElem.setAttribute("id", term.getId());
2421 if (term == span.getHead()) {
2422 targetElem.setAttribute("head", "yes");
2423 }
2424 spanElem.addContent(targetElem);
2425 }
2426 referencesElem.addContent(spanElem);
2427 }
2428 categoryElem.addContent(referencesElem);
2429 categoriesElem.addContent(categoryElem);
2430 }
2431 featuresElem.addContent(categoriesElem);
2432 }
2433 if (featuresElem.getChildren().size() > 0) {
2434 root.addContent(featuresElem);
2435 }
2436
2437 List<Opinion> opinions = annotationContainer.getOpinions();
2438 if (opinions.size() > 0) {
2439 Element opinionsElem = new Element("opinions");
2440 for (Opinion opinion : opinions) {
2441 Element opinionElem = new Element("opinion");
2442 opinionElem.setAttribute("id", opinion.getId());
2443 if (opinion.getLabel() != null) {
2444 opinionElem.setAttribute("label", opinion.getLabel());
2445 }
2446 if (!opinion.getExternalRefs().isEmpty()) {
2447 opinionElem.addContent(externalReferencesToDOM(opinion.getExternalRefs()));
2448 }
2449 Opinion.OpinionHolder holder = opinion.getOpinionHolder();
2450 if (holder != null) {
2451 Element opinionHolderElem = new Element("opinion_holder");
2452 if (holder.hasType()) {
2453 opinionHolderElem.setAttribute("type", holder.getType());
2454 }
2455 Comment comment = new Comment(opinion.getSpanStr(opinion.getOpinionHolder().getSpan()));
2456 opinionHolderElem.addContent(comment);
2457 List<Term> targets = holder.getTerms();
2458 Span<Term> span = holder.getSpan();
2459 if (targets.size() > 0) {
2460 Element spanElem = new Element("span");
2461 opinionHolderElem.addContent(spanElem);
2462 for (Term target : targets) {
2463 Element targetElem = new Element("target");
2464 targetElem.setAttribute("id", target.getId());
2465 if (target == span.getHead()) {
2466 targetElem.setAttribute("head", "yes");
2467 }
2468 spanElem.addContent(targetElem);
2469 }
2470 }
2471 if (!holder.getExternalRefs().isEmpty()) {
2472 opinionHolderElem.addContent(externalReferencesToDOM(holder.getExternalRefs()));
2473 }
2474 opinionElem.addContent(opinionHolderElem);
2475 }
2476 Opinion.OpinionTarget opTarget = opinion.getOpinionTarget();
2477 if (opTarget != null) {
2478 Element opinionTargetElem = new Element("opinion_target");
2479 if (opTarget.hasType()) {
2480 opinionTargetElem.setAttribute("type", opTarget.getType());
2481 }
2482 Comment comment = new Comment(opinion.getSpanStr(opinion.getOpinionTarget().getSpan()));
2483 opinionTargetElem.addContent(comment);
2484 List<Term> targets = opTarget.getTerms();
2485 Span<Term> span = opTarget.getSpan();
2486 if (targets.size() > 0) {
2487 Element spanElem = new Element("span");
2488 opinionTargetElem.addContent(spanElem);
2489 for (Term target : targets) {
2490 Element targetElem = new Element("target");
2491 targetElem.setAttribute("id", target.getId());
2492 if (target == span.getHead()) {
2493 targetElem.setAttribute("head", "yes");
2494 }
2495 spanElem.addContent(targetElem);
2496 }
2497 }
2498 if (!opTarget.getExternalRefs().isEmpty()) {
2499 opinionTargetElem.addContent(externalReferencesToDOM(opTarget.getExternalRefs()));
2500 }
2501 opinionElem.addContent(opinionTargetElem);
2502 }
2503 Opinion.OpinionExpression expression = opinion.getOpinionExpression();
2504 if (expression != null) {
2505 Element opinionExpressionElem = new Element("opinion_expression");
2506 Comment comment = new Comment(opinion.getSpanStr(opinion.getOpinionExpression().getSpan()));
2507 opinionExpressionElem.addContent(comment);
2508 if (expression.hasPolarity()) {
2509 opinionExpressionElem.setAttribute("polarity", expression.getPolarity());
2510 }
2511 if (expression.hasStrength()) {
2512 opinionExpressionElem.setAttribute("strength", expression.getStrength());
2513 }
2514 if (expression.hasSubjectivity()) {
2515 opinionExpressionElem.setAttribute("subjectivity", expression.getSubjectivity());
2516 }
2517 if (expression.hasSentimentSemanticType()) {
2518 opinionExpressionElem
2519 .setAttribute("sentiment_semantic_type", expression.getSentimentSemanticType());
2520 }
2521 if (expression.hasSentimentProductFeature()) {
2522 opinionExpressionElem
2523 .setAttribute("sentiment_product_feature", expression.getSentimentProductFeature());
2524 }
2525 List<Term> targets = expression.getTerms();
2526 Span<Term> span = expression.getSpan();
2527 if (targets.size() > 0) {
2528 Element spanElem = new Element("span");
2529 opinionExpressionElem.addContent(spanElem);
2530 for (Term target : targets) {
2531 Element targetElem = new Element("target");
2532 targetElem.setAttribute("id", target.getId());
2533 if (target == span.getHead()) {
2534 targetElem.setAttribute("head", "yes");
2535 }
2536 spanElem.addContent(targetElem);
2537 }
2538 }
2539 if (!expression.getExternalRefs().isEmpty()) {
2540 opinionExpressionElem.addContent(externalReferencesToDOM(expression.getExternalRefs()));
2541 }
2542 opinionElem.addContent(opinionExpressionElem);
2543 }
2544
2545 opinionsElem.addContent(opinionElem);
2546 }
2547 root.addContent(opinionsElem);
2548 }
2549
2550 List<Relation> relations = annotationContainer.getRelations();
2551 if (relations.size() > 0) {
2552 Element relationsElem = new Element("relations");
2553 for (Relation relation : relations) {
2554 Comment comment = new Comment(relation.getStr());
2555 relationsElem.addContent(comment);
2556 Element relationElem = new Element("relation");
2557 relationElem.setAttribute("id", relation.getId());
2558 relationElem.setAttribute("from", relation.getFrom().getId());
2559 relationElem.setAttribute("to", relation.getTo().getId());
2560 if (relation.hasConfidence()) {
2561 relationElem.setAttribute("confidence", String.valueOf(relation.getConfidence()));
2562 }
2563 relationsElem.addContent(relationElem);
2564 }
2565 root.addContent(relationsElem);
2566 }
2567
2568 List<Predicate> predicates = annotationContainer.getPredicates();
2569 if (predicates.size() > 0) {
2570 Element predicatesElem = new Element("srl");
2571 for (Predicate predicate : predicates) {
2572 Comment predicateComment = new Comment(predicate.getStr());
2573 predicatesElem.addContent(predicateComment);
2574 Element predicateElem = new Element("predicate");
2575 predicateElem.setAttribute("id", predicate.getId());
2576 if (predicate.hasSource()) {
2577 predicateElem.setAttribute("source", predicate.getSource());
2578 }
2579 if (predicate.hasUri()) {
2580 predicateElem.setAttribute("uri", predicate.getUri());
2581 }
2582 if (predicate.hasConfidence()) {
2583 predicateElem.setAttribute("confidence", Double.toString(predicate.getConfidence()));
2584 }
2585 if (!predicate.getFlags().isEmpty()) {
2586 StringBuilder builder = new StringBuilder();
2587 String separator = "";
2588 for (String flag : predicate.getFlags()) {
2589 builder.append(separator).append(flag);
2590 separator = ",";
2591 }
2592 predicateElem.setAttribute("flags", builder.toString());
2593 }
2594 Span<Term> span = predicate.getSpan();
2595 if (span.getTargets().size() > 0) {
2596 Comment spanComment = new Comment(predicate.getSpanStr());
2597 Element spanElem = new Element("span");
2598 predicateElem.addContent(spanComment);
2599 predicateElem.addContent(spanElem);
2600 for (Term target : span.getTargets()) {
2601 Element targetElem = new Element("target");
2602 targetElem.setAttribute("id", target.getId());
2603 if (target == span.getHead()) {
2604 targetElem.setAttribute("head", "yes");
2605 }
2606 spanElem.addContent(targetElem);
2607 }
2608 }
2609 List<ExternalRef> externalReferences = predicate.getExternalRefs();
2610 if (externalReferences.size() > 0) {
2611 Element externalReferencesElem = externalReferencesToDOM(externalReferences);
2612 predicateElem.addContent(externalReferencesElem);
2613 }
2614 for (Predicate.Role role : predicate.getRoles()) {
2615 Element roleElem = new Element("role");
2616 roleElem.setAttribute("id", role.getId());
2617 roleElem.setAttribute("semRole", role.getSemRole());
2618 if (!role.getFlags().isEmpty()) {
2619 StringBuilder builder = new StringBuilder();
2620 String separator = "";
2621 for (String flag : role.getFlags()) {
2622 builder.append(separator).append(flag);
2623 separator = ",";
2624 }
2625 roleElem.setAttribute("flags", builder.toString());
2626 }
2627 Span<Term> roleSpan = role.getSpan();
2628 if (roleSpan.getTargets().size() > 0) {
2629 Comment spanComment = new Comment(role.getStr());
2630 Element spanElem = new Element("span");
2631 roleElem.addContent(spanComment);
2632 roleElem.addContent(spanElem);
2633 for (Term target : roleSpan.getTargets()) {
2634 Element targetElem = new Element("target");
2635 targetElem.setAttribute("id", target.getId());
2636 if (target == roleSpan.getHead()) {
2637 targetElem.setAttribute("head", "yes");
2638 }
2639 spanElem.addContent(targetElem);
2640 }
2641 }
2642 List<ExternalRef> rExternalReferences = role.getExternalRefs();
2643 if (rExternalReferences.size() > 0) {
2644 Element externalReferencesElem = externalReferencesToDOM(rExternalReferences);
2645 roleElem.addContent(externalReferencesElem);
2646 }
2647 predicateElem.addContent(roleElem);
2648 }
2649 predicatesElem.addContent(predicateElem);
2650 }
2651 root.addContent(predicatesElem);
2652 }
2653
2654 HashMap<Integer, String> conStrings = annotationContainer.getConstituencyStrings();
2655 if (conStrings.size() > 0) {
2656 Element constituentsElem = new Element("constituencyStrings");
2657 for (Integer sent : conStrings.keySet()) {
2658 String constituencyString = conStrings.get(sent);
2659 Element treeElem = new Element("tree");
2660 treeElem.setAttribute("sentence", sent.toString());
2661 treeElem.addContent(constituencyString);
2662 constituentsElem.addContent(treeElem);
2663 }
2664 root.addContent(constituentsElem);
2665 }
2666
2667 List<Tree> constituents = annotationContainer.getConstituents();
2668 if (constituents.size() > 0) {
2669 Element constituentsElem = new Element("constituency");
2670 for (Tree tree : constituents) {
2671 Element treeElem = new Element("tree");
2672 try {
2673 treeElem.setAttribute("sentence", tree.getSentence().toString());
2674 } catch (Exception e) {
2675
2676 }
2677 constituentsElem.addContent(treeElem);
2678 List<NonTerminal> nonTerminals = new LinkedList<NonTerminal>();
2679 List<Terminal> terminals = new LinkedList<Terminal>();
2680 List<Edge> edges = new ArrayList<Edge>();
2681 TreeNode rootNode = tree.getRoot();
2682 extractTreeNodes(rootNode, nonTerminals, terminals, edges);
2683 Collections.sort(nonTerminals, new Comparator<NonTerminal>() {
2684
2685 public int compare(NonTerminal nt1, NonTerminal nt2) {
2686 if (cmpId(nt1.getId(), nt2.getId()) < 0) {
2687 return -1;
2688 } else if (nt1.getId().equals(nt2.getId())) {
2689 return 0;
2690 } else {
2691 return 1;
2692 }
2693 }
2694 });
2695 Collections.sort(terminals, new Comparator<Terminal>() {
2696
2697 public int compare(Terminal t1, Terminal t2) {
2698 if (cmpId(t1.getId(), t2.getId()) < 0) {
2699 return -1;
2700 } else if (t1.getId().equals(t2.getId())) {
2701 return 0;
2702 } else {
2703 return 1;
2704 }
2705 }
2706 });
2707 Comment ntCom = new Comment("Non-terminals");
2708 treeElem.addContent(ntCom);
2709 for (NonTerminal node : nonTerminals) {
2710 Element nodeElem = new Element("nt");
2711 nodeElem.setAttribute("id", node.getId());
2712 nodeElem.setAttribute("label", node.getLabel());
2713 treeElem.addContent(nodeElem);
2714 }
2715 Comment tCom = new Comment("Terminals");
2716 treeElem.addContent(tCom);
2717 for (Terminal node : terminals) {
2718 Element nodeElem = new Element("t");
2719 nodeElem.setAttribute("id", node.getId());
2720 nodeElem.addContent(createTermSpanElem(node.getSpan()));
2721
2722 Comment tStrCom = new Comment(node.getStr());
2723 treeElem.addContent(tStrCom);
2724 treeElem.addContent(nodeElem);
2725 }
2726 Comment edgeCom = new Comment("Tree edges");
2727 treeElem.addContent(edgeCom);
2728 for (Edge edge : edges) {
2729 Element edgeElem = new Element("edge");
2730 if (edge.id != null) {
2731 edgeElem.setAttribute("id", edge.id);
2732 }
2733 edgeElem.setAttribute("from", edge.from);
2734 edgeElem.setAttribute("to", edge.to);
2735 if (edge.head) {
2736 edgeElem.setAttribute("head", "yes");
2737 }
2738 treeElem.addContent(edgeElem);
2739 }
2740 }
2741 root.addContent(constituentsElem);
2742 }
2743
2744 List<TLink> tLinks = annotationContainer.getTLinks();
2745 if (tLinks.size() > 0) {
2746 Element tLinksElem = new Element("temporalRelations");
2747 for (TLink tLink : tLinks) {
2748 Comment tLinkComment = new Comment
2749 (tLink.getRelType() + "(" + tLink.getFrom().getId() + ", " + tLink.getTo().getId() + ")");
2750 tLinksElem.addContent(tLinkComment);
2751 Element tLinkElem = new Element("tlink");
2752 tLinkElem.setAttribute("id", tLink.getId());
2753 tLinkElem.setAttribute("from", tLink.getFrom().getId());
2754 tLinkElem.setAttribute("to", tLink.getTo().getId());
2755 tLinkElem.setAttribute("fromType", tLink.getFromType());
2756 tLinkElem.setAttribute("toType", tLink.getToType());
2757 tLinkElem.setAttribute("relType", tLink.getRelType());
2758 tLinksElem.addContent(tLinkElem);
2759 }
2760 root.addContent(tLinksElem);
2761 }
2762
2763 List<CLink> cLinks = annotationContainer.getCLinks();
2764 if (cLinks.size() > 0) {
2765 Element cLinksElem = new Element("causalRelations");
2766 for (CLink cLink : cLinks) {
2767 String commentStr = "";
2768 if (cLink.hasRelType()) {
2769 commentStr += cLink.getRelType();
2770 }
2771 commentStr += "(" + cLink.getFrom().getId() + ", " + cLink.getTo().getId() + ")";
2772 Comment cLinkComment = new Comment(commentStr);
2773 cLinksElem.addContent(cLinkComment);
2774 Element cLinkElem = new Element("clink");
2775 cLinkElem.setAttribute("id", cLink.getId());
2776 cLinkElem.setAttribute("from", cLink.getFrom().getId());
2777 cLinkElem.setAttribute("to", cLink.getTo().getId());
2778 if (cLink.hasRelType()) {
2779 cLinkElem.setAttribute("relType", cLink.getRelType());
2780 }
2781 cLinksElem.addContent(cLinkElem);
2782 }
2783 root.addContent(cLinksElem);
2784 }
2785
2786 List<Element> unknownLayers = annotationContainer.getUnknownLayers();
2787 for (Element layer : unknownLayers) {
2788 layer.detach();
2789 root.addContent(layer);
2790 }
2791
2792 return doc;
2793 }
2794
2795
2796 private static void termToJson(Term term, boolean isComponent, JsonArray termsElem) {
2797 String morphofeat;
2798 Term head;
2799 String termcase;
2800
2801 String tag = (isComponent) ? "component" : "term";
2802
2803 JsonObject termElem = new JsonObject();
2804
2805
2806 termElem.addProperty("id", term.getId());
2807 if (term.hasType()) {
2808 termElem.addProperty("type", term.getType());
2809 }
2810 if (term.hasLemma()) {
2811 termElem.addProperty("lemma", term.getLemma());
2812 }
2813 if (term.hasSupersenseTag()) {
2814 termElem.addProperty("supersense", term.getSupersenseTag());
2815 }
2816 if (term.hasWordnetSense()) {
2817 termElem.addProperty("wordnet", term.getWordnetSense());
2818 }
2819 if (term.hasBBNTag()) {
2820 termElem.addProperty("bbn", term.getBBNTag());
2821 }
2822 if (term.hasPos()) {
2823 termElem.addProperty("pos", term.getPos());
2824 }
2825 if (term.hasMorphofeat()) {
2826 termElem.addProperty("morphofeat", term.getMorphofeat());
2827 }
2828 if (term.hasHead()) {
2829 termElem.addProperty("head", term.getHead().getId());
2830 }
2831 if (term.hasCase()) {
2832 termElem.addProperty("case", term.getCase());
2833 }
2834
2835
2836 if (term.hasSentiment()) {
2837 Term.Sentiment sentiment = term.getSentiment();
2838
2839 JsonObject sentimentElem = new JsonObject();
2840
2841 if (sentiment.hasResource()) {
2842 sentimentElem.addProperty("resource", sentiment.getResource());
2843 }
2844 if (sentiment.hasPolarity()) {
2845 sentimentElem.addProperty("polarity", sentiment.getPolarity());
2846 }
2847 if (sentiment.hasStrength()) {
2848 sentimentElem.addProperty("strength", sentiment.getStrength());
2849 }
2850 if (sentiment.hasSubjectivity()) {
2851 sentimentElem.addProperty("subjectivity", sentiment.getSubjectivity());
2852 }
2853 if (sentiment.hasSentimentSemanticType()) {
2854 sentimentElem.addProperty("sentiment_semantic_type", sentiment.getSentimentSemanticType());
2855 }
2856 if (sentiment.hasSentimentModifier()) {
2857 sentimentElem.addProperty("sentiment_modifier", sentiment.getSentimentModifier());
2858 }
2859 if (sentiment.hasSentimentMarker()) {
2860 sentimentElem.addProperty("sentiment_marker", sentiment.getSentimentMarker());
2861 }
2862 if (sentiment.hasSentimentProductFeature()) {
2863 sentimentElem.addProperty("sentiment_product_feature", sentiment.getSentimentProductFeature());
2864 }
2865
2866
2867 termElem.add("sentiment", sentimentElem);
2868 }
2869
2870
2871 JsonObject spanElem = new JsonObject();
2872
2873
2874 Span<WF> span = term.getSpan();
2875 for (WF target : term.getWFs()) {
2876 JsonObject targetElem = new JsonObject();
2877 targetElem.addProperty("id", target.getId());
2878 if (target == span.getHead()) {
2879 targetElem.addProperty("head", "yes");
2880 }
2881
2882 spanElem.add("target", targetElem);
2883 }
2884 termElem.add("span", spanElem);
2885
2886
2887 JsonArray ja = new JsonArray();
2888
2889
2890 if (!isComponent) {
2891 List<Term> components = term.getComponents();
2892 if (components.size() > 0) {
2893 for (Term component : components) {
2894 termToJson(component, true, ja);
2895 }
2896 }
2897 if (ja.size() > 0) {
2898 JsonObject ta = new JsonObject();
2899 ta.add("terms", ja);
2900 termsElem.add(ta);
2901 }
2902 }
2903
2904
2905 List<ExternalRef> externalReferences = term.getExternalRefs();
2906 if (externalReferences.size() > 0) {
2907 JsonArray externalReferencesElem = externalReferencesToJSON(externalReferences);
2908 termElem.add("externalReferences", externalReferencesElem);
2909 }
2910
2911 JsonObject t = new JsonObject();
2912 t.add(tag, termElem);
2913
2914 termsElem.add(t);
2915 }
2916
2917
2918 private static void termToDOM(Term term, boolean isComponent, Element termsElem) {
2919 String morphofeat;
2920 Term head;
2921 String termcase;
2922 Comment termComment = new Comment(term.getStr());
2923 termsElem.addContent(termComment);
2924 String tag = (isComponent) ? "component" : "term";
2925 Element termElem = new Element(tag);
2926 termElem.setAttribute("id", term.getId());
2927 if (term.hasType()) {
2928 termElem.setAttribute("type", term.getType());
2929 }
2930 if (term.hasLemma()) {
2931 termElem.setAttribute("lemma", term.getLemma());
2932 }
2933 if (term.hasSupersenseTag()) {
2934 termElem.setAttribute("supersense", term.getSupersenseTag());
2935 }
2936 if (term.hasWordnetSense()) {
2937 termElem.setAttribute("wordnet", term.getWordnetSense());
2938 }
2939 if (term.hasBBNTag()) {
2940 termElem.setAttribute("bbn", term.getBBNTag());
2941 }
2942 if (term.hasPos()) {
2943 termElem.setAttribute("pos", term.getPos());
2944 }
2945 if (term.hasMorphofeat()) {
2946 termElem.setAttribute("morphofeat", term.getMorphofeat());
2947 }
2948 if (term.hasUpos()) {
2949 termElem.setAttribute("upos", term.getUpos());
2950 }
2951 if (term.hasHead()) {
2952 termElem.setAttribute("head", term.getHead().getId());
2953 }
2954 if (term.hasCase()) {
2955 termElem.setAttribute("case", term.getCase());
2956 }
2957 if (term.hasSentiment()) {
2958 Term.Sentiment sentiment = term.getSentiment();
2959 Element sentimentElem = new Element("sentiment");
2960 if (sentiment.hasResource()) {
2961 sentimentElem.setAttribute("resource", sentiment.getResource());
2962 }
2963 if (sentiment.hasPolarity()) {
2964 sentimentElem.setAttribute("polarity", sentiment.getPolarity());
2965 }
2966 if (sentiment.hasStrength()) {
2967 sentimentElem.setAttribute("strength", sentiment.getStrength());
2968 }
2969 if (sentiment.hasSubjectivity()) {
2970 sentimentElem.setAttribute("subjectivity", sentiment.getSubjectivity());
2971 }
2972 if (sentiment.hasSentimentSemanticType()) {
2973 sentimentElem.setAttribute("sentiment_semantic_type", sentiment.getSentimentSemanticType());
2974 }
2975 if (sentiment.hasSentimentModifier()) {
2976 sentimentElem.setAttribute("sentiment_modifier", sentiment.getSentimentModifier());
2977 }
2978 if (sentiment.hasSentimentMarker()) {
2979 sentimentElem.setAttribute("sentiment_marker", sentiment.getSentimentMarker());
2980 }
2981 if (sentiment.hasSentimentProductFeature()) {
2982 sentimentElem.setAttribute("sentiment_product_feature", sentiment.getSentimentProductFeature());
2983 }
2984 termElem.addContent(sentimentElem);
2985 }
2986
2987 if (term.hasFeatures()) {
2988 Element featsElem = new Element("features");
2989 Map<String, Collection<String>> features = term.getFeatures();
2990 for (String key : features.keySet()) {
2991 Collection<String> value = features.get(key);
2992 String allValues = value.stream()
2993 .collect(Collectors.joining(", "));
2994 Element featElem = new Element("feature");
2995 featElem.setAttribute("key", key);
2996 featElem.setAttribute("value", allValues);
2997 featsElem.addContent(featElem);
2998 }
2999 termElem.addContent(featsElem);
3000 }
3001
3002 Element spanElem = new Element("span");
3003 Span<WF> span = term.getSpan();
3004 for (WF target : term.getWFs()) {
3005 Element targetElem = new Element("target");
3006 targetElem.setAttribute("id", target.getId());
3007 if (target == span.getHead()) {
3008 targetElem.setAttribute("head", "yes");
3009 }
3010 spanElem.addContent(targetElem);
3011 }
3012 termElem.addContent(spanElem);
3013 if (!isComponent) {
3014 List<Term> components = term.getComponents();
3015 if (components.size() > 0) {
3016 for (Term component : components) {
3017 termToDOM(component, true, termElem);
3018 }
3019 }
3020 }
3021 List<ExternalRef> externalReferences = term.getExternalRefs();
3022 if (externalReferences.size() > 0) {
3023 Element externalReferencesElem = externalReferencesToDOM(externalReferences);
3024 termElem.addContent(externalReferencesElem);
3025 }
3026 termsElem.addContent(termElem);
3027 }
3028
3029 private static void extractTreeNodes(TreeNode node, List<NonTerminal> nonTerminals, List<Terminal> terminals,
3030 List<Edge> edges) {
3031 if (node instanceof NonTerminal) {
3032 nonTerminals.add((NonTerminal) node);
3033 List<TreeNode> treeNodes = ((NonTerminal) node).getChildren();
3034 for (TreeNode child : treeNodes) {
3035 edges.add(new Edge(child, node));
3036 extractTreeNodes(child, nonTerminals, terminals, edges);
3037 }
3038 } else {
3039 terminals.add((Terminal) node);
3040 }
3041 }
3042
3043 private static Element externalReferencesToDOM(List<ExternalRef> externalRefs) {
3044 Element externalReferencesElem = new Element("externalReferences");
3045 for (ExternalRef externalRef : externalRefs) {
3046 Element externalRefElem = externalRefToDOM(externalRef);
3047 externalReferencesElem.addContent(externalRefElem);
3048 }
3049 return externalReferencesElem;
3050 }
3051
3052 private static JsonArray externalReferencesToJSON(List<ExternalRef> externalRefs) {
3053 Gson gson = new Gson();
3054
3055 JsonArray externalReferencesElem = new JsonArray();
3056
3057
3058 for (ExternalRef externalRef : externalRefs) {
3059 JsonObject externalRefElem = externalRefToJSON(externalRef);
3060
3061
3062 externalReferencesElem.add(externalRefElem);
3063 }
3064 return externalReferencesElem;
3065 }
3066
3067
3068 private static Element externalRefToDOM(ExternalRef externalRef) {
3069 Element externalRefElem = new Element("externalRef");
3070 externalRefElem.setAttribute("resource", externalRef.getResource());
3071 externalRefElem.setAttribute("reference", externalRef.getReference());
3072 if (externalRef.hasConfidence()) {
3073 externalRefElem.setAttribute("confidence", Float.toString(externalRef.getConfidence()));
3074 }
3075 if (externalRef.getSource() != null) {
3076 externalRefElem.setAttribute("source", externalRef.getSource());
3077 }
3078 if (externalRef.hasExternalRef()) {
3079 Element subExternalRefElem = externalRefToDOM(externalRef.getExternalRef());
3080 externalRefElem.addContent(subExternalRefElem);
3081 }
3082 return externalRefElem;
3083 }
3084
3085
3086 private static JsonObject externalRefToJSON(ExternalRef externalRef) {
3087 JsonObject externalRefElem = new JsonObject();
3088 externalRefElem.addProperty("resource", externalRef.getResource());
3089 externalRefElem.addProperty("reference", externalRef.getReference());
3090 if (externalRef.hasConfidence()) {
3091 externalRefElem.addProperty("confidence", Float.toString(externalRef.getConfidence()));
3092 }
3093 if (externalRef.getSource() != null) {
3094 externalRefElem.addProperty("source", externalRef.getSource());
3095 }
3096 if (externalRef.hasExternalRef()) {
3097 JsonObject subExternalRefElem = externalRefToJSON(externalRef.getExternalRef());
3098 externalRefElem.add("subref", subExternalRefElem);
3099 }
3100 return externalRefElem;
3101 }
3102
3103
3104 private static int cmpId(String id1, String id2) {
3105 int nbr1 = extractNumberFromId(id1);
3106 int nbr2 = extractNumberFromId(id2);
3107 if (nbr1 < nbr2) {
3108 return -1;
3109 } else if (nbr1 == nbr2) {
3110 return 0;
3111 } else {
3112 return 1;
3113 }
3114 }
3115
3116 private static int extractNumberFromId(String id) {
3117 Matcher matcher = Pattern.compile("^[a-z]*_?(\\d+)$").matcher(id);
3118 if (!matcher.find()) {
3119 throw new IllegalStateException(
3120 "IdManager doesn't recognise the given id's (" + id + ") format. Should be [a-z]*_?[0-9]+");
3121 }
3122 return Integer.valueOf(matcher.group(1));
3123 }
3124 }