1 package eu.fbk.dkm.pikes.naflib;
2
3 import java.io.IOException;
4 import java.util.Collections;
5 import java.util.List;
6 import java.util.Map;
7 import java.util.Set;
8
9 import com.google.common.collect.ImmutableList;
10 import com.google.common.collect.ImmutableSet;
11 import com.google.common.collect.Lists;
12 import com.google.common.collect.Maps;
13 import com.google.common.collect.Ordering;
14 import com.google.common.collect.Sets;
15
16 import ixa.kaflib.Coref;
17 import ixa.kaflib.Dep;
18 import ixa.kaflib.Entity;
19 import ixa.kaflib.ExternalRef;
20 import ixa.kaflib.KAFDocument;
21 import ixa.kaflib.Predicate;
22 import ixa.kaflib.Predicate.Role;
23 import ixa.kaflib.Span;
24 import ixa.kaflib.Term;
25 import ixa.kaflib.WF;
26
27 public class NafRenderUtils {
28
29 public static void renderText(final Appendable out, final KAFDocument document,
30 final Iterable<Term> terms, final Iterable<Markable> markables) throws IOException {
31
32 final List<Term> termList = Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(terms);
33 final Set<Term> termSet = ImmutableSet.copyOf(termList);
34 if (termList.isEmpty()) {
35 return;
36 }
37
38 final Markable[] markableIndex = indexMarkables(termList, markables);
39
40 final Map<Term, Set<Coref>> corefs = Maps.newHashMap();
41 for (final Coref coref : document.getCorefs()) {
42 for (final Span<Term> span : coref.getSpans()) {
43 for (final Term term : span.getTargets()) {
44 if (termSet.contains(term)) {
45 Set<Coref> set = corefs.get(term);
46 if (set == null) {
47 set = Sets.newHashSet();
48 corefs.put(term, set);
49 }
50 set.add(coref);
51 }
52 }
53 }
54 }
55
56 Markable markable = null;
57
58 int index = termList.get(0).getOffset();
59 final int end = Integer.MAX_VALUE;
60
61 List<Coref> lastCorefs = ImmutableList.of();
62 for (int i = 0; i < termList.size(); ++i) {
63
64 final Term term = termList.get(i);
65 final int termOffset = term.getOffset();
66 final int termLength = endOf(term) - termOffset;
67 final int termBegin = Math.max(termOffset, index);
68 final int termEnd = Math.min(termOffset + termLength, end);
69 final List<Coref> termCorefs = document.getCorefsByTerm(term);
70
71 if (termBegin > index) {
72 final List<Coref> sameCorefs = Lists.newArrayList(lastCorefs);
73 sameCorefs.retainAll(termCorefs);
74 out.append(sameCorefs.isEmpty() ? " " : "<span class=\"txt_coref\"> </span>");
75 }
76
77 if (markable == null) {
78 markable = markableIndex[i];
79 if (markable != null) {
80 out.append("<span style=\"background-color: ").append(markable.color)
81 .append("\">");
82 }
83 }
84
85 out.append("<span class=\"txt_term_tip");
86 for (final Coref coref : termCorefs) {
87 if (coref.getSpans().size() > 1) {
88 out.append(" txt_coref");
89 break;
90 }
91 }
92 out.append("\" title=\"");
93 emitTermTooltip(out, document, term);
94 out.append("\">");
95 out.append(term.getForm());
96 out.append("</span>");
97
98 if (markable != null && term == markable.terms.get(markable.terms.size() - 1)) {
99 out.append("</span>");
100 markable = null;
101 }
102
103 index = termEnd;
104 lastCorefs = termCorefs;
105 }
106
107 if (markable != null) {
108 out.append("</span>");
109 }
110 }
111
112 public static void renderParsing(final Appendable out, final KAFDocument document,
113 final int sentence, final boolean emitDependencies, final boolean emitSRL,
114 final Iterable<Markable> markables) throws IOException {
115 new ParsingRenderer(out, document, sentence).render(emitDependencies, emitSRL, markables);
116 }
117
118 private static void emitTermTooltip(final Appendable out, final KAFDocument document,
119 final Term term) throws IOException {
120
121
122 out.append("<strong>Term ").append(term.getId()).append("</strong>");
123 if (term.getPos() != null && term.getMorphofeat() != null) {
124 out.append(": pos ").append(term.getPos()).append('/').append(term.getMorphofeat());
125 }
126
127
128 if (term.getLemma() != null) {
129 out.append(", lemma '").append(term.getLemma().replace("\"", """)).append("'");
130 }
131
132 final Dep dep = document.getDepToTerm(term);
133 if (dep != null) {
134 out.append(", ").append(dep.getRfunc()).append(" of '")
135 .append(dep.getFrom().getForm().replace("\"", """)).append("' (")
136 .append(dep.getFrom().getId()).append(")");
137 }
138 for (final ExternalRef ref : term.getExternalRefs()) {
139 out.append(", ").append(ref.getResource()).append(' ').append(ref.getReference());
140 }
141
142
143 final List<Predicate> predicates = document.getPredicatesByTerm(term);
144 if (!predicates.isEmpty()) {
145 final Predicate predicate = predicates.get(0);
146 out.append("<br/><b>Predicate ").append(predicate.getId()).append("</b>: sense ");
147 final boolean isNoun = term.getPos().toUpperCase().equals("N");
148 for (final ExternalRef ref : predicate.getExternalRefs()) {
149 final String resource = ref.getResource().toLowerCase();
150 if ("propbank".equals(resource) && !isNoun || "nombank".equals(resource) && isNoun) {
151 out.append(ref.getReference());
152 break;
153 }
154 }
155 }
156
157
158 final List<Entity> entities = document.getEntitiesByTerm(term);
159 if (!entities.isEmpty()) {
160 final Entity entity = entities.get(0);
161 out.append("<br/><b>Entity ").append(entity.getId()).append("</b>: type ")
162 .append(entity.getType());
163 String separator = ", sense ";
164 for (final ExternalRef ref : entity.getExternalRefs()) {
165 out.append(separator);
166 try {
167 String s = ref.getReference();
168 if (s.startsWith("http://dbpedia.org/resource/")) {
169 s = "dbpedia:" + ref.getReference().substring(28);
170 }
171 out.append(s);
172 } catch (final Throwable ex) {
173 out.append(ref.getReference());
174 }
175 separator = " ";
176 }
177 }
178
179
180 for (final Coref coref : document.getCorefsByTerm(term)) {
181 if (coref.getSpans().size() > 1) {
182 out.append("<br/><b>Coref ").append(coref.getId()).append("</b>: ");
183 String separator = "";
184 for (final Span<Term> span : coref.getSpans()) {
185 out.append(separator);
186 out.append(span.getTargets().get(0).getId());
187 out.append(" '").append(span.getStr()).append("'");
188 separator = ", ";
189 }
190 }
191 }
192 }
193
194 private static Markable[] indexMarkables(final List<Term> terms,
195 final Iterable<Markable> markables) {
196
197 final Map<Term, Integer> termIndex = Maps.newHashMap();
198 for (int i = 0; i < terms.size(); ++i) {
199 termIndex.put(terms.get(i), i);
200 }
201
202 final Markable[] markableIndex = new Markable[terms.size()];
203 for (final Markable markable : markables) {
204 for (final Term term : markable.getTerms()) {
205 final Integer index = termIndex.get(term);
206 if (index != null) {
207 markableIndex[index] = markable;
208 }
209 }
210 }
211
212 return markableIndex;
213 }
214
215 private static int endOf(final Term term) {
216 final List<WF> wfs = term.getWFs();
217 final WF wf = wfs.get(wfs.size() - 1);
218 final String str = wf.getForm();
219 if (str.equals("-LSB-") || str.equals("-RSB-") || str.equals("''")) {
220 return wf.getOffset() + 1;
221 }
222 return wf.getOffset() + wf.getLength();
223 }
224
225 private static final class ParsingRenderer {
226
227 private final Appendable out;
228
229 private final KAFDocument document;
230
231 private final int sentence;
232
233 private final List<Term> terms;
234
235 private final List<Dep> deps;
236
237 private final Map<Term, Integer> indexes;
238
239 ParsingRenderer(final Appendable out, final KAFDocument document, final int sentence) {
240 this.out = out;
241 this.document = document;
242 this.sentence = sentence;
243 this.terms = document.getTermsBySent(sentence);
244 this.deps = Lists.newArrayListWithCapacity(this.terms.size());
245 this.indexes = Maps.newIdentityHashMap();
246 for (int index = 0; index < this.terms.size(); ++index) {
247 final Term term = this.terms.get(index);
248 this.deps.add(document.getDepToTerm(term));
249 this.indexes.put(term, index);
250 }
251 }
252
253 void render(final boolean emitDependencies, final boolean emitSRL,
254 final Iterable<Markable> markables) throws IOException {
255
256 this.out.append("<table class=\"txt\" cellspacing=\"0\" cellpadding=\"0\">\n");
257
258 if (emitDependencies) {
259 renderDependencies();
260 }
261
262 renderTerms(markables);
263
264 if (emitSRL) {
265 renderSRL();
266 }
267
268 this.out.append("</table>\n");
269 }
270
271 private void renderDependencies() throws IOException {
272
273
274
275
276
277
278
279 final boolean[] leftVerticalLines = new boolean[this.terms.size()];
280 final boolean[] centerVerticalLines = new boolean[this.terms.size()];
281 final boolean[] rightVerticalLines = new boolean[this.terms.size()];
282 for (int i = 0; i < this.terms.size(); ++i) {
283 if (this.deps.get(i) == null) {
284 centerVerticalLines[this.indexes.get(this.terms.get(i))] = true;
285 }
286 }
287
288
289 final List<List<Term>> rows = computeDependencyRows();
290 for (int j = 0; j < rows.size(); ++j) {
291 final List<Term> row = rows.get(j);
292
293
294 this.out.append("<tr class=\"txt_dep\">\n");
295
296
297
298 final String[] labels = new String[this.terms.size()];
299
300
301 for (final Term term : row) {
302 final int termIndex = this.indexes.get(term);
303 final Dep termDep = this.deps.get(termIndex);
304 final Term parent = termDep == null ? term : termDep.getFrom();
305 final int parentIndex = this.indexes.get(parent);
306 final String label = termDep == null ? "" : termDep.getRfunc().toLowerCase();
307 centerVerticalLines[termIndex] = true;
308 if (termIndex < parentIndex) {
309 leftVerticalLines[parentIndex] = true;
310 labels[termIndex] = label;
311 } else if (termIndex > parentIndex) {
312 rightVerticalLines[parentIndex] = true;
313 labels[parentIndex] = label;
314 }
315 }
316
317
318
319 String label = null;
320 boolean arrow = false;
321 int start = 0;
322 int end = 0;
323 for (int i = 0; i < this.terms.size(); ++i) {
324 ++end;
325 if (leftVerticalLines[i]) {
326 renderDependencyCell(start, end, label, arrow);
327 start = end;
328 label = null;
329 arrow = false;
330 }
331 ++end;
332 if (centerVerticalLines[i]) {
333 renderDependencyCell(start, end, label, arrow);
334 start = end;
335 label = rightVerticalLines[i] ? null : labels[i];
336 arrow = j == rows.size() - 1;
337 }
338 ++end;
339 if (rightVerticalLines[i]) {
340 renderDependencyCell(start, end, label, arrow);
341 start = end;
342 label = labels[i];
343 arrow = false;
344 }
345 ++end;
346 }
347 renderDependencyCell(start, end, null, arrow);
348
349
350 this.out.append("</tr>\n");
351 }
352
353
354 this.out.append("<tr>\n");
355 for (int i = 0; i < this.terms.size(); ++i) {
356 final boolean left = leftVerticalLines[i];
357 final boolean right = rightVerticalLines[i];
358 this.out.append("<td class=\"txt_dep_co").append(left ? " rb" : "")
359 .append("\"></td>");
360 this.out.append("<td class=\"txt_dep_ci").append(left ? " lb" : "")
361 .append("\"></td>");
362 this.out.append("<td class=\"txt_dep_ci").append(right ? " rb" : "")
363 .append("\"></td>");
364 this.out.append("<td class=\"txt_dep_co").append(right ? " lb" : "")
365 .append("\"></td>\n");
366 }
367 this.out.append("</tr>\n");
368 }
369
370 private void renderDependencyCell(final int from, final int to, final String label,
371 final boolean arrow) throws IOException {
372
373
374 this.out.append("<td class=\"");
375
376
377 String separator = "";
378 if (from != 0) {
379 this.out.append(separator).append("txt_lb");
380 separator = " ";
381 }
382 if (to != 4 * this.terms.size()) {
383 this.out.append(separator).append("txt_rb");
384 separator = " ";
385 }
386 if (label != null) {
387 this.out.append(separator).append("txt_tb");
388 }
389 this.out.append("\"");
390
391
392 if (to - from > 1) {
393 this.out.append(" colspan=\"").append(Integer.toString(to - from)).append("\"");
394 }
395
396
397 this.out.append("><div><span>")
398 .append(label != null ? label : " ")
399 .append("</span></div>");
400
401
402 if (arrow) {
403 this.out.append("<div class=\"txt_ab\"></div>");
404 }
405
406
407 this.out.append("</td>\n");
408 }
409
410 private List<List<Term>> computeDependencyRows() {
411
412
413 final List<List<Term>> rows = Lists.newArrayList();
414
415
416
417 final Set<Term> remaining = Sets.newHashSet(this.terms);
418 while (!remaining.isEmpty()) {
419 final List<Term> candidates = ImmutableList.copyOf(remaining);
420 final List<Term> row = Lists.newArrayList();
421 rows.add(row);
422
423
424 for (final Term t1 : candidates) {
425
426
427 final int s1 = this.indexes.get(t1);
428 final Dep dep1 = this.deps.get(s1);
429 final Term p1 = dep1 == null ? t1 : dep1.getFrom();
430 final int e1 = this.indexes.get(p1);
431
432
433
434 boolean canEmit = true;
435 for (final Term t2 : candidates) {
436 if (t2 != t1) {
437
438
439 final int s2 = this.indexes.get(t2);
440 final Dep dep2 = this.deps.get(s2);
441 final Term p2 = dep2 == null ? t2 : dep2.getFrom();
442 final int e2 = this.indexes.get(p2);
443
444
445 if (Math.min(s1, e1) <= Math.min(s2, e2)
446 && Math.max(s1, e1) >= Math.max(s2, e2)) {
447 canEmit = false;
448 break;
449 }
450 }
451 }
452
453
454 if (canEmit) {
455 row.add(t1);
456 remaining.remove(t1);
457 }
458 }
459 }
460
461
462
463 rows.add(ImmutableList.<Term>of());
464
465
466 Collections.reverse(rows);
467 return rows;
468 }
469
470 private void renderTerms(final Iterable<Markable> markables) throws IOException {
471
472 final Markable[] markableIndex = indexMarkables(this.terms, markables);
473
474
475 this.out.append("<tr class=\"txt_terms\">\n");
476
477
478 for (int i = 0; i < this.terms.size(); ++i) {
479 final Term term = this.terms.get(i);
480 this.out.append("<td colspan=\"4\"><div class=\"");
481 final Markable markable = markableIndex[i];
482 if (markable == null) {
483 this.out.append("txt_term_c\">");
484 } else {
485 final boolean start = i == 0 || markable != markableIndex[i - 1];
486 final boolean end = i == this.terms.size() - 1
487 || markable != markableIndex[i + 1];
488 this.out.append(start ? end ? "txt_term_lcr" : "txt_term_lc"
489 : end ? "txt_term_cr" : "txt_term_c");
490 this.out.append("\" style=\"background-color: ").append(markable.color)
491 .append("\">");
492 }
493 this.out.append("<span class=\"txt_term_tip\" title=\"");
494 emitTermTooltip(this.out, this.document, term);
495 this.out.append("\">").append(term.getForm().replace(' ', '_')).append("</span>");
496 this.out.append("</div></td>\n");
497 }
498
499
500 this.out.append("</tr>\n");
501 }
502
503 private void renderSRL() throws IOException {
504
505
506
507
508
509
510 final List<SRLElement> propositions = Lists.newArrayList();
511 for (final Predicate predicate : this.document.getPredicatesBySent(this.sentence)) {
512 propositions.add(new SRLElement(null, predicate, true));
513 }
514
515
516
517 for (final List<SRLElement> propositionRow : computeSRLRows(propositions)) {
518
519
520 this.out.append("<tr class=\"txt_empty\"><td").append(" colspan=\"")
521 .append(Integer.toString(4 * this.terms.size())).append("\"")
522 .append("></td></tr>\n");
523
524
525 final List<SRLElement> markables = Lists.newArrayList();
526 for (final SRLElement proposition : propositionRow) {
527 final Predicate predicate = (Predicate) proposition.element;
528 markables.add(new SRLElement(proposition, predicate, false));
529 for (final Role role : predicate.getRoles()) {
530 markables.add(new SRLElement(proposition, role, false));
531 }
532 }
533
534
535
536 for (final List<SRLElement> markableRow : computeSRLRows(markables)) {
537
538
539 this.out.append("<tr class=\"txt_srl\">\n");
540
541
542
543 final boolean[] leftBorders = new boolean[this.terms.size()];
544 final boolean[] rightBorders = new boolean[this.terms.size()];
545 for (final SRLElement markable : markableRow) {
546 final SRLElement proposition = markable.parent;
547 final int s = this.indexes.get(proposition.terms.get(0));
548 final int e = this.indexes.get(proposition.terms.get(proposition.terms
549 .size() - 1));
550 leftBorders[s] = true;
551 rightBorders[e] = true;
552 if (s > 0) {
553 rightBorders[s - 1] = true;
554 }
555 if (e < this.terms.size() - 1) {
556 leftBorders[e + 1] = true;
557 }
558 }
559
560
561 final SRLElement[] cells = new SRLElement[this.terms.size()];
562 for (final SRLElement markable : markableRow) {
563 for (final Term term : markable.terms) {
564 cells[this.indexes.get(term)] = markable;
565 }
566 }
567
568
569
570 int start = 0;
571 int end = start + 1;
572 while (start < this.terms.size()) {
573
574
575 final SRLElement markable = cells[start];
576 while (end < this.terms.size() && cells[end] == markable
577 && !leftBorders[end]) {
578 ++end;
579 }
580
581
582 final boolean lb = leftBorders[start];
583 final boolean rb = rightBorders[end - 1];
584 this.out.append("<td colspan=\"")
585 .append(Integer.toString(4 * (end - start)))
586 .append("\"")
587 .append(lb ? rb ? "class=\"txt_lb txt_rb\"" : "class=\"txt_lb\""
588 : rb ? "class=\"txt_rb\"" : "")
589 .append(">");
590
591
592 if (markable != null) {
593 this.out.append("<div>");
594 final Object element = markable.element;
595 if (element instanceof Predicate) {
596 final Predicate predicate = (Predicate) element;
597 final String res = predicate.getTerms().get(0).getPos()
598 .equalsIgnoreCase("V") ? "propbank" : "nombank";
599 String roleset = null;
600 for (final ExternalRef ref : predicate.getExternalRefs()) {
601 if (res.equalsIgnoreCase(ref.getResource())) {
602 if (ref.getSource() != null) {
603 roleset = ref.getReference();
604 break;
605 } else if (roleset == null) {
606 roleset = ref.getReference();
607 }
608 }
609 }
610 if (roleset != null) {
611 this.out.append(roleset);
612 }
613 } else {
614 this.out.append(((Role) element).getSemRole());
615 }
616 this.out.append("</div>");
617 }
618
619
620 this.out.append("</td>\n");
621
622
623 start = end;
624 ++end;
625 }
626
627
628 this.out.append("</tr>\n");
629 }
630 }
631 }
632
633 private List<List<SRLElement>> computeSRLRows(final Iterable<SRLElement> elements) {
634
635
636 final List<List<SRLElement>> rows = Lists.newArrayList();
637
638
639
640 final Set<SRLElement> remaining = Sets.newHashSet(elements);
641 while (!remaining.isEmpty()) {
642
643
644 final List<SRLElement> row = Lists.newArrayList();
645 rows.add(row);
646
647
648 final List<SRLElement> ranking = Ordering.natural().sortedCopy(remaining);
649
650
651
652
653 for (final SRLElement candidate : ranking) {
654
655
656 boolean canEmit = true;
657 for (final SRLElement element : row) {
658 if (candidate.overlaps(element)) {
659 canEmit = false;
660 break;
661 }
662 }
663
664
665 if (canEmit) {
666 row.add(candidate);
667 remaining.remove(candidate);
668 }
669 }
670 }
671 return rows;
672 }
673
674 }
675
676 private static final class SRLElement implements Comparable<SRLElement> {
677
678 final SRLElement parent;
679
680 final Object element;
681
682 final List<Term> terms;
683
684 final int begin;
685
686 final int end;
687
688 SRLElement(final SRLElement parent, final Object element, final boolean useProposition) {
689 this.parent = parent;
690 this.element = element;
691 if (useProposition) {
692 final Predicate predicate = (Predicate) element;
693 final Set<Term> termSet = Sets.newHashSet();
694 termSet.addAll(predicate.getTerms());
695 for (final Role role : predicate.getRoles()) {
696 termSet.addAll(role.getTerms());
697 }
698 this.terms = Ordering.from(Term.OFFSET_COMPARATOR).immutableSortedCopy(termSet);
699 } else if (element instanceof Predicate) {
700 this.terms = ((Predicate) element).getTerms();
701 } else {
702 this.terms = ((Role) element).getTerms();
703 }
704 this.begin = this.terms.get(0).getOffset();
705 this.end = endOf(this.terms.get(this.terms.size() - 1));
706 }
707
708 boolean overlaps(final SRLElement other) {
709 return this.end > other.begin && this.begin < other.end;
710 }
711
712 @Override
713 public int compareTo(final SRLElement other) {
714 int result = 0;
715 if (other != this) {
716 result = this.terms.size() - other.terms.size();
717 if (result == 0) {
718 result = System.identityHashCode(this.element)
719 - System.identityHashCode(other.element);
720 }
721 }
722 return result;
723 }
724
725 }
726
727 public static final class Markable {
728
729 private final List<Term> terms;
730
731 private final String color;
732
733 public Markable(final Iterable<Term> terms, final String color) {
734 this.terms = ImmutableList.copyOf(terms);
735 this.color = color;
736 }
737
738 public List<Term> getTerms() {
739 return this.terms;
740 }
741
742 public String getColor() {
743 return this.color;
744 }
745
746 }
747
748 }