1 package eu.fbk.dkm.pikes.resources.mpqa;
2
3 import com.google.common.collect.Lists;
4 import com.google.common.html.HtmlEscapers;
5 import org.slf4j.Logger;
6 import org.slf4j.LoggerFactory;
7
8 import java.util.Collections;
9 import java.util.List;
10
11
12
13
14 public final class Span implements Comparable<Span> {
15
16 public int begin;
17
18 public int end;
19
20 private static final Logger LOGGER = LoggerFactory.getLogger(Span.class);
21
22 public Span(final String span) {
23 final String trimmedSpan = span.trim();
24 final int delimiter = trimmedSpan.indexOf(',');
25 this.begin = Integer.parseInt(trimmedSpan.substring(0, delimiter));
26 this.end = Integer.parseInt(trimmedSpan.substring(delimiter + 1));
27 }
28
29 public Span(final int begin, final int end) {
30 this.begin = begin;
31 this.end = end;
32 }
33
34 public String apply(final String text) {
35 return apply(text, true);
36 }
37
38 public String apply(final String text, boolean escapeHTML) {
39 if (escapeHTML) {
40 return HtmlEscapers.htmlEscaper().escape(text.substring(this.begin, this.end));
41 }
42 else {
43 return text.substring(this.begin, this.end);
44 }
45 }
46
47 public Span align(final String text) {
48
49 int begin = this.begin;
50 int end = this.end;
51
52 while (begin < end && !CorpusPreprocessor.isWord(text.charAt(begin))) {
53 ++begin;
54 }
55 while (begin > 0 && !CorpusPreprocessor.isDelim(text.charAt(begin - 1))) {
56 --begin;
57 }
58
59 while (end > begin && !CorpusPreprocessor.isWord(text.charAt(end - 1))) {
60 --end;
61 }
62 while (end < text.length() && !CorpusPreprocessor.isDelim(text.charAt(end))) {
63 ++end;
64 }
65
66 return begin == this.begin && end == this.end ? this : new Span(begin, end);
67 }
68
69 public void check(final String text, final String documentURI) {
70 if (this.begin < this.end) {
71 final boolean beginOk = CorpusPreprocessor.isWord(text.charAt(this.begin))
72 && (this.begin == 0 || CorpusPreprocessor.isDelim(text.charAt(this.begin - 1)));
73 final boolean endOk = CorpusPreprocessor.isWord(text.charAt(this.end - 1))
74 && (this.end == text.length() || CorpusPreprocessor.isDelim(text.charAt(this.end)));
75 if (!beginOk || !endOk) {
76 LOGGER.warn("Wrong span detected in " + documentURI + ": ..."
77 + text.substring(Math.max(0, this.begin - 10), this.begin) + "["
78 + text.substring(this.begin, this.end) + "]"
79 + text.substring(this.end, Math.min(text.length(), this.end + 10))
80 + "...");
81 }
82 }
83 }
84
85 public boolean contains(final Span span) {
86 return this.begin <= span.begin && this.end >= span.end;
87 }
88
89 public boolean overlaps(final Span span) {
90 return this.end > span.begin && this.begin < span.end;
91 }
92
93 public List<Span> split(final Iterable<Span> spans) {
94
95 final List<Span> sortedSpans = Lists.newArrayList(spans);
96 boolean overlaps = true;
97 while (overlaps) {
98 overlaps = false;
99 Collections.sort(sortedSpans);
100 for (int i = 0; i < sortedSpans.size() - 1; ++i) {
101 final Span span1 = sortedSpans.get(i);
102 final Span span2 = sortedSpans.get(i + 1);
103 if (span1.end > span2.begin) {
104 sortedSpans.remove(i);
105 if (span1.begin < span2.begin) {
106 sortedSpans.add(new Span(span1.begin, span2.begin));
107 }
108 if (span1.end < span2.end) {
109 sortedSpans.remove(i);
110 sortedSpans.add(new Span(span2.begin, span1.end));
111 sortedSpans.add(new Span(span1.end, span2.end));
112 }
113 else if (span1.end > span2.end) {
114 sortedSpans.add(new Span(span2.end, span1.end));
115 }
116 overlaps = true;
117
118
119
120 break;
121 }
122 }
123 }
124
125 final List<Span> result = Lists.newArrayList();
126 int index = this.begin;
127 for (final Span span : sortedSpans) {
128 if (span.begin < index) {
129 throw new Error("Span overlap: " + spans);
130 }
131 if (span.begin > index) {
132 result.add(new Span(index, span.begin));
133 }
134 result.add(span);
135 index = span.end;
136 }
137 if (index < this.end) {
138 result.add(new Span(index, this.end));
139 }
140 return result;
141 }
142
143 @Override
144 public int compareTo(final Span span) {
145 int result = this.begin - span.begin;
146 if (result == 0) {
147 result = span.end - this.end;
148 }
149 return result;
150 }
151
152 @Override
153 public boolean equals(final Object object) {
154 if (object == this) {
155 return true;
156 }
157 if (!(object instanceof Span)) {
158 return false;
159 }
160 final Span other = (Span) object;
161 return this.begin == other.begin && this.end == other.end;
162 }
163
164 @Override
165 public int hashCode() {
166 return this.begin * 37 + this.end;
167 }
168
169 @Override
170 public String toString() {
171 return this.begin + "," + this.end;
172 }
173
174 }