1   package eu.fbk.dkm.pikes.resources.mpqa;
2   
3   import com.google.common.collect.Lists;
4   import com.google.common.html.HtmlEscapers;
5   import org.slf4j.Logger;
6   import org.slf4j.LoggerFactory;
7   
8   import java.util.Collections;
9   import java.util.List;
10  
11  /**
12   * Created by alessio on 24/03/15.
13   */
14  public final class Span implements Comparable<Span> {
15  
16  	public int begin;
17  
18  	public int end;
19  
20  	private static final Logger LOGGER = LoggerFactory.getLogger(Span.class);
21  
22  	public Span(final String span) {
23  		final String trimmedSpan = span.trim();
24  		final int delimiter = trimmedSpan.indexOf(',');
25  		this.begin = Integer.parseInt(trimmedSpan.substring(0, delimiter));
26  		this.end = Integer.parseInt(trimmedSpan.substring(delimiter + 1));
27  	}
28  
29  	public Span(final int begin, final int end) {
30  		this.begin = begin;
31  		this.end = end;
32  	}
33  
34  	public String apply(final String text) {
35  		return apply(text, true);
36  	}
37  
38  	public String apply(final String text, boolean escapeHTML) {
39  		if (escapeHTML) {
40  			return HtmlEscapers.htmlEscaper().escape(text.substring(this.begin, this.end));
41  		}
42  		else {
43  			return text.substring(this.begin, this.end);
44  		}
45  	}
46  
47  	public Span align(final String text) {
48  
49  		int begin = this.begin;
50  		int end = this.end;
51  
52  		while (begin < end && !CorpusPreprocessor.isWord(text.charAt(begin))) {
53  			++begin;
54  		}
55  		while (begin > 0 && !CorpusPreprocessor.isDelim(text.charAt(begin - 1))) {
56  			--begin;
57  		}
58  
59  		while (end > begin && !CorpusPreprocessor.isWord(text.charAt(end - 1))) {
60  			--end;
61  		}
62  		while (end < text.length() && !CorpusPreprocessor.isDelim(text.charAt(end))) {
63  			++end;
64  		}
65  
66  		return begin == this.begin && end == this.end ? this : new Span(begin, end);
67  	}
68  
69  	public void check(final String text, final String documentURI) {
70  		if (this.begin < this.end) {
71  			final boolean beginOk = CorpusPreprocessor.isWord(text.charAt(this.begin))
72  					&& (this.begin == 0 || CorpusPreprocessor.isDelim(text.charAt(this.begin - 1)));
73  			final boolean endOk = CorpusPreprocessor.isWord(text.charAt(this.end - 1))
74  					&& (this.end == text.length() || CorpusPreprocessor.isDelim(text.charAt(this.end)));
75  			if (!beginOk || !endOk) {
76  				LOGGER.warn("Wrong span detected in " + documentURI + ": ..."
77  						+ text.substring(Math.max(0, this.begin - 10), this.begin) + "["
78  						+ text.substring(this.begin, this.end) + "]"
79  						+ text.substring(this.end, Math.min(text.length(), this.end + 10))
80  						+ "...");
81  			}
82  		}
83  	}
84  
85  	public boolean contains(final Span span) {
86  		return this.begin <= span.begin && this.end >= span.end;
87  	}
88  
89  	public boolean overlaps(final Span span) {
90  		return this.end > span.begin && this.begin < span.end;
91  	}
92  
93  	public List<Span> split(final Iterable<Span> spans) {
94  
95  		final List<Span> sortedSpans = Lists.newArrayList(spans);
96  		boolean overlaps = true;
97  		while (overlaps) {
98  			overlaps = false;
99  			Collections.sort(sortedSpans);
100 			for (int i = 0; i < sortedSpans.size() - 1; ++i) {
101 				final Span span1 = sortedSpans.get(i);
102 				final Span span2 = sortedSpans.get(i + 1);
103 				if (span1.end > span2.begin) {
104 					sortedSpans.remove(i);
105 					if (span1.begin < span2.begin) {
106 						sortedSpans.add(new Span(span1.begin, span2.begin));
107 					}
108 					if (span1.end < span2.end) {
109 						sortedSpans.remove(i); // former i + 1
110 						sortedSpans.add(new Span(span2.begin, span1.end));
111 						sortedSpans.add(new Span(span1.end, span2.end));
112 					}
113 					else if (span1.end > span2.end) {
114 						sortedSpans.add(new Span(span2.end, span1.end));
115 					}
116 					overlaps = true;
117 					// System.err.println(span1 + " " + span2 + " "
118 					// + new Span(span1.begin, span2.begin) + " "
119 					// + new Span(span2.begin, span1.end));
120 					break;
121 				}
122 			}
123 		}
124 
125 		final List<Span> result = Lists.newArrayList();
126 		int index = this.begin;
127 		for (final Span span : sortedSpans) {
128 			if (span.begin < index) {
129 				throw new Error("Span overlap: " + spans);
130 			}
131 			if (span.begin > index) {
132 				result.add(new Span(index, span.begin));
133 			}
134 			result.add(span);
135 			index = span.end;
136 		}
137 		if (index < this.end) {
138 			result.add(new Span(index, this.end));
139 		}
140 		return result;
141 	}
142 
143 	@Override
144 	public int compareTo(final Span span) {
145 		int result = this.begin - span.begin;
146 		if (result == 0) {
147 			result = span.end - this.end;
148 		}
149 		return result;
150 	}
151 
152 	@Override
153 	public boolean equals(final Object object) {
154 		if (object == this) {
155 			return true;
156 		}
157 		if (!(object instanceof Span)) {
158 			return false;
159 		}
160 		final Span other = (Span) object;
161 		return this.begin == other.begin && this.end == other.end;
162 	}
163 
164 	@Override
165 	public int hashCode() {
166 		return this.begin * 37 + this.end;
167 	}
168 
169 	@Override
170 	public String toString() {
171 		return this.begin + "," + this.end;
172 	}
173 
174 }