1 package eu.fbk.dkm.pikes.rdf.util;
2
3 import com.google.common.collect.*;
4 import eu.fbk.dkm.pikes.rdf.vocab.KS_OLD;
5 import eu.fbk.dkm.pikes.rdf.vocab.NIF;
6 import eu.fbk.dkm.pikes.rdf.vocab.GAF;
7 import eu.fbk.rdfpro.util.QuadModel;
8 import eu.fbk.rdfpro.util.Statements;
9 import org.eclipse.rdf4j.model.IRI;
10 import org.eclipse.rdf4j.model.*;
11 import org.eclipse.rdf4j.model.vocabulary.RDF;
12
13 import javax.annotation.Nullable;
14 import java.util.List;
15 import java.util.Locale;
16 import java.util.Map;
17 import java.util.Set;
18
19
20
21
22
23 public final class ModelUtil {
24
25 private static final Map<String, IRI> LANGUAGE_CODES_TO_IRIS;
26
27 private static final Map<IRI, String> LANGUAGE_IRIS_TO_CODES;
28
29 static {
30 final Map<String, IRI> codesToIRIs = Maps.newHashMap();
31 final Map<IRI, String> urisToCodes = Maps.newHashMap();
32 for (final String language : Locale.getISOLanguages()) {
33 final Locale locale = new Locale(language);
34 final IRI uri = Statements.VALUE_FACTORY.createIRI("http://lexvo.org/id/iso639-3/",
35 locale.getISO3Language());
36 codesToIRIs.put(language, uri);
37 urisToCodes.put(uri, language);
38 }
39 LANGUAGE_CODES_TO_IRIS = ImmutableMap.copyOf(codesToIRIs);
40 LANGUAGE_IRIS_TO_CODES = ImmutableMap.copyOf(urisToCodes);
41 }
42
43 public static Set<Resource> getMentions(final QuadModel model) {
44 return model.filter(null, RDF.TYPE, KS_OLD.MENTION).subjects();
45 }
46
47 public static Set<Resource> getMentions(final QuadModel model, final int beginIndex,
48 final int endIndex) {
49 final List<Resource> mentionIDs = Lists.newArrayList();
50 for (final Resource mentionID : model.filter(null, RDF.TYPE, KS_OLD.MENTION).subjects()) {
51 final Literal begin = model.filter(mentionID, NIF.BEGIN_INDEX, null).objectLiteral();
52 final Literal end = model.filter(mentionID, NIF.END_INDEX, null).objectLiteral();
53 if (begin != null && begin.intValue() >= beginIndex && end != null
54 && end.intValue() <= endIndex) {
55 mentionIDs.add(mentionID);
56 }
57 }
58 return ImmutableSet.copyOf(mentionIDs);
59 }
60
61 public static QuadModel getSubModel(final QuadModel model,
62 final Iterable<? extends Resource> mentionIDs) {
63
64 final QuadModel result = QuadModel.create();
65 final Set<Resource> nodes = Sets.newHashSet();
66
67
68
69
70 for (final Resource mentionID : mentionIDs) {
71 result.addAll(model.filter(mentionID, null, null));
72 for (final Statement triple : model.filter(null, null, mentionID)) {
73 result.add(triple);
74 if (triple.getPredicate().equals(KS_OLD.EXPRESSED_BY)) {
75 final Resource factID = triple.getSubject();
76 result.addAll(model.filter(factID, null, null));
77 for (final Statement factTriple : model.filter(null, null, null, factID)) {
78 result.add(factTriple);
79 final Resource factSubj = factTriple.getSubject();
80 final IRI factPred = factTriple.getPredicate();
81 final Value factObj = factTriple.getObject();
82 nodes.add(factSubj);
83 if (factObj instanceof Resource && !factPred.equals(GAF.DENOTED_BY)) {
84 nodes.add((Resource) factObj);
85 }
86 }
87 } else {
88 nodes.add(triple.getSubject());
89 }
90 }
91 }
92
93
94 final List<Resource> queue = Lists.newLinkedList(nodes);
95 while (!queue.isEmpty()) {
96 final Resource node = queue.remove(0);
97 for (final Statement triple : model.filter(node, null, null)) {
98 if (triple.getContext() != null) {
99 final Resource context = triple.getContext();
100 if (model.filter(context, KS_OLD.EXPRESSED_BY, null).isEmpty()) {
101 result.add(triple);
102 if (triple.getObject() instanceof Resource) {
103 final Resource obj = (Resource) triple.getObject();
104 if (nodes.add(obj)) {
105 queue.add(obj);
106 }
107 }
108 }
109 }
110 }
111 }
112 return result;
113 }
114
115 public static IRI languageCodeToIRI(@Nullable final String code)
116 throws IllegalArgumentException {
117 if (code == null) {
118 return null;
119 }
120 final int length = code.length();
121 if (length == 2) {
122 final IRI uri = LANGUAGE_CODES_TO_IRIS.get(code);
123 if (uri != null) {
124 return uri;
125 }
126 } else if (length == 3) {
127 final IRI uri = Statements.VALUE_FACTORY.createIRI("http://lexvo.org/id/iso639-3/"
128 + code);
129 if (LANGUAGE_IRIS_TO_CODES.containsKey(uri)) {
130 return uri;
131 }
132 }
133 throw new IllegalArgumentException("Invalid language code: " + code);
134 }
135
136 @Nullable
137 public static String languageIRIToCode(@Nullable final IRI uri)
138 throws IllegalArgumentException {
139 if (uri == null) {
140 return null;
141 }
142 final String code = LANGUAGE_IRIS_TO_CODES.get(uri);
143 if (code != null) {
144 return code;
145 }
146 throw new IllegalArgumentException("Invalid language IRI: " + uri);
147 }
148
149
150
151
152
153
154
155
156
157
158 @Nullable
159 public static String cleanIRI(@Nullable final String string) throws IllegalArgumentException {
160
161
162
163
164
165
166
167
168
169 if (string == null) {
170 return null;
171 }
172
173
174
175
176 final StringBuilder builder = new StringBuilder();
177 for (int i = 0; i < string.length(); ++i) {
178 final char c = string.charAt(i);
179 if (c >= 'a' && c <= 'z' || c >= '?' && c <= '[' || c >= '&' && c <= ';' || c == '#'
180 || c == '$' || c == '!' || c == '=' || c == ']' || c == '_' || c == '~'
181 || c >= 0xA0 && c <= 0xD7FF || c >= 0xF900 && c <= 0xFDCF || c >= 0xFDF0
182 && c <= 0xFFEF) {
183 builder.append(c);
184 } else if (c == '%' && i < string.length() - 2
185 && Character.digit(string.charAt(i + 1), 16) >= 0
186 && Character.digit(string.charAt(i + 2), 16) >= 0) {
187 builder.append('%');
188 } else {
189 builder.append('%').append(Character.forDigit(c / 16, 16))
190 .append(Character.forDigit(c % 16, 16));
191 }
192 }
193
194
195 return builder.toString();
196 }
197
198 }