1 package eu.fbk.dkm.pikes.resources;
2
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.util.Arrays;
6 import java.util.List;
7 import java.util.Map;
8 import java.util.Set;
9
10 import javax.annotation.Nullable;
11
12 import com.google.common.base.Charsets;
13 import com.google.common.base.Splitter;
14 import com.google.common.collect.ImmutableList;
15 import com.google.common.collect.ImmutableMap;
16 import com.google.common.collect.ImmutableSet;
17 import com.google.common.collect.Lists;
18 import com.google.common.collect.Maps;
19 import com.google.common.collect.Sets;
20 import com.google.common.io.Resources;
21
22 import eu.fbk.rdfpro.util.Statements;
23 import org.eclipse.rdf4j.model.IRI;
24
25 public final class Sumo {
26
27 public static final String SUMO_NAMESPACE = "http://www.ontologyportal.org/SUMO.owl#";
28
29 private static final Map<IRI, Concept> IRI_INDEX;
30
31 private static final Map<String, Concept> SYNSET_INDEX;
32
33 static {
34 try (final BufferedReader reader = Resources.asCharSource(
35 NomBank.class.getResource("Sumo.tsv"), Charsets.UTF_8).openBufferedStream()) {
36
37 final Map<String, IRI> uriIndex = Maps.newHashMap();
38 final Map<IRI, Concept> nameIndex = Maps.newHashMap();
39 final Map<String, Concept> synsetIndex = Maps.newHashMap();
40
41 String line;
42 while ((line = reader.readLine()) != null) {
43
44 final String[] tokens = Arrays.copyOf(line.split("\t"), 4);
45
46 final String name = tokens[0].intern();
47 final List<String> parents = tokens[1] == null ? ImmutableList.of() : Splitter
48 .on('|').trimResults().omitEmptyStrings().splitToList(tokens[1]);
49 final List<String> children = tokens[2] == null ? ImmutableList.of() : Splitter
50 .on('|').trimResults().omitEmptyStrings().splitToList(tokens[2]);
51 final List<String> synsets = tokens[3] == null ? ImmutableList.of() : Splitter
52 .on('|').trimResults().omitEmptyStrings().splitToList(tokens[3]);
53
54 final IRI[][] uriArrays = new IRI[3][];
55 final List<List<String>> stringLists = ImmutableList.of(ImmutableList.of(name),
56 parents, children);
57
58 for (int i = 0; i < 3; ++i) {
59 final List<String> stringList = stringLists.get(i);
60 final IRI[] uriArray = new IRI[stringList.size()];
61 uriArrays[i] = uriArray;
62 for (int j = 0; j < stringList.size(); ++j) {
63 final String uriString = (SUMO_NAMESPACE + stringList.get(j).trim())
64 .intern();
65 IRI uri = uriIndex.get(uriString);
66 if (uri == null) {
67 uri = Statements.VALUE_FACTORY.createIRI(uriString);
68 uriIndex.put(uriString, uri);
69 }
70 uriArray[j] = uri;
71 }
72 }
73
74 final IRI conceptIRI = uriArrays[0][0];
75
76 final String[] synsetsArray = new String[synsets.size()];
77 for (int i = 0; i < synsets.size(); ++i) {
78 synsetsArray[i] = synsets.get(i).trim().intern();
79 }
80
81 final Concept concept = new Concept(conceptIRI, uriArrays[1], uriArrays[2],
82 synsetsArray);
83
84 nameIndex.put(conceptIRI, concept);
85 for (final String synset : synsets) {
86 synsetIndex.put(synset, concept);
87 }
88 }
89
90 IRI_INDEX = ImmutableMap.copyOf(nameIndex);
91 SYNSET_INDEX = ImmutableMap.copyOf(synsetIndex);
92
93 } catch (final IOException ex) {
94 throw new Error("Cannot load PropBank data", ex);
95 }
96 }
97
98 @Nullable
99 public static IRI synsetToConcept(@Nullable final String synsetID) {
100 if (synsetID == null) {
101 return null;
102 }
103 final Concept concept = SYNSET_INDEX.get(synsetID.toLowerCase());
104 return concept == null ? null : concept.uri;
105 }
106
107 public static Set<IRI> synsetsToConcepts(@Nullable final Iterable<String> synsetIDs) {
108 final Set<IRI> conceptIRIs = Sets.newHashSet();
109 for (final String synsetID : synsetIDs) {
110 final IRI conceptIRI = Sumo.synsetToConcept(synsetID);
111 if (conceptIRI != null) {
112 conceptIRIs.add(conceptIRI);
113 }
114 }
115 return filterAncestors(conceptIRIs);
116 }
117
118 public static Set<String> conceptToSynsets(@Nullable final IRI conceptIRI) {
119 if (conceptIRI == null) {
120 return null;
121 }
122 final Concept concept = IRI_INDEX.get(conceptIRI);
123 return concept == null ? ImmutableSet.of() : ImmutableSet.copyOf(concept.synsets);
124 }
125
126 public static Set<IRI> filterAncestors(@Nullable final Iterable<? extends IRI> conceptIRIs) {
127 final Set<IRI> result = Sets.newHashSet(conceptIRIs);
128 outer: for (final IRI uri1 : conceptIRIs) {
129 for (final IRI uri2 : conceptIRIs) {
130 if (!uri1.equals(uri2) && isSubClassOf(uri1, uri2)) {
131 continue outer;
132 }
133 }
134 result.add(uri1);
135 }
136 return result;
137 }
138
139 public static Set<IRI> getSubClasses(final IRI parentIRI) {
140 final Set<IRI> result = Sets.newHashSet();
141 final List<IRI> queue = Lists.newLinkedList();
142 queue.add(parentIRI);
143 while (!queue.isEmpty()) {
144 final Concept concept = IRI_INDEX.get(queue.remove(0));
145 if (concept != null) {
146 for (final IRI uri : concept.children) {
147 if (result.add(uri)) {
148 queue.add(uri);
149 }
150 }
151 }
152 }
153 return result;
154 }
155
156 public static Set<IRI> getSuperClasses(final IRI childIRI) {
157 final Set<IRI> result = Sets.newHashSet();
158 final List<IRI> queue = Lists.newLinkedList();
159 queue.add(childIRI);
160 while (!queue.isEmpty()) {
161 final Concept concept = IRI_INDEX.get(queue.remove(0));
162 if (concept != null) {
163 for (final IRI uri : concept.parents) {
164 if (result.add(uri)) {
165 queue.add(uri);
166 }
167 }
168 }
169 }
170 return result;
171 }
172
173 public static boolean isSubClassOf(final IRI childIRI, final IRI parentIRI) {
174 final Concept child = IRI_INDEX.get(childIRI);
175 if (child == null) {
176 return false;
177 }
178 if (childIRI.equals(parentIRI)) {
179 return true;
180 }
181 for (final IRI uri : child.parents) {
182 if (isSubClassOf(uri, parentIRI)) {
183 return true;
184 }
185 }
186 return false;
187 }
188
189 private static final class Concept {
190
191 public final IRI uri;
192
193 public final IRI[] parents;
194
195 public final IRI[] children;
196
197 public final String[] synsets;
198
199 Concept(final IRI uri, final IRI[] parents, final IRI[] children, final String[] synsets) {
200 this.uri = uri;
201 this.parents = parents;
202 this.children = children;
203 this.synsets = synsets;
204 }
205
206 }
207
208 }