1   package eu.fbk.dkm.pikes.resources;
2   
3   import java.io.BufferedReader;
4   import java.io.IOException;
5   import java.util.Arrays;
6   import java.util.List;
7   import java.util.Map;
8   import java.util.Set;
9   
10  import javax.annotation.Nullable;
11  
12  import com.google.common.base.Charsets;
13  import com.google.common.base.Splitter;
14  import com.google.common.collect.ImmutableList;
15  import com.google.common.collect.ImmutableMap;
16  import com.google.common.collect.ImmutableSet;
17  import com.google.common.collect.Lists;
18  import com.google.common.collect.Maps;
19  import com.google.common.collect.Sets;
20  import com.google.common.io.Resources;
21  
22  import eu.fbk.rdfpro.util.Statements;
23  import org.eclipse.rdf4j.model.IRI;
24  
25  public final class Sumo {
26  
27      public static final String SUMO_NAMESPACE = "http://www.ontologyportal.org/SUMO.owl#";
28  
29      private static final Map<IRI, Concept> IRI_INDEX;
30  
31      private static final Map<String, Concept> SYNSET_INDEX;
32  
33      static {
34          try (final BufferedReader reader = Resources.asCharSource(
35                  NomBank.class.getResource("Sumo.tsv"), Charsets.UTF_8).openBufferedStream()) {
36  
37              final Map<String, IRI> uriIndex = Maps.newHashMap();
38              final Map<IRI, Concept> nameIndex = Maps.newHashMap();
39              final Map<String, Concept> synsetIndex = Maps.newHashMap();
40  
41              String line;
42              while ((line = reader.readLine()) != null) {
43  
44                  final String[] tokens = Arrays.copyOf(line.split("\t"), 4);
45  
46                  final String name = tokens[0].intern();
47                  final List<String> parents = tokens[1] == null ? ImmutableList.of() : Splitter
48                          .on('|').trimResults().omitEmptyStrings().splitToList(tokens[1]);
49                  final List<String> children = tokens[2] == null ? ImmutableList.of() : Splitter
50                          .on('|').trimResults().omitEmptyStrings().splitToList(tokens[2]);
51                  final List<String> synsets = tokens[3] == null ? ImmutableList.of() : Splitter
52                          .on('|').trimResults().omitEmptyStrings().splitToList(tokens[3]);
53  
54                  final IRI[][] uriArrays = new IRI[3][];
55                  final List<List<String>> stringLists = ImmutableList.of(ImmutableList.of(name),
56                          parents, children);
57  
58                  for (int i = 0; i < 3; ++i) {
59                      final List<String> stringList = stringLists.get(i);
60                      final IRI[] uriArray = new IRI[stringList.size()];
61                      uriArrays[i] = uriArray;
62                      for (int j = 0; j < stringList.size(); ++j) {
63                          final String uriString = (SUMO_NAMESPACE + stringList.get(j).trim())
64                                  .intern();
65                          IRI uri = uriIndex.get(uriString);
66                          if (uri == null) {
67                              uri = Statements.VALUE_FACTORY.createIRI(uriString);
68                              uriIndex.put(uriString, uri);
69                          }
70                          uriArray[j] = uri;
71                      }
72                  }
73  
74                  final IRI conceptIRI = uriArrays[0][0];
75  
76                  final String[] synsetsArray = new String[synsets.size()];
77                  for (int i = 0; i < synsets.size(); ++i) {
78                      synsetsArray[i] = synsets.get(i).trim().intern();
79                  }
80  
81                  final Concept concept = new Concept(conceptIRI, uriArrays[1], uriArrays[2],
82                          synsetsArray);
83  
84                  nameIndex.put(conceptIRI, concept);
85                  for (final String synset : synsets) {
86                      synsetIndex.put(synset, concept);
87                  }
88              }
89  
90              IRI_INDEX = ImmutableMap.copyOf(nameIndex);
91              SYNSET_INDEX = ImmutableMap.copyOf(synsetIndex);
92  
93          } catch (final IOException ex) {
94              throw new Error("Cannot load PropBank data", ex);
95          }
96      }
97  
98      @Nullable
99      public static IRI synsetToConcept(@Nullable final String synsetID) {
100         if (synsetID == null) {
101             return null;
102         }
103         final Concept concept = SYNSET_INDEX.get(synsetID.toLowerCase());
104         return concept == null ? null : concept.uri;
105     }
106 
107     public static Set<IRI> synsetsToConcepts(@Nullable final Iterable<String> synsetIDs) {
108         final Set<IRI> conceptIRIs = Sets.newHashSet();
109         for (final String synsetID : synsetIDs) {
110             final IRI conceptIRI = Sumo.synsetToConcept(synsetID);
111             if (conceptIRI != null) {
112                 conceptIRIs.add(conceptIRI);
113             }
114         }
115         return filterAncestors(conceptIRIs);
116     }
117 
118     public static Set<String> conceptToSynsets(@Nullable final IRI conceptIRI) {
119         if (conceptIRI == null) {
120             return null;
121         }
122         final Concept concept = IRI_INDEX.get(conceptIRI);
123         return concept == null ? ImmutableSet.of() : ImmutableSet.copyOf(concept.synsets);
124     }
125 
126     public static Set<IRI> filterAncestors(@Nullable final Iterable<? extends IRI> conceptIRIs) {
127         final Set<IRI> result = Sets.newHashSet(conceptIRIs);
128         outer: for (final IRI uri1 : conceptIRIs) {
129             for (final IRI uri2 : conceptIRIs) {
130                 if (!uri1.equals(uri2) && isSubClassOf(uri1, uri2)) {
131                     continue outer;
132                 }
133             }
134             result.add(uri1);
135         }
136         return result;
137     }
138 
139     public static Set<IRI> getSubClasses(final IRI parentIRI) {
140         final Set<IRI> result = Sets.newHashSet();
141         final List<IRI> queue = Lists.newLinkedList();
142         queue.add(parentIRI);
143         while (!queue.isEmpty()) {
144             final Concept concept = IRI_INDEX.get(queue.remove(0));
145             if (concept != null) {
146                 for (final IRI uri : concept.children) {
147                     if (result.add(uri)) {
148                         queue.add(uri);
149                     }
150                 }
151             }
152         }
153         return result;
154     }
155 
156     public static Set<IRI> getSuperClasses(final IRI childIRI) {
157         final Set<IRI> result = Sets.newHashSet();
158         final List<IRI> queue = Lists.newLinkedList();
159         queue.add(childIRI);
160         while (!queue.isEmpty()) {
161             final Concept concept = IRI_INDEX.get(queue.remove(0));
162             if (concept != null) {
163                 for (final IRI uri : concept.parents) {
164                     if (result.add(uri)) {
165                         queue.add(uri);
166                     }
167                 }
168             }
169         }
170         return result;
171     }
172 
173     public static boolean isSubClassOf(final IRI childIRI, final IRI parentIRI) {
174         final Concept child = IRI_INDEX.get(childIRI);
175         if (child == null) {
176             return false;
177         }
178         if (childIRI.equals(parentIRI)) {
179             return true;
180         }
181         for (final IRI uri : child.parents) {
182             if (isSubClassOf(uri, parentIRI)) {
183                 return true;
184             }
185         }
186         return false;
187     }
188 
189     private static final class Concept {
190 
191         public final IRI uri;
192 
193         public final IRI[] parents;
194 
195         public final IRI[] children;
196 
197         public final String[] synsets;
198 
199         Concept(final IRI uri, final IRI[] parents, final IRI[] children, final String[] synsets) {
200             this.uri = uri;
201             this.parents = parents;
202             this.children = children;
203             this.synsets = synsets;
204         }
205 
206     }
207 
208 }