1 package eu.fbk.dkm.pikes.resources; 2 3 import java.io.BufferedReader; 4 import java.io.IOException; 5 import java.util.Arrays; 6 import java.util.List; 7 import java.util.Map; 8 import java.util.Set; 9 10 import javax.annotation.Nullable; 11 12 import com.google.common.base.Charsets; 13 import com.google.common.base.Splitter; 14 import com.google.common.collect.ImmutableList; 15 import com.google.common.collect.ImmutableMap; 16 import com.google.common.collect.ImmutableSet; 17 import com.google.common.collect.Lists; 18 import com.google.common.collect.Maps; 19 import com.google.common.collect.Sets; 20 import com.google.common.io.Resources; 21 22 import eu.fbk.rdfpro.util.Statements; 23 import org.eclipse.rdf4j.model.IRI; 24 25 public final class Sumo { 26 27 public static final String SUMO_NAMESPACE = "http://www.ontologyportal.org/SUMO.owl#"; 28 29 private static final Map<IRI, Concept> IRI_INDEX; 30 31 private static final Map<String, Concept> SYNSET_INDEX; 32 33 static { 34 try (final BufferedReader reader = Resources.asCharSource( 35 NomBank.class.getResource("Sumo.tsv"), Charsets.UTF_8).openBufferedStream()) { 36 37 final Map<String, IRI> uriIndex = Maps.newHashMap(); 38 final Map<IRI, Concept> nameIndex = Maps.newHashMap(); 39 final Map<String, Concept> synsetIndex = Maps.newHashMap(); 40 41 String line; 42 while ((line = reader.readLine()) != null) { 43 44 final String[] tokens = Arrays.copyOf(line.split("\t"), 4); 45 46 final String name = tokens[0].intern(); 47 final List<String> parents = tokens[1] == null ? ImmutableList.of() : Splitter 48 .on('|').trimResults().omitEmptyStrings().splitToList(tokens[1]); 49 final List<String> children = tokens[2] == null ? ImmutableList.of() : Splitter 50 .on('|').trimResults().omitEmptyStrings().splitToList(tokens[2]); 51 final List<String> synsets = tokens[3] == null ? ImmutableList.of() : Splitter 52 .on('|').trimResults().omitEmptyStrings().splitToList(tokens[3]); 53 54 final IRI[][] uriArrays = new IRI[3][]; 55 final List<List<String>> stringLists = ImmutableList.of(ImmutableList.of(name), 56 parents, children); 57 58 for (int i = 0; i < 3; ++i) { 59 final List<String> stringList = stringLists.get(i); 60 final IRI[] uriArray = new IRI[stringList.size()]; 61 uriArrays[i] = uriArray; 62 for (int j = 0; j < stringList.size(); ++j) { 63 final String uriString = (SUMO_NAMESPACE + stringList.get(j).trim()) 64 .intern(); 65 IRI uri = uriIndex.get(uriString); 66 if (uri == null) { 67 uri = Statements.VALUE_FACTORY.createIRI(uriString); 68 uriIndex.put(uriString, uri); 69 } 70 uriArray[j] = uri; 71 } 72 } 73 74 final IRI conceptIRI = uriArrays[0][0]; 75 76 final String[] synsetsArray = new String[synsets.size()]; 77 for (int i = 0; i < synsets.size(); ++i) { 78 synsetsArray[i] = synsets.get(i).trim().intern(); 79 } 80 81 final Concept concept = new Concept(conceptIRI, uriArrays[1], uriArrays[2], 82 synsetsArray); 83 84 nameIndex.put(conceptIRI, concept); 85 for (final String synset : synsets) { 86 synsetIndex.put(synset, concept); 87 } 88 } 89 90 IRI_INDEX = ImmutableMap.copyOf(nameIndex); 91 SYNSET_INDEX = ImmutableMap.copyOf(synsetIndex); 92 93 } catch (final IOException ex) { 94 throw new Error("Cannot load PropBank data", ex); 95 } 96 } 97 98 @Nullable 99 public static IRI synsetToConcept(@Nullable final String synsetID) { 100 if (synsetID == null) { 101 return null; 102 } 103 final Concept concept = SYNSET_INDEX.get(synsetID.toLowerCase()); 104 return concept == null ? null : concept.uri; 105 } 106 107 public static Set<IRI> synsetsToConcepts(@Nullable final Iterable<String> synsetIDs) { 108 final Set<IRI> conceptIRIs = Sets.newHashSet(); 109 for (final String synsetID : synsetIDs) { 110 final IRI conceptIRI = Sumo.synsetToConcept(synsetID); 111 if (conceptIRI != null) { 112 conceptIRIs.add(conceptIRI); 113 } 114 } 115 return filterAncestors(conceptIRIs); 116 } 117 118 public static Set<String> conceptToSynsets(@Nullable final IRI conceptIRI) { 119 if (conceptIRI == null) { 120 return null; 121 } 122 final Concept concept = IRI_INDEX.get(conceptIRI); 123 return concept == null ? ImmutableSet.of() : ImmutableSet.copyOf(concept.synsets); 124 } 125 126 public static Set<IRI> filterAncestors(@Nullable final Iterable<? extends IRI> conceptIRIs) { 127 final Set<IRI> result = Sets.newHashSet(conceptIRIs); 128 outer: for (final IRI uri1 : conceptIRIs) { 129 for (final IRI uri2 : conceptIRIs) { 130 if (!uri1.equals(uri2) && isSubClassOf(uri1, uri2)) { 131 continue outer; 132 } 133 } 134 result.add(uri1); 135 } 136 return result; 137 } 138 139 public static Set<IRI> getSubClasses(final IRI parentIRI) { 140 final Set<IRI> result = Sets.newHashSet(); 141 final List<IRI> queue = Lists.newLinkedList(); 142 queue.add(parentIRI); 143 while (!queue.isEmpty()) { 144 final Concept concept = IRI_INDEX.get(queue.remove(0)); 145 if (concept != null) { 146 for (final IRI uri : concept.children) { 147 if (result.add(uri)) { 148 queue.add(uri); 149 } 150 } 151 } 152 } 153 return result; 154 } 155 156 public static Set<IRI> getSuperClasses(final IRI childIRI) { 157 final Set<IRI> result = Sets.newHashSet(); 158 final List<IRI> queue = Lists.newLinkedList(); 159 queue.add(childIRI); 160 while (!queue.isEmpty()) { 161 final Concept concept = IRI_INDEX.get(queue.remove(0)); 162 if (concept != null) { 163 for (final IRI uri : concept.parents) { 164 if (result.add(uri)) { 165 queue.add(uri); 166 } 167 } 168 } 169 } 170 return result; 171 } 172 173 public static boolean isSubClassOf(final IRI childIRI, final IRI parentIRI) { 174 final Concept child = IRI_INDEX.get(childIRI); 175 if (child == null) { 176 return false; 177 } 178 if (childIRI.equals(parentIRI)) { 179 return true; 180 } 181 for (final IRI uri : child.parents) { 182 if (isSubClassOf(uri, parentIRI)) { 183 return true; 184 } 185 } 186 return false; 187 } 188 189 private static final class Concept { 190 191 public final IRI uri; 192 193 public final IRI[] parents; 194 195 public final IRI[] children; 196 197 public final String[] synsets; 198 199 Concept(final IRI uri, final IRI[] parents, final IRI[] children, final String[] synsets) { 200 this.uri = uri; 201 this.parents = parents; 202 this.children = children; 203 this.synsets = synsets; 204 } 205 206 } 207 208 }