1 package eu.fbk.dkm.pikes.resources; 2 3 import com.google.common.base.Charsets; 4 import com.google.common.collect.*; 5 import com.google.common.io.Resources; 6 import eu.fbk.utils.core.CommandLine; 7 import eu.fbk.utils.core.CommandLine.Type; 8 import org.slf4j.Logger; 9 import org.slf4j.LoggerFactory; 10 11 import javax.xml.stream.XMLStreamException; 12 import java.io.*; 13 import java.util.List; 14 import java.util.Map; 15 import java.util.Set; 16 17 public class FrameNet { 18 19 private static final Logger LOGGER = LoggerFactory.getLogger(FrameNet.class); 20 21 private static final Map<Relation, Multimap<String, String>> RELATIONS; 22 23 static { 24 try { 25 final Map<Relation, ImmutableMultimap.Builder<String, String>> map = Maps.newHashMap(); 26 for (final Relation relation : Relation.values()) { 27 map.put(relation, ImmutableMultimap.builder()); 28 } 29 for (final String line : Resources.readLines( 30 FrameNet.class.getResource("FrameNet.tsv"), Charsets.UTF_8)) { 31 final String[] tokens = line.split("\t"); 32 final Relation relation = Relation.valueOf(tokens[0]); 33 final String from = tokens[1]; 34 final String to = tokens[2]; 35 map.get(relation).put(from, to); 36 if (relation == Relation.USES) { 37 map.get(Relation.IS_USED_BY).put(to, from); 38 } else if (relation == Relation.INHERITS_FROM) { 39 map.get(Relation.IS_INHERITED_BY).put(to, from); 40 } else if (relation == Relation.PRECEDES) { 41 map.get(Relation.IS_PRECEDED_BY).put(to, from); 42 } else if (relation == Relation.PERSPECTIVE_ON) { 43 map.get(Relation.IS_PERSPECTIVIZED_IN).put(to, from); 44 } else if (relation == Relation.SUBFRAME_OF) { 45 map.get(Relation.HAS_SUBFRAME).put(to, from); 46 } 47 } 48 final ImmutableMap.Builder<Relation, Multimap<String, String>> mapBuilder = ImmutableMap 49 .builder(); 50 for (final Map.Entry<Relation, ImmutableMultimap.Builder<String, String>> entry : map 51 .entrySet()) { 52 mapBuilder.put(entry.getKey(), entry.getValue().build()); 53 } 54 RELATIONS = mapBuilder.build(); 55 56 } catch (final IOException ex) { 57 throw new Error("Could not load eu.fbk.dkm.pikes.resources.FrameNet data from classpath", ex); 58 } 59 } 60 61 public static Set<String> getRelatedFrames(final boolean recursive, 62 final String sourceFrameID, final Relation... relations) { 63 final Set<String> ids = Sets.newHashSet(); 64 final List<String> queue = Lists.newLinkedList(); 65 queue.add(sourceFrameID); 66 while (!queue.isEmpty()) { 67 final String id = queue.remove(0); 68 for (final Relation relation : relations) { 69 for (final String relatedID : RELATIONS.get(relation).get(id)) { 70 if (ids.add(relatedID) && recursive) { 71 queue.add(relatedID); 72 } 73 } 74 } 75 } 76 return ids; 77 } 78 79 public static void main(final String[] args) throws IOException, XMLStreamException { 80 81 try { 82 final CommandLine cmd = CommandLine 83 .parser() 84 .withName("eu.fbk.dkm.pikes.resources.FrameNet") 85 .withHeader("Generate a TSV file with indexed eu.fbk.dkm.pikes.resources.FrameNet data") 86 .withOption("f", "frames", "the directory containing frame definitions", 87 "DIR", Type.DIRECTORY_EXISTING, true, false, true) 88 .withOption("o", "output", "output file", "FILE", Type.FILE, true, false, true) 89 .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args); 90 91 final File dir = cmd.getOptionValue("f", File.class); 92 final File output = cmd.getOptionValue("o", File.class); 93 94 final Set<String> lines = Sets.newHashSet(); 95 for (final File file : dir.listFiles()) { 96 if (!file.getName().endsWith(".xml")) { 97 continue; 98 } 99 LOGGER.info("Processing {}", file); 100 try (BufferedReader reader = new BufferedReader(new FileReader(file))) { 101 String line = null; 102 String from = null; 103 Relation relation = null; 104 while ((line = reader.readLine()) != null) { 105 if (line.startsWith("<frame")) { 106 final int start = line.indexOf(" name=\"") + 7; 107 final int end = line.indexOf('"', start); 108 from = line.substring(start, end).trim().replace(' ', '_'); 109 } else if (line.contains("<frameRelation")) { 110 final int start = line.indexOf(" type=\"") + 7; 111 int end = line.indexOf('(', start); 112 if (end < 0) { 113 end = line.length(); 114 } 115 end = Math.min(end, line.indexOf('"', start)); 116 relation = Relation.valueOf(line.substring(start, end).trim() 117 .toUpperCase().replace(' ', '_')); 118 } else if (line.contains("<relatedFrame")) { 119 final int start = line.indexOf(">") + 1; 120 final int end = line.indexOf('<', start); 121 final String to = line.substring(start, end).trim().replace(' ', '_'); 122 if (relation == Relation.IS_USED_BY) { 123 lines.add(Relation.USES + "\t" + to + "\t" + from); 124 } else if (relation == Relation.IS_INHERITED_BY) { 125 lines.add(Relation.INHERITS_FROM + "\t" + to + "\t" + from); 126 } else if (relation == Relation.IS_PRECEDED_BY) { 127 lines.add(Relation.PRECEDES + "\t" + to + "\t" + from); 128 } else if (relation == Relation.IS_PERSPECTIVIZED_IN) { 129 lines.add(Relation.PERSPECTIVE_ON + "\t" + to + "\t" + from); 130 } else if (relation == Relation.HAS_SUBFRAME) { 131 lines.add(Relation.SUBFRAME_OF + "\t" + to + "\t" + from); 132 } else { 133 lines.add(relation + "\t" + from + "\t" + to); 134 } 135 } 136 } 137 } 138 } 139 140 final List<String> sortedLines = Ordering.natural().immutableSortedCopy(lines); 141 try (Writer writer = new OutputStreamWriter(new BufferedOutputStream( 142 new FileOutputStream(output)), Charsets.UTF_8)) { 143 for (final String line : sortedLines) { 144 writer.write(line); 145 writer.write('\n'); 146 } 147 } 148 149 LOGGER.info("Extracted {} relations", sortedLines.size()); 150 151 } catch (final Throwable ex) { 152 CommandLine.fail(ex); 153 } 154 } 155 156 public enum Relation { 157 158 USES, 159 160 IS_USED_BY, 161 162 INHERITS_FROM, 163 164 IS_INHERITED_BY, 165 166 PRECEDES, 167 168 IS_PRECEDED_BY, 169 170 PERSPECTIVE_ON, 171 172 IS_PERSPECTIVIZED_IN, 173 174 SUBFRAME_OF, 175 176 HAS_SUBFRAME, 177 178 IS_CAUSATIVE_OF, 179 180 IS_INCHOATIVE_OF, 181 182 SEE_ALSO; 183 184 } 185 186 }