1 package eu.fbk.dkm.pikes.resources;
2
3 import com.google.common.base.Charsets;
4 import com.google.common.collect.*;
5 import com.google.common.io.Resources;
6 import eu.fbk.utils.core.CommandLine;
7 import eu.fbk.utils.core.CommandLine.Type;
8 import org.slf4j.Logger;
9 import org.slf4j.LoggerFactory;
10
11 import javax.xml.stream.XMLStreamException;
12 import java.io.*;
13 import java.util.List;
14 import java.util.Map;
15 import java.util.Set;
16
17 public class FrameNet {
18
19 private static final Logger LOGGER = LoggerFactory.getLogger(FrameNet.class);
20
21 private static final Map<Relation, Multimap<String, String>> RELATIONS;
22
23 static {
24 try {
25 final Map<Relation, ImmutableMultimap.Builder<String, String>> map = Maps.newHashMap();
26 for (final Relation relation : Relation.values()) {
27 map.put(relation, ImmutableMultimap.builder());
28 }
29 for (final String line : Resources.readLines(
30 FrameNet.class.getResource("FrameNet.tsv"), Charsets.UTF_8)) {
31 final String[] tokens = line.split("\t");
32 final Relation relation = Relation.valueOf(tokens[0]);
33 final String from = tokens[1];
34 final String to = tokens[2];
35 map.get(relation).put(from, to);
36 if (relation == Relation.USES) {
37 map.get(Relation.IS_USED_BY).put(to, from);
38 } else if (relation == Relation.INHERITS_FROM) {
39 map.get(Relation.IS_INHERITED_BY).put(to, from);
40 } else if (relation == Relation.PRECEDES) {
41 map.get(Relation.IS_PRECEDED_BY).put(to, from);
42 } else if (relation == Relation.PERSPECTIVE_ON) {
43 map.get(Relation.IS_PERSPECTIVIZED_IN).put(to, from);
44 } else if (relation == Relation.SUBFRAME_OF) {
45 map.get(Relation.HAS_SUBFRAME).put(to, from);
46 }
47 }
48 final ImmutableMap.Builder<Relation, Multimap<String, String>> mapBuilder = ImmutableMap
49 .builder();
50 for (final Map.Entry<Relation, ImmutableMultimap.Builder<String, String>> entry : map
51 .entrySet()) {
52 mapBuilder.put(entry.getKey(), entry.getValue().build());
53 }
54 RELATIONS = mapBuilder.build();
55
56 } catch (final IOException ex) {
57 throw new Error("Could not load eu.fbk.dkm.pikes.resources.FrameNet data from classpath", ex);
58 }
59 }
60
61 public static Set<String> getRelatedFrames(final boolean recursive,
62 final String sourceFrameID, final Relation... relations) {
63 final Set<String> ids = Sets.newHashSet();
64 final List<String> queue = Lists.newLinkedList();
65 queue.add(sourceFrameID);
66 while (!queue.isEmpty()) {
67 final String id = queue.remove(0);
68 for (final Relation relation : relations) {
69 for (final String relatedID : RELATIONS.get(relation).get(id)) {
70 if (ids.add(relatedID) && recursive) {
71 queue.add(relatedID);
72 }
73 }
74 }
75 }
76 return ids;
77 }
78
79 public static void main(final String[] args) throws IOException, XMLStreamException {
80
81 try {
82 final CommandLine cmd = CommandLine
83 .parser()
84 .withName("eu.fbk.dkm.pikes.resources.FrameNet")
85 .withHeader("Generate a TSV file with indexed eu.fbk.dkm.pikes.resources.FrameNet data")
86 .withOption("f", "frames", "the directory containing frame definitions",
87 "DIR", Type.DIRECTORY_EXISTING, true, false, true)
88 .withOption("o", "output", "output file", "FILE", Type.FILE, true, false, true)
89 .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);
90
91 final File dir = cmd.getOptionValue("f", File.class);
92 final File output = cmd.getOptionValue("o", File.class);
93
94 final Set<String> lines = Sets.newHashSet();
95 for (final File file : dir.listFiles()) {
96 if (!file.getName().endsWith(".xml")) {
97 continue;
98 }
99 LOGGER.info("Processing {}", file);
100 try (BufferedReader reader = new BufferedReader(new FileReader(file))) {
101 String line = null;
102 String from = null;
103 Relation relation = null;
104 while ((line = reader.readLine()) != null) {
105 if (line.startsWith("<frame")) {
106 final int start = line.indexOf(" name=\"") + 7;
107 final int end = line.indexOf('"', start);
108 from = line.substring(start, end).trim().replace(' ', '_');
109 } else if (line.contains("<frameRelation")) {
110 final int start = line.indexOf(" type=\"") + 7;
111 int end = line.indexOf('(', start);
112 if (end < 0) {
113 end = line.length();
114 }
115 end = Math.min(end, line.indexOf('"', start));
116 relation = Relation.valueOf(line.substring(start, end).trim()
117 .toUpperCase().replace(' ', '_'));
118 } else if (line.contains("<relatedFrame")) {
119 final int start = line.indexOf(">") + 1;
120 final int end = line.indexOf('<', start);
121 final String to = line.substring(start, end).trim().replace(' ', '_');
122 if (relation == Relation.IS_USED_BY) {
123 lines.add(Relation.USES + "\t" + to + "\t" + from);
124 } else if (relation == Relation.IS_INHERITED_BY) {
125 lines.add(Relation.INHERITS_FROM + "\t" + to + "\t" + from);
126 } else if (relation == Relation.IS_PRECEDED_BY) {
127 lines.add(Relation.PRECEDES + "\t" + to + "\t" + from);
128 } else if (relation == Relation.IS_PERSPECTIVIZED_IN) {
129 lines.add(Relation.PERSPECTIVE_ON + "\t" + to + "\t" + from);
130 } else if (relation == Relation.HAS_SUBFRAME) {
131 lines.add(Relation.SUBFRAME_OF + "\t" + to + "\t" + from);
132 } else {
133 lines.add(relation + "\t" + from + "\t" + to);
134 }
135 }
136 }
137 }
138 }
139
140 final List<String> sortedLines = Ordering.natural().immutableSortedCopy(lines);
141 try (Writer writer = new OutputStreamWriter(new BufferedOutputStream(
142 new FileOutputStream(output)), Charsets.UTF_8)) {
143 for (final String line : sortedLines) {
144 writer.write(line);
145 writer.write('\n');
146 }
147 }
148
149 LOGGER.info("Extracted {} relations", sortedLines.size());
150
151 } catch (final Throwable ex) {
152 CommandLine.fail(ex);
153 }
154 }
155
156 public enum Relation {
157
158 USES,
159
160 IS_USED_BY,
161
162 INHERITS_FROM,
163
164 IS_INHERITED_BY,
165
166 PRECEDES,
167
168 IS_PRECEDED_BY,
169
170 PERSPECTIVE_ON,
171
172 IS_PERSPECTIVIZED_IN,
173
174 SUBFRAME_OF,
175
176 HAS_SUBFRAME,
177
178 IS_CAUSATIVE_OF,
179
180 IS_INCHOATIVE_OF,
181
182 SEE_ALSO;
183
184 }
185
186 }