1   package eu.fbk.dkm.pikes.resources;
2   
3   import com.google.common.base.Charsets;
4   import com.google.common.collect.*;
5   import com.google.common.io.Resources;
6   import eu.fbk.utils.core.CommandLine;
7   import eu.fbk.utils.core.CommandLine.Type;
8   import org.slf4j.Logger;
9   import org.slf4j.LoggerFactory;
10  
11  import javax.xml.stream.XMLStreamException;
12  import java.io.*;
13  import java.util.List;
14  import java.util.Map;
15  import java.util.Set;
16  
17  public class FrameNet {
18  
19      private static final Logger LOGGER = LoggerFactory.getLogger(FrameNet.class);
20  
21      private static final Map<Relation, Multimap<String, String>> RELATIONS;
22  
23      static {
24          try {
25              final Map<Relation, ImmutableMultimap.Builder<String, String>> map = Maps.newHashMap();
26              for (final Relation relation : Relation.values()) {
27                  map.put(relation, ImmutableMultimap.builder());
28              }
29              for (final String line : Resources.readLines(
30                      FrameNet.class.getResource("FrameNet.tsv"), Charsets.UTF_8)) {
31                  final String[] tokens = line.split("\t");
32                  final Relation relation = Relation.valueOf(tokens[0]);
33                  final String from = tokens[1];
34                  final String to = tokens[2];
35                  map.get(relation).put(from, to);
36                  if (relation == Relation.USES) {
37                      map.get(Relation.IS_USED_BY).put(to, from);
38                  } else if (relation == Relation.INHERITS_FROM) {
39                      map.get(Relation.IS_INHERITED_BY).put(to, from);
40                  } else if (relation == Relation.PRECEDES) {
41                      map.get(Relation.IS_PRECEDED_BY).put(to, from);
42                  } else if (relation == Relation.PERSPECTIVE_ON) {
43                      map.get(Relation.IS_PERSPECTIVIZED_IN).put(to, from);
44                  } else if (relation == Relation.SUBFRAME_OF) {
45                      map.get(Relation.HAS_SUBFRAME).put(to, from);
46                  }
47              }
48              final ImmutableMap.Builder<Relation, Multimap<String, String>> mapBuilder = ImmutableMap
49                      .builder();
50              for (final Map.Entry<Relation, ImmutableMultimap.Builder<String, String>> entry : map
51                      .entrySet()) {
52                  mapBuilder.put(entry.getKey(), entry.getValue().build());
53              }
54              RELATIONS = mapBuilder.build();
55  
56          } catch (final IOException ex) {
57              throw new Error("Could not load eu.fbk.dkm.pikes.resources.FrameNet data from classpath", ex);
58          }
59      }
60  
61      public static Set<String> getRelatedFrames(final boolean recursive,
62              final String sourceFrameID, final Relation... relations) {
63          final Set<String> ids = Sets.newHashSet();
64          final List<String> queue = Lists.newLinkedList();
65          queue.add(sourceFrameID);
66          while (!queue.isEmpty()) {
67              final String id = queue.remove(0);
68              for (final Relation relation : relations) {
69                  for (final String relatedID : RELATIONS.get(relation).get(id)) {
70                      if (ids.add(relatedID) && recursive) {
71                          queue.add(relatedID);
72                      }
73                  }
74              }
75          }
76          return ids;
77      }
78  
79      public static void main(final String[] args) throws IOException, XMLStreamException {
80  
81          try {
82              final CommandLine cmd = CommandLine
83                      .parser()
84                      .withName("eu.fbk.dkm.pikes.resources.FrameNet")
85                      .withHeader("Generate a TSV file with indexed eu.fbk.dkm.pikes.resources.FrameNet data")
86                      .withOption("f", "frames", "the directory containing frame definitions",
87                              "DIR", Type.DIRECTORY_EXISTING, true, false, true)
88                      .withOption("o", "output", "output file", "FILE", Type.FILE, true, false, true)
89                      .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);
90  
91              final File dir = cmd.getOptionValue("f", File.class);
92              final File output = cmd.getOptionValue("o", File.class);
93  
94              final Set<String> lines = Sets.newHashSet();
95              for (final File file : dir.listFiles()) {
96                  if (!file.getName().endsWith(".xml")) {
97                      continue;
98                  }
99                  LOGGER.info("Processing {}", file);
100                 try (BufferedReader reader = new BufferedReader(new FileReader(file))) {
101                     String line = null;
102                     String from = null;
103                     Relation relation = null;
104                     while ((line = reader.readLine()) != null) {
105                         if (line.startsWith("<frame")) {
106                             final int start = line.indexOf(" name=\"") + 7;
107                             final int end = line.indexOf('"', start);
108                             from = line.substring(start, end).trim().replace(' ', '_');
109                         } else if (line.contains("<frameRelation")) {
110                             final int start = line.indexOf(" type=\"") + 7;
111                             int end = line.indexOf('(', start);
112                             if (end < 0) {
113                                 end = line.length();
114                             }
115                             end = Math.min(end, line.indexOf('"', start));
116                             relation = Relation.valueOf(line.substring(start, end).trim()
117                                     .toUpperCase().replace(' ', '_'));
118                         } else if (line.contains("<relatedFrame")) {
119                             final int start = line.indexOf(">") + 1;
120                             final int end = line.indexOf('<', start);
121                             final String to = line.substring(start, end).trim().replace(' ', '_');
122                             if (relation == Relation.IS_USED_BY) {
123                                 lines.add(Relation.USES + "\t" + to + "\t" + from);
124                             } else if (relation == Relation.IS_INHERITED_BY) {
125                                 lines.add(Relation.INHERITS_FROM + "\t" + to + "\t" + from);
126                             } else if (relation == Relation.IS_PRECEDED_BY) {
127                                 lines.add(Relation.PRECEDES + "\t" + to + "\t" + from);
128                             } else if (relation == Relation.IS_PERSPECTIVIZED_IN) {
129                                 lines.add(Relation.PERSPECTIVE_ON + "\t" + to + "\t" + from);
130                             } else if (relation == Relation.HAS_SUBFRAME) {
131                                 lines.add(Relation.SUBFRAME_OF + "\t" + to + "\t" + from);
132                             } else {
133                                 lines.add(relation + "\t" + from + "\t" + to);
134                             }
135                         }
136                     }
137                 }
138             }
139 
140             final List<String> sortedLines = Ordering.natural().immutableSortedCopy(lines);
141             try (Writer writer = new OutputStreamWriter(new BufferedOutputStream(
142                     new FileOutputStream(output)), Charsets.UTF_8)) {
143                 for (final String line : sortedLines) {
144                     writer.write(line);
145                     writer.write('\n');
146                 }
147             }
148 
149             LOGGER.info("Extracted {} relations", sortedLines.size());
150 
151         } catch (final Throwable ex) {
152             CommandLine.fail(ex);
153         }
154     }
155 
156     public enum Relation {
157 
158         USES,
159 
160         IS_USED_BY,
161 
162         INHERITS_FROM,
163 
164         IS_INHERITED_BY,
165 
166         PRECEDES,
167 
168         IS_PRECEDED_BY,
169 
170         PERSPECTIVE_ON,
171 
172         IS_PERSPECTIVIZED_IN,
173 
174         SUBFRAME_OF,
175 
176         HAS_SUBFRAME,
177 
178         IS_CAUSATIVE_OF,
179 
180         IS_INCHOATIVE_OF,
181 
182         SEE_ALSO;
183 
184     }
185 
186 }