1   package eu.fbk.dkm.pikes.resources;
2   
3   import java.io.BufferedOutputStream;
4   import java.io.BufferedReader;
5   import java.io.File;
6   import java.io.FileOutputStream;
7   import java.io.IOException;
8   import java.io.OutputStreamWriter;
9   import java.io.Writer;
10  import java.util.Map;
11  import java.util.Set;
12  
13  import com.google.common.base.Charsets;
14  import com.google.common.collect.HashMultimap;
15  import com.google.common.collect.ImmutableMap;
16  import com.google.common.collect.ImmutableSet;
17  import com.google.common.collect.Maps;
18  import com.google.common.collect.Multimap;
19  import com.google.common.collect.Ordering;
20  import com.google.common.io.Resources;
21  
22  import org.eclipse.rdf4j.model.Statement;
23  import org.eclipse.rdf4j.model.IRI;
24  import org.eclipse.rdf4j.model.ValueFactory;
25  import org.eclipse.rdf4j.model.vocabulary.RDFS;
26  import org.eclipse.rdf4j.rio.RDFHandlerException;
27  import org.slf4j.LoggerFactory;
28  
29  import eu.fbk.utils.core.CommandLine;
30  import eu.fbk.utils.core.CommandLine.Type;
31  import eu.fbk.rdfpro.AbstractRDFHandler;
32  import eu.fbk.rdfpro.RDFSource;
33  import eu.fbk.rdfpro.RDFSources;
34  import eu.fbk.rdfpro.util.Statements;
35  
36  public final class FrameBase {
37  
38      public static final String NAMESPACE = "http://framebase.org/ns/";
39  
40      private static final Map<String, String> CLASS_MAP;
41  
42      private static final Map<String, String> PROPERTY_MAP;
43  
44      private static final Set<String> NAME_SET;
45  
46      private static final Set<String> ROLES_SET;
47      private static final Set<String> PREDICATES_SET;
48  
49      static {
50          try {
51              final ImmutableMap.Builder<String, String> classBuilder = ImmutableMap.builder();
52              final ImmutableMap.Builder<String, String> propertyBuilder = ImmutableMap.builder();
53              final ImmutableSet.Builder<String> namesBuilder = ImmutableSet.builder();
54              final ImmutableSet.Builder<String> propertiesBuilder = ImmutableSet.builder();
55              final ImmutableSet.Builder<String> predicatesBuilder = ImmutableSet.builder();
56  
57              final BufferedReader reader = Resources.asCharSource(
58                      FrameBase.class.getResource("FrameBase.tsv"), Charsets.UTF_8)
59                      .openBufferedStream();
60  
61              String line;
62              while ((line = reader.readLine()) != null) {
63                  final String[] tokens = line.split("\t");
64                  final String name = tokens[0];
65                  namesBuilder.add(name);
66                  if (name.startsWith("fe-")) {
67                      propertiesBuilder.add(name);
68                  } else if (name.startsWith("frame-")) {
69                      predicatesBuilder.add(name);
70                  }
71                  for (int i = 1; i < tokens.length; ++i) {
72                      final String key = tokens[i];
73                      if (key.indexOf('@') >= 0) {
74                          propertyBuilder.put(key, name);
75                      } else {
76                          classBuilder.put(key, name);
77                      }
78                  }
79              }
80              reader.close();
81  
82              CLASS_MAP = classBuilder.build();
83              PROPERTY_MAP = propertyBuilder.build();
84              NAME_SET = namesBuilder.build();
85  
86              ROLES_SET = propertiesBuilder.build();
87              PREDICATES_SET = predicatesBuilder.build();
88  
89          } catch (final IOException ex) {
90              throw new Error("Cannot load eu.fbk.dkm.pikes.resources.FrameBase data", ex);
91          }
92      }
93  
94      public static Set<String> getRolesSet() {
95          return ROLES_SET;
96      }
97  
98      public static Set<String> getPredicatesSet() {
99          return PREDICATES_SET;
100     }
101 
102     public static IRI uriFor(final String name) {
103         if (name == null) {
104             return null;
105         }
106         return Statements.VALUE_FACTORY.createIRI(NAMESPACE, name);
107     }
108 
109     public static IRI classFor(final String fnFrame, final String predicateLemma,
110             final POS predicatePos) {
111         final String key = classKeyFor(fnFrame, predicateLemma, predicatePos);
112         String name = CLASS_MAP.get(key);
113         if (name == null) {
114             name = classNameFor(key);
115             if (!NAME_SET.contains(name)) {
116                 return null;
117             }
118         }
119         return Statements.VALUE_FACTORY.createIRI(NAMESPACE, name);
120     }
121 
122     public static IRI propertyFor(final String fnFrame, final String fnFE) {
123         final String key = propertyKeyFor(fnFrame, fnFE);
124         String name = PROPERTY_MAP.get(key);
125         if (name == null) {
126             name = propertyNameFor(key);
127             if (!NAME_SET.contains(name)) {
128                 return null;
129             }
130         }
131         return Statements.VALUE_FACTORY.createIRI(NAMESPACE, name);
132     }
133 
134     public static boolean isMicroframe(final IRI uri) {
135         if (!uri.getNamespace().equals(FrameBase.NAMESPACE)) {
136             return false;
137         }
138         final String str = uri.getLocalName();
139         final int index = str.lastIndexOf('.');
140         if (index < 0) {
141             return false;
142         }
143         for (int i = index + 1; i < str.length(); ++i) {
144             final char ch = str.charAt(i);
145             if (ch < 'a' || ch > 'z') {
146                 return false;
147             }
148         }
149         return true;
150     }
151 
152     private static String classKeyFor(final String fnFrame, final String predicateLemma,
153             final POS predicatePos) {
154         return (fnFrame + "#" + predicateLemma + "." + predicatePos.getLetter()) //
155                 .toLowerCase().replace(' ', '_');
156     }
157 
158     private static String classNameFor(final String classKey) {
159         final int index1 = classKey.lastIndexOf('#');
160         final int index2 = classKey.lastIndexOf('.');
161         final String frame = Character.toUpperCase(classKey.charAt(0))
162                 + classKey.substring(1, index1);
163         final String lemma = classKey.substring(index1 + 1, index2);
164         final String pos = classKey.substring(index2 + 1);
165         return "frame-" + frame + "-" + lemma + "." + pos;
166     }
167 
168     private static String propertyKeyFor(final String fnFrame, final String fnFE) {
169         return (fnFrame + '@' + fnFE).toLowerCase().replace(' ', '_');
170     }
171 
172     private static String propertyNameFor(final String propertyKey) {
173         final int index = propertyKey.indexOf('@');
174         final String frame = Character.toUpperCase(propertyKey.charAt(0))
175                 + propertyKey.substring(1, index);
176         final String role = Character.toUpperCase(propertyKey.charAt(index + 1))
177                 + propertyKey.substring(index + 2);
178         return "fe-" + frame + "-" + role;
179     }
180 
181     public static void main(final String... args) {
182 
183         try {
184             final CommandLine cmd = CommandLine
185                     .parser()
186                     .withName("eu.fbk.dkm.pikes.resources.FrameBase")
187                     .withHeader(
188                             "Generate a TSV file with indexed eu.fbk.dkm.pikes.resources.FrameBase data")
189                     .withOption("i", "input", "the input file containing FrameBase RDF data",
190                             "FILE", Type.FILE_EXISTING, true, false, true)
191                     .withOption("o", "output", "output file", "FILE", Type.FILE, true, false, true)
192                     .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);
193 
194             final File inputFile = cmd.getOptionValue("i", File.class);
195             final File outputFile = cmd.getOptionValue("o", File.class);
196 
197             final ValueFactory vf = Statements.VALUE_FACTORY;
198             final IRI inheritsFrom = vf.createIRI("http://framebase.org/ns/inheritsFrom");
199             //for FB2.0 final IRI inheritsFrom = vf.createIRI("http://framebase.org/meta/inheritsFrom");
200 
201             final IRI denotedBy = vf.createIRI("http://www.w3.org/ns/lemon/ontolex#isDenotedBy");
202             final String self = "<SELF>";
203 
204             final Multimap<String, String> map = HashMultimap.create();
205 
206             final Map<String, String> frameParents = Maps.newHashMap();
207             final Multimap<String, String> frameEntries = HashMultimap.create();
208 
209             final RDFSource source = RDFSources.read(false, true, null, null, null, true,
210                     inputFile.getAbsolutePath());
211             source.emit(new AbstractRDFHandler() {
212 
213                 @Override
214                 public void handleStatement(final Statement stmt) throws RDFHandlerException {
215                     if (stmt.getSubject() instanceof IRI && stmt.getObject() instanceof IRI) {
216 
217                         final IRI s = (IRI) stmt.getSubject();
218                         final IRI p = stmt.getPredicate();
219                         final IRI o = (IRI) stmt.getObject();
220                         final String sn = s.getLocalName();
221                         final String on = o.getLocalName();
222 
223                         if (p.equals(RDFS.DOMAIN)) {
224                             if (sn.startsWith("fe-") && on.startsWith("frame-")) {
225                                 final String frame = on.substring("frame-".length());
226                                 final String fe = sn.substring("fe-".length() + frame.length() + 1);
227                                 final String key = propertyKeyFor(frame, fe);
228                                 final String name = propertyNameFor(key);
229                                 if (!name.equals(sn)) {
230                                     map.put(sn, key);
231                                 }
232                                 map.put(sn, self);
233                             }
234 
235                         } else if (p.equals(denotedBy)) {
236                             if (sn.startsWith("frame-")) {
237                                 frameEntries.put(sn, on);
238                             }
239 
240                         } else if (p.equals(inheritsFrom)) {
241                             if (sn.startsWith("frame-") && on.startsWith("frame-")) {
242                                 frameParents.put(sn, on);
243                             }
244                         }
245                     }
246                 }
247 
248             }, 1);
249 
250             for (final Map.Entry<String, String> entry : frameEntries.entries()) {
251                 final int index = entry.getValue().indexOf("-");
252                 final POS pos = POS.forFrameNetTag(entry.getValue().substring(0, index));
253                 final String lemma = entry.getValue().substring(index + 1);
254                 final String frame = frameParents.get(entry.getKey()).substring("frame-".length());
255                 final String key = classKeyFor(frame, lemma, pos);
256                 final String name = classNameFor(key);
257                 if (!name.equals(entry.getKey())) {
258                     map.put(entry.getKey(), key);
259                 }
260                 map.put(entry.getKey(), self);
261             }
262 
263             try (Writer writer = new OutputStreamWriter(new BufferedOutputStream(
264                     new FileOutputStream(outputFile)), Charsets.UTF_8)) {
265                 for (final String name : Ordering.natural().sortedCopy(map.keySet())) {
266                     writer.write(name);
267                     for (final String key : Ordering.natural().sortedCopy(map.get(name))) {
268                         if (!self.equals(key)) {
269                             writer.write("\t");
270                             writer.write(key);
271                         }
272                     }
273                     writer.write("\n");
274                 }
275             }
276 
277         } catch (final Throwable ex) {
278             CommandLine.fail(ex);
279         }
280     }
281 
282     private FrameBase() {
283     }
284 
285     public enum POS {
286 
287         NOUN('n'),
288 
289         VERB('v'),
290 
291         ADJECTIVE('a'),
292 
293         OTHER('c');
294 
295         private final char letter;
296 
297         private POS(final char letter) {
298             this.letter = letter;
299         }
300 
301         public char getLetter() {
302             return this.letter;
303         }
304 
305         public static POS forFrameNetTag(String tag) {
306             tag = tag.toLowerCase();
307             if ("n".equals(tag)) {
308                 return NOUN;
309             } else if ("a".equals(tag)) {
310                 return ADJECTIVE;
311             } else if ("v".equals(tag)) {
312                 return VERB;
313             } else {
314                 return OTHER;
315             }
316         }
317 
318         public static POS forPennTag(String tag) {
319             tag = tag.toUpperCase();
320             if (tag.startsWith("NN")) {
321                 return NOUN;
322             } else if (tag.startsWith("VB")) {
323                 return VERB;
324             } else if (tag.startsWith("JJ")) {
325                 return ADJECTIVE;
326             } else {
327                 return OTHER;
328             }
329         }
330 
331     }
332 
333 }