1 package eu.fbk.dkm.pikes.resources;
2
3 import java.io.BufferedOutputStream;
4 import java.io.BufferedReader;
5 import java.io.File;
6 import java.io.FileOutputStream;
7 import java.io.IOException;
8 import java.io.OutputStreamWriter;
9 import java.io.Writer;
10 import java.util.Map;
11 import java.util.Set;
12
13 import com.google.common.base.Charsets;
14 import com.google.common.collect.HashMultimap;
15 import com.google.common.collect.ImmutableMap;
16 import com.google.common.collect.ImmutableSet;
17 import com.google.common.collect.Maps;
18 import com.google.common.collect.Multimap;
19 import com.google.common.collect.Ordering;
20 import com.google.common.io.Resources;
21
22 import org.eclipse.rdf4j.model.Statement;
23 import org.eclipse.rdf4j.model.IRI;
24 import org.eclipse.rdf4j.model.ValueFactory;
25 import org.eclipse.rdf4j.model.vocabulary.RDFS;
26 import org.eclipse.rdf4j.rio.RDFHandlerException;
27 import org.slf4j.LoggerFactory;
28
29 import eu.fbk.utils.core.CommandLine;
30 import eu.fbk.utils.core.CommandLine.Type;
31 import eu.fbk.rdfpro.AbstractRDFHandler;
32 import eu.fbk.rdfpro.RDFSource;
33 import eu.fbk.rdfpro.RDFSources;
34 import eu.fbk.rdfpro.util.Statements;
35
36 public final class FrameBase {
37
38 public static final String NAMESPACE = "http://framebase.org/ns/";
39
40 private static final Map<String, String> CLASS_MAP;
41
42 private static final Map<String, String> PROPERTY_MAP;
43
44 private static final Set<String> NAME_SET;
45
46 private static final Set<String> ROLES_SET;
47 private static final Set<String> PREDICATES_SET;
48
49 static {
50 try {
51 final ImmutableMap.Builder<String, String> classBuilder = ImmutableMap.builder();
52 final ImmutableMap.Builder<String, String> propertyBuilder = ImmutableMap.builder();
53 final ImmutableSet.Builder<String> namesBuilder = ImmutableSet.builder();
54 final ImmutableSet.Builder<String> propertiesBuilder = ImmutableSet.builder();
55 final ImmutableSet.Builder<String> predicatesBuilder = ImmutableSet.builder();
56
57 final BufferedReader reader = Resources.asCharSource(
58 FrameBase.class.getResource("FrameBase.tsv"), Charsets.UTF_8)
59 .openBufferedStream();
60
61 String line;
62 while ((line = reader.readLine()) != null) {
63 final String[] tokens = line.split("\t");
64 final String name = tokens[0];
65 namesBuilder.add(name);
66 if (name.startsWith("fe-")) {
67 propertiesBuilder.add(name);
68 } else if (name.startsWith("frame-")) {
69 predicatesBuilder.add(name);
70 }
71 for (int i = 1; i < tokens.length; ++i) {
72 final String key = tokens[i];
73 if (key.indexOf('@') >= 0) {
74 propertyBuilder.put(key, name);
75 } else {
76 classBuilder.put(key, name);
77 }
78 }
79 }
80 reader.close();
81
82 CLASS_MAP = classBuilder.build();
83 PROPERTY_MAP = propertyBuilder.build();
84 NAME_SET = namesBuilder.build();
85
86 ROLES_SET = propertiesBuilder.build();
87 PREDICATES_SET = predicatesBuilder.build();
88
89 } catch (final IOException ex) {
90 throw new Error("Cannot load eu.fbk.dkm.pikes.resources.FrameBase data", ex);
91 }
92 }
93
94 public static Set<String> getRolesSet() {
95 return ROLES_SET;
96 }
97
98 public static Set<String> getPredicatesSet() {
99 return PREDICATES_SET;
100 }
101
102 public static IRI uriFor(final String name) {
103 if (name == null) {
104 return null;
105 }
106 return Statements.VALUE_FACTORY.createIRI(NAMESPACE, name);
107 }
108
109 public static IRI classFor(final String fnFrame, final String predicateLemma,
110 final POS predicatePos) {
111 final String key = classKeyFor(fnFrame, predicateLemma, predicatePos);
112 String name = CLASS_MAP.get(key);
113 if (name == null) {
114 name = classNameFor(key);
115 if (!NAME_SET.contains(name)) {
116 return null;
117 }
118 }
119 return Statements.VALUE_FACTORY.createIRI(NAMESPACE, name);
120 }
121
122 public static IRI propertyFor(final String fnFrame, final String fnFE) {
123 final String key = propertyKeyFor(fnFrame, fnFE);
124 String name = PROPERTY_MAP.get(key);
125 if (name == null) {
126 name = propertyNameFor(key);
127 if (!NAME_SET.contains(name)) {
128 return null;
129 }
130 }
131 return Statements.VALUE_FACTORY.createIRI(NAMESPACE, name);
132 }
133
134 public static boolean isMicroframe(final IRI uri) {
135 if (!uri.getNamespace().equals(FrameBase.NAMESPACE)) {
136 return false;
137 }
138 final String str = uri.getLocalName();
139 final int index = str.lastIndexOf('.');
140 if (index < 0) {
141 return false;
142 }
143 for (int i = index + 1; i < str.length(); ++i) {
144 final char ch = str.charAt(i);
145 if (ch < 'a' || ch > 'z') {
146 return false;
147 }
148 }
149 return true;
150 }
151
152 private static String classKeyFor(final String fnFrame, final String predicateLemma,
153 final POS predicatePos) {
154 return (fnFrame + "#" + predicateLemma + "." + predicatePos.getLetter())
155 .toLowerCase().replace(' ', '_');
156 }
157
158 private static String classNameFor(final String classKey) {
159 final int index1 = classKey.lastIndexOf('#');
160 final int index2 = classKey.lastIndexOf('.');
161 final String frame = Character.toUpperCase(classKey.charAt(0))
162 + classKey.substring(1, index1);
163 final String lemma = classKey.substring(index1 + 1, index2);
164 final String pos = classKey.substring(index2 + 1);
165 return "frame-" + frame + "-" + lemma + "." + pos;
166 }
167
168 private static String propertyKeyFor(final String fnFrame, final String fnFE) {
169 return (fnFrame + '@' + fnFE).toLowerCase().replace(' ', '_');
170 }
171
172 private static String propertyNameFor(final String propertyKey) {
173 final int index = propertyKey.indexOf('@');
174 final String frame = Character.toUpperCase(propertyKey.charAt(0))
175 + propertyKey.substring(1, index);
176 final String role = Character.toUpperCase(propertyKey.charAt(index + 1))
177 + propertyKey.substring(index + 2);
178 return "fe-" + frame + "-" + role;
179 }
180
181 public static void main(final String... args) {
182
183 try {
184 final CommandLine cmd = CommandLine
185 .parser()
186 .withName("eu.fbk.dkm.pikes.resources.FrameBase")
187 .withHeader(
188 "Generate a TSV file with indexed eu.fbk.dkm.pikes.resources.FrameBase data")
189 .withOption("i", "input", "the input file containing FrameBase RDF data",
190 "FILE", Type.FILE_EXISTING, true, false, true)
191 .withOption("o", "output", "output file", "FILE", Type.FILE, true, false, true)
192 .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);
193
194 final File inputFile = cmd.getOptionValue("i", File.class);
195 final File outputFile = cmd.getOptionValue("o", File.class);
196
197 final ValueFactory vf = Statements.VALUE_FACTORY;
198 final IRI inheritsFrom = vf.createIRI("http://framebase.org/ns/inheritsFrom");
199
200
201 final IRI denotedBy = vf.createIRI("http://www.w3.org/ns/lemon/ontolex#isDenotedBy");
202 final String self = "<SELF>";
203
204 final Multimap<String, String> map = HashMultimap.create();
205
206 final Map<String, String> frameParents = Maps.newHashMap();
207 final Multimap<String, String> frameEntries = HashMultimap.create();
208
209 final RDFSource source = RDFSources.read(false, true, null, null, null, true,
210 inputFile.getAbsolutePath());
211 source.emit(new AbstractRDFHandler() {
212
213 @Override
214 public void handleStatement(final Statement stmt) throws RDFHandlerException {
215 if (stmt.getSubject() instanceof IRI && stmt.getObject() instanceof IRI) {
216
217 final IRI s = (IRI) stmt.getSubject();
218 final IRI p = stmt.getPredicate();
219 final IRI o = (IRI) stmt.getObject();
220 final String sn = s.getLocalName();
221 final String on = o.getLocalName();
222
223 if (p.equals(RDFS.DOMAIN)) {
224 if (sn.startsWith("fe-") && on.startsWith("frame-")) {
225 final String frame = on.substring("frame-".length());
226 final String fe = sn.substring("fe-".length() + frame.length() + 1);
227 final String key = propertyKeyFor(frame, fe);
228 final String name = propertyNameFor(key);
229 if (!name.equals(sn)) {
230 map.put(sn, key);
231 }
232 map.put(sn, self);
233 }
234
235 } else if (p.equals(denotedBy)) {
236 if (sn.startsWith("frame-")) {
237 frameEntries.put(sn, on);
238 }
239
240 } else if (p.equals(inheritsFrom)) {
241 if (sn.startsWith("frame-") && on.startsWith("frame-")) {
242 frameParents.put(sn, on);
243 }
244 }
245 }
246 }
247
248 }, 1);
249
250 for (final Map.Entry<String, String> entry : frameEntries.entries()) {
251 final int index = entry.getValue().indexOf("-");
252 final POS pos = POS.forFrameNetTag(entry.getValue().substring(0, index));
253 final String lemma = entry.getValue().substring(index + 1);
254 final String frame = frameParents.get(entry.getKey()).substring("frame-".length());
255 final String key = classKeyFor(frame, lemma, pos);
256 final String name = classNameFor(key);
257 if (!name.equals(entry.getKey())) {
258 map.put(entry.getKey(), key);
259 }
260 map.put(entry.getKey(), self);
261 }
262
263 try (Writer writer = new OutputStreamWriter(new BufferedOutputStream(
264 new FileOutputStream(outputFile)), Charsets.UTF_8)) {
265 for (final String name : Ordering.natural().sortedCopy(map.keySet())) {
266 writer.write(name);
267 for (final String key : Ordering.natural().sortedCopy(map.get(name))) {
268 if (!self.equals(key)) {
269 writer.write("\t");
270 writer.write(key);
271 }
272 }
273 writer.write("\n");
274 }
275 }
276
277 } catch (final Throwable ex) {
278 CommandLine.fail(ex);
279 }
280 }
281
282 private FrameBase() {
283 }
284
285 public enum POS {
286
287 NOUN('n'),
288
289 VERB('v'),
290
291 ADJECTIVE('a'),
292
293 OTHER('c');
294
295 private final char letter;
296
297 private POS(final char letter) {
298 this.letter = letter;
299 }
300
301 public char getLetter() {
302 return this.letter;
303 }
304
305 public static POS forFrameNetTag(String tag) {
306 tag = tag.toLowerCase();
307 if ("n".equals(tag)) {
308 return NOUN;
309 } else if ("a".equals(tag)) {
310 return ADJECTIVE;
311 } else if ("v".equals(tag)) {
312 return VERB;
313 } else {
314 return OTHER;
315 }
316 }
317
318 public static POS forPennTag(String tag) {
319 tag = tag.toUpperCase();
320 if (tag.startsWith("NN")) {
321 return NOUN;
322 } else if (tag.startsWith("VB")) {
323 return VERB;
324 } else if (tag.startsWith("JJ")) {
325 return ADJECTIVE;
326 } else {
327 return OTHER;
328 }
329 }
330
331 }
332
333 }