1 package eu.fbk.dkm.pikes.raid.mdfsa.wordnet;
2
3 import eu.fbk.dkm.pikes.raid.mdfsa.FileManager;
4 import eu.fbk.dkm.pikes.raid.mdfsa.FileManager.Mode;
5 import eu.fbk.shell.mdfsa.data.structures.Graph;
6
7 import java.util.ArrayList;
8 import java.util.HashMap;
9 import java.util.Iterator;
10 import java.util.Properties;
11
12 public class WordNetLoader {
13
14 Properties prp;
15 private HashMap<String, ArrayList<Long>> wordSynsets;
16 private HashMap<Long, ArrayList<String>> synsetWords;
17 private HashMap<String, String> allowedRelations;
18 private HashMap<String, Double> weightsRelations;
19 private HashMap<Long, ArrayList<WordNetRelation>> synsetRelations;
20
21
22
23
24 private HashMap<String, Integer>[] allTerms;
25 private HashMap<String, Integer> allNouns;
26 private HashMap<String, Integer> allVerbs;
27 private HashMap<String, Integer> allAdjs;
28 private HashMap<String, Integer> allAdvs;
29 private HashMap<String, Integer> stopwords;
30 private HashMap<String, String>[] allExceptions;
31 private HashMap<String, String> nounExceptions;
32 private HashMap<String, String> verbExceptions;
33 private HashMap<String, String> adjExceptions;
34 private HashMap<String, String> advExceptions;
35
36
37
38
39
40
41 @SuppressWarnings("unchecked")
42 public WordNetLoader(Properties prp) {
43 this.prp = prp;
44 this.wordSynsets = new HashMap<String, ArrayList<Long>>();
45 this.synsetWords = new HashMap<Long, ArrayList<String>>();
46 this.allowedRelations = new HashMap<String, String>();
47 this.weightsRelations = new HashMap<String, Double>();
48 this.synsetRelations = new HashMap<Long, ArrayList<WordNetRelation>>();
49
50 this.allNouns = new HashMap<String, Integer>();
51 this.allVerbs = new HashMap<String, Integer>();
52 this.allAdjs = new HashMap<String, Integer>();
53 this.allAdvs = new HashMap<String, Integer>();
54 this.stopwords = new HashMap<String, Integer>();
55 this.nounExceptions = new HashMap<String, String>();
56 this.verbExceptions = new HashMap<String, String>();
57 this.adjExceptions = new HashMap<String, String>();
58 this.advExceptions = new HashMap<String, String>();
59
60 this.allTerms = new HashMap[4];
61 this.allTerms[0] = this.allNouns;
62 this.allTerms[1] = this.allVerbs;
63 this.allTerms[2] = this.allAdjs;
64 this.allTerms[3] = this.allAdvs;
65 this.allExceptions = new HashMap[4];
66 this.allExceptions[0] = this.nounExceptions;
67 this.allExceptions[1] = this.verbExceptions;
68 this.allExceptions[2] = this.adjExceptions;
69 this.allExceptions[3] = this.advExceptions;
70 }
71
72
73
74
75
76
77
78 public void load() {
79 this.loadWordNetParameters();
80
81
82 this.loadWordNetRawData();
83 }
84
85
86
87 private void loadWordNetParameters() {
88
89
90 FileManager fm = new FileManager(prp.getProperty("mdfsa.wordnet.relations"), Mode.READ);
91 ArrayList<String> relations = fm.importSimpleTextContent();
92 Iterator<String> it = relations.iterator();
93 while(it.hasNext()) {
94 String currentRelation = it.next();
95 String[] tokens = currentRelation.split("\\^\\^\\^");
96 if(tokens[0].compareTo("1") == 0) {
97 this.allowedRelations.put(tokens[1], tokens[1]);
98 this.weightsRelations.put(tokens[1], Double.valueOf(tokens[2]));
99 }
100 }
101 }
102
103
104
105 private void loadWordNetRawData() {
106
107 String[] r;
108 try {
109
110
111
112
113 FileManager fm = new FileManager(this.prp.getProperty("mdfsa.wordnet.unambiguoussynsets"), Mode.READ);
114 ArrayList<String> content = fm.importSimpleTextContent();
115
116 for(String row: content) {
117 if(row.compareTo("") == 0) {
118 continue;
119 }
120 r = row.split("\\^\\^\\^");
121 String currentWord = r[1];
122 Long currentSynset = Long.valueOf(r[2]);
123
124
125 ArrayList<Long> synsets = this.wordSynsets.get(currentWord);
126 if(synsets == null) {
127 synsets = new ArrayList<Long>();
128 }
129 synsets.add(currentSynset);
130 this.wordSynsets.put(currentWord, synsets);
131
132
133 ArrayList<String> words = this.synsetWords.get(currentSynset);
134 if(words == null) {
135 words = new ArrayList<String>();
136 }
137 words.add(currentWord);
138 this.synsetWords.put(currentSynset, words);
139 }
140
141
142
143
144
145 fm = new FileManager(this.prp.getProperty("mdfsa.wordnet.links"), Mode.READ);
146 content = fm.importSimpleTextContent();
147 for(String row: content) {
148 if(row.compareTo("") == 0) {
149 continue;
150 }
151 r = row.split("\\^\\^\\^");
152 long currentSynSource = Long.valueOf(r[0]);
153 long currentSynTarget = Long.valueOf(r[1]);
154 int currentRelation = Integer.valueOf(r[2]);
155
156 ArrayList<WordNetRelation> currentRelations = this.synsetRelations.get(currentSynSource);
157 if(currentRelations == null) {
158 currentRelations = new ArrayList<WordNetRelation>();
159 }
160 WordNetRelation wnr = new WordNetRelation(currentRelation, currentSynTarget,
161 this.weightsRelations.get(String.valueOf(currentRelation)));
162 currentRelations.add(wnr);
163 this.synsetRelations.put(currentSynSource, currentRelations);
164 }
165
166
167
168
169
170
171 String[] indexFiles = new String[4];
172 indexFiles[0] = this.prp.getProperty("mdfsa.extraction.nounlist");
173 indexFiles[1] = this.prp.getProperty("mdfsa.extraction.verblist");
174 indexFiles[2] = this.prp.getProperty("mdfsa.extraction.adjlist");
175 indexFiles[3] = this.prp.getProperty("mdfsa.extraction.advlist");
176 for(int i = 0; i < 4; i++) {
177 fm = new FileManager(indexFiles[i], Mode.READ);
178 content = fm.importSimpleTextContent();
179 for(String row: content) {
180 if(row.startsWith(" ")) {
181 continue;
182 }
183 String[] data = row.split(" ");
184 this.allTerms[i].put(data[0], new Integer(1));
185 }
186 }
187
188
189
190
191
192 String[] excFiles = new String[4];
193 excFiles[0] = this.prp.getProperty("mdfsa.extraction.nounexc");
194 excFiles[1] = this.prp.getProperty("mdfsa.extraction.verbexc");
195 excFiles[2] = this.prp.getProperty("mdfsa.extraction.adjexc");
196 excFiles[3] = this.prp.getProperty("mdfsa.extraction.advexc");
197 for(int i = 0; i < 4; i++) {
198 fm = new FileManager(excFiles[i], Mode.READ);
199 content = fm.importSimpleTextContent();
200 for(String row: content) {
201 String[] curExc = row.split(" ");
202 this.allExceptions[i].put(curExc[0], curExc[1]);
203 }
204 }
205
206
207
208
209
210 String stopwordsFile = this.prp.getProperty("mdfsa.extraction.stopwords");
211 fm = new FileManager(stopwordsFile, Mode.READ);
212 content = fm.importSimpleTextContent();
213 for(String row: content) {
214 this.stopwords.put(row, 1);
215 }
216
217 } catch(Exception e) {
218 e.printStackTrace();
219 }
220 }
221
222
223
224
225
226
227
228
229 public Graph putInGraph(Graph g) {
230
231
232
233 g.setWnWordSynsets(this.wordSynsets);
234
235
236 Iterator<Long> synsets = this.synsetRelations.keySet().iterator();
237 while(synsets.hasNext()) {
238 long currentSynset = synsets.next();
239 ArrayList<WordNetRelation> synRelations = this.synsetRelations.get(currentSynset);
240 for(WordNetRelation rel: synRelations) {
241 long targetSynset = rel.getTargetSynset();
242 double weight = rel.getRelationWeight();
243 g.addEdge(String.valueOf(currentSynset), String.valueOf(targetSynset), Double.MAX_VALUE, weight, 1);
244 }
245 }
246
247
248
249
250 g.createSenticNetWordNetRelations();
251
252 return g;
253 }
254
255
256 public HashMap<String, Integer>[] getAllTerms() {
257 return allTerms;
258 }
259
260
261 public void setAllTerms(HashMap<String, Integer>[] allTerms) {
262 this.allTerms = allTerms;
263 }
264
265
266 public HashMap<String, Integer> getAllNouns() {
267 return allNouns;
268 }
269
270
271 public void setAllNouns(HashMap<String, Integer> allNouns) {
272 this.allNouns = allNouns;
273 }
274
275
276 public HashMap<String, Integer> getAllVerbs() {
277 return allVerbs;
278 }
279
280
281 public void setAllVerbs(HashMap<String, Integer> allVerbs) {
282 this.allVerbs = allVerbs;
283 }
284
285
286 public HashMap<String, Integer> getAllAdjs() {
287 return allAdjs;
288 }
289
290
291 public void setAllAdjs(HashMap<String, Integer> allAdjs) {
292 this.allAdjs = allAdjs;
293 }
294
295
296 public HashMap<String, Integer> getAllAdvs() {
297 return allAdvs;
298 }
299
300
301 public void setAllAdvs(HashMap<String, Integer> allAdvs) {
302 this.allAdvs = allAdvs;
303 }
304
305
306 public HashMap<String, String>[] getAllExceptions() {
307 return allExceptions;
308 }
309
310
311 public void setAllExceptions(HashMap<String, String>[] allExceptions) {
312 this.allExceptions = allExceptions;
313 }
314
315
316 public HashMap<String, String> getNounExceptions() {
317 return nounExceptions;
318 }
319
320
321 public void setNounExceptions(HashMap<String, String> nounExceptions) {
322 this.nounExceptions = nounExceptions;
323 }
324
325
326 public HashMap<String, String> getVerbExceptions() {
327 return verbExceptions;
328 }
329
330
331 public void setVerbExceptions(HashMap<String, String> verbExceptions) {
332 this.verbExceptions = verbExceptions;
333 }
334
335
336 public HashMap<String, String> getAdjExceptions() {
337 return adjExceptions;
338 }
339
340
341 public void setAdjExceptions(HashMap<String, String> adjExceptions) {
342 this.adjExceptions = adjExceptions;
343 }
344
345
346 public HashMap<String, String> getAdvExceptions() {
347 return advExceptions;
348 }
349
350
351 public void setAdvExceptions(HashMap<String, String> advExceptions) {
352 this.advExceptions = advExceptions;
353 }
354
355
356 public HashMap<String, Integer> getStopwords() {
357 return this.stopwords;
358 }
359
360
361 public void setStopwords(HashMap<String, Integer> stopwords) {
362 this.stopwords = stopwords;
363 }
364
365 }