1   package eu.fbk.dkm.pikes.raid.mdfsa.wordnet;
2   
3   import eu.fbk.dkm.pikes.raid.mdfsa.FileManager;
4   import eu.fbk.dkm.pikes.raid.mdfsa.FileManager.Mode;
5   import eu.fbk.shell.mdfsa.data.structures.Graph;
6   
7   import java.util.ArrayList;
8   import java.util.HashMap;
9   import java.util.Iterator;
10  import java.util.Properties;
11  
12  public class WordNetLoader {
13    
14    Properties prp;
15    private HashMap<String, ArrayList<Long>> wordSynsets;
16    private HashMap<Long, ArrayList<String>> synsetWords;
17    private HashMap<String, String> allowedRelations;
18    private HashMap<String, Double> weightsRelations;
19    private HashMap<Long, ArrayList<WordNetRelation>> synsetRelations;
20    
21    /*
22     * Maps containing WordNet terms lists without applying filters
23     */
24    private HashMap<String, Integer>[] allTerms;
25    private HashMap<String, Integer> allNouns;
26    private HashMap<String, Integer> allVerbs;
27    private HashMap<String, Integer> allAdjs;
28    private HashMap<String, Integer> allAdvs;
29    private HashMap<String, Integer> stopwords;
30    private HashMap<String, String>[] allExceptions;
31    private HashMap<String, String> nounExceptions;
32    private HashMap<String, String> verbExceptions;
33    private HashMap<String, String> adjExceptions;
34    private HashMap<String, String> advExceptions;
35    
36    
37    /**
38     * Class contructor
39     * @param prp properties file
40     */
41    @SuppressWarnings("unchecked")
42    public WordNetLoader(Properties prp) {
43      this.prp = prp;
44      this.wordSynsets = new HashMap<String, ArrayList<Long>>();
45      this.synsetWords = new HashMap<Long, ArrayList<String>>();
46      this.allowedRelations = new HashMap<String, String>();
47      this.weightsRelations = new HashMap<String, Double>();
48      this.synsetRelations = new HashMap<Long, ArrayList<WordNetRelation>>();
49      
50      this.allNouns = new HashMap<String, Integer>();
51      this.allVerbs = new HashMap<String, Integer>();
52      this.allAdjs = new HashMap<String, Integer>();
53      this.allAdvs = new HashMap<String, Integer>();
54      this.stopwords = new HashMap<String, Integer>();
55      this.nounExceptions = new HashMap<String, String>();
56      this.verbExceptions = new HashMap<String, String>();
57      this.adjExceptions = new HashMap<String, String>();
58      this.advExceptions = new HashMap<String, String>();
59      
60      this.allTerms = new HashMap[4];
61      this.allTerms[0] = this.allNouns;
62      this.allTerms[1] = this.allVerbs;
63      this.allTerms[2] = this.allAdjs;
64      this.allTerms[3] = this.allAdvs;
65      this.allExceptions = new HashMap[4];
66      this.allExceptions[0] = this.nounExceptions;
67      this.allExceptions[1] = this.verbExceptions;
68      this.allExceptions[2] = this.adjExceptions;
69      this.allExceptions[3] = this.advExceptions;
70    }
71    
72    
73    /**
74     * Loads all data related to the ConceptNet knowledge base:
75     * - loads the ConceptNet parameters related to the allowed relations and uri
76     * - loads the json representation of ConceptNet
77     */
78    public void load() {
79      this.loadWordNetParameters();
80  //    System.out.println(this.allowedRelations);
81  //    System.out.println(this.weightsRelations);
82      this.loadWordNetRawData();
83    }
84    
85    
86    
87    private void loadWordNetParameters() {
88      
89      /* Loads the set of allowed relations */
90      FileManager fm = new FileManager(prp.getProperty("mdfsa.wordnet.relations"), Mode.READ);
91      ArrayList<String> relations = fm.importSimpleTextContent();
92      Iterator<String> it = relations.iterator();
93      while(it.hasNext()) {
94        String currentRelation = it.next();
95        String[] tokens = currentRelation.split("\\^\\^\\^");
96        if(tokens[0].compareTo("1") == 0) {
97          this.allowedRelations.put(tokens[1], tokens[1]);
98          this.weightsRelations.put(tokens[1], Double.valueOf(tokens[2]));
99        }
100     }
101   }
102   
103   
104   
105   private void loadWordNetRawData() {
106 
107     String[] r;
108     try {
109       
110       /*
111        * LOADS SYNSETS
112        */
113       FileManager fm = new FileManager(this.prp.getProperty("mdfsa.wordnet.unambiguoussynsets"), Mode.READ);
114       ArrayList<String> content = fm.importSimpleTextContent();
115       
116       for(String row: content) {
117         if(row.compareTo("") == 0) {
118           continue;
119         }
120         r = row.split("\\^\\^\\^");
121         String currentWord = r[1];
122         Long currentSynset = Long.valueOf(r[2]);
123         
124         /* Add the current synset to the word ones */
125         ArrayList<Long> synsets = this.wordSynsets.get(currentWord);
126         if(synsets == null) {
127           synsets = new ArrayList<Long>();
128         }
129         synsets.add(currentSynset);
130         this.wordSynsets.put(currentWord, synsets);
131         
132         /* Add the current word to the synsets ones */
133         ArrayList<String> words = this.synsetWords.get(currentSynset);
134         if(words == null) {
135           words = new ArrayList<String>();
136         }
137         words.add(currentWord);
138         this.synsetWords.put(currentSynset, words);
139       }
140 
141       
142       /*
143        * LOADS SYNSETS RELATIONSHIPS
144        */
145       fm = new FileManager(this.prp.getProperty("mdfsa.wordnet.links"), Mode.READ);
146       content = fm.importSimpleTextContent();      
147       for(String row: content) {
148         if(row.compareTo("") == 0) {
149           continue;
150         }
151         r = row.split("\\^\\^\\^");
152         long currentSynSource = Long.valueOf(r[0]);
153         long currentSynTarget = Long.valueOf(r[1]);
154         int currentRelation = Integer.valueOf(r[2]);
155         
156         ArrayList<WordNetRelation> currentRelations = this.synsetRelations.get(currentSynSource);
157         if(currentRelations == null) {
158           currentRelations = new ArrayList<WordNetRelation>();
159         }
160         WordNetRelation wnr = new WordNetRelation(currentRelation, currentSynTarget, 
161                                                   this.weightsRelations.get(String.valueOf(currentRelation)));
162         currentRelations.add(wnr);
163         this.synsetRelations.put(currentSynSource, currentRelations);
164       }
165       
166       
167       
168       /*
169        * LOADS INDEX FILES
170        */
171       String[] indexFiles = new String[4];
172       indexFiles[0] = this.prp.getProperty("mdfsa.extraction.nounlist");
173       indexFiles[1] = this.prp.getProperty("mdfsa.extraction.verblist");
174       indexFiles[2] = this.prp.getProperty("mdfsa.extraction.adjlist");
175       indexFiles[3] = this.prp.getProperty("mdfsa.extraction.advlist");
176       for(int i = 0; i < 4; i++) {
177         fm = new FileManager(indexFiles[i], Mode.READ);
178         content = fm.importSimpleTextContent();
179         for(String row: content) {
180           if(row.startsWith("  ")) {
181             continue;
182           }
183           String[] data = row.split(" ");
184           this.allTerms[i].put(data[0], new Integer(1));
185         }
186       }
187       
188       
189       /*
190        * LOADS EXCEPTIONS FILES
191        */
192       String[] excFiles = new String[4];
193       excFiles[0] = this.prp.getProperty("mdfsa.extraction.nounexc");
194       excFiles[1] = this.prp.getProperty("mdfsa.extraction.verbexc");
195       excFiles[2] = this.prp.getProperty("mdfsa.extraction.adjexc");
196       excFiles[3] = this.prp.getProperty("mdfsa.extraction.advexc");
197       for(int i = 0; i < 4; i++) {
198         fm = new FileManager(excFiles[i], Mode.READ);
199         content = fm.importSimpleTextContent();
200         for(String row: content) {
201           String[] curExc = row.split(" ");
202           this.allExceptions[i].put(curExc[0], curExc[1]);
203         }
204       }
205             
206       
207       /*
208        * LOADS STOPWORDS FILE
209        */
210       String stopwordsFile = this.prp.getProperty("mdfsa.extraction.stopwords");
211       fm = new FileManager(stopwordsFile, Mode.READ);
212       content = fm.importSimpleTextContent();
213       for(String row: content) {
214         this.stopwords.put(row, 1);
215       }
216       
217     } catch(Exception e) {
218       e.printStackTrace();
219     }
220   }
221   
222   
223   
224   /**
225    * Updates the knowledge graph with information coming from WordNet
226    * @param g the knowledge graph
227    * @return the updated knowledge graph
228    */
229   public Graph putInGraph(Graph g) {
230     
231     /* Adds the mappings between terms and synsets to the graph. It is used during the propagation phase because information
232      * from the dataset comes through text-terms and not by using synsets. */
233     g.setWnWordSynsets(this.wordSynsets);
234     
235     /* Creates the relations between synsets */
236     Iterator<Long> synsets = this.synsetRelations.keySet().iterator();
237     while(synsets.hasNext()) {
238       long currentSynset = synsets.next();
239       ArrayList<WordNetRelation> synRelations = this.synsetRelations.get(currentSynset);
240       for(WordNetRelation rel: synRelations) {
241         long targetSynset = rel.getTargetSynset();
242         double weight = rel.getRelationWeight();
243         g.addEdge(String.valueOf(currentSynset), String.valueOf(targetSynset), Double.MAX_VALUE, weight, 1);
244       }
245     }
246     
247     /* Creates the relations between SenticNet and the unambiguous synsets of WordNet.
248      * Such relations are created directly in the Graph object due to the fact that all information are already
249      * defined in it. */
250     g.createSenticNetWordNetRelations();
251     
252     return g;
253   }
254 
255 
256   public HashMap<String, Integer>[] getAllTerms() {
257     return allTerms;
258   }
259 
260 
261   public void setAllTerms(HashMap<String, Integer>[] allTerms) {
262     this.allTerms = allTerms;
263   }
264 
265 
266   public HashMap<String, Integer> getAllNouns() {
267     return allNouns;
268   }
269 
270 
271   public void setAllNouns(HashMap<String, Integer> allNouns) {
272     this.allNouns = allNouns;
273   }
274 
275 
276   public HashMap<String, Integer> getAllVerbs() {
277     return allVerbs;
278   }
279 
280 
281   public void setAllVerbs(HashMap<String, Integer> allVerbs) {
282     this.allVerbs = allVerbs;
283   }
284 
285 
286   public HashMap<String, Integer> getAllAdjs() {
287     return allAdjs;
288   }
289 
290 
291   public void setAllAdjs(HashMap<String, Integer> allAdjs) {
292     this.allAdjs = allAdjs;
293   }
294 
295 
296   public HashMap<String, Integer> getAllAdvs() {
297     return allAdvs;
298   }
299 
300 
301   public void setAllAdvs(HashMap<String, Integer> allAdvs) {
302     this.allAdvs = allAdvs;
303   }
304 
305 
306   public HashMap<String, String>[] getAllExceptions() {
307     return allExceptions;
308   }
309 
310 
311   public void setAllExceptions(HashMap<String, String>[] allExceptions) {
312     this.allExceptions = allExceptions;
313   }
314 
315 
316   public HashMap<String, String> getNounExceptions() {
317     return nounExceptions;
318   }
319 
320 
321   public void setNounExceptions(HashMap<String, String> nounExceptions) {
322     this.nounExceptions = nounExceptions;
323   }
324 
325 
326   public HashMap<String, String> getVerbExceptions() {
327     return verbExceptions;
328   }
329 
330 
331   public void setVerbExceptions(HashMap<String, String> verbExceptions) {
332     this.verbExceptions = verbExceptions;
333   }
334 
335 
336   public HashMap<String, String> getAdjExceptions() {
337     return adjExceptions;
338   }
339 
340 
341   public void setAdjExceptions(HashMap<String, String> adjExceptions) {
342     this.adjExceptions = adjExceptions;
343   }
344 
345 
346   public HashMap<String, String> getAdvExceptions() {
347     return advExceptions;
348   }
349 
350 
351   public void setAdvExceptions(HashMap<String, String> advExceptions) {
352     this.advExceptions = advExceptions;
353   }
354 
355 
356   public HashMap<String, Integer> getStopwords() {
357     return this.stopwords;
358   }
359 
360 
361   public void setStopwords(HashMap<String, Integer> stopwords) {
362     this.stopwords = stopwords;
363   }
364   
365 }