1   package eu.fbk.dkm.pikes.raid.mdfsa.wordnet;
2   
3   import java.util.HashMap;
4   
5   public class WordNetLexicalizer
6   {
7   
8     private String[] SufxNoun = {"", "s", "ses", "xes", "zes", "ches", "shes", "men", "ies"};
9     private String[] EndNoun = {"", "", "s", "x", "z", "ch", "sh", "man", "y"};
10    private String[] SufxVerb = {"", "s", "ies", "es", "es", "ed", "ed", "ing", "ing"};
11    private String[] EndVerb = {"", "", "y", "e", "", "e", "", "e", ""};
12    private String[] SufxAdj = {"", "er", "est", "er", "est"};
13    private String[] EndAdj = {"", "", "", "e", "e"};
14    private String S;
15    private String CurWord;
16    
17    private HashMap<String, Integer> allNouns;
18    private HashMap<String, Integer> allVerbs;
19    private HashMap<String, Integer> allAdjs;
20    private HashMap<String, Integer> allAdvs;
21    private HashMap<String, String> nounExceptions;
22    private HashMap<String, String> verbExceptions;
23    private HashMap<String, String> adjExceptions;
24    private HashMap<String, String> advExceptions;
25    
26    private HashMap HNoun;
27    private HashMap HVerb;
28    private HashMap HAdj;
29    private int WordType = -1;  //0: Noun; 1: Verb; 2: Adjective
30    private int POSWordType = -1;
31  
32    private boolean IsNounAdj;
33  
34    
35    public WordNetLexicalizer(HashMap<String, Integer>[] allTerms, HashMap<String, String>[] exceptions)
36    {
37      this.allNouns = allTerms[0];
38      this.allVerbs = allTerms[1];
39      this.allAdjs = allTerms[2];
40      this.allAdvs = allTerms[3];
41      this.nounExceptions = exceptions[0];
42      this.verbExceptions = exceptions[1];
43      this.adjExceptions = exceptions[2];
44      this.advExceptions = exceptions[3];
45      
46      this.HAdj = null;
47      this.HNoun = null;
48      this.HVerb = null;
49    }
50    
51  
52    public boolean getIsNounAdj()
53    {
54      return this.IsNounAdj;
55    }
56  
57    public int getWordType()
58    {
59      return this.WordType;
60    }
61    
62    public void setPOSWordType(int t)
63    {
64    	this.POSWordType = t;
65    }
66    
67    public void setWord(String word)
68    {
69      this.CurWord = word.substring(0);
70      this.WordType = -1;
71    }
72  
73    
74    /**
75     * Returns the lexicalization of a word for which the part-of-speech tag is known
76     * @param word the word to lexicalize
77     * @param posTag the part of speach tag
78     * @return the lexicalized word
79     */
80    public String getWordLexicalizationByType(String word, String posTag) {
81      String lexWord = null;
82      if(posTag.compareTo("N") == 0) {
83        lexWord = this.lexicalizeNoun(word);
84      } else if(posTag.compareTo("V") == 0) {
85        lexWord = this.lexicalizeVerb(word);
86      } else if(posTag.compareTo("AJ") == 0) {
87        lexWord = this.lexicalizeAdjective(word);
88      } else if(posTag.compareTo("AV") == 0) {
89        lexWord = this.lexicalizeAdverb(word);
90      } else if(posTag.compareTo("MIX") == 0) {
91        
92        lexWord = this.lexicalizeNoun(word);
93        if(lexWord == null) {
94          lexWord = this.lexicalizeVerb(word);
95        }
96        if(lexWord == null) {
97          lexWord = this.lexicalizeAdjective(word);
98        }
99        if(lexWord == null) {
100         lexWord = this.lexicalizeAdverb(word);
101       }
102       
103     }
104     return lexWord;
105   }
106   
107   
108   
109   /**
110    * Lexicalizes the noun-term given as input
111    * @param noun the noun to lexicalize
112    * @return the lexicalized noun
113    */
114   private String lexicalizeNoun(String noun) {
115     String lexNoun = null;
116     String tempString;
117     Integer existFlag;
118     
119     lexNoun = this.nounExceptions.get(noun);
120     if(lexNoun != null) {
121       return lexNoun;
122     }
123     
124     /* Checks all possible suffixes */
125     for (int I = 0; I < this.SufxNoun.length; I++) {
126       if(noun.length() > this.SufxNoun[I].length()) {
127         if (noun.substring(noun.length() - this.SufxNoun[I].length()).compareTo(this.SufxNoun[I]) == 0) {
128           tempString = new String(noun.substring(0, noun.length() - this.SufxNoun[I].length()) + this.EndNoun[I]);
129           existFlag = this.allNouns.get(tempString.toLowerCase());
130           if (existFlag != null) {
131             lexNoun = tempString.substring(0);
132             break;
133           }
134         }
135       }
136     }
137     return lexNoun;
138   }
139   
140   
141   /**
142    * Lexicalizes the verb-term given as input
143    * @param verb the verb to lexicalize
144    * @return the lexicalized verb
145    */
146   private String lexicalizeVerb(String verb) {
147     String lexVerb = null;
148     String tempString;
149     Integer existFlag;
150     
151     lexVerb = this.verbExceptions.get(verb);
152     if(lexVerb != null) {
153       return lexVerb;
154     }
155     
156     /* Checks all possible suffixes */
157     for (int I = 0; I < this.SufxVerb.length; I++) {
158       if(verb.length() > this.SufxVerb[I].length()) {
159         if (verb.substring(verb.length() - this.SufxVerb[I].length()).compareTo(this.SufxVerb[I]) == 0) {
160           tempString = new String(verb.substring(0, verb.length() - this.SufxVerb[I].length()) + this.EndVerb[I]);
161           existFlag = this.allVerbs.get(tempString.toLowerCase());
162           if (existFlag != null) {
163             lexVerb = tempString.substring(0);
164             break;
165           }
166         }
167       }
168     }
169     return lexVerb;
170   }
171   
172   
173   
174   /**
175    * Lexicalizes the adjective-term given as input
176    * @param verb the adjective to lexicalize
177    * @return the lexicalized adjective
178    */
179   private String lexicalizeAdjective(String adj) {
180     String lexAdj = null;
181     String tempString;
182     Integer existFlag;
183     
184     lexAdj = this.adjExceptions.get(adj);
185     if(lexAdj != null) {
186       return lexAdj;
187     }
188     
189     /* Checks all possible suffixes */
190     for (int I = 0; I < this.SufxAdj.length; I++) {
191       if(adj.length() > this.SufxAdj[I].length()) {
192         if (adj.substring(adj.length() - this.SufxAdj[I].length()).compareTo(this.SufxAdj[I]) == 0) {
193           tempString = new String(adj.substring(0, adj.length() - this.SufxAdj[I].length()) + this.EndAdj[I]);
194           existFlag = this.allAdjs.get(tempString.toLowerCase());
195           if (existFlag != null) {
196             lexAdj = tempString.substring(0);
197             break;
198           }
199         }
200       }
201     }
202     return lexAdj;
203   }
204   
205   
206   /**
207    * Lexicalizes the adverb-term given as input
208    * @param verb the adverb to lexicalize
209    * @return the lexicalized adverb
210    */
211   private String lexicalizeAdverb(String adv) {
212     String lexAdv = null;
213     String tempString;
214     Integer existFlag;
215     
216     lexAdv = this.advExceptions.get(adv);
217     if(lexAdv != null) {
218       return lexAdv;
219     }
220     
221     /* Checks all possible suffixes */
222     for (int I = 0; I < this.SufxNoun.length; I++) {
223       if(adv.length() > this.SufxNoun[I].length()) {
224         if (adv.substring(adv.length() - this.SufxNoun[I].length()).compareTo(this.SufxNoun[I]) == 0) {
225           tempString = new String(adv.substring(0, adv.length() - this.SufxNoun[I].length()) + this.EndNoun[I]);
226           existFlag = this.allAdvs.get(tempString.toLowerCase());
227           if (existFlag != null) {
228             lexAdv = tempString.substring(0);
229             break;
230           }
231         }
232       }
233     }
234     return lexAdv;
235   }
236   
237   
238   
239   
240   
241   
242   
243 
244   public String getLexicalization()
245   {
246     this.S = null;
247     this.S = new String();
248     this.IsNounAdj = false;
249     String TempString = null;
250     String QueryString = null;
251 
252     try
253     {
254 
255     	if(this.POSWordType == -1 || this.POSWordType == 0)
256     	{
257 	      //Effettuo la scansione della categoria "N"
258 	      for (int I = 0; I < this.SufxNoun.length; I++)
259 	      {
260 	        if (this.CurWord.length() > this.SufxNoun[I].length())
261 	        {
262 	          if (this.CurWord.substring(this.CurWord.length() -
263 	                                     this.SufxNoun[I].length()).compareTo(
264 	                                     this.SufxNoun[I]) == 0)
265 	          {
266 	            TempString = new String(this.CurWord.substring(0, this.CurWord.length() -
267 	                                    this.SufxNoun[I].length()) +
268 	                                    this.EndNoun[I]);
269 	
270 	            QueryString = (String)this.HNoun.get(TempString.toLowerCase());
271 	            if (QueryString != null)
272 	            {
273 	              this.S = TempString.substring(0);
274 	              this.WordType = 0;
275 	              return this.S;
276 	            }
277 	          }
278 	        }
279 	      }
280     	}
281 
282 
283     	
284     	if(this.POSWordType == -1 || this.POSWordType == 1)
285     	{
286 	      //Effettuo la scansione della categoria "V"
287 	      for (int I = 0; I < this.SufxVerb.length; I++)
288 	      {
289 	        if (this.CurWord.length() > this.SufxNoun[I].length())
290 	        {
291 	          if (this.CurWord.substring(this.CurWord.length() -
292 	                                     this.SufxVerb[I].length()).compareTo(
293 	                                     this.SufxVerb[I]) == 0)
294 	
295 	          {
296 	            TempString = new String(this.CurWord.substring(0, this.CurWord.length() -
297 	                                    this.SufxVerb[I].length()) +
298 	                                    this.EndVerb[I]);
299 	
300 	
301 	            QueryString = (String)this.HVerb.get(TempString.toLowerCase());
302 	            if (QueryString != null)
303 	            {
304 	              this.S = TempString.substring(0);
305 	              this.WordType = 1;
306 	              return this.S;
307 	            }
308 	
309 	          }
310 	        }
311 	      }
312     	}
313 
314 
315     	
316     	if(this.POSWordType == -1 || this.POSWordType == 2)
317     	{
318 	      //Effettuo la scansione della categoria "A"
319 	      for (int I = 0; I < this.SufxAdj.length; I++)
320 	      {
321 	        if (this.CurWord.length() > this.SufxNoun[I].length())
322 	        {
323 	          if (this.CurWord.substring(this.CurWord.length() -
324 	                                     this.SufxAdj[I].length()).compareTo(
325 	                                     this.SufxAdj[I]) == 0)
326 	
327 	          {
328 	
329 	            TempString = new String(this.CurWord.substring(0, this.CurWord.length() -
330 	                                    this.SufxAdj[I].length()) +
331 	                                    this.EndAdj[I]);
332 	
333 	
334 	            QueryString = (String)this.HAdj.get(TempString.toLowerCase());
335 	            if (QueryString != null)
336 	            {
337 	              this.S = TempString.substring(0);
338 	              this.WordType = 2;
339 	              return this.S;
340 	            }
341 	
342 	          }
343 	        }
344 	      }
345     	}
346 
347     }
348     catch(Exception e){}
349 
350     return this.S;
351   }
352 
353   
354   
355   public String checkCompoundNames(String CurrentString)
356   {
357   	String Result = "";
358   	String[] Words = CurrentString.split(" ");
359   	String QueryString, TempString;
360   	String RS;
361   	int J, CompoundNoun;
362   	
363   	int I = 0;
364   	while(I < Words.length)
365   	{
366   		QueryString = Words[I];
367   		if(QueryString.compareTo("STOPWORD") == 0){I++; continue;}
368   		
369   		CompoundNoun = 0;
370   		J = I + 1;
371 
372   		while(CompoundNoun != 0)
373   		{
374   			if(Words[J].compareTo("STOPWORD") == 0) break;
375   			TempString = QueryString.concat(" " + Words[J]);
376   			RS = (String)this.HNoun.get(TempString.toLowerCase());
377   			if(RS == null) RS = (String)this.HVerb.get(TempString.toLowerCase());
378   			if(RS == null) RS = (String)this.HAdj.get(TempString.toLowerCase());
379   			
380   			if(RS != null) {QueryString = TempString; J++;}
381   			else CompoundNoun = 1;
382   		}
383   		
384   		Result = Result.concat(QueryString + " ");
385   		I = J;
386   	}
387       	
388   	return Result;
389   }
390 }