1 package eu.fbk.dkm.pikes.raid.mdfsa.wordnet;
2
3 import java.util.HashMap;
4
5 public class WordNetLexicalizer
6 {
7
8 private String[] SufxNoun = {"", "s", "ses", "xes", "zes", "ches", "shes", "men", "ies"};
9 private String[] EndNoun = {"", "", "s", "x", "z", "ch", "sh", "man", "y"};
10 private String[] SufxVerb = {"", "s", "ies", "es", "es", "ed", "ed", "ing", "ing"};
11 private String[] EndVerb = {"", "", "y", "e", "", "e", "", "e", ""};
12 private String[] SufxAdj = {"", "er", "est", "er", "est"};
13 private String[] EndAdj = {"", "", "", "e", "e"};
14 private String S;
15 private String CurWord;
16
17 private HashMap<String, Integer> allNouns;
18 private HashMap<String, Integer> allVerbs;
19 private HashMap<String, Integer> allAdjs;
20 private HashMap<String, Integer> allAdvs;
21 private HashMap<String, String> nounExceptions;
22 private HashMap<String, String> verbExceptions;
23 private HashMap<String, String> adjExceptions;
24 private HashMap<String, String> advExceptions;
25
26 private HashMap HNoun;
27 private HashMap HVerb;
28 private HashMap HAdj;
29 private int WordType = -1;
30 private int POSWordType = -1;
31
32 private boolean IsNounAdj;
33
34
35 public WordNetLexicalizer(HashMap<String, Integer>[] allTerms, HashMap<String, String>[] exceptions)
36 {
37 this.allNouns = allTerms[0];
38 this.allVerbs = allTerms[1];
39 this.allAdjs = allTerms[2];
40 this.allAdvs = allTerms[3];
41 this.nounExceptions = exceptions[0];
42 this.verbExceptions = exceptions[1];
43 this.adjExceptions = exceptions[2];
44 this.advExceptions = exceptions[3];
45
46 this.HAdj = null;
47 this.HNoun = null;
48 this.HVerb = null;
49 }
50
51
52 public boolean getIsNounAdj()
53 {
54 return this.IsNounAdj;
55 }
56
57 public int getWordType()
58 {
59 return this.WordType;
60 }
61
62 public void setPOSWordType(int t)
63 {
64 this.POSWordType = t;
65 }
66
67 public void setWord(String word)
68 {
69 this.CurWord = word.substring(0);
70 this.WordType = -1;
71 }
72
73
74
75
76
77
78
79
80 public String getWordLexicalizationByType(String word, String posTag) {
81 String lexWord = null;
82 if(posTag.compareTo("N") == 0) {
83 lexWord = this.lexicalizeNoun(word);
84 } else if(posTag.compareTo("V") == 0) {
85 lexWord = this.lexicalizeVerb(word);
86 } else if(posTag.compareTo("AJ") == 0) {
87 lexWord = this.lexicalizeAdjective(word);
88 } else if(posTag.compareTo("AV") == 0) {
89 lexWord = this.lexicalizeAdverb(word);
90 } else if(posTag.compareTo("MIX") == 0) {
91
92 lexWord = this.lexicalizeNoun(word);
93 if(lexWord == null) {
94 lexWord = this.lexicalizeVerb(word);
95 }
96 if(lexWord == null) {
97 lexWord = this.lexicalizeAdjective(word);
98 }
99 if(lexWord == null) {
100 lexWord = this.lexicalizeAdverb(word);
101 }
102
103 }
104 return lexWord;
105 }
106
107
108
109
110
111
112
113
114 private String lexicalizeNoun(String noun) {
115 String lexNoun = null;
116 String tempString;
117 Integer existFlag;
118
119 lexNoun = this.nounExceptions.get(noun);
120 if(lexNoun != null) {
121 return lexNoun;
122 }
123
124
125 for (int I = 0; I < this.SufxNoun.length; I++) {
126 if(noun.length() > this.SufxNoun[I].length()) {
127 if (noun.substring(noun.length() - this.SufxNoun[I].length()).compareTo(this.SufxNoun[I]) == 0) {
128 tempString = new String(noun.substring(0, noun.length() - this.SufxNoun[I].length()) + this.EndNoun[I]);
129 existFlag = this.allNouns.get(tempString.toLowerCase());
130 if (existFlag != null) {
131 lexNoun = tempString.substring(0);
132 break;
133 }
134 }
135 }
136 }
137 return lexNoun;
138 }
139
140
141
142
143
144
145
146 private String lexicalizeVerb(String verb) {
147 String lexVerb = null;
148 String tempString;
149 Integer existFlag;
150
151 lexVerb = this.verbExceptions.get(verb);
152 if(lexVerb != null) {
153 return lexVerb;
154 }
155
156
157 for (int I = 0; I < this.SufxVerb.length; I++) {
158 if(verb.length() > this.SufxVerb[I].length()) {
159 if (verb.substring(verb.length() - this.SufxVerb[I].length()).compareTo(this.SufxVerb[I]) == 0) {
160 tempString = new String(verb.substring(0, verb.length() - this.SufxVerb[I].length()) + this.EndVerb[I]);
161 existFlag = this.allVerbs.get(tempString.toLowerCase());
162 if (existFlag != null) {
163 lexVerb = tempString.substring(0);
164 break;
165 }
166 }
167 }
168 }
169 return lexVerb;
170 }
171
172
173
174
175
176
177
178
179 private String lexicalizeAdjective(String adj) {
180 String lexAdj = null;
181 String tempString;
182 Integer existFlag;
183
184 lexAdj = this.adjExceptions.get(adj);
185 if(lexAdj != null) {
186 return lexAdj;
187 }
188
189
190 for (int I = 0; I < this.SufxAdj.length; I++) {
191 if(adj.length() > this.SufxAdj[I].length()) {
192 if (adj.substring(adj.length() - this.SufxAdj[I].length()).compareTo(this.SufxAdj[I]) == 0) {
193 tempString = new String(adj.substring(0, adj.length() - this.SufxAdj[I].length()) + this.EndAdj[I]);
194 existFlag = this.allAdjs.get(tempString.toLowerCase());
195 if (existFlag != null) {
196 lexAdj = tempString.substring(0);
197 break;
198 }
199 }
200 }
201 }
202 return lexAdj;
203 }
204
205
206
207
208
209
210
211 private String lexicalizeAdverb(String adv) {
212 String lexAdv = null;
213 String tempString;
214 Integer existFlag;
215
216 lexAdv = this.advExceptions.get(adv);
217 if(lexAdv != null) {
218 return lexAdv;
219 }
220
221
222 for (int I = 0; I < this.SufxNoun.length; I++) {
223 if(adv.length() > this.SufxNoun[I].length()) {
224 if (adv.substring(adv.length() - this.SufxNoun[I].length()).compareTo(this.SufxNoun[I]) == 0) {
225 tempString = new String(adv.substring(0, adv.length() - this.SufxNoun[I].length()) + this.EndNoun[I]);
226 existFlag = this.allAdvs.get(tempString.toLowerCase());
227 if (existFlag != null) {
228 lexAdv = tempString.substring(0);
229 break;
230 }
231 }
232 }
233 }
234 return lexAdv;
235 }
236
237
238
239
240
241
242
243
244 public String getLexicalization()
245 {
246 this.S = null;
247 this.S = new String();
248 this.IsNounAdj = false;
249 String TempString = null;
250 String QueryString = null;
251
252 try
253 {
254
255 if(this.POSWordType == -1 || this.POSWordType == 0)
256 {
257
258 for (int I = 0; I < this.SufxNoun.length; I++)
259 {
260 if (this.CurWord.length() > this.SufxNoun[I].length())
261 {
262 if (this.CurWord.substring(this.CurWord.length() -
263 this.SufxNoun[I].length()).compareTo(
264 this.SufxNoun[I]) == 0)
265 {
266 TempString = new String(this.CurWord.substring(0, this.CurWord.length() -
267 this.SufxNoun[I].length()) +
268 this.EndNoun[I]);
269
270 QueryString = (String)this.HNoun.get(TempString.toLowerCase());
271 if (QueryString != null)
272 {
273 this.S = TempString.substring(0);
274 this.WordType = 0;
275 return this.S;
276 }
277 }
278 }
279 }
280 }
281
282
283
284 if(this.POSWordType == -1 || this.POSWordType == 1)
285 {
286
287 for (int I = 0; I < this.SufxVerb.length; I++)
288 {
289 if (this.CurWord.length() > this.SufxNoun[I].length())
290 {
291 if (this.CurWord.substring(this.CurWord.length() -
292 this.SufxVerb[I].length()).compareTo(
293 this.SufxVerb[I]) == 0)
294
295 {
296 TempString = new String(this.CurWord.substring(0, this.CurWord.length() -
297 this.SufxVerb[I].length()) +
298 this.EndVerb[I]);
299
300
301 QueryString = (String)this.HVerb.get(TempString.toLowerCase());
302 if (QueryString != null)
303 {
304 this.S = TempString.substring(0);
305 this.WordType = 1;
306 return this.S;
307 }
308
309 }
310 }
311 }
312 }
313
314
315
316 if(this.POSWordType == -1 || this.POSWordType == 2)
317 {
318
319 for (int I = 0; I < this.SufxAdj.length; I++)
320 {
321 if (this.CurWord.length() > this.SufxNoun[I].length())
322 {
323 if (this.CurWord.substring(this.CurWord.length() -
324 this.SufxAdj[I].length()).compareTo(
325 this.SufxAdj[I]) == 0)
326
327 {
328
329 TempString = new String(this.CurWord.substring(0, this.CurWord.length() -
330 this.SufxAdj[I].length()) +
331 this.EndAdj[I]);
332
333
334 QueryString = (String)this.HAdj.get(TempString.toLowerCase());
335 if (QueryString != null)
336 {
337 this.S = TempString.substring(0);
338 this.WordType = 2;
339 return this.S;
340 }
341
342 }
343 }
344 }
345 }
346
347 }
348 catch(Exception e){}
349
350 return this.S;
351 }
352
353
354
355 public String checkCompoundNames(String CurrentString)
356 {
357 String Result = "";
358 String[] Words = CurrentString.split(" ");
359 String QueryString, TempString;
360 String RS;
361 int J, CompoundNoun;
362
363 int I = 0;
364 while(I < Words.length)
365 {
366 QueryString = Words[I];
367 if(QueryString.compareTo("STOPWORD") == 0){I++; continue;}
368
369 CompoundNoun = 0;
370 J = I + 1;
371
372 while(CompoundNoun != 0)
373 {
374 if(Words[J].compareTo("STOPWORD") == 0) break;
375 TempString = QueryString.concat(" " + Words[J]);
376 RS = (String)this.HNoun.get(TempString.toLowerCase());
377 if(RS == null) RS = (String)this.HVerb.get(TempString.toLowerCase());
378 if(RS == null) RS = (String)this.HAdj.get(TempString.toLowerCase());
379
380 if(RS != null) {QueryString = TempString; J++;}
381 else CompoundNoun = 1;
382 }
383
384 Result = Result.concat(QueryString + " ");
385 I = J;
386 }
387
388 return Result;
389 }
390 }