1 package eu.fbk.shell.mdfsa.data.structures;
2
3 import eu.fbk.dkm.pikes.raid.mdfsa.parser.DependencyTree;
4
5 import java.io.Serializable;
6 import java.util.ArrayList;
7 import java.util.HashMap;
8 import java.util.Iterator;
9 import java.util.Properties;
10
11 public class TreeGraphStatisticalModel implements Serializable {
12
13 private static final long serialVersionUID = 1L;
14
15 private Properties prp;
16 private ArrayList<DatasetInstance> trainingInstances;
17 private ArrayList<DatasetInstance> testInstances;
18 private HashMap<String, HashMap<String, Tuple>> domainDependentModelsPositive;
19 private HashMap<String, HashMap<String, Tuple>> domainDependentModelsNegative;
20 private HashMap<String, HashMap<String, Tuple>> domainDependentModelsNeutral;
21 private HashMap<String, Tuple> domainIndependentModelPositive;
22 private HashMap<String, Tuple> domainIndependentModelNegative;
23 private HashMap<String, Tuple> domainIndependentModelNeutral;
24 private HashMap<String, Integer> domainInstanceCounter;
25 private HashMap<String, Integer> domainLevels;
26
27 public TreeGraphStatisticalModel(Properties p) {
28 this.prp = p;
29 this.trainingInstances = new ArrayList<DatasetInstance>();
30 this.domainDependentModelsPositive = new HashMap<String, HashMap<String, Tuple>>();
31 this.domainDependentModelsNegative = new HashMap<String, HashMap<String, Tuple>>();
32 this.domainDependentModelsNeutral = new HashMap<String, HashMap<String, Tuple>>();
33 this.domainIndependentModelPositive = new HashMap<String, Tuple>();
34 this.domainIndependentModelNegative = new HashMap<String, Tuple>();
35 this.domainIndependentModelNeutral = new HashMap<String, Tuple>();
36 this.domainInstanceCounter = new HashMap<String, Integer>();
37 this.domainInstanceCounter.put("POL-POSITIVE", new Integer(0));
38 this.domainInstanceCounter.put("POL-NEGATIVE", new Integer(0));
39 this.domainInstanceCounter.put("POL-NEUTRAL", new Integer(0));
40 this.domainLevels = new HashMap<String, Integer>();
41 }
42
43
44 public void setTrainingInstances(ArrayList<DatasetInstance> training) {
45 this.trainingInstances = training;
46 }
47
48
49 public void setTestInstances(ArrayList<DatasetInstance> test) {
50 this.testInstances = test;
51 }
52
53 public ArrayList<DatasetInstance> getTestInstances() {
54 return this.testInstances;
55 }
56
57
58 public void buildModel() {
59
60 for(DatasetInstance di: this.trainingInstances) {
61
62
63 String domain = di.getDomain();
64 String[] domains = domain.split("\\#");
65 int k = 0;
66 for(String cDomain: domains) {
67 this.domainLevels.put(cDomain.trim(), k);
68
69 HashMap<String, Tuple> domainModel = this.domainDependentModelsPositive.get(cDomain);
70 if(domainModel == null) {
71 domainModel = new HashMap<String, Tuple>();
72 this.domainDependentModelsPositive.put(cDomain, domainModel);
73 domainModel = new HashMap<String, Tuple>();
74 this.domainDependentModelsNegative.put(cDomain, domainModel);
75 domainModel = new HashMap<String, Tuple>();
76 this.domainDependentModelsNeutral.put(cDomain, domainModel);
77
78 }
79
80
81
82 int polarity = di.getPolarity();
83 HashMap<String, Tuple> domainIndependentModel = null;
84 HashMap<String, HashMap<String, Tuple>> polarizedModel = null;
85 if(polarity == 1) {
86 polarizedModel = this.domainDependentModelsPositive;
87 domainIndependentModel = this.domainIndependentModelPositive;
88 } else if(polarity == -1) {
89 polarizedModel = this.domainDependentModelsNegative;
90 domainIndependentModel = this.domainIndependentModelNegative;
91 } else if(polarity == 0) {
92 polarizedModel = this.domainDependentModelsNeutral;
93 domainIndependentModel = this.domainIndependentModelNeutral;
94 }
95
96 domainModel = polarizedModel.get(cDomain);
97
98
99
100 SentenceStructuredRepresentation ssr = di.getSentenceStructuredRepresentation();
101 ArrayList<DependencyTree> dts = ssr.getDependencyTree();
102 for(DependencyTree dt: dts) {
103
104 ArrayList<String> dependencies = dt.getDependecies();
105 for(String dep: dependencies) {
106
107
108 Integer domainCounter = this.domainInstanceCounter.get(cDomain.trim() + "_" + di.getPolarity());
109 if(domainCounter == null) {
110 this.domainInstanceCounter.put(cDomain.trim() + "_" + di.getPolarity(), new Integer(0));
111 domainCounter = this.domainInstanceCounter.get(cDomain.trim() + "_" + di.getPolarity());
112 }
113 domainCounter++;
114 this.domainInstanceCounter.put(cDomain.trim() + "_" + di.getPolarity(), domainCounter);
115 domainCounter = this.domainInstanceCounter.get(cDomain.trim());
116 if(domainCounter == null) {
117 this.domainInstanceCounter.put(cDomain.trim(), new Integer(0));
118 domainCounter = this.domainInstanceCounter.get(cDomain.trim());
119 }
120 domainCounter++;
121 this.domainInstanceCounter.put(cDomain.trim(), domainCounter);
122
123
124 if(di.getPolarity() == 1.0) {
125 Integer counter = this.domainInstanceCounter.get("POL-POSITIVE");
126 counter++;
127 this.domainInstanceCounter.put("POL-POSITIVE", counter);
128 } else if(di.getPolarity() == -1.0) {
129 Integer counter = this.domainInstanceCounter.get("POL-NEGATIVE");
130 counter++;
131 this.domainInstanceCounter.put("POL-NEGATIVE", counter);
132 } else {
133 Integer counter = this.domainInstanceCounter.get("POL-NEUTRAL");
134 counter++;
135 this.domainInstanceCounter.put("POL-NEUTRAL", counter);
136 }
137
138
139 String[] d = dep.split("\\^\\^\\^");
140 d[1] = d[1].substring(0, d[1].indexOf("-"));
141 d[2] = d[2].substring(0, d[2].indexOf("-"));
142 dep = d[0] + "^^^" + d[1] + "^^^" + d[2];
143
144 if(d.length == 3) {
145
146
147
148
149
150
151
152
153
154
155 Tuple t = domainModel.get(dep);
156 Tuple tI = domainIndependentModel.get(dep);
157 if(t == null) {
158 t = new Tuple(1);
159 t.setToken(dep);
160 t.setFrequency(1.0);
161 } else {
162 t.setFrequency(t.getFrequency() + 1.0);
163 }
164 if(tI == null) {
165 tI = new Tuple(1);
166 tI.setToken(dep);
167 tI.setFrequency(1.0);
168 } else {
169 tI.setFrequency(t.getFrequency() + 1.0);
170 }
171 domainModel.put(dep, t);
172 domainIndependentModel.put(dep, tI);
173
174
175 t = domainModel.get(d[1]);
176 tI = domainIndependentModel.get(d[1]);
177 if(t == null) {
178 t = new Tuple(1);
179 t.setToken(d[1]);
180 t.setFrequency(1.0);
181 } else {
182 t.setFrequency(t.getFrequency() + 1.0);
183 }
184 if(tI == null) {
185 tI = new Tuple(1);
186 tI.setToken(d[1]);
187 tI.setFrequency(1.0);
188 } else {
189 tI.setFrequency(t.getFrequency() + 1.0);
190 }
191 domainModel.put(d[1], t);
192 domainIndependentModel.put(d[1], tI);
193
194
195 t = domainModel.get(d[2]);
196 tI = domainIndependentModel.get(d[2]);
197 if(t == null) {
198 t = new Tuple(1);
199 t.setToken(d[2]);
200 t.setFrequency(1.0);
201 } else {
202 t.setFrequency(t.getFrequency() + 1.0);
203 }
204 if(tI == null) {
205 tI = new Tuple(1);
206 tI.setToken(d[2]);
207 tI.setFrequency(1.0);
208 } else {
209 tI.setFrequency(t.getFrequency() + 1.0);
210 }
211 domainModel.put(d[2], t);
212 domainIndependentModel.put(d[2], tI);
213
214
215 String key = new String(d[0] + "^^^" + d[2] + "^^^" + d[1]);
216 t = domainModel.get(key);
217 tI = domainIndependentModel.get(key);
218 if(t == null) {
219 t = new Tuple(1);
220 t.setToken(key);
221 t.setFrequency(1.0);
222 } else {
223 t.setFrequency(t.getFrequency() + 1.0);
224 }
225 if(tI == null) {
226 tI = new Tuple(1);
227 tI.setToken(key);
228 tI.setFrequency(1.0);
229 } else {
230 tI.setFrequency(t.getFrequency() + 1.0);
231 }
232 domainModel.put(key, t);
233 domainIndependentModel.put(key, tI);
234
235
236 key = new String(d[1] + "^^^" + d[2]);
237 t = domainModel.get(key);
238 tI = domainIndependentModel.get(key);
239 if(t == null) {
240 t = new Tuple(1);
241 t.setToken(key);
242 t.setFrequency(1.0);
243 } else {
244 t.setFrequency(t.getFrequency() + 1.0);
245 }
246 if(tI == null) {
247 tI = new Tuple(1);
248 tI.setToken(key);
249 tI.setFrequency(1.0);
250 } else {
251 tI.setFrequency(t.getFrequency() + 1.0);
252 }
253 domainModel.put(key, t);
254 domainIndependentModel.put(key, tI);
255
256
257 key = new String(d[2] + "^^^" + d[1]);
258 t = domainModel.get(key);
259 tI = domainIndependentModel.get(key);
260 if(t == null) {
261 t = new Tuple(1);
262 t.setToken(key);
263 t.setFrequency(1.0);
264 } else {
265 t.setFrequency(t.getFrequency() + 1.0);
266 }
267 if(tI == null) {
268 tI = new Tuple(1);
269 tI.setToken(key);
270 tI.setFrequency(1.0);
271 } else {
272 tI.setFrequency(t.getFrequency() + 1.0);
273 }
274 domainModel.put(key, t);
275 domainIndependentModel.put(key, tI);
276 }
277 }
278 }
279
280 polarizedModel.put(cDomain, domainModel);
281 k++;
282 }
283 }
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300 }
301
302
303
304
305
306
307
308 public void computeResults(int flagEvaluateDomain, int flagEvaluateDoubleDomain, int fineGranedPolarity) {
309
310 int positiveCounter = this.domainInstanceCounter.get("POL-POSITIVE");
311 int negativeCounter = this.domainInstanceCounter.get("POL-NEGATIVE");
312 int neutralCounter = this.domainInstanceCounter.get("POL-NEUTRAL");
313
314 for(DatasetInstance di: this.testInstances) {
315
316
317
318
319 double positiveCoefficient = this.computeMembershipCoefficient(this.domainIndependentModelPositive, di, positiveCounter);
320 double negativeCoefficient = this.computeMembershipCoefficient(this.domainIndependentModelNegative, di, negativeCounter);
321 double neutralCoefficient = this.computeMembershipCoefficient(this.domainIndependentModelNeutral, di, neutralCounter);
322
323 if(fineGranedPolarity == 1) {
324 double delta = positiveCoefficient - negativeCoefficient;
325 double finePolarity = 0.0;
326
327 if(Math.max(positiveCoefficient, negativeCoefficient) != 0.0) {
328 finePolarity = (delta / Math.max(positiveCoefficient, negativeCoefficient)) * 5.0;
329 }
330
331 if(neutralCoefficient != 0.0) {
332 finePolarity = finePolarity - ((1.0 - (1.0 / neutralCoefficient)) * Math.signum(finePolarity));
333 }
334
335 if(finePolarity > 0.0) {
336 di.setInferredPolarity(Math.ceil(finePolarity));
337 } else if(finePolarity < 0.0) {
338 di.setInferredPolarity(Math.floor(finePolarity));
339 } else {
340 di.setInferredPolarity(finePolarity);
341 }
342 di.setInferredPolarity(finePolarity);
343
344 } else {
345 if(positiveCoefficient > negativeCoefficient && positiveCoefficient > neutralCoefficient) {di.setInferredPolarity(1.0);}
346 if(negativeCoefficient > positiveCoefficient && negativeCoefficient > neutralCoefficient) {di.setInferredPolarity(-1.0);}
347 if(neutralCoefficient > negativeCoefficient && neutralCoefficient > positiveCoefficient) {di.setInferredPolarity(0.0);}
348 }
349
350
351
352
353
354
355 if(flagEvaluateDomain == 1) {
356 di.setInferredDomain("");
357 Iterator<String> iD = this.domainLevels.keySet().iterator();
358 HashMap<String, Double> domainMembership = new HashMap<String, Double>();
359
360 double maxDomainCoeff = Double.NEGATIVE_INFINITY;
361 while(iD.hasNext()) {
362 String d = iD.next();
363 double domainCoeff = 0.0;
364 positiveCoefficient = 0.0;
365 negativeCoefficient = 0.0;
366 neutralCoefficient = 0.0;
367
368 Integer currentDomainCounter = this.domainInstanceCounter.get(d.trim());
369
370 HashMap<String, Tuple> currentDomainModel = this.domainDependentModelsPositive.get(d);
371 if(currentDomainModel != null && currentDomainCounter != null) {
372 positiveCoefficient = this.computeMembershipCoefficient(currentDomainModel, di, currentDomainCounter);
373 }
374
375 currentDomainModel = this.domainDependentModelsNegative.get(d);
376 if(currentDomainModel != null && currentDomainCounter != null) {
377 negativeCoefficient = this.computeMembershipCoefficient(currentDomainModel, di, currentDomainCounter);
378 }
379
380 currentDomainModel = this.domainDependentModelsPositive.get(d);
381 if(currentDomainModel != null && currentDomainCounter != null) {
382 neutralCoefficient = this.computeMembershipCoefficient(currentDomainModel, di, currentDomainCounter);
383 }
384
385 domainCoeff = positiveCoefficient + negativeCoefficient + neutralCoefficient;
386
387 if(domainCoeff > maxDomainCoeff) {
388 di.setInferredDomain(d);
389 maxDomainCoeff = domainCoeff;
390 }
391 domainMembership.put(d, domainCoeff);
392 }
393
394
395
396
397
398
399
400 if(flagEvaluateDoubleDomain == 1) {
401 String levelZero = new String("");
402 String levelOne = new String("");
403 iD = this.domainLevels.keySet().iterator();
404 double maxDomainCoeffLevelZero = Double.NEGATIVE_INFINITY;
405 double maxDomainCoeffLevelOne = Double.NEGATIVE_INFINITY;
406 while(iD.hasNext()) {
407 String d = iD.next();
408 int level = this.domainLevels.get(d);
409 double membership = domainMembership.get(d);
410 if(level == 0) {
411 if(membership > maxDomainCoeffLevelZero) {
412 levelZero = new String(d);
413 maxDomainCoeffLevelZero = membership;
414 }
415 } else if(level == 1) {
416 if(membership > maxDomainCoeffLevelOne) {
417 levelOne = new String(d);
418 maxDomainCoeffLevelOne = membership;
419 }
420 }
421 }
422 di.setInferredDomain(levelZero + "#" + levelOne);
423 }
424 }
425 }
426 }
427
428
429
430
431 private double computeMembershipCoefficient(HashMap<String, Tuple> h, DatasetInstance di, int normCounter) {
432
433 double coeff = 0.0;
434
435 SentenceStructuredRepresentation ssr = di.getSentenceStructuredRepresentation();
436 ArrayList<DependencyTree> dts = ssr.getDependencyTree();
437 for(DependencyTree dt: dts) {
438 ArrayList<String> dependencies = dt.getDependecies();
439 for(String dep: dependencies) {
440 String[] d = dep.split("\\^\\^\\^");
441 d[1] = d[1].substring(0, d[1].indexOf("-"));
442 d[2] = d[2].substring(0, d[2].indexOf("-"));
443 dep = d[0] + "^^^" + d[1] + "^^^" + d[2];
444
445 if(d.length == 3) {
446
447 String governor = d[1];
448 String dependent = d[2];
449 String rdg = new String(d[0] + "^^^" + d[2] + "^^^" + d[1]);
450 String gd = new String(d[1] + "^^^" + d[2]);
451 String dg = new String(d[2] + "^^^" + d[1]);
452
453
454 Tuple tDep = h.get(dep);
455 Tuple tG = h.get(governor);
456 Tuple tD = h.get(dependent);
457 Tuple tRDG = h.get(rdg);
458 Tuple tGD = h.get(gd);
459 Tuple tDG = h.get(dg);
460
461 double axiomCoeff = 0.0;
462 if(tDep != null) {
463 axiomCoeff += 1.0 / Math.log((double) normCounter / tDep.getFrequency());
464 }
465 if(tDep != null && tG != null) {
466 axiomCoeff += ((1.0 / Math.log((double) normCounter / tDep.getFrequency())) /
467 (1.0 / Math.log((double) normCounter / tG.getFrequency())));
468 }
469 if(tDep != null && tD != null) {
470 axiomCoeff += ((1.0 / Math.log((double) normCounter / tDep.getFrequency())) /
471 (1.0 / Math.log((double) normCounter / tD.getFrequency())));
472 }
473 if(tRDG != null) {
474 axiomCoeff += 1.0 / Math.log((double) normCounter / tRDG.getFrequency());
475 }
476 if(tRDG != null && tGD != null) {
477 axiomCoeff += ((1.0 / Math.log((double) normCounter / tRDG.getFrequency())) /
478 (1.0 / Math.log((double) normCounter / tGD.getFrequency())));
479 }
480 if(tRDG != null && tDG != null) {
481 axiomCoeff += (((1.0 / Math.log((double) normCounter / tRDG.getFrequency())) /
482 (1.0 / Math.log((double) normCounter / tDG.getFrequency()))) * 0.5);
483 }
484 coeff += (axiomCoeff / dependencies.size());
485 }
486 }
487 }
488
489 return coeff;
490 }
491
492
493
494
495
496 private class Tuple implements Serializable {
497
498 private static final long serialVersionUID = 1L;
499
500 private String token;
501 private double frequency;
502 private double weight;
503 private int type;
504 private double counter;
505
506 public Tuple(int type) {
507 this.frequency = 0.0;
508 this.counter = 1.0;
509 this.weight = 1.0;
510 this.type = type;
511 }
512
513 public String getToken() {
514 return token;
515 }
516 public void setToken(String token) {
517 this.token = token;
518 }
519 public double getFrequency() {
520 return frequency;
521 }
522 public void setFrequency(double frequency) {
523 this.frequency = frequency;
524 }
525 public double getWeight() {
526 return weight;
527 }
528 public void setWeight(double weight) {
529 this.weight = weight;
530 }
531 public int getType() {
532 return type;
533 }
534 public void setType(int type) {
535 this.type = type;
536 }
537 }
538 }