1   package eu.fbk.shell.mdfsa.data.structures;
2   
3   import eu.fbk.dkm.pikes.raid.mdfsa.parser.DependencyTree;
4   
5   import java.io.Serializable;
6   import java.util.ArrayList;
7   import java.util.HashMap;
8   import java.util.Iterator;
9   import java.util.Properties;
10  
11  public class TreeGraphStatisticalModel implements Serializable {
12  
13    private static final long serialVersionUID = 1L;
14    
15    private Properties prp;
16    private ArrayList<DatasetInstance> trainingInstances;
17    private ArrayList<DatasetInstance> testInstances;
18    private HashMap<String, HashMap<String, Tuple>> domainDependentModelsPositive;
19    private HashMap<String, HashMap<String, Tuple>> domainDependentModelsNegative;
20    private HashMap<String, HashMap<String, Tuple>> domainDependentModelsNeutral;
21    private HashMap<String, Tuple> domainIndependentModelPositive;
22    private HashMap<String, Tuple> domainIndependentModelNegative;
23    private HashMap<String, Tuple> domainIndependentModelNeutral;
24    private HashMap<String, Integer> domainInstanceCounter;
25    private HashMap<String, Integer> domainLevels;
26    
27    public TreeGraphStatisticalModel(Properties p) {
28      this.prp = p;
29      this.trainingInstances = new ArrayList<DatasetInstance>();
30      this.domainDependentModelsPositive = new HashMap<String, HashMap<String, Tuple>>();
31      this.domainDependentModelsNegative = new HashMap<String, HashMap<String, Tuple>>();
32      this.domainDependentModelsNeutral = new HashMap<String, HashMap<String, Tuple>>();
33      this.domainIndependentModelPositive = new HashMap<String, Tuple>();
34      this.domainIndependentModelNegative = new HashMap<String, Tuple>();
35      this.domainIndependentModelNeutral = new HashMap<String, Tuple>();
36      this.domainInstanceCounter = new HashMap<String, Integer>();
37      this.domainInstanceCounter.put("POL-POSITIVE", new Integer(0));
38      this.domainInstanceCounter.put("POL-NEGATIVE", new Integer(0));
39      this.domainInstanceCounter.put("POL-NEUTRAL", new Integer(0));
40      this.domainLevels = new HashMap<String, Integer>();
41    }
42    
43    
44    public void setTrainingInstances(ArrayList<DatasetInstance> training) {
45      this.trainingInstances = training;
46    }
47    
48    
49    public void setTestInstances(ArrayList<DatasetInstance> test) {
50      this.testInstances = test;
51    }
52    
53    public ArrayList<DatasetInstance> getTestInstances() {
54      return this.testInstances;
55    }
56    
57    
58    public void buildModel() {
59      
60      for(DatasetInstance di: this.trainingInstances) {
61        
62        /* Checks if the domain has been already analyzed, otherwise the new maps are created */
63        String domain = di.getDomain();
64        String[] domains = domain.split("\\#");
65        int k = 0;
66        for(String cDomain: domains) {
67          this.domainLevels.put(cDomain.trim(), k);
68          
69          HashMap<String, Tuple> domainModel = this.domainDependentModelsPositive.get(cDomain);
70          if(domainModel == null) {
71            domainModel = new HashMap<String, Tuple>();
72            this.domainDependentModelsPositive.put(cDomain, domainModel);
73            domainModel = new HashMap<String, Tuple>();
74            this.domainDependentModelsNegative.put(cDomain, domainModel);
75            domainModel = new HashMap<String, Tuple>();
76            this.domainDependentModelsNeutral.put(cDomain, domainModel);
77            //this.domainInstanceCounter.put(domain, new Integer(0));
78          }
79          
80          
81          /* Sets the HashMap to use based on the domain and on the polarity of the current DatasetInstance */
82          int polarity = di.getPolarity();
83          HashMap<String, Tuple> domainIndependentModel = null;
84          HashMap<String, HashMap<String, Tuple>> polarizedModel = null;
85          if(polarity == 1) {
86            polarizedModel = this.domainDependentModelsPositive;
87            domainIndependentModel = this.domainIndependentModelPositive;
88          } else if(polarity == -1) {
89            polarizedModel = this.domainDependentModelsNegative;
90            domainIndependentModel = this.domainIndependentModelNegative;
91          } else if(polarity == 0) {
92            polarizedModel = this.domainDependentModelsNeutral;
93            domainIndependentModel = this.domainIndependentModelNeutral;
94          }
95          
96          domainModel = polarizedModel.get(cDomain);
97          
98          
99          /* Extracts the list of dependencies from the current DatasetInstance and populates the model */
100         SentenceStructuredRepresentation ssr = di.getSentenceStructuredRepresentation();
101         ArrayList<DependencyTree> dts = ssr.getDependencyTree();
102         for(DependencyTree dt: dts) {
103           
104           ArrayList<String> dependencies = dt.getDependecies();
105           for(String dep: dependencies) {
106             
107             /* Gets the number of instances of the current domain models (both domain dependent and independent) and updates them */  
108             Integer domainCounter = this.domainInstanceCounter.get(cDomain.trim() + "_" + di.getPolarity());
109             if(domainCounter == null) {
110               this.domainInstanceCounter.put(cDomain.trim() + "_" + di.getPolarity(), new Integer(0));
111               domainCounter = this.domainInstanceCounter.get(cDomain.trim() + "_" + di.getPolarity());
112             }
113             domainCounter++;
114             this.domainInstanceCounter.put(cDomain.trim() + "_" + di.getPolarity(), domainCounter);
115             domainCounter = this.domainInstanceCounter.get(cDomain.trim());
116             if(domainCounter == null) {
117               this.domainInstanceCounter.put(cDomain.trim(), new Integer(0));
118               domainCounter = this.domainInstanceCounter.get(cDomain.trim());
119             }
120             domainCounter++;
121             this.domainInstanceCounter.put(cDomain.trim(), domainCounter);
122             
123             
124             if(di.getPolarity() == 1.0) {
125               Integer counter = this.domainInstanceCounter.get("POL-POSITIVE");
126               counter++;
127               this.domainInstanceCounter.put("POL-POSITIVE", counter);
128             } else if(di.getPolarity() == -1.0) {
129               Integer counter = this.domainInstanceCounter.get("POL-NEGATIVE");
130               counter++;
131               this.domainInstanceCounter.put("POL-NEGATIVE", counter);
132             } else {
133               Integer counter = this.domainInstanceCounter.get("POL-NEUTRAL");
134               counter++;
135               this.domainInstanceCounter.put("POL-NEUTRAL", counter);
136             }
137             
138             
139             String[] d = dep.split("\\^\\^\\^");
140             d[1] = d[1].substring(0, d[1].indexOf("-"));
141             d[2] = d[2].substring(0, d[2].indexOf("-"));
142             dep = d[0] + "^^^" + d[1] + "^^^" + d[2];
143             
144             if(d.length == 3) {
145               /*
146                * Checks if the following four tuples exists:
147                * key: governor term;
148                * key: dependent term;
149                * key: the entire rule relation-governor-dependent
150                * key: the inverse entire rule relation-dependent-governor (used with frequency 0.5)
151                * If not, they are created and put in the HashMap; if Yes, they are updated with the statistical information
152                */
153               
154               /* Key: relation-governor-dependent */
155               Tuple t = domainModel.get(dep);
156               Tuple tI = domainIndependentModel.get(dep);
157               if(t == null) {
158                 t = new Tuple(1);
159                 t.setToken(dep);
160                 t.setFrequency(1.0);
161               } else {
162                 t.setFrequency(t.getFrequency() + 1.0);
163               }
164               if(tI == null) {
165                 tI = new Tuple(1);
166                 tI.setToken(dep);
167                 tI.setFrequency(1.0);
168               } else {
169                 tI.setFrequency(t.getFrequency() + 1.0);
170               }
171               domainModel.put(dep, t);
172               domainIndependentModel.put(dep, tI);
173               
174               /* Key: governor */
175               t = domainModel.get(d[1]);
176               tI = domainIndependentModel.get(d[1]);
177               if(t == null) {
178                 t = new Tuple(1);
179                 t.setToken(d[1]);
180                 t.setFrequency(1.0);
181               } else {
182                 t.setFrequency(t.getFrequency() + 1.0);
183               }
184               if(tI == null) {
185                 tI = new Tuple(1);
186                 tI.setToken(d[1]);
187                 tI.setFrequency(1.0);
188               } else {
189                 tI.setFrequency(t.getFrequency() + 1.0);
190               }
191               domainModel.put(d[1], t);
192               domainIndependentModel.put(d[1], tI);
193               
194               /* Key: dependent */
195               t = domainModel.get(d[2]);
196               tI = domainIndependentModel.get(d[2]);
197               if(t == null) {
198                 t = new Tuple(1);
199                 t.setToken(d[2]);
200                 t.setFrequency(1.0);
201               } else {
202                 t.setFrequency(t.getFrequency() + 1.0);
203               }
204               if(tI == null) {
205                 tI = new Tuple(1);
206                 tI.setToken(d[2]);
207                 tI.setFrequency(1.0);
208               } else {
209                 tI.setFrequency(t.getFrequency() + 1.0);
210               }
211               domainModel.put(d[2], t);
212               domainIndependentModel.put(d[2], tI);
213               
214               /* Key: relation-dependent-governor */
215               String key = new String(d[0] + "^^^" + d[2] + "^^^" + d[1]);
216               t = domainModel.get(key);
217               tI = domainIndependentModel.get(key);
218               if(t == null) {
219                 t = new Tuple(1);
220                 t.setToken(key);
221                 t.setFrequency(1.0);
222               } else {
223                 t.setFrequency(t.getFrequency() + 1.0);
224               }
225               if(tI == null) {
226                 tI = new Tuple(1);
227                 tI.setToken(key);
228                 tI.setFrequency(1.0);
229               } else {
230                 tI.setFrequency(t.getFrequency() + 1.0);
231               }
232               domainModel.put(key, t);
233               domainIndependentModel.put(key, tI);
234               
235               /* Key: governor-dependent */
236               key = new String(d[1] + "^^^" + d[2]);
237               t = domainModel.get(key);
238               tI = domainIndependentModel.get(key);
239               if(t == null) {
240                 t = new Tuple(1);
241                 t.setToken(key);
242                 t.setFrequency(1.0);
243               } else {
244                 t.setFrequency(t.getFrequency() + 1.0);
245               }
246               if(tI == null) {
247                 tI = new Tuple(1);
248                 tI.setToken(key);
249                 tI.setFrequency(1.0);
250               } else {
251                 tI.setFrequency(t.getFrequency() + 1.0);
252               }
253               domainModel.put(key, t);
254               domainIndependentModel.put(key, tI);
255               
256               /* Key: dependent-governor */
257               key = new String(d[2] + "^^^" + d[1]);
258               t = domainModel.get(key);
259               tI = domainIndependentModel.get(key);
260               if(t == null) {
261                 t = new Tuple(1);
262                 t.setToken(key);
263                 t.setFrequency(1.0);
264               } else {
265                 t.setFrequency(t.getFrequency() + 1.0);
266               }
267               if(tI == null) {
268                 tI = new Tuple(1);
269                 tI.setToken(key);
270                 tI.setFrequency(1.0);
271               } else {
272                 tI.setFrequency(t.getFrequency() + 1.0);
273               }
274               domainModel.put(key, t);
275               domainIndependentModel.put(key, tI);
276             }
277           }
278         }
279         
280         polarizedModel.put(cDomain, domainModel);
281         k++;
282       }
283     }
284     
285     /*
286     System.out.println(this.domainDependentModelsPositive);
287     System.out.println();
288     System.out.println(this.domainDependentModelsNegative);
289     System.out.println();
290     System.out.println(this.domainDependentModelsNeutral);
291     System.out.println();
292     System.out.println(this.domainIndependentModelPositive);
293     System.out.println();
294     System.out.println(this.domainIndependentModelNegative);
295     System.out.println();
296     System.out.println(this.domainIndependentModelNeutral);
297     System.out.println();
298     System.out.println(this.domainInstanceCounter);
299     */
300   }
301   
302   
303   
304   
305   /*
306    * Output prediction procedure
307    */
308   public void computeResults(int flagEvaluateDomain, int flagEvaluateDoubleDomain, int fineGranedPolarity) {
309         
310     int positiveCounter = this.domainInstanceCounter.get("POL-POSITIVE");
311     int negativeCounter = this.domainInstanceCounter.get("POL-NEGATIVE");
312     int neutralCounter = this.domainInstanceCounter.get("POL-NEUTRAL");
313     
314     for(DatasetInstance di: this.testInstances) {
315       
316       /*
317        * POLARITY EVALUATION
318        */
319       double positiveCoefficient = this.computeMembershipCoefficient(this.domainIndependentModelPositive, di, positiveCounter);
320       double negativeCoefficient = this.computeMembershipCoefficient(this.domainIndependentModelNegative, di, negativeCounter);
321       double neutralCoefficient = this.computeMembershipCoefficient(this.domainIndependentModelNeutral, di, neutralCounter);
322       
323       if(fineGranedPolarity == 1) {
324         double delta = positiveCoefficient - negativeCoefficient;
325         double finePolarity = 0.0;
326         
327         if(Math.max(positiveCoefficient, negativeCoefficient) != 0.0) {
328           finePolarity = (delta / Math.max(positiveCoefficient, negativeCoefficient)) * 5.0;
329         }
330         
331         if(neutralCoefficient != 0.0) {
332           finePolarity = finePolarity - ((1.0 - (1.0 / neutralCoefficient)) * Math.signum(finePolarity));
333         }
334         //di.setInferredPolarity(Math.abs(finePolarity) - Math.abs(di.getInferredPolarity()));
335         if(finePolarity > 0.0) {
336           di.setInferredPolarity(Math.ceil(finePolarity));
337         } else if(finePolarity < 0.0) {
338           di.setInferredPolarity(Math.floor(finePolarity));
339         } else {
340           di.setInferredPolarity(finePolarity);
341         }
342         di.setInferredPolarity(finePolarity);
343         
344       } else {
345         if(positiveCoefficient > negativeCoefficient && positiveCoefficient > neutralCoefficient) {di.setInferredPolarity(1.0);}
346         if(negativeCoefficient > positiveCoefficient && negativeCoefficient > neutralCoefficient) {di.setInferredPolarity(-1.0);}
347         if(neutralCoefficient > negativeCoefficient && neutralCoefficient > positiveCoefficient) {di.setInferredPolarity(0.0);}
348       }
349         
350       
351       
352       /*
353        * DOMAIN EVALUATION
354        */
355       if(flagEvaluateDomain == 1) {
356         di.setInferredDomain("");
357         Iterator<String> iD = this.domainLevels.keySet().iterator();
358         HashMap<String, Double> domainMembership = new HashMap<String, Double>();
359         
360         double maxDomainCoeff = Double.NEGATIVE_INFINITY;
361         while(iD.hasNext()) {
362           String d = iD.next();
363           double domainCoeff = 0.0;
364           positiveCoefficient = 0.0;
365           negativeCoefficient = 0.0;
366           neutralCoefficient = 0.0;
367           
368           Integer currentDomainCounter = this.domainInstanceCounter.get(d.trim());
369           
370           HashMap<String, Tuple> currentDomainModel = this.domainDependentModelsPositive.get(d);
371           if(currentDomainModel != null && currentDomainCounter != null) {
372             positiveCoefficient = this.computeMembershipCoefficient(currentDomainModel, di, currentDomainCounter);
373           }
374           
375           currentDomainModel = this.domainDependentModelsNegative.get(d);
376           if(currentDomainModel != null && currentDomainCounter != null) {
377             negativeCoefficient = this.computeMembershipCoefficient(currentDomainModel, di, currentDomainCounter);
378           }
379           
380           currentDomainModel = this.domainDependentModelsPositive.get(d);
381           if(currentDomainModel != null && currentDomainCounter != null) {
382             neutralCoefficient = this.computeMembershipCoefficient(currentDomainModel, di, currentDomainCounter);
383           }
384           
385           domainCoeff = positiveCoefficient + negativeCoefficient + neutralCoefficient;
386 
387           if(domainCoeff > maxDomainCoeff) {
388             di.setInferredDomain(d);
389             maxDomainCoeff = domainCoeff;
390           }
391           domainMembership.put(d, domainCoeff);
392         }
393         
394         
395         /*
396          * Double DOMAIN Validation
397          * This evaluation is performed contextually to the single domain evaluation in order to exploit the same
398          * objects preliminary filled during the single domain evaluation
399          */
400         if(flagEvaluateDoubleDomain == 1) {
401           String levelZero = new String("");
402           String levelOne = new String("");
403           iD = this.domainLevels.keySet().iterator();
404           double maxDomainCoeffLevelZero = Double.NEGATIVE_INFINITY;
405           double maxDomainCoeffLevelOne = Double.NEGATIVE_INFINITY;
406           while(iD.hasNext()) {
407             String d = iD.next();
408             int level = this.domainLevels.get(d);
409             double membership = domainMembership.get(d);
410             if(level == 0) {
411               if(membership > maxDomainCoeffLevelZero) {
412                 levelZero = new String(d);
413                 maxDomainCoeffLevelZero = membership;
414               }
415             } else if(level == 1) {
416               if(membership > maxDomainCoeffLevelOne) {
417                 levelOne = new String(d);
418                 maxDomainCoeffLevelOne = membership;
419               }
420             }
421           }
422           di.setInferredDomain(levelZero + "#" + levelOne);
423         }
424       }
425     }
426   }
427   
428   
429   
430   
431   private double computeMembershipCoefficient(HashMap<String, Tuple> h, DatasetInstance di, int normCounter) {
432     
433     double coeff = 0.0;
434     
435     SentenceStructuredRepresentation ssr = di.getSentenceStructuredRepresentation();
436     ArrayList<DependencyTree> dts = ssr.getDependencyTree();
437     for(DependencyTree dt: dts) {
438       ArrayList<String> dependencies = dt.getDependecies();
439       for(String dep: dependencies) {
440         String[] d = dep.split("\\^\\^\\^");
441         d[1] = d[1].substring(0, d[1].indexOf("-"));
442         d[2] = d[2].substring(0, d[2].indexOf("-"));
443         dep = d[0] + "^^^" + d[1] + "^^^" + d[2];
444         
445         if(d.length == 3) {
446           
447           String governor = d[1];
448           String dependent = d[2];
449           String rdg = new String(d[0] + "^^^" + d[2] + "^^^" + d[1]);
450           String gd = new String(d[1] + "^^^" + d[2]);
451           String dg = new String(d[2] + "^^^" + d[1]);
452           
453           /* Compute coefficient */
454           Tuple tDep = h.get(dep);
455           Tuple tG = h.get(governor);
456           Tuple tD = h.get(dependent);
457           Tuple tRDG = h.get(rdg);
458           Tuple tGD = h.get(gd);
459           Tuple tDG = h.get(dg);
460           
461           double axiomCoeff = 0.0;
462           if(tDep != null) {
463             axiomCoeff += 1.0 / Math.log((double) normCounter / tDep.getFrequency());
464           }
465           if(tDep != null && tG != null) {
466             axiomCoeff += ((1.0 / Math.log((double) normCounter / tDep.getFrequency())) /
467                            (1.0 / Math.log((double) normCounter / tG.getFrequency())));
468           }
469           if(tDep != null && tD != null) {
470             axiomCoeff += ((1.0 / Math.log((double) normCounter / tDep.getFrequency())) /
471                            (1.0 / Math.log((double) normCounter / tD.getFrequency())));
472           }
473           if(tRDG != null) {
474             axiomCoeff += 1.0 / Math.log((double) normCounter / tRDG.getFrequency());
475           }
476           if(tRDG != null && tGD != null) {
477             axiomCoeff += ((1.0 / Math.log((double) normCounter / tRDG.getFrequency())) /
478                            (1.0 / Math.log((double) normCounter / tGD.getFrequency())));
479           }
480           if(tRDG != null && tDG != null) {
481             axiomCoeff += (((1.0 / Math.log((double) normCounter / tRDG.getFrequency())) /
482                             (1.0 / Math.log((double) normCounter / tDG.getFrequency()))) * 0.5);
483           }
484           coeff += (axiomCoeff / dependencies.size());
485         }
486       }
487     }
488     
489     return coeff;
490   }
491   
492   
493   
494   
495   
496   private class Tuple implements Serializable {
497     
498     private static final long serialVersionUID = 1L;
499     
500     private String token;
501     private double frequency;
502     private double weight;  
503     private int type;
504     private double counter;
505     
506     public Tuple(int type) {
507       this.frequency = 0.0;
508       this.counter = 1.0;
509       this.weight = 1.0;
510       this.type = type;
511     }
512     
513     public String getToken() {
514       return token;
515     }
516     public void setToken(String token) {
517       this.token = token;
518     }
519     public double getFrequency() {
520       return frequency;
521     }
522     public void setFrequency(double frequency) {
523       this.frequency = frequency;
524     }
525     public double getWeight() {
526       return weight;
527     }
528     public void setWeight(double weight) {
529       this.weight = weight;
530     }
531     public int getType() {
532       return type;
533     }
534     public void setType(int type) {
535       this.type = type;
536     }
537   }
538 }