1   package eu.fbk.shell.mdfsa.data.structures;
2   
3   import edu.stanford.nlp.trees.Tree;
4   import eu.fbk.dkm.pikes.raid.mdfsa.parser.DependencyTree;
5   import eu.fbk.dkm.pikes.raid.mdfsa.wordnet.WordNetLexicalizer;
6   import eu.fbk.dkm.pikes.raid.mdfsa.wordnet.WordNetLoader;
7   
8   import java.io.Serializable;
9   import java.util.ArrayList;
10  import java.util.HashMap;
11  import java.util.Iterator;
12  import java.util.Properties;
13  
14  public class SentenceStructuredRepresentation implements Serializable {
15    
16    private static final long serialVersionUID = 1L;
17    
18    private Properties prp;
19    private String uri;
20    private String originalText;
21    private String posTaggedString;
22    private String lexString;
23    private String stemmedString;
24    private ArrayList<Tree> parsedTree;
25    private HashMap<String, ArrayList<String>> aspects;
26    private ArrayList<String> semanticConcepts;
27    private ArrayList<DependencyTree> dts;
28    private int sentenceMarker;
29    
30    public SentenceStructuredRepresentation(Properties prp) {
31      this.prp = prp;
32    }
33  
34    public void setUri(String uri) {
35      this.uri = uri;
36    }
37    
38    public String getUri() {
39      return this.uri;
40    }
41    
42    public String getOriginalText() {
43      return this.originalText;
44    }
45  
46    public void setOriginalText(String originalText) {
47      this.originalText = originalText;
48    }
49  
50    public String getPosTaggedString() {
51      return this.posTaggedString;
52    }
53  
54    public void setPosTaggedString(String posTaggedString) {
55      this.posTaggedString = posTaggedString;
56    }
57  
58    public ArrayList<DependencyTree> getDependencyTree() {
59      return this.dts;
60    }
61  
62    public void setDependencyTree(ArrayList<DependencyTree> dts) {
63      this.dts = dts;
64    }
65    
66    public HashMap<String, ArrayList<String>> getAspects() {
67      return this.aspects;
68    }
69    
70    public ArrayList<String> getSemanticConcepts() {
71      return this.semanticConcepts;
72    }
73  
74    public ArrayList<Tree> getParsedTree() {
75      return this.parsedTree;
76    }
77  
78    public void setParsedTree(ArrayList<Tree> parsedTree) {
79      this.parsedTree = parsedTree;
80    }
81    
82    public void createLexicalizedRepresentation(WordNetLexicalizer wnlex) {
83      
84      /* Checks the tagged string and creates lexicalized version of it */
85      String[] posTaggedTerms = this.posTaggedString.split(" ");
86      StringBuffer tempLex = new StringBuffer();
87      for(String curTaggedTerm: posTaggedTerms) {
88        if(curTaggedTerm.compareTo("") == 0) {
89          continue;
90        }
91        try {
92          String term = curTaggedTerm.substring(0, curTaggedTerm.indexOf("/"));
93          String tag = curTaggedTerm.substring(curTaggedTerm.indexOf("/") + 1);
94          
95          if(tag.compareTo("NNS") == 0 || tag.compareTo("NNPS") == 0) {
96            term = wnlex.getWordLexicalizationByType(term, "N");
97          } else if(tag.compareTo("VBD") == 0 || tag.compareTo("VBG") == 0 || tag.compareTo("VBN") == 0 ||
98                    tag.compareTo("VBP") == 0 || tag.compareTo("VBZ") == 0) {
99            term = wnlex.getWordLexicalizationByType(term, "V");
100         } else if(tag.compareTo("JJR") == 0 || tag.compareTo("JJS") == 0) {
101           term = wnlex.getWordLexicalizationByType(term, "AJ");
102         } else if(tag.compareTo("RBR") == 0 || tag.compareTo("RBS") == 0) {
103           term = wnlex.getWordLexicalizationByType(term, "AV");
104         }
105         if(term == null) {
106           term = curTaggedTerm.substring(0, curTaggedTerm.indexOf("/"));
107         }
108         tempLex.append(term + "/" + tag + " ");
109       } catch(Exception e) {
110         //System.out.println(this.posTaggedString);
111         e.printStackTrace();
112         //System.exit(0);
113       }
114     }
115     this.lexString = tempLex.toString().trim();
116     //System.out.println(this.originalText);
117     //System.out.println(this.posTaggedString);
118     //System.out.println(this.lexString);
119     //System.out.println();
120   }
121   
122   
123   public void createStemmedRepresentation() {
124     
125   }
126   
127   
128   /**
129    * Extracts the set of semantic concepts
130    */
131   public void extractSemanticConcepts(WordNetLoader wnl, WordNetLexicalizer wnlex) {
132     this.semanticConcepts = new ArrayList<String>();
133     //String terms = this.lexString.replaceAll("\\./\\.", "");
134     //String[] termsList = terms.split(" ");
135     String[] termsList = this.lexString.split(" ");
136     boolean compoundNounFlag = false;
137     for(String currentTerm : termsList) {
138       String[] atom = currentTerm.split("/");
139       if(atom.length > 1) {
140         Integer stopFlag = wnl.getStopwords().get(atom[0]);
141         if(stopFlag != null) continue;
142         if(atom[1].compareTo("NN") == 0 || atom[1].compareTo("NNP") == 0 || atom[1].compareTo("NNPS") == 0 ||
143            atom[1].compareTo("NNS") == 0 || atom[1].compareTo("FW") == 0) {
144           String newAspect;
145           if(compoundNounFlag == true) {
146             newAspect = this.semanticConcepts.get(this.semanticConcepts.size() - 1);
147             newAspect = (newAspect + " " + atom[0]).replaceAll(" ", "_").toLowerCase();
148             this.semanticConcepts.remove(this.semanticConcepts.size() - 1);
149           } else {
150             newAspect = atom[0].replaceAll(" ", "_").toLowerCase();
151           }
152           if(!this.semanticConcepts.contains(newAspect)) {
153             this.semanticConcepts.add(newAspect);
154           }
155           compoundNounFlag = true;
156         } else {
157           compoundNounFlag = false;
158         }
159       }
160     }
161     
162     
163     
164     for(DependencyTree dt: this.dts)
165     {
166       ArrayList<String> dependencies = dt.getDependecies();
167       for(String curDep: dependencies) {
168         String[] tokens = curDep.split("\\^\\^\\^");
169         if(tokens.length == 3) {
170           if(tokens[0].trim().compareTo("dobj") == 0) {
171             String[] tokenOne = tokens[1].split("-");
172             String[] tokenTwo = tokens[2].split("-");
173             String partOne = wnlex.getWordLexicalizationByType(tokenOne[0], "V");
174             if(partOne == null) {
175               partOne = tokenOne[0];
176             }
177             String partTwo = wnlex.getWordLexicalizationByType(tokenTwo[0], "N");
178             if(partTwo == null) {
179               partTwo = tokenTwo[0];
180             }
181             String newAspect = partOne + "_" + partTwo;
182             if(!this.semanticConcepts.contains(newAspect)) {
183               this.semanticConcepts.add(newAspect);
184             }
185           }
186         }
187       }
188     }
189     
190   }
191   
192   
193   /**
194    * Extracts the set of aspects
195    */
196   public void extractAspects(WordNetLoader wnl) {
197     ArrayList<String> tempAspects = new ArrayList<String>();
198     this.aspects = new HashMap<String, ArrayList<String>>();
199     //String terms = this.lexString.replaceAll("\\./\\.", "");
200     //String[] termsList = terms.split(" ");
201     
202     /* Extracts aspects */
203     String[] termsList = this.lexString.split(" ");
204     boolean compoundNounFlag = false;
205     for(String currentTerm : termsList) {
206       String[] atom = currentTerm.split("/");
207       if(atom.length > 1) {
208         Integer stopFlag = wnl.getStopwords().get(atom[0]);
209         if(stopFlag != null) continue;
210         if(atom[1].compareTo("NN") == 0 || atom[1].compareTo("NNP") == 0 || atom[1].compareTo("NNPS") == 0 ||
211            atom[1].compareTo("NNS") == 0 || atom[1].compareTo("FW") == 0) {
212           String newAspect;
213           if(compoundNounFlag == true) {
214             newAspect = tempAspects.get(tempAspects.size() - 1);
215             newAspect = (newAspect + " " + atom[0]).replaceAll(" ", "_").toLowerCase();
216             tempAspects.remove(tempAspects.size() - 1);
217           } else {
218             newAspect = atom[0].replaceAll(" ", "_").toLowerCase();
219           }
220           if(!tempAspects.contains(newAspect)) {
221             tempAspects.add(newAspect);
222           }
223           compoundNounFlag = true;
224         } else {
225           compoundNounFlag = false;
226         }
227       }
228     }
229     
230     
231     /* creates the list of features connect with each aspect */
232     //ArrayList<Tree> trees = this.extractTree(this.parsedTree);
233     HashMap<Integer, ArrayList<String>> featureSentence = new HashMap<Integer, ArrayList<String>>();
234     for(Tree pt: this.parsedTree)
235     {
236       this.sentenceMarker = 0;
237       this.extractRelatedFeatures(pt, this.sentenceMarker, featureSentence);
238       for(String curAspect: tempAspects) {
239         String[] compoundAspect = curAspect.split(" ");
240         //HashMap<String, Integer> relatedFeatures = new HashMap<String, Integer>();
241         for(String cA: compoundAspect) {
242           Iterator<Integer> it = featureSentence.keySet().iterator();
243           while(it.hasNext()) {
244             int key = it.next();
245             ArrayList<String> currentTree = featureSentence.get(key);
246             if(currentTree.contains(cA)) {
247               ArrayList<String> featuresList = this.aspects.get(curAspect);
248               if(featuresList == null) {
249                 featuresList = new ArrayList<String>();
250               }
251               for(String currentFeature: currentTree) {
252                 if(currentFeature.compareTo(cA) != 0) {
253                   featuresList.add(currentFeature);
254                 }
255               }
256               this.aspects.put(curAspect, featuresList);
257             }
258           } 
259         }
260       }
261     }
262   }
263 
264   
265   
266   public ArrayList<Tree> extractTree(Tree t) {
267     ArrayList<Tree> wanted = new ArrayList<Tree>();
268     if (t.label().value().equals("S") || t.label().value().equals("SBAR")) {
269       wanted.add(t);
270       for (Tree child : t.children()) {
271         ArrayList<Tree> temp = new ArrayList<Tree>();
272         temp = this.extractTree(child);
273         if (temp.size() > 0) {
274           int o = -1;
275           o = wanted.indexOf(t);
276           if (o != -1) {
277             wanted.remove(o);
278           }
279         }
280         wanted.addAll(temp);
281       }
282     } else {
283       for (Tree child : t.children()) {
284         wanted.addAll(this.extractTree(child));
285       }
286     }
287     //if(wanted.size() > 0) {
288     //  System.out.println(wanted.toString());
289     //}
290     return wanted;
291   }
292   
293   
294   
295   private void extractRelatedFeatures(Tree t, int marker, HashMap<Integer, ArrayList<String>> featureSentence) {
296     //HashMap<String, Integer> features = new HashMap<String, Integer>();
297     int localmarker = marker;
298     if (t.label().value().equals("S") || t.label().value().equals("SBAR")) {
299       localmarker = this.sentenceMarker + 1;
300       this.sentenceMarker = localmarker;
301       marker++;
302     }
303     if (t.label().value().length() > 1 && t.label().value().equals(t.label().value().toLowerCase())) {
304       ArrayList<String> currentFeatures = featureSentence.get(marker);
305       if(currentFeatures == null) {
306         currentFeatures = new ArrayList<String>();
307       }
308       currentFeatures.add(t.label().value());
309       //features.put(t.label().value(), new Integer(1));
310       featureSentence.put(marker, currentFeatures);
311     }
312     for (Tree child: t.children()) {
313       HashMap<String, Integer> temp = new HashMap<String, Integer>();
314       this.extractRelatedFeatures(child, localmarker, featureSentence);
315       if (temp.size() > 0) {
316         Iterator<String> it = temp.keySet().iterator();
317         while(it.hasNext()) {
318           String currentFeature = (String) it.next();
319           //features.put(currentFeature, new Integer(1));
320         }
321       }
322     }
323     if(localmarker == 1) {
324       marker--;
325     }
326     //return features;
327   }
328   
329   
330 }