1   package eu.fbk.dkm.pikes.raid.mdfsa.parser;
2   
3   import edu.stanford.nlp.ling.HasWord;
4   import edu.stanford.nlp.ling.Sentence;
5   import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
6   import edu.stanford.nlp.process.DocumentPreprocessor;
7   import edu.stanford.nlp.trees.*;
8   
9   import java.io.Reader;
10  import java.io.StringReader;
11  import java.util.ArrayList;
12  import java.util.Collection;
13  import java.util.List;
14  
15  public class DependenciesBuilder {
16  
17    private LexicalizedParser lp;
18    private TreebankLanguagePack tlp;
19    private GrammaticalStructureFactory gsf;
20    private ArrayList<DependencyTree> parsedTree;
21    private ArrayList<Tree> trees;
22    
23    public DependenciesBuilder() {
24    }
25    
26    public void init() {
27      this.lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz",
28                                            "-maxLength", "80", "-retainTmpSubcategories");
29      this.tlp = new PennTreebankLanguagePack();
30      this.gsf = this.tlp.grammaticalStructureFactory();
31      //this.parsedTree = new ArrayList<DependencyTree>();
32      //this.trees = new ArrayList<Tree>();
33    }
34    
35    public ArrayList<Tree> getParsedTrees() {
36      return this.trees;
37    }
38    
39    public ArrayList<DependencyTree> getDependencyTrees() {
40      return this.parsedTree;
41    }
42    
43    public void buildDependeciesTree(String text) {
44      this.parsedTree = new ArrayList<DependencyTree>();
45      this.trees = new ArrayList<Tree>();
46      
47      Reader reader = new StringReader(text);
48      DocumentPreprocessor dp = new DocumentPreprocessor(reader);
49      for (List<HasWord> sentence : new DocumentPreprocessor(reader)) {
50  
51        //String[] sent = text.split(" ");
52        //this.parsedTree = this.lp.apply(Sentence.toWordList(sent));
53        Tree parsedTree = this.lp.apply(sentence);
54        //TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");
55        //tp.printTree(parse);
56        GrammaticalStructure gs = this.gsf.newGrammaticalStructure(parsedTree);
57        Collection<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
58        
59        DependencyTree dt = new DependencyTree();
60        for(TypedDependency td: tdl) {
61          TreeGraphNode dep = td.dep();
62          TreeGraphNode gov = td.gov();
63          GrammaticalRelation gr = td.reln();
64          String depString = gr.toString() + "^^^" + gov.toString() + "^^^" + dep.toString();
65          //System.out.println(depString);
66          dt.addDependency(depString);
67        }
68        this.parsedTree.add(dt);
69        this.trees.add(parsedTree);
70      }
71    }
72    
73    
74    
75    public ArrayList<DependencyTree> buildDependeciesTrees(ArrayList<String> texts) {
76      ArrayList<DependencyTree> dtList = new ArrayList<DependencyTree>();
77      int textId = 1;
78      for(String text: texts) {
79       System.out.println(textId);
80       String[] sent = text.split(" ");
81        Tree parse = this.lp.apply(Sentence.toWordList(sent));
82        GrammaticalStructure gs = this.gsf.newGrammaticalStructure(parse);
83        Collection<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
84        DependencyTree curDT = new DependencyTree();
85        for(TypedDependency td: tdl) {
86          TreeGraphNode dep = td.dep();
87          TreeGraphNode gov = td.gov();
88          GrammaticalRelation gr = td.reln();
89          String depString = gr.toString() + "^^^" + gov.toString() + "^^^" + dep.toString();
90          curDT.addDependency(depString);
91        }
92        textId++;
93        dtList.add(curDT);
94      }
95  
96      return dtList;
97    }
98  
99  }