1 package eu.fbk.dkm.pikes.raid.mdfsa.parser;
2
3 import edu.stanford.nlp.ling.HasWord;
4 import edu.stanford.nlp.ling.Sentence;
5 import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
6 import edu.stanford.nlp.process.DocumentPreprocessor;
7 import edu.stanford.nlp.trees.*;
8
9 import java.io.Reader;
10 import java.io.StringReader;
11 import java.util.ArrayList;
12 import java.util.Collection;
13 import java.util.List;
14
15 public class DependenciesBuilder {
16
17 private LexicalizedParser lp;
18 private TreebankLanguagePack tlp;
19 private GrammaticalStructureFactory gsf;
20 private ArrayList<DependencyTree> parsedTree;
21 private ArrayList<Tree> trees;
22
23 public DependenciesBuilder() {
24 }
25
26 public void init() {
27 this.lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz",
28 "-maxLength", "80", "-retainTmpSubcategories");
29 this.tlp = new PennTreebankLanguagePack();
30 this.gsf = this.tlp.grammaticalStructureFactory();
31
32
33 }
34
35 public ArrayList<Tree> getParsedTrees() {
36 return this.trees;
37 }
38
39 public ArrayList<DependencyTree> getDependencyTrees() {
40 return this.parsedTree;
41 }
42
43 public void buildDependeciesTree(String text) {
44 this.parsedTree = new ArrayList<DependencyTree>();
45 this.trees = new ArrayList<Tree>();
46
47 Reader reader = new StringReader(text);
48 DocumentPreprocessor dp = new DocumentPreprocessor(reader);
49 for (List<HasWord> sentence : new DocumentPreprocessor(reader)) {
50
51
52
53 Tree parsedTree = this.lp.apply(sentence);
54
55
56 GrammaticalStructure gs = this.gsf.newGrammaticalStructure(parsedTree);
57 Collection<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
58
59 DependencyTree dt = new DependencyTree();
60 for(TypedDependency td: tdl) {
61 TreeGraphNode dep = td.dep();
62 TreeGraphNode gov = td.gov();
63 GrammaticalRelation gr = td.reln();
64 String depString = gr.toString() + "^^^" + gov.toString() + "^^^" + dep.toString();
65
66 dt.addDependency(depString);
67 }
68 this.parsedTree.add(dt);
69 this.trees.add(parsedTree);
70 }
71 }
72
73
74
75 public ArrayList<DependencyTree> buildDependeciesTrees(ArrayList<String> texts) {
76 ArrayList<DependencyTree> dtList = new ArrayList<DependencyTree>();
77 int textId = 1;
78 for(String text: texts) {
79 System.out.println(textId);
80 String[] sent = text.split(" ");
81 Tree parse = this.lp.apply(Sentence.toWordList(sent));
82 GrammaticalStructure gs = this.gsf.newGrammaticalStructure(parse);
83 Collection<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
84 DependencyTree curDT = new DependencyTree();
85 for(TypedDependency td: tdl) {
86 TreeGraphNode dep = td.dep();
87 TreeGraphNode gov = td.gov();
88 GrammaticalRelation gr = td.reln();
89 String depString = gr.toString() + "^^^" + gov.toString() + "^^^" + dep.toString();
90 curDT.addDependency(depString);
91 }
92 textId++;
93 dtList.add(curDT);
94 }
95
96 return dtList;
97 }
98
99 }