1 package eu.fbk.dkm.pikes.raid.mdfsa.parser;
2
3 import com.hp.hpl.jena.rdf.model.*;
4 import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
5 import edu.stanford.nlp.trees.Tree;
6 import eu.fbk.dkm.pikes.raid.mdfsa.FileManager;
7 import eu.fbk.dkm.pikes.raid.mdfsa.FileManager.Mode;
8 import eu.fbk.dkm.pikes.raid.mdfsa.MaxEntTagger;
9 import eu.fbk.shell.mdfsa.data.structures.SentenceStructuredRepresentation;
10 import eu.fbk.dkm.pikes.raid.mdfsa.wordnet.WordNetLexicalizer;
11 import eu.fbk.dkm.pikes.raid.mdfsa.wordnet.WordNetLoader;
12 import org.w3c.dom.Document;
13
14 import java.util.ArrayList;
15 import java.util.Properties;
16
17 public class ReviewsParser {
18
19 private Properties prp;
20 private WordNetLoader wnl;
21
22 public ReviewsParser(Properties prp, WordNetLoader wnl) {
23 this.prp = prp;
24 this.wnl = wnl;
25 }
26
27
28
29
30
31
32 public Document[] loadFull(String datasetName) {
33 String positiveReviews = ((String) this.prp.getProperty("mdfsa.dataset.basepath")).concat(datasetName + "/positive.review");
34 String negativeReviews = ((String) this.prp.getProperty("mdfsa.dataset.basepath")).concat(datasetName + "/negative.review");
35 String allReviews = ((String) this.prp.getProperty("mdfsa.dataset.basepath")).concat(datasetName + "/all.review");
36 return null;
37 }
38
39
40
41
42
43
44 public ArrayList<SentenceStructuredRepresentation> load(String filename) {
45
46 WordNetLexicalizer wnlex = new WordNetLexicalizer(this.wnl.getAllTerms(), this.wnl.getAllExceptions());
47 ArrayList<SentenceStructuredRepresentation> ssrList = new ArrayList<SentenceStructuredRepresentation>();
48 LexicalizedParser treeParser;
49 DependenciesBuilder db = new DependenciesBuilder();
50 MaxEntTagger met = new MaxEntTagger(this.prp);
51 db.init();
52 FileManager fm = new FileManager(filename, Mode.READ);
53 Model content = fm.importRDFContent();
54
55 String task = prp.getProperty("mdfsa.task");
56
57 int reviewId = 1;
58
59 StmtIterator iter = content.listStatements();
60
61 System.out.println(content.size());
62 int stmtID = 0;
63
64 while (iter.hasNext()) {
65 System.out.println("Loading sentence " + reviewId);
66 Statement stmt = iter.nextStatement();
67 Resource subject = stmt.getSubject();
68 Property predicate = stmt.getPredicate();
69 RDFNode object = stmt.getObject();
70
71
72 int endText = object.toString().indexOf("^^");
73 String currentReviewOriginal = object.toString().substring(1, endText).replaceAll("\n", "");
74 String currentReview = currentReviewOriginal.replaceAll("\\.", " \\. ");
75 currentReview = currentReview.replaceAll("\\:", " \\: ");
76 currentReview = currentReview.replaceAll("\\,", " \\, ");
77 currentReview = currentReview.replaceAll("\\!", " \\! ");
78 currentReview = currentReview.replaceAll("\\?", " \\? ");
79 currentReview = currentReview.replaceAll("( )+", " ");
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105 currentReview = currentReview.replaceAll("\t", "");
106
107
108
109
110 String taggedReview = null;
111
112
113 ArrayList<DependencyTree> curDt = null;
114
115
116 ArrayList<Tree> parsedTree = null;
117
118
119
120
121 taggedReview = met.tag(currentReview);
122 taggedReview = taggedReview.replaceAll("/\\.", "/\\. ");
123
124
125 db.buildDependeciesTree(currentReview.toLowerCase());
126
127
128 curDt = db.getDependencyTrees();
129 parsedTree = db.getParsedTrees();
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144 SentenceStructuredRepresentation ssr = new SentenceStructuredRepresentation(this.prp);
145 ssr.setUri(subject.toString());
146 ssr.setOriginalText(currentReviewOriginal);
147 ssr.setPosTaggedString(taggedReview);
148 ssr.setDependencyTree(curDt);
149 ssr.setParsedTree(parsedTree);
150
151
152
153
154
155 ssr.createLexicalizedRepresentation(wnlex);
156 ssr.extractSemanticConcepts(this.wnl, wnlex);
157 ssr.extractAspects(this.wnl);
158
159
160 ssrList.add(ssr);
161
162
163
164
165
166 reviewId++;
167 }
168 return ssrList;
169 }
170
171
172
173
174
175
176
177 public void convertReviewToESWCChallenge(String filename, String datasetName) {
178 FileManager fm = new FileManager(filename, Mode.READ);
179
180 ArrayList<String> contents = fm.importSimpleTextContent();
181
182 FileManager rdfOut = new FileManager("/home/drago/Documents/java_projects/research/nlp/multi_domain_fuzzy_sentiment_analysis/eswc2014_challenge_mdfsa_dragoni/task3/" + datasetName + ".validation.rdf.xml", Mode.WRITE);
183
184 rdfOut.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
185 rdfOut.write("<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">");
186 int reviewId = 1;
187
188
189
190 for(String currentReview: contents) {
191
192
193 currentReview = currentReview.replace("&", "&");
194
195
196
197 rdfOut.write("\t<rdf:Description rdf:about=\"http://sentic.net/challenge/sentence_" + reviewId + "\">");
198
199 rdfOut.write("\t\t\t<text xmlns=\"http://sentic.net/challenge/\" rdf:datatype=\"http://www.w3.org/TR/rdf-text/\">");
200
201 rdfOut.write("\t\t\t" + currentReview + "");
202 rdfOut.write("\t\t\t</text>");
203
204 rdfOut.write("\t</rdf:Description>");
205
206
207
208
209
210
211 reviewId++;
212 }
213 rdfOut.write("</rdf:RDF>");
214 rdfOut.close();
215 fm.close();
216 }
217
218
219 }