1 package eu.fbk.dkm.pikes.tintop;
2
3 import com.google.common.collect.HashMultimap;
4 import com.google.gson.JsonElement;
5 import com.google.gson.JsonObject;
6 import com.google.gson.JsonParser;
7 import edu.stanford.nlp.ling.CoreAnnotations;
8 import edu.stanford.nlp.ling.CoreLabel;
9 import edu.stanford.nlp.pipeline.Annotation;
10 import edu.stanford.nlp.pipeline.StanfordCoreNLP;
11 import edu.stanford.nlp.time.TimeAnnotations;
12 import edu.stanford.nlp.util.CoreMap;
13 import eu.fbk.utils.corenlp.CustomAnnotations;
14 import org.slf4j.Logger;
15 import org.slf4j.LoggerFactory;
16
17 import java.io.BufferedReader;
18 import java.io.IOException;
19 import java.io.InputStreamReader;
20 import java.net.HttpURLConnection;
21 import java.net.URL;
22 import java.net.URLEncoder;
23 import java.util.List;
24 import java.util.Map;
25 import java.util.Properties;
26
27
28
29
30
31 public class StanfordTest {
32
33 private static final Logger LOGGER = LoggerFactory.getLogger(StanfordTest.class);
34 private static final String prefix = "https://en.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&explaintext=&exlimit=1&titles=";
35
36 private static void printOutput(Annotation annotation) {
37 List<CoreMap> sents = annotation.get(CoreAnnotations.SentencesAnnotation.class);
38 for (CoreMap thisSent : sents) {
39
40 List<CoreLabel> tokens = thisSent.get(CoreAnnotations.TokensAnnotation.class);
41 for (CoreLabel token : tokens) {
42 System.out.println("Token: " + token);
43 System.out.println("Index: " + token.index());
44 System.out.println("Sent index: " + token.sentIndex());
45 System.out.println("Begin: " + token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
46 System.out.println("End: " + token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
47 System.out.println("NER: " + token.get(CoreAnnotations.NamedEntityTagAnnotation.class));
48 System.out.println(token.get(CoreAnnotations.NamedEntityTagAnnotation.class));
49 System.out.println(token.get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class));
50 System.out.println(token.get(CoreAnnotations.ValueAnnotation.class));
51
52
53
54
55 System.out.println(token.get(TimeAnnotations.TimexAnnotation.class));
56
57
58
59
60
61
62
63
64
65 System.out.println();
66 }
67
68 System.out.println("---");
69 System.out.println();
70 }
71 }
72
73 public static String downloadPage(String sURL) {
74 String s = new String();
75 try {
76 URL obj = new URL(sURL);
77 HttpURLConnection con = (HttpURLConnection) obj.openConnection();
78
79
80 con.setRequestMethod("GET");
81
82
83 con.setRequestProperty("User-Agent", "Mozilla/5.0");
84
85 int responseCode = con.getResponseCode();
86 System.out.println("\nSending 'GET' request for URL: " + sURL);
87
88
89 BufferedReader in = new BufferedReader(
90 new InputStreamReader(con.getInputStream()));
91 String inputLine;
92 StringBuffer response = new StringBuffer();
93
94 while ((inputLine = in.readLine()) != null) {
95 response.append(inputLine);
96 }
97 in.close();
98
99 s = response.toString();
100 } catch (Exception e) {
101 e.printStackTrace();
102 }
103 return s;
104 }
105
106 public static void main(String[] args) throws IOException {
107
108 String text;
109 text = "Donald Trump set off a fierce new controversy Tuesday with remarks about the right to bear arms that were interpreted by many as a threat of violence against Hillary Clinton.";
110 text = "Vladimir \"Vladi\" Luxuria (born Wladimiro Guadagno in Foggia, Apulia, on June 24, 1965) is an Italian actress, writer, politician and television host. Luxuria was a Communist Refoundation Party member of the Italian parliament, belonging to Romano Prodi's L'Unione coalition. She was the first openly transgender member of Parliament in Europe, and the world's second openly transgender MP after New Zealander Georgina Beyer. She lost her seat in the election of April, 2008.\n"
111 + "\n"
112 + "In the 2006 general election, Luxuria was elected to the Chamber of Deputies by the Lazio 1 constituency in Rome. She lost her seat in the 2008 election. After the retirement of Beyer and Luxuria, there were no transgender MPs reported in the world, until 2011, when Anna Grodzka was elected to the Polish parliament.";
113
114 String page = "Maria Montessori";
115 String url = prefix + URLEncoder.encode(page, "UTF-8");
116 String rawText = downloadPage(url);
117
118 JsonParser parser = new JsonParser();
119
120 JsonObject o = parser.parse(rawText).getAsJsonObject();
121 String id_page = "";
122 for (Map.Entry<String, JsonElement> stringJsonElementEntry : o.getAsJsonObject("query").getAsJsonObject("pages").entrySet()) {
123 id_page = stringJsonElementEntry.getKey();
124 break;
125 }
126 text = o.getAsJsonObject("query").getAsJsonObject("pages").getAsJsonObject(id_page).get("extract").getAsString();
127
128 Properties props;
129 Annotation annotation;
130
131 props = new Properties();
132 props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, wikipediacoref");
133 props.setProperty("customAnnotatorClass.wikipediacoref", "eu.fbk.fcw.wikipedia.WikipediaCorefAnnotator");
134
135 StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
136 annotation = new Annotation(text);
137 annotation.set(CoreAnnotations.DocTitleAnnotation.class, "Maria Montessori");
138 pipeline.annotate(annotation);
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168 }
169 }