1   package eu.fbk.dkm.pikes.tintop;
2   
3   import com.google.common.collect.HashMultimap;
4   import com.google.gson.JsonElement;
5   import com.google.gson.JsonObject;
6   import com.google.gson.JsonParser;
7   import edu.stanford.nlp.ling.CoreAnnotations;
8   import edu.stanford.nlp.ling.CoreLabel;
9   import edu.stanford.nlp.pipeline.Annotation;
10  import edu.stanford.nlp.pipeline.StanfordCoreNLP;
11  import edu.stanford.nlp.time.TimeAnnotations;
12  import edu.stanford.nlp.util.CoreMap;
13  import eu.fbk.utils.corenlp.CustomAnnotations;
14  import org.slf4j.Logger;
15  import org.slf4j.LoggerFactory;
16  
17  import java.io.BufferedReader;
18  import java.io.IOException;
19  import java.io.InputStreamReader;
20  import java.net.HttpURLConnection;
21  import java.net.URL;
22  import java.net.URLEncoder;
23  import java.util.List;
24  import java.util.Map;
25  import java.util.Properties;
26  
27  /**
28   * Created by alessio on 26/02/15.
29   */
30  
31  public class StanfordTest {
32  
33      private static final Logger LOGGER = LoggerFactory.getLogger(StanfordTest.class);
34      private static final String prefix = "https://en.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&explaintext=&exlimit=1&titles=";
35  
36      private static void printOutput(Annotation annotation) {
37          List<CoreMap> sents = annotation.get(CoreAnnotations.SentencesAnnotation.class);
38          for (CoreMap thisSent : sents) {
39  
40              List<CoreLabel> tokens = thisSent.get(CoreAnnotations.TokensAnnotation.class);
41              for (CoreLabel token : tokens) {
42                  System.out.println("Token: " + token);
43                  System.out.println("Index: " + token.index());
44                  System.out.println("Sent index: " + token.sentIndex());
45                  System.out.println("Begin: " + token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
46                  System.out.println("End: " + token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
47                  System.out.println("NER: " + token.get(CoreAnnotations.NamedEntityTagAnnotation.class));
48                  System.out.println(token.get(CoreAnnotations.NamedEntityTagAnnotation.class));
49                  System.out.println(token.get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class));
50                  System.out.println(token.get(CoreAnnotations.ValueAnnotation.class));
51  //                System.out.println(token.get(TimeExpression.Annotation.class));
52  //                System.out.println(token.get(TimeExpression.TimeIndexAnnotation.class));
53  //                System.out.println(token.get(CoreAnnotations.DistSimAnnotation.class));
54  //                System.out.println(token.get(CoreAnnotations.NumericCompositeTypeAnnotation.class));
55                  System.out.println(token.get(TimeAnnotations.TimexAnnotation.class));
56  //                System.out.println(token.get(CoreAnnotations.NumericValueAnnotation.class));
57  //                System.out.println(token.get(TimeExpression.ChildrenAnnotation.class));
58  //                System.out.println(token.get(CoreAnnotations.NumericTypeAnnotation.class));
59  //                System.out.println(token.get(CoreAnnotations.ShapeAnnotation.class));
60  //                System.out.println(token.get(Tags.TagsAnnotation.class));
61  //                System.out.println(token.get(CoreAnnotations.NumerizedTokensAnnotation.class));
62  //                System.out.println(token.get(CoreAnnotations.AnswerAnnotation.class));
63  //                System.out.println(token.get(CoreAnnotations.NumericCompositeValueAnnotation.class));
64  
65                  System.out.println();
66              }
67  
68              System.out.println("---");
69              System.out.println();
70          }
71      }
72  
73      public static String downloadPage(String sURL) {
74          String s = new String();
75          try {
76              URL obj = new URL(sURL);
77              HttpURLConnection con = (HttpURLConnection) obj.openConnection();
78  
79              // optional default is GET
80              con.setRequestMethod("GET");
81  
82              //add request header
83              con.setRequestProperty("User-Agent", "Mozilla/5.0");
84  
85              int responseCode = con.getResponseCode();
86              System.out.println("\nSending 'GET' request for URL: " + sURL);
87  //            System.out.println("Response Code : " + responseCode);
88  
89              BufferedReader in = new BufferedReader(
90                      new InputStreamReader(con.getInputStream()));
91              String inputLine;
92              StringBuffer response = new StringBuffer();
93  
94              while ((inputLine = in.readLine()) != null) {
95                  response.append(inputLine);
96              }
97              in.close();
98  
99              s = response.toString();
100         } catch (Exception e) {
101             e.printStackTrace();
102         }
103         return s;
104     }
105 
106     public static void main(String[] args) throws IOException {
107 
108         String text;
109         text = "Donald Trump set off a fierce new controversy Tuesday with remarks about the right to bear arms that were interpreted by many as a threat of violence against Hillary Clinton.";
110         text = "Vladimir \"Vladi\" Luxuria (born Wladimiro Guadagno in Foggia, Apulia, on June 24, 1965) is an Italian actress, writer, politician and television host. Luxuria was a Communist Refoundation Party member of the Italian parliament, belonging to Romano Prodi's L'Unione coalition. She was the first openly transgender member of Parliament in Europe, and the world's second openly transgender MP after New Zealander Georgina Beyer. She lost her seat in the election of April, 2008.\n"
111                 + "\n"
112                 + "In the 2006 general election, Luxuria was elected to the Chamber of Deputies by the Lazio 1 constituency in Rome. She lost her seat in the 2008 election. After the retirement of Beyer and Luxuria, there were no transgender MPs reported in the world, until 2011, when Anna Grodzka was elected to the Polish parliament.";
113 
114         String page = "Maria Montessori";
115         String url = prefix + URLEncoder.encode(page, "UTF-8");
116         String rawText = downloadPage(url);
117 
118         JsonParser parser = new JsonParser();
119 
120         JsonObject o = parser.parse(rawText).getAsJsonObject();
121         String id_page = "";
122         for (Map.Entry<String, JsonElement> stringJsonElementEntry : o.getAsJsonObject("query").getAsJsonObject("pages").entrySet()) {
123             id_page = stringJsonElementEntry.getKey();
124             break;
125         }
126         text = o.getAsJsonObject("query").getAsJsonObject("pages").getAsJsonObject(id_page).get("extract").getAsString();
127 
128         Properties props;
129         Annotation annotation;
130 
131         props = new Properties();
132         props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, wikipediacoref");
133         props.setProperty("customAnnotatorClass.wikipediacoref", "eu.fbk.fcw.wikipedia.WikipediaCorefAnnotator");
134 
135         StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
136         annotation = new Annotation(text);
137         annotation.set(CoreAnnotations.DocTitleAnnotation.class, "Maria Montessori");
138         pipeline.annotate(annotation);
139 
140 //        for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
141 //            System.out.println(sentence);
142 //        }
143 
144 //        HashMultimap<Integer, Integer> simpleCoref = annotation.get(CustomAnnotations.SimpleCorefAnnotation.class);
145 //        System.out.println(simpleCoref);
146 
147 //        System.out.println(annotation.get(CoreAnnotations.TextAnnotation.class));
148 
149 //        Map<Integer, CorefChain> coreferenceGraph = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
150 //        for (Object c : coreferenceGraph.keySet()) {
151 //            CorefChain chain = coreferenceGraph.get(c);
152 //            Map<IntPair, Set<CorefChain.CorefMention>> mentionMap = chain.getMentionMap();
153 //
154 //            System.out.println(mentionMap);
155 //            for (IntPair p : mentionMap.keySet()) {
156 //                for (CorefChain.CorefMention m : mentionMap.get(p)) {
157 //                    System.out.println(m.sentNum);
158 //                    System.out.println(m.startIndex);
159 //                    System.out.println(m.endIndex);
160 //                }
161 //            }
162 //        }
163 
164 //        if (text.length() < 1000) {
165 //            printOutput(annotation);
166 //        }
167 
168     }
169 }