1   package eu.fbk.dkm.pikes.resources.wes;
2   
3   import com.fasterxml.jackson.databind.ObjectMapper;
4   import com.google.common.base.Charsets;
5   import com.google.common.io.Files;
6   import eu.fbk.utils.core.CommandLine;
7   import org.slf4j.Logger;
8   import org.slf4j.LoggerFactory;
9   
10  import java.io.*;
11  import java.net.HttpURLConnection;
12  import java.net.URL;
13  import java.net.URLEncoder;
14  import java.util.ArrayList;
15  import java.util.List;
16  import java.util.Map;
17  
18  /**
19   * Created by alessio on 11/12/15.
20   */
21  
22  public class QuerySolr {
23  
24      private static final Logger LOGGER = LoggerFactory.getLogger(QuerySolr.class);
25  
26  //    private static String nafQueriesFileName = "/Users/alessio/Documents/Resources/wes/wes2015.queries.solr.txt";
27  //    private static String outputFileName = "/Users/alessio/Documents/Resources/wes/solr-2.txt";
28      // Query pattern example "http://dkm-server-1:8983/solr/demo2/select?q=%s&fl=id&df=texttitle&wt=json&indent=true&rows=350"
29  
30      private static String DEFAULT_USER_AGENT = "FBK evaluation";
31  
32      private static ArrayList<String> sendGet(String query, String agent) throws Exception {
33          URL obj = new URL(query);
34          HttpURLConnection con = (HttpURLConnection) obj.openConnection();
35          ArrayList<String> ret = new ArrayList<>();
36  
37          // optional default is GET
38          con.setRequestMethod("GET");
39  
40          //add request header
41          con.setRequestProperty("User-Agent", agent);
42  
43          int responseCode = con.getResponseCode();
44          LOGGER.debug("Queried Google [{}], response code {}", query, responseCode);
45  
46          BufferedReader in = new BufferedReader(
47                  new InputStreamReader(con.getInputStream()));
48          String inputLine;
49          StringBuffer response = new StringBuffer();
50  
51          while ((inputLine = in.readLine()) != null) {
52              response.append(inputLine);
53          }
54          in.close();
55  
56          ObjectMapper mapper = new ObjectMapper();
57          Map<?, ?> root = mapper.readValue(response.toString(), Map.class);
58          Map<?, ?> response2 = (Map) root.get("response");
59          ArrayList<?> docs = (ArrayList) response2.get("docs");
60          if (docs != null) {
61              for (Object item : docs) {
62                  String id = (String) ((Map<?, ?>) item).get("id");
63                  ret.add(id);
64              }
65          }
66  
67          return ret;
68      }
69  
70      public static void main(String[] args) {
71          try {
72              final CommandLine cmd = CommandLine
73                      .parser()
74                      .withName("query-solr")
75                      .withHeader("Send WES queries to a Solr server")
76                      .withOption("q", "queries", "CSV file with queries", "FILE", CommandLine.Type.FILE_EXISTING, true,
77                              false, true)
78                      .withOption("p", "pattern", "Query pattern (use %s as placeholder for the query)", "URL",
79                              CommandLine.Type.STRING, true, false, true)
80                              // Query pattern example: http://dkm-server-1:8983/solr/demo2/select?q=%s&fl=id&df=texttitle&wt=json&indent=true&rows=350
81                      .withOption("o", "output", "Output file", "FILE", CommandLine.Type.FILE, true, false, true)
82                      .withOption("a", "agent", String.format("User agent, default %s", DEFAULT_USER_AGENT), "STRING",
83                              CommandLine.Type.STRING, true, false, false)
84                      .withLogger(LoggerFactory.getLogger("eu.fbk")) //
85                      .parse(args);
86  
87              File outputFile = cmd.getOptionValue("output", File.class);
88              File nafQueriesFile = cmd.getOptionValue("queries", File.class);
89              String userAgent = cmd.getOptionValue("agent", String.class);
90              String queryPattern = cmd.getOptionValue("pattern", String.class);
91  
92              BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile));
93  
94              List<String> lines = Files.readLines(nafQueriesFile, Charsets.UTF_8);
95              for (String line : lines) {
96                  line = line.trim();
97                  if (line.length() == 0) {
98                      continue;
99                  }
100 
101                 if (line.startsWith("#")) {
102                     continue;
103                 }
104 
105                 String[] parts = line.split("\\t");
106                 String id = parts[0];
107                 String query = parts[1];
108 
109                 LOGGER.info(query);
110                 query = URLEncoder.encode(query, "UTF-8");
111                 query = String.format(queryPattern, query);
112                 ArrayList<String> ids = sendGet(query, userAgent);
113 
114                 writer.append(id);
115                 for (String s : ids) {
116                     writer.append("\t").append(s);
117                 }
118                 writer.append("\n");
119             }
120 
121             writer.close();
122 
123         } catch (Exception e) {
124             CommandLine.fail(e);
125         }
126     }
127 }