1   package eu.fbk.dkm.pikes.resources.trec;
2   
3   import com.google.common.base.Charsets;
4   import com.google.common.collect.HashMultimap;
5   import eu.fbk.utils.core.CommandLine;
6   import org.apache.commons.io.FileUtils;
7   import org.slf4j.Logger;
8   import org.slf4j.LoggerFactory;
9   
10  import java.io.BufferedWriter;
11  import java.io.File;
12  import java.io.FileWriter;
13  import java.util.Iterator;
14  import java.util.List;
15  
16  /**
17   * Created by alessio on 15/12/15.
18   */
19  
20  public class Qrels {
21  
22      private static final Logger LOGGER = LoggerFactory.getLogger(Qrels.class);
23  //    private static String folder = "/Users/alessio/Documents/scripts/pikesir/test/trec/queries/";
24  //    private static String outputFile = "/Users/alessio/Documents/scripts/pikesir/test/trec/queries.tsv";
25  
26      public static void main(String[] args) {
27  
28          try {
29  
30              final CommandLine cmd = CommandLine
31                      .parser()
32                      .withName("trec-qrels-converter")
33                      .withHeader("Convert TREC qrels into TSV format")
34                      .withOption("i", "input", "Input folder", "FOLDER", CommandLine.Type.DIRECTORY_EXISTING, true,
35                              false, true)
36                      .withOption("o", "output", "Output file", "FILE", CommandLine.Type.FILE, true, false, true)
37                      .withLogger(LoggerFactory.getLogger("eu.fbk")) //
38                      .parse(args);
39  
40              File inputFolder = cmd.getOptionValue("input", File.class);
41              File outputFile = cmd.getOptionValue("output", File.class);
42  
43              HashMultimap<String, String> qrels = HashMultimap.create();
44  
45              Iterator<File> fileIterator = FileUtils.iterateFiles(inputFolder, null, true);
46              while (fileIterator.hasNext()) {
47                  File file = fileIterator.next();
48  
49                  LOGGER.info(file.getName());
50  
51                  List<String> lines = FileUtils.readLines(file, Charsets.UTF_8);
52                  for (String line : lines) {
53                      line = line.trim();
54  
55                      String[] parts = line.split("\\s+");
56  
57                      String qID = "q" + parts[0];
58                      String docID = parts[2];
59                      String relevance = parts[3];
60  
61                      if (relevance.equals("0")) {
62                          continue;
63                      }
64  
65                      qrels.put(qID, docID);
66                  }
67              }
68  
69              BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile));
70              for (String key : qrels.keySet()) {
71                  writer.append(key).append("\t");
72                  StringBuffer stringBuffer = new StringBuffer();
73                  for (String value : qrels.get(key)) {
74                      stringBuffer.append(";").append(value).append(":1");
75                  }
76                  writer.append(stringBuffer.toString().substring(1));
77                  writer.append("\n");
78              }
79  
80              writer.close();
81  
82          } catch (Exception e) {
83              CommandLine.fail(e);
84          }
85      }
86  }