1   package eu.fbk.dkm.pikes.resources.ecb;
2   
3   import eu.fbk.utils.core.CommandLine;
4   import org.slf4j.Logger;
5   import org.slf4j.LoggerFactory;
6   
7   import java.io.*;
8   import java.util.ArrayList;
9   import java.util.LinkedHashMap;
10  import java.util.List;
11  import java.util.Map;
12  import java.util.regex.Matcher;
13  import java.util.regex.Pattern;
14  
15  /**
16   * Created by alessio on 29/09/16.
17   */
18  
19  public class ECBPlusSplitter {
20  
21      private static final Logger LOGGER = LoggerFactory.getLogger(ECBPlusSplitter.class);
22      private static final Pattern headerPattern = Pattern.compile("#begin document ([0-9]+)_.*");
23  
24      public static void main(String[] args) {
25          try {
26              final CommandLine cmd = CommandLine
27                      .parser()
28                      .withName("./ecbplus-splitter")
29                      .withHeader("Splits ECB+ results by folder")
30                      .withOption("i", "input", "Input txt file", "FILE",
31                              CommandLine.Type.FILE_EXISTING, true, false, true)
32                      .withOption("o", "output", "Output folder", "FOLDER",
33                              CommandLine.Type.DIRECTORY, true, false, true)
34                      .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);
35  
36              File inputFile = cmd.getOptionValue("input", File.class);
37              File outputFolder = cmd.getOptionValue("output", File.class);
38  
39              Map<String, List<String>> res = new LinkedHashMap<>();
40  
41              BufferedReader reader = new BufferedReader(new FileReader(inputFile));
42              String line;
43              String folder = null;
44              while ((line = reader.readLine()) != null) {
45                  Matcher matcher = headerPattern.matcher(line);
46                  if (matcher.find()) {
47                      folder = matcher.group(1);
48                  }
49  
50                  if (folder == null) {
51                      continue;
52                  }
53                  res.putIfAbsent(folder, new ArrayList<>());
54                  res.get(folder).add(line);
55              }
56  
57              reader.close();
58  
59              outputFolder.mkdirs();
60  
61              for (String key : res.keySet()) {
62                  String thisFileString = outputFolder.getAbsolutePath() + File.separator + key + ".txt";
63                  File thisFile = new File(thisFileString);
64  
65                  BufferedWriter writer = new BufferedWriter(new FileWriter(thisFile));
66                  for (String thisLine : res.get(key)) {
67                      writer.append(thisLine).append("\n");
68                  }
69  
70                  writer.close();
71              }
72  
73          } catch (Exception e) {
74              CommandLine.fail(e);
75          }
76  
77      }
78  }