1 package eu.fbk.dkm.pikes.resources.ecb;
2
3 import eu.fbk.utils.core.CommandLine;
4 import org.slf4j.Logger;
5 import org.slf4j.LoggerFactory;
6
7 import java.io.*;
8 import java.util.ArrayList;
9 import java.util.LinkedHashMap;
10 import java.util.List;
11 import java.util.Map;
12 import java.util.regex.Matcher;
13 import java.util.regex.Pattern;
14
15
16
17
18
19 public class ECBPlusSplitter {
20
21 private static final Logger LOGGER = LoggerFactory.getLogger(ECBPlusSplitter.class);
22 private static final Pattern headerPattern = Pattern.compile("#begin document ([0-9]+)_.*");
23
24 public static void main(String[] args) {
25 try {
26 final CommandLine cmd = CommandLine
27 .parser()
28 .withName("./ecbplus-splitter")
29 .withHeader("Splits ECB+ results by folder")
30 .withOption("i", "input", "Input txt file", "FILE",
31 CommandLine.Type.FILE_EXISTING, true, false, true)
32 .withOption("o", "output", "Output folder", "FOLDER",
33 CommandLine.Type.DIRECTORY, true, false, true)
34 .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);
35
36 File inputFile = cmd.getOptionValue("input", File.class);
37 File outputFolder = cmd.getOptionValue("output", File.class);
38
39 Map<String, List<String>> res = new LinkedHashMap<>();
40
41 BufferedReader reader = new BufferedReader(new FileReader(inputFile));
42 String line;
43 String folder = null;
44 while ((line = reader.readLine()) != null) {
45 Matcher matcher = headerPattern.matcher(line);
46 if (matcher.find()) {
47 folder = matcher.group(1);
48 }
49
50 if (folder == null) {
51 continue;
52 }
53 res.putIfAbsent(folder, new ArrayList<>());
54 res.get(folder).add(line);
55 }
56
57 reader.close();
58
59 outputFolder.mkdirs();
60
61 for (String key : res.keySet()) {
62 String thisFileString = outputFolder.getAbsolutePath() + File.separator + key + ".txt";
63 File thisFile = new File(thisFileString);
64
65 BufferedWriter writer = new BufferedWriter(new FileWriter(thisFile));
66 for (String thisLine : res.get(key)) {
67 writer.append(thisLine).append("\n");
68 }
69
70 writer.close();
71 }
72
73 } catch (Exception e) {
74 CommandLine.fail(e);
75 }
76
77 }
78 }