1   package eu.fbk.dkm.pikes.resources;
2   
3   import com.google.common.io.Files;
4   import eu.fbk.utils.core.CommandLine;
5   import ixa.kaflib.KAFDocument;
6   import org.apache.commons.io.FileUtils;
7   import org.slf4j.Logger;
8   import org.slf4j.LoggerFactory;
9   
10  import javax.xml.parsers.DocumentBuilder;
11  import javax.xml.parsers.DocumentBuilderFactory;
12  import java.io.File;
13  
14  public class Txt2Naf {
15  
16      private static final Logger LOGGER = LoggerFactory.getLogger(Txt2Naf.class);
17  //    private static final String DEFAULT_PREFIX = "http://dkm.fbk.eu/pikes/dataset/ecb";
18  
19      public static void main(String[] args) {
20          try {
21              final CommandLine cmd = CommandLine
22                      .parser()
23                      .withName("./taol-extractor")
24                      .withHeader("Convert file from txt to NAF")
25                      .withOption("i", "input", "Input folder", "FOLDER",
26                              CommandLine.Type.DIRECTORY_EXISTING, true, false, true)
27                      .withOption("o", "output", "Output folder", "FOLDER",
28                              CommandLine.Type.DIRECTORY, true, false, true)
29                      .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);
30  
31              File inputFolder = cmd.getOptionValue("input", File.class);
32              File outputFolder = cmd.getOptionValue("output", File.class);
33  
34              if (!outputFolder.exists()) {
35                  outputFolder.mkdirs();
36              }
37  
38              for (final File file : Files.fileTreeTraverser().preOrderTraversal(inputFolder)) {
39                  if (!file.isFile()) {
40                      continue;
41                  }
42                  if (file.getName().startsWith(".")) {
43                      continue;
44                  }
45  
46                  String content = FileUtils.readFileToString(file, "utf-8");
47  
48                  File outputFile = new File(
49                          outputFolder.getAbsolutePath() + File.separator +
50                                  file.getAbsolutePath().substring(
51                                          inputFolder.getAbsolutePath().length()).replace(".txt",".naf"));
52                  Files.createParentDirs(outputFile);
53  
54                  KAFDocument document = new KAFDocument("en", "v3");
55  
56                  KAFDocument.Public documentPublic = document.createPublic();
57                  documentPublic.uri = "file://" + file.getAbsolutePath();
58                  documentPublic.publicId = file.getName();
59  
60                  KAFDocument.FileDesc documentFileDesc = document.createFileDesc();
61                  documentFileDesc.filename = file.getName();
62                  documentFileDesc.title = file.getName();
63                  document.setRawText(content);
64                  document.save(outputFile.getAbsolutePath());
65              }
66  
67          } catch (Exception e) {
68              CommandLine.fail(e);
69          }
70  
71      }
72  
73  }