1 package eu.fbk.dkm.pikes.resources;
2
3 import com.google.common.io.Files;
4 import eu.fbk.utils.core.CommandLine;
5 import ixa.kaflib.KAFDocument;
6 import org.apache.commons.io.FileUtils;
7 import org.slf4j.Logger;
8 import org.slf4j.LoggerFactory;
9
10 import javax.xml.parsers.DocumentBuilder;
11 import javax.xml.parsers.DocumentBuilderFactory;
12 import java.io.File;
13
14 public class Txt2Naf {
15
16 private static final Logger LOGGER = LoggerFactory.getLogger(Txt2Naf.class);
17
18
19 public static void main(String[] args) {
20 try {
21 final CommandLine cmd = CommandLine
22 .parser()
23 .withName("./taol-extractor")
24 .withHeader("Convert file from txt to NAF")
25 .withOption("i", "input", "Input folder", "FOLDER",
26 CommandLine.Type.DIRECTORY_EXISTING, true, false, true)
27 .withOption("o", "output", "Output folder", "FOLDER",
28 CommandLine.Type.DIRECTORY, true, false, true)
29 .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);
30
31 File inputFolder = cmd.getOptionValue("input", File.class);
32 File outputFolder = cmd.getOptionValue("output", File.class);
33
34 if (!outputFolder.exists()) {
35 outputFolder.mkdirs();
36 }
37
38 for (final File file : Files.fileTreeTraverser().preOrderTraversal(inputFolder)) {
39 if (!file.isFile()) {
40 continue;
41 }
42 if (file.getName().startsWith(".")) {
43 continue;
44 }
45
46 String content = FileUtils.readFileToString(file, "utf-8");
47
48 File outputFile = new File(
49 outputFolder.getAbsolutePath() + File.separator +
50 file.getAbsolutePath().substring(
51 inputFolder.getAbsolutePath().length()).replace(".txt",".naf"));
52 Files.createParentDirs(outputFile);
53
54 KAFDocument document = new KAFDocument("en", "v3");
55
56 KAFDocument.Public documentPublic = document.createPublic();
57 documentPublic.uri = "file://" + file.getAbsolutePath();
58 documentPublic.publicId = file.getName();
59
60 KAFDocument.FileDesc documentFileDesc = document.createFileDesc();
61 documentFileDesc.filename = file.getName();
62 documentFileDesc.title = file.getName();
63 document.setRawText(content);
64 document.save(outputFile.getAbsolutePath());
65 }
66
67 } catch (Exception e) {
68 CommandLine.fail(e);
69 }
70
71 }
72
73 }