1   package eu.fbk.dkm.pikes.rdf;
2   
3   import java.io.File;
4   import java.io.InputStream;
5   
6   import javax.xml.parsers.DocumentBuilder;
7   import javax.xml.parsers.DocumentBuilderFactory;
8   import javax.xml.transform.Transformer;
9   import javax.xml.transform.TransformerFactory;
10  import javax.xml.transform.dom.DOMSource;
11  import javax.xml.transform.stream.StreamResult;
12  
13  import org.w3c.dom.Document;
14  import org.w3c.dom.Element;
15  import org.w3c.dom.NodeList;
16  
17  import eu.fbk.rdfpro.util.IO;
18  
19  public class SemaforExtractor {
20  
21      public static void main(final String... args) throws Throwable {
22  
23          final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
24          final DocumentBuilder builder = factory.newDocumentBuilder();
25  
26          final TransformerFactory transformerFactory = TransformerFactory.newInstance();
27          final Transformer transformer = transformerFactory.newTransformer();
28  
29          for (final String arg : args) {
30              final File dir = new File(arg);
31              for (final File file : dir.listFiles()) {
32                  if (file.getName().endsWith(".out")) {
33                      System.out.println("Procesing " + arg);
34                      try (InputStream stream = IO.read(file.getAbsolutePath())) {
35                          final Document document = builder.parse(stream);
36                          process(document);
37                          final DOMSource source = new DOMSource(document);
38                          final StreamResult result = new StreamResult(new File(file
39                                  .getAbsolutePath().replace(".out", ".xml")));
40                          transformer.transform(source, result);
41                      }
42                  }
43              }
44          }
45      }
46  
47      private static void process(final Document document) {
48          final String text = document.getElementsByTagName("text").item(0).getTextContent();
49          System.out.println(text);
50          final NodeList list = document.getElementsByTagName("label");
51          for (int i = 0; i < list.getLength(); ++i) {
52              final Element element = (Element) list.item(i);
53              final int start = Integer.parseInt(element.getAttribute("start"));
54              final int end = Integer.parseInt(element.getAttribute("end"));
55              final String span = text.substring(start, end + 1);
56              element.setAttribute("span", span);
57          }
58      }
59  
60  }