1   package eu.fbk.dkm.pikes.tintop;
2   
3   import eu.fbk.fcw.utils.corpus.*;
4   import org.slf4j.Logger;
5   import org.slf4j.LoggerFactory;
6   
7   import java.io.BufferedWriter;
8   import java.io.FileWriter;
9   import java.io.IOException;
10  import java.util.ArrayList;
11  import java.util.HashMap;
12  import java.util.Set;
13  
14  /**
15   * Created by alessio on 20/01/16.
16   */
17  
18  public class Ontonotes2Giulio {
19  
20      private static final Logger LOGGER = LoggerFactory.getLogger(Ontonotes2Giulio.class);
21  
22      public static void main(String[] args) throws IOException {
23          String inputFile = args[0];
24          String outputFile = args[1];
25  
26          Corpus corpus = Corpus.readDocumentFromFile(inputFile, "ontonotes-5");
27          BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile));
28  
29          for (Sentence sentence : corpus) {
30  
31              HashMap<String, StringBuffer> buffers = new HashMap<>();
32              ArrayList<StringBuffer> additionalBuffers = new ArrayList<>();
33  
34              buffers.put("id", new StringBuffer());
35              buffers.put("form", new StringBuffer());
36              buffers.put("lemma", new StringBuffer());
37              buffers.put("pos", new StringBuffer());
38              buffers.put("deplabel", new StringBuffer());
39              buffers.put("depparent", new StringBuffer());
40  
41              for (Word word : sentence) {
42                  buffers.get("id").append(word.getId()).append('\t');
43                  buffers.get("form").append(word.getForm()).append('\t');
44                  buffers.get("lemma").append(word.getLemma()).append('\t');
45                  buffers.get("pos").append(word.getPos()).append('\t');
46                  buffers.get("deplabel").append(word.getDepLabel()).append('\t');
47                  buffers.get("depparent").append(word.getDepParent()).append('\t');
48              }
49  
50              for (Srl srl : sentence.getSrls()) {
51                  StringBuffer stringBuffer = new StringBuffer();
52                  HashMap<Integer, String> tokens = new HashMap<>();
53  
54                  for (Word word : srl.getTarget()) {
55                      tokens.put(word.getId(), srl.getLabel());
56                  }
57                  for (Role role : srl.getRoles()) {
58                      for (Word word : role.getSpan()) {
59                          Set<Integer> descendants = sentence.getDescendants(word.getId());
60                          for (Integer descendant : descendants) {
61                              tokens.put(descendant, role.getLabel());
62                          }
63                      }
64                  }
65  
66                  for (int i = 0; i < sentence.getWords().size(); i++) {
67                      String s = tokens.get(i + 1);
68                      if (s != null) {
69                          stringBuffer.append(s);
70                      }
71                      else {
72                          stringBuffer.append("-");
73                      }
74                      stringBuffer.append('\t');
75                  }
76  
77                  additionalBuffers.add(stringBuffer);
78              }
79  
80              writer.append(buffers.get("id").toString().trim()).append('\n');
81              writer.append(buffers.get("form").toString().trim()).append('\n');
82              writer.append(buffers.get("lemma").toString().trim()).append('\n');
83              writer.append(buffers.get("pos").toString().trim()).append('\n');
84              writer.append(buffers.get("deplabel").toString().trim()).append('\n');
85              writer.append(buffers.get("depparent").toString().trim()).append('\n');
86  
87              for (StringBuffer buffer : additionalBuffers) {
88                  String s = buffer.toString();
89                  s = s.substring(0, s.length() - 1);
90                  writer.append(s).append('\n');
91              }
92  
93              writer.append('\n');
94          }
95  
96          writer.close();
97  
98      }
99  }