1 package eu.fbk.dkm.pikes.tintop;
2
3 import eu.fbk.fcw.utils.corpus.*;
4 import org.slf4j.Logger;
5 import org.slf4j.LoggerFactory;
6
7 import java.io.BufferedWriter;
8 import java.io.FileWriter;
9 import java.io.IOException;
10 import java.util.ArrayList;
11 import java.util.HashMap;
12 import java.util.Set;
13
14
15
16
17
18 public class Ontonotes2Giulio {
19
20 private static final Logger LOGGER = LoggerFactory.getLogger(Ontonotes2Giulio.class);
21
22 public static void main(String[] args) throws IOException {
23 String inputFile = args[0];
24 String outputFile = args[1];
25
26 Corpus corpus = Corpus.readDocumentFromFile(inputFile, "ontonotes-5");
27 BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile));
28
29 for (Sentence sentence : corpus) {
30
31 HashMap<String, StringBuffer> buffers = new HashMap<>();
32 ArrayList<StringBuffer> additionalBuffers = new ArrayList<>();
33
34 buffers.put("id", new StringBuffer());
35 buffers.put("form", new StringBuffer());
36 buffers.put("lemma", new StringBuffer());
37 buffers.put("pos", new StringBuffer());
38 buffers.put("deplabel", new StringBuffer());
39 buffers.put("depparent", new StringBuffer());
40
41 for (Word word : sentence) {
42 buffers.get("id").append(word.getId()).append('\t');
43 buffers.get("form").append(word.getForm()).append('\t');
44 buffers.get("lemma").append(word.getLemma()).append('\t');
45 buffers.get("pos").append(word.getPos()).append('\t');
46 buffers.get("deplabel").append(word.getDepLabel()).append('\t');
47 buffers.get("depparent").append(word.getDepParent()).append('\t');
48 }
49
50 for (Srl srl : sentence.getSrls()) {
51 StringBuffer stringBuffer = new StringBuffer();
52 HashMap<Integer, String> tokens = new HashMap<>();
53
54 for (Word word : srl.getTarget()) {
55 tokens.put(word.getId(), srl.getLabel());
56 }
57 for (Role role : srl.getRoles()) {
58 for (Word word : role.getSpan()) {
59 Set<Integer> descendants = sentence.getDescendants(word.getId());
60 for (Integer descendant : descendants) {
61 tokens.put(descendant, role.getLabel());
62 }
63 }
64 }
65
66 for (int i = 0; i < sentence.getWords().size(); i++) {
67 String s = tokens.get(i + 1);
68 if (s != null) {
69 stringBuffer.append(s);
70 }
71 else {
72 stringBuffer.append("-");
73 }
74 stringBuffer.append('\t');
75 }
76
77 additionalBuffers.add(stringBuffer);
78 }
79
80 writer.append(buffers.get("id").toString().trim()).append('\n');
81 writer.append(buffers.get("form").toString().trim()).append('\n');
82 writer.append(buffers.get("lemma").toString().trim()).append('\n');
83 writer.append(buffers.get("pos").toString().trim()).append('\n');
84 writer.append(buffers.get("deplabel").toString().trim()).append('\n');
85 writer.append(buffers.get("depparent").toString().trim()).append('\n');
86
87 for (StringBuffer buffer : additionalBuffers) {
88 String s = buffer.toString();
89 s = s.substring(0, s.length() - 1);
90 writer.append(s).append('\n');
91 }
92
93 writer.append('\n');
94 }
95
96 writer.close();
97
98 }
99 }