1 package eu.fbk.dkm.pikes.raid.sbrs;
2
3 import ch.qos.logback.classic.Level;
4 import com.google.common.collect.ImmutableList;
5 import eu.fbk.dkm.pikes.naflib.Corpus;
6 import eu.fbk.dkm.pikes.raid.Component;
7 import eu.fbk.dkm.pikes.raid.Extractor;
8 import eu.fbk.dkm.pikes.raid.Trainer;
9 import eu.fbk.dkm.pikes.resources.NAFUtils;
10 import eu.fbk.dkm.pikes.resources.WordNet;
11 import eu.fbk.utils.svm.Util;
12 import ixa.kaflib.KAFDocument;
13 import org.slf4j.Logger;
14 import org.slf4j.LoggerFactory;
15
16 import java.io.File;
17 import java.io.IOException;
18 import java.nio.file.Path;
19 import java.util.*;
20 import java.util.stream.StreamSupport;
21
22
23
24
25
26 public class CreateTraining {
27
28 private static final Logger LOGGER = LoggerFactory.getLogger(CreateTraining.class);
29
30 public static void main(String[] args) throws IOException {
31
32 String folder = "/Users/alessio/Documents/Resources/johansson-moschitti/NAF-parsed";
33 String wordnetPath = "/Users/alessio/Documents/Resources/wn-3.0-dict/dict";
34
35
36
37 ((ch.qos.logback.classic.Logger) LoggerFactory.getLogger("eu.fbk")).setLevel(Level.DEBUG);
38
39 File folderFile = new File(folder);
40 WordNet.setPath(wordnetPath);
41
42 LOGGER.info("Starting parsing");
43
44 final Component[] components = Component.forLetters("ht").toArray(new Component[0]);
45 Set<String> labels = new HashSet<>();
46 labels.add("gold-mpqa-subjective");
47
48 final List<Path> inputPaths = new ArrayList<>();
49 inputPaths.add(folderFile.toPath());
50
51 final Properties properties = Util.parseProperties("joint=true holder.unique=true target.unique=true");
52
53 final Trainer<? extends Extractor> trainer = new SBRSTrainer(properties, components);
54
55 final List<Path> files = Util.fileMatch(inputPaths, ImmutableList.of(".naf", ".naf.gz", ".naf.bz2", ".naf.xz", ".xml", ".xml.gz", ".xml.bz2", ".xml.xz"), false, false);
56 Iterable<KAFDocument> documents = files != null ? Corpus.create(false, files)
57 : ImmutableList.of(NAFUtils.readDocument(null));
58
59 StreamSupport.stream(documents.spliterator(), false).forEach(
60 (final KAFDocument document) -> {
61 trainer.add(document, labels);
62 });
63
64
65 }
66
67 }