1   package eu.fbk.dkm.pikes.raid.sbrs;
2   
3   import ch.qos.logback.classic.Level;
4   import com.google.common.collect.ImmutableList;
5   import eu.fbk.dkm.pikes.naflib.Corpus;
6   import eu.fbk.dkm.pikes.raid.Component;
7   import eu.fbk.dkm.pikes.raid.Extractor;
8   import eu.fbk.dkm.pikes.raid.Trainer;
9   import eu.fbk.dkm.pikes.resources.NAFUtils;
10  import eu.fbk.dkm.pikes.resources.WordNet;
11  import eu.fbk.utils.svm.Util;
12  import ixa.kaflib.KAFDocument;
13  import org.slf4j.Logger;
14  import org.slf4j.LoggerFactory;
15  
16  import java.io.File;
17  import java.io.IOException;
18  import java.nio.file.Path;
19  import java.util.*;
20  import java.util.stream.StreamSupport;
21  
22  /**
23   * Created by alessio on 20/08/15.
24   */
25  
26  public class CreateTraining {
27  
28  	private static final Logger LOGGER = LoggerFactory.getLogger(CreateTraining.class);
29  
30  	public static void main(String[] args) throws IOException {
31  
32  		String folder = "/Users/alessio/Documents/Resources/johansson-moschitti/NAF-parsed";
33  		String wordnetPath = "/Users/alessio/Documents/Resources/wn-3.0-dict/dict";
34  
35  		// ---
36  
37  		((ch.qos.logback.classic.Logger) LoggerFactory.getLogger("eu.fbk")).setLevel(Level.DEBUG);
38  
39  		File folderFile = new File(folder);
40  		WordNet.setPath(wordnetPath);
41  
42  		LOGGER.info("Starting parsing");
43  
44  		final Component[] components = Component.forLetters("ht").toArray(new Component[0]);
45  		Set<String> labels = new HashSet<>();
46  		labels.add("gold-mpqa-subjective");
47  
48  		final List<Path> inputPaths = new ArrayList<>();
49  		inputPaths.add(folderFile.toPath());
50  
51  		final Properties properties = Util.parseProperties("joint=true holder.unique=true target.unique=true");
52  //		final Trainer<? extends Extractor> trainer = Trainer.create(properties, components);
53  		final Trainer<? extends Extractor> trainer = new SBRSTrainer(properties, components);
54  
55  		final List<Path> files = Util.fileMatch(inputPaths, ImmutableList.of(".naf", ".naf.gz", ".naf.bz2", ".naf.xz", ".xml", ".xml.gz", ".xml.bz2", ".xml.xz"), false, false);
56  		Iterable<KAFDocument> documents = files != null ? Corpus.create(false, files)
57  				: ImmutableList.of(NAFUtils.readDocument(null));
58  
59  		StreamSupport.stream(documents.spliterator(), false).forEach(
60  				(final KAFDocument document) -> {
61  					trainer.add(document, labels);
62  				});
63  //		final Extractor extractor = trainer.train();
64  
65  	}
66  
67  }