1 package eu.fbk.dkm.pikes.raid.mdfsa;
2
3 import edu.stanford.nlp.ling.HasWord;
4 import edu.stanford.nlp.ling.Sentence;
5 import edu.stanford.nlp.ling.TaggedWord;
6 import edu.stanford.nlp.tagger.maxent.MaxentTagger;
7
8 import java.io.BufferedReader;
9 import java.io.StringReader;
10 import java.util.ArrayList;
11 import java.util.List;
12 import java.util.Properties;
13
14
15 public class MaxEntTagger
16 {
17 private Properties prp;
18 private String modelName;
19 private MaxentTagger tagger;
20
21 public MaxEntTagger(Properties prp) {
22 try {
23 this.prp = prp;
24 this.modelName = this.prp.getProperty("mdfsa.extraction.taggermodel");
25 this.tagger = new MaxentTagger(this.modelName);
26 } catch(Exception e) {
27 e.printStackTrace();
28 System.out.println("Impossible to initialize the tagger model.");
29 }
30 }
31
32
33 public String tag(String fn) {
34 String taggedString = new String();
35 try {
36 List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new StringReader(fn)));
37 for (List<HasWord> sentence : sentences) {
38 ArrayList<TaggedWord> tSentence = (ArrayList<TaggedWord>) tagger.tagSentence(sentence);
39 taggedString = taggedString.concat(Sentence.listToString(tSentence, false));
40 }
41 return taggedString;
42 } catch(Exception e) {
43 e.printStackTrace();
44 System.out.println("Error during the text tagging operation.");
45 }
46 return null;
47 }
48
49 }