1 package eu.fbk.dkm.pikes.raid.mdfsa; 2 3 import edu.stanford.nlp.ling.HasWord; 4 import edu.stanford.nlp.ling.Sentence; 5 import edu.stanford.nlp.ling.TaggedWord; 6 import edu.stanford.nlp.tagger.maxent.MaxentTagger; 7 8 import java.io.BufferedReader; 9 import java.io.StringReader; 10 import java.util.ArrayList; 11 import java.util.List; 12 import java.util.Properties; 13 14 15 public class MaxEntTagger 16 { 17 private Properties prp; 18 private String modelName; 19 private MaxentTagger tagger; 20 21 public MaxEntTagger(Properties prp) { 22 try { 23 this.prp = prp; 24 this.modelName = this.prp.getProperty("mdfsa.extraction.taggermodel"); 25 this.tagger = new MaxentTagger(this.modelName); 26 } catch(Exception e) { 27 e.printStackTrace(); 28 System.out.println("Impossible to initialize the tagger model."); 29 } 30 } 31 32 33 public String tag(String fn) { 34 String taggedString = new String(); 35 try { 36 List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new StringReader(fn))); 37 for (List<HasWord> sentence : sentences) { 38 ArrayList<TaggedWord> tSentence = (ArrayList<TaggedWord>) tagger.tagSentence(sentence); 39 taggedString = taggedString.concat(Sentence.listToString(tSentence, false)); 40 } 41 return taggedString; 42 } catch(Exception e) { 43 e.printStackTrace(); 44 System.out.println("Error during the text tagging operation."); 45 } 46 return null; 47 } 48 49 }