1   package eu.fbk.dkm.pikes.resources.vuaopinion;
2   
3   import eu.fbk.dkm.pikes.resources.NAFFilter;
4   import eu.fbk.utils.core.CommandLine;
5   import ixa.kaflib.*;
6   import org.apache.commons.io.FileUtils;
7   import org.apache.commons.io.FilenameUtils;
8   import org.slf4j.LoggerFactory;
9   
10  import java.io.File;
11  import java.io.IOException;
12  import java.util.HashMap;
13  import java.util.Iterator;
14  
15  
16  /**
17   * Created by alessio on 09/04/15.
18   */
19  
20  public class CorpusAnnotator {
21  
22  	private static final org.slf4j.Logger LOGGER = LoggerFactory.getLogger(CorpusAnnotator.class);
23  
24  	public static void main(String[] args) {
25  		try {
26  			CommandLine cmd = null;
27  			cmd = CommandLine
28  					.parser()
29  					.withName("corpus-postprocessor")
30  					.withHeader(
31  							"Add opinion layers to the parsed NAFs")
32  					.withOption("i", "input-path", "the base EN path of the corpus", "DIR",
33  							CommandLine.Type.DIRECTORY_EXISTING, true, false, true)
34  					.withOption("f", "force", "Force opinion")
35  					.withLogger(LoggerFactory.getLogger("eu.fbk.fssa")).parse(args);
36  
37  			boolean forceOpinion = cmd.hasOption("f");
38  			 
39  			final File inputPath = cmd.getOptionValue("i", File.class);
40  			if (!inputPath.exists()) {
41  				throw new IOException(String.format("Folder %s does not exist", inputPath.getAbsolutePath()));
42  			}
43  
44  			File kafPath = new File(inputPath.getAbsolutePath() + File.separator + "kaf");
45  			if (!kafPath.exists()) {
46  				throw new IOException(String.format("Folder %s does not exist", kafPath.getAbsolutePath()));
47  			}
48  			File nafPath = new File(inputPath.getAbsolutePath() + File.separator + "naf-parsed");
49  			if (!nafPath.exists()) {
50  				throw new IOException(String.format("Folder %s does not exist", nafPath.getAbsolutePath()));
51  			}
52  
53  			Iterator<File> fileIterator;
54  			fileIterator = FileUtils.iterateFiles(kafPath, new String[]{"kaf"}, false);
55  
56  			while (fileIterator.hasNext()) {
57  				File file = fileIterator.next();
58  				String fileBaseName = FilenameUtils.removeExtension(file.getName());
59  				KAFDocument document = KAFDocument.createFromFile(file);
60  
61  				File nafFile = new File(nafPath.getAbsolutePath() + File.separator + fileBaseName + ".naf");
62  				if (!nafFile.exists()) {
63  					LOGGER.warn(String.format("File %s does not exist", nafFile.getAbsolutePath()));
64  					continue;
65  				}
66  				KAFDocument nafDoc = KAFDocument.createFromFile(nafFile);
67  				HashMap<String, Term> nafTerms = new HashMap<>();
68  				for (Term term : nafDoc.getTerms()) {
69  					nafTerms.put(term.getId(), term);
70  				}
71  
72  				HashMap<String, String> idConverter = new HashMap<>();
73  				int i = 0;
74  				for (WF wf : document.getWFs()) {
75  					String id = wf.getId();
76  					id = id.replace('w', 't');
77  					idConverter.put(id, "t" + Integer.toString(++i));
78  				}
79  				
80  				boolean hasGoldOpinions = false;
81                  for (Opinion opinion : document.getOpinions()) {
82                      if ("gold-vua-opinion".equals(opinion.getLabel())) {
83                          hasGoldOpinions = true;
84                          break;
85                      }
86                  }
87                  
88                  if (hasGoldOpinions && !forceOpinion) {
89                      LOGGER.info("Opinions already present, skipping...");
90                  
91                  } else {
92      				for (Opinion opinion : document.getOpinions()) {
93      					Opinion newOpinion = nafDoc.newOpinion();
94      					newOpinion.setLabel("gold-vua-opinion");
95      
96      					Span<Term> termSpan;
97      
98      					// Expression
99      					if (opinion.getOpinionExpression() != null) {
100     						termSpan = KAFDocument.newTermSpan();
101     						for (Term term : opinion.getOpinionExpression().getTerms()) {
102     							termSpan.addTarget(nafTerms.get(idConverter.get(term.getId())));
103     						}
104     						Opinion.OpinionExpression expression = newOpinion.createOpinionExpression(termSpan);
105     						expression.setPolarity(opinion.getOpinionExpression().getPolarity());
106     					}
107     
108     					// Holder
109     					if (opinion.getOpinionHolder() != null) {
110     						termSpan = KAFDocument.newTermSpan();
111     						for (Term term : opinion.getOpinionHolder().getTerms()) {
112     							termSpan.addTarget(nafTerms.get(idConverter.get(term.getId())));
113     						}
114     						newOpinion.createOpinionHolder(termSpan);
115     					}
116     
117     					// Target
118     					if (opinion.getOpinionTarget() != null) {
119     						termSpan = KAFDocument.newTermSpan();
120     						for (Term term : opinion.getOpinionTarget().getTerms()) {
121     							termSpan.addTarget(nafTerms.get(idConverter.get(term.getId())));
122     						}
123     						newOpinion.createOpinionTarget(termSpan);
124     					}
125     				}
126                 }
127                 
128                 NAFFilter.builder(false).withSRLRoleLinking(true, true)
129                         .withOpinionLinking(true, true).build().filter(document);
130                 
131 				nafDoc.save(nafFile.getAbsolutePath());
132 			}
133 
134 		} catch (final Throwable ex) {
135 			CommandLine.fail(ex);
136 		}
137 	}
138 }