1   package eu.fbk.dkm.pikes.resources;
2   
3   import com.google.common.base.Joiner;
4   import com.google.common.base.Objects;
5   import com.google.common.base.Throwables;
6   import com.google.common.collect.ImmutableList;
7   import com.google.common.collect.ImmutableMap;
8   import com.google.common.collect.Lists;
9   import com.google.common.collect.Maps;
10  import eu.fbk.utils.core.CommandLine;
11  import eu.fbk.utils.core.CommandLine.Type;
12  import eu.fbk.rdfpro.AbstractRDFHandler;
13  import eu.fbk.rdfpro.RDFSources;
14  import eu.fbk.rdfpro.tql.TQL;
15  import eu.fbk.rdfpro.util.Environment;
16  import eu.fbk.rdfpro.util.Statements;
17  import org.eclipse.rdf4j.model.*;
18  import org.eclipse.rdf4j.rio.RDFHandlerException;
19  import org.slf4j.Logger;
20  import org.slf4j.LoggerFactory;
21  
22  import javax.annotation.Nullable;
23  import java.io.File;
24  import java.io.IOException;
25  import java.util.List;
26  import java.util.Map;
27  
28  public class SenticNet extends Lexicon<SenticNet.Lexeme> {
29  
30      private static final Logger LOGGER = LoggerFactory.getLogger(SenticNet.class);
31  
32      private static final String NS_CONCEPT = "http://sentic.net/api/en/concept/";
33  
34      private static final IRI PROP_APTITUDE = Statements.VALUE_FACTORY
35              .createIRI("http://sentic.net/apiaptitude");
36  
37      private static final IRI PROP_ATTENTION = Statements.VALUE_FACTORY
38              .createIRI("http://sentic.net/apiattention");
39  
40      private static final IRI PROP_PLEASENTNESS = Statements.VALUE_FACTORY
41              .createIRI("http://sentic.net/apipleasantness");
42  
43      private static final IRI PROP_POLARITY = Statements.VALUE_FACTORY
44              .createIRI("http://sentic.net/apipolarity");
45  
46      private static final IRI PROP_SENSITIVITY = Statements.VALUE_FACTORY
47              .createIRI("http://sentic.net/apisensitivity");
48  
49      private static final IRI PROP_SEMANTICS = Statements.VALUE_FACTORY
50              .createIRI("http://sentic.net/apisemantics");
51  
52      private static final IRI PROP_TEXT = Statements.VALUE_FACTORY
53              .createIRI("http://sentic.net/apitext");
54  
55      private static SenticNet instance = null;
56  
57      public static synchronized void setInstance(@Nullable final SenticNet instance) {
58          SenticNet.instance = instance;
59      }
60  
61      public static synchronized SenticNet getInstance() {
62          if (instance == null) {
63              final String location = Objects.firstNonNull(
64                      Environment.getProperty("senticnet.home"), "SenticNet.tsv");
65              try {
66                  instance = Lexicon.readFrom(SenticNet.class, Lexeme.class, location);
67              } catch (final Throwable ex) {
68                  throw new Error("Could not read default subjectivity lexicon at " + location, ex);
69              }
70          }
71          return instance;
72      }
73  
74      @Nullable
75      public static String idFor(@Nullable final Value value) {
76          if (value == null) {
77              return null;
78          }
79          if (!(value instanceof IRI)) {
80              throw new IllegalArgumentException("Not a concept IRI: " + value);
81          }
82          final IRI uri = (IRI) value;
83          if (!uri.getNamespace().equals(NS_CONCEPT)) {
84              throw new IllegalArgumentException("Unexpected namespace for concept IRI: " + value);
85          }
86          return uri.getLocalName();
87      }
88  
89      @Nullable
90      public static IRI uriFor(@Nullable final String id) {
91          return id == null ? null : Statements.VALUE_FACTORY.createIRI(NS_CONCEPT, id);
92      }
93  
94      public static SenticNet index(final String resourceFile) throws IOException {
95  
96          final Map<String, LexemeData> data = Maps.newHashMap();
97          try {
98              RDFSources.read(false, true, null, null, null, true, resourceFile).emit(new AbstractRDFHandler() {
99  
100                 @Override
101                 public void handleStatement(final Statement statement) throws RDFHandlerException {
102 
103                     final Resource subj = statement.getSubject();
104                     final IRI pred = statement.getPredicate();
105                     final Value obj = statement.getObject();
106 
107                     try {
108                         if (pred.equals(PROP_APTITUDE)) {
109                             getLexemeData(subj).aptitude = ((Literal) obj).floatValue();
110                         } else if (pred.equals(PROP_ATTENTION)) {
111                             getLexemeData(subj).attention = ((Literal) obj).floatValue();
112                         } else if (pred.equals(PROP_PLEASENTNESS)) {
113                             getLexemeData(subj).pleasentness = ((Literal) obj).floatValue();
114                         } else if (pred.equals(PROP_POLARITY)) {
115                             getLexemeData(subj).polarity = ((Literal) obj).floatValue();
116                         } else if (pred.equals(PROP_SENSITIVITY)) {
117                             getLexemeData(subj).sensitivity = ((Literal) obj).floatValue();
118                         } else if (pred.equals(PROP_SEMANTICS)) {
119                             getLexemeData(subj).semantics.add(idFor(obj));
120                         } else if (pred.equals(PROP_TEXT)) {
121                             getLexemeData(subj).text = obj.stringValue();
122                         }
123                     } catch (final Throwable ex) {
124                         LOGGER.warn("Could not process statement: " + statement, ex);
125                     }
126                 }
127 
128                 private LexemeData getLexemeData(final Resource subject) {
129                     final String id = idFor(subject);
130                     LexemeData lexemeData = data.get(id);
131                     if (lexemeData == null) {
132                         lexemeData = new LexemeData(id);
133                         data.put(id, lexemeData);
134                     }
135                     return lexemeData;
136                 }
137 
138             }, 1);
139 
140         } catch (final RDFHandlerException ex) {
141             Throwables.propagateIfPossible(ex.getCause() == null ? ex : ex.getCause(),
142                     IOException.class);
143             Throwables.propagate(ex);
144         }
145 
146         final List<Lexeme> lexemes = Lists.newArrayList();
147         for (final LexemeData lexemeData : data.values()) {
148             final Lexeme lexeme = lexemeData.toLexeme();
149             if (lexeme == null) {
150                 LOGGER.warn("Could not create lexeme for ID " + lexemeData.id);
151             } else {
152                 lexemes.add(lexeme);
153             }
154         }
155 
156         return new SenticNet(lexemes);
157     }
158 
159     public static void main(final String... args) {
160         try {
161             final CommandLine cmd = CommandLine
162                     .parser()
163                     .withName("index-senticnet-lexicon")
164                     .withHeader("Processes the RDF data of eu.fbk.dkm.pikes.resources.SenticNet, " //
165                             + "producing a TSV file with an indexed version of it that can " //
166                             + "be used with the eu.fbk.dkm.pikes.resources.SenticNet Java API class.")
167                     .withOption("i", "input", "the input file name", "FILE", Type.FILE_EXISTING,
168                             true, false, true)
169                     .withOption("o", "output", "the output file name", "FILE", Type.FILE, true,
170                             false, true) //
171                     .withLogger(LoggerFactory.getLogger("eu.fbk")) //
172                     .parse(args);
173 
174             final File inputFile = cmd.getOptionValue("i", File.class);
175             final File outputFile = cmd.getOptionValue("o", File.class);
176 
177             final SenticNet lexicon = index(inputFile.getAbsolutePath());
178             lexicon.writeTo(outputFile.getAbsolutePath());
179 
180         } catch (final Throwable ex) {
181             CommandLine.fail(ex);
182         }
183     }
184 
185     public SenticNet(final Iterable<Lexeme> lexemes) {
186         super(lexemes);
187     }
188 
189     public Lexeme get(final Value id) {
190         return get(idFor(id));
191     }
192 
193     public static final class Lexeme extends Lexicon.Lexeme {
194 
195         private final float aptitude;
196 
197         private final float attention;
198 
199         private final float pleasentness;
200 
201         private final float polarity;
202 
203         private final float sensitivity;
204 
205         private final String[] semantics;
206 
207         public Lexeme(final String id, final Iterable<Token> tokens, final float aptitude,
208                 final float attention, final float pleasentness, final float polarity,
209                 final float sensitivity, final String... semantics) {
210 
211             super(id, tokens);
212 
213             this.aptitude = aptitude;
214             this.attention = attention;
215             this.pleasentness = pleasentness;
216             this.polarity = polarity;
217             this.sensitivity = sensitivity;
218             this.semantics = semantics.clone();
219 
220             for (int i = 0; i < semantics.length; ++i) {
221                 semantics[i] = semantics[i].intern();
222             }
223         }
224 
225         protected Lexeme(final String id, final Iterable<Token> tokens,
226                 final Map<String, String> properties) {
227             // for use with reflection
228             this(id, tokens, Float.parseFloat(properties.getOrDefault("aptitude", "0")), //
229                     Float.parseFloat(properties.getOrDefault("attention", "0")), //
230                     Float.parseFloat(properties.getOrDefault("pleasentness", "0")), //
231                     Float.parseFloat(properties.getOrDefault("polarity", "0")), //
232                     Float.parseFloat(properties.getOrDefault("sensitivity", "0")), //
233                     properties.getOrDefault("semantics", "").split("\\|"));
234         }
235 
236         @Override
237         protected Map<String, String> getProperties() {
238             return ImmutableMap.<String, String>builder()
239                     .put("aptitude", Float.toString(this.aptitude))
240                     .put("attention", Float.toString(this.attention))
241                     .put("pleasentness", Float.toString(this.pleasentness))
242                     .put("polarity", Float.toString(this.polarity))
243                     .put("sensitivity", Float.toString(this.sensitivity))
244                     .put("semantics", Joiner.on('|').join(this.semantics)).build();
245         }
246 
247         public float getAptitude() {
248             return this.aptitude;
249         }
250 
251         public float getAttention() {
252             return this.attention;
253         }
254 
255         public float getPleasentness() {
256             return this.pleasentness;
257         }
258 
259         public float getPolarity() {
260             return this.polarity;
261         }
262 
263         public float getSensitivity() {
264             return this.sensitivity;
265         }
266 
267         public List<String> getSemantics() {
268             return ImmutableList.copyOf(this.semantics);
269         }
270 
271     }
272 
273     private static final class LexemeData {
274 
275         String id;
276 
277         @Nullable
278         String text;
279 
280         float aptitude;
281 
282         float attention;
283 
284         float pleasentness;
285 
286         float polarity;
287 
288         float sensitivity;
289 
290         List<String> semantics;
291 
292         LexemeData(final String id) {
293             this.id = id;
294             this.semantics = Lists.newArrayList();
295         }
296 
297         @Nullable
298         Lexeme toLexeme() {
299             if (this.text == null) {
300                 return null;
301             }
302             final List<Token> tokens = Lists.newArrayList();
303             for (final String word : this.text.split("\\s+")) {
304                 tokens.add(Token.create(word.toLowerCase(), null, null));
305             }
306             final String[] semantics = this.semantics.toArray(new String[this.semantics.size()]);
307             return new Lexeme(this.id, tokens, this.aptitude, this.attention, this.pleasentness,
308                     this.polarity, this.sensitivity, semantics);
309         }
310 
311     }
312 
313 }