1 package eu.fbk.dkm.pikes.resources;
2
3 import com.google.common.base.Joiner;
4 import com.google.common.base.Objects;
5 import com.google.common.base.Throwables;
6 import com.google.common.collect.ImmutableList;
7 import com.google.common.collect.ImmutableMap;
8 import com.google.common.collect.Lists;
9 import com.google.common.collect.Maps;
10 import eu.fbk.utils.core.CommandLine;
11 import eu.fbk.utils.core.CommandLine.Type;
12 import eu.fbk.rdfpro.AbstractRDFHandler;
13 import eu.fbk.rdfpro.RDFSources;
14 import eu.fbk.rdfpro.tql.TQL;
15 import eu.fbk.rdfpro.util.Environment;
16 import eu.fbk.rdfpro.util.Statements;
17 import org.eclipse.rdf4j.model.*;
18 import org.eclipse.rdf4j.rio.RDFHandlerException;
19 import org.slf4j.Logger;
20 import org.slf4j.LoggerFactory;
21
22 import javax.annotation.Nullable;
23 import java.io.File;
24 import java.io.IOException;
25 import java.util.List;
26 import java.util.Map;
27
28 public class SenticNet extends Lexicon<SenticNet.Lexeme> {
29
30 private static final Logger LOGGER = LoggerFactory.getLogger(SenticNet.class);
31
32 private static final String NS_CONCEPT = "http://sentic.net/api/en/concept/";
33
34 private static final IRI PROP_APTITUDE = Statements.VALUE_FACTORY
35 .createIRI("http://sentic.net/apiaptitude");
36
37 private static final IRI PROP_ATTENTION = Statements.VALUE_FACTORY
38 .createIRI("http://sentic.net/apiattention");
39
40 private static final IRI PROP_PLEASENTNESS = Statements.VALUE_FACTORY
41 .createIRI("http://sentic.net/apipleasantness");
42
43 private static final IRI PROP_POLARITY = Statements.VALUE_FACTORY
44 .createIRI("http://sentic.net/apipolarity");
45
46 private static final IRI PROP_SENSITIVITY = Statements.VALUE_FACTORY
47 .createIRI("http://sentic.net/apisensitivity");
48
49 private static final IRI PROP_SEMANTICS = Statements.VALUE_FACTORY
50 .createIRI("http://sentic.net/apisemantics");
51
52 private static final IRI PROP_TEXT = Statements.VALUE_FACTORY
53 .createIRI("http://sentic.net/apitext");
54
55 private static SenticNet instance = null;
56
57 public static synchronized void setInstance(@Nullable final SenticNet instance) {
58 SenticNet.instance = instance;
59 }
60
61 public static synchronized SenticNet getInstance() {
62 if (instance == null) {
63 final String location = Objects.firstNonNull(
64 Environment.getProperty("senticnet.home"), "SenticNet.tsv");
65 try {
66 instance = Lexicon.readFrom(SenticNet.class, Lexeme.class, location);
67 } catch (final Throwable ex) {
68 throw new Error("Could not read default subjectivity lexicon at " + location, ex);
69 }
70 }
71 return instance;
72 }
73
74 @Nullable
75 public static String idFor(@Nullable final Value value) {
76 if (value == null) {
77 return null;
78 }
79 if (!(value instanceof IRI)) {
80 throw new IllegalArgumentException("Not a concept IRI: " + value);
81 }
82 final IRI uri = (IRI) value;
83 if (!uri.getNamespace().equals(NS_CONCEPT)) {
84 throw new IllegalArgumentException("Unexpected namespace for concept IRI: " + value);
85 }
86 return uri.getLocalName();
87 }
88
89 @Nullable
90 public static IRI uriFor(@Nullable final String id) {
91 return id == null ? null : Statements.VALUE_FACTORY.createIRI(NS_CONCEPT, id);
92 }
93
94 public static SenticNet index(final String resourceFile) throws IOException {
95
96 final Map<String, LexemeData> data = Maps.newHashMap();
97 try {
98 RDFSources.read(false, true, null, null, null, true, resourceFile).emit(new AbstractRDFHandler() {
99
100 @Override
101 public void handleStatement(final Statement statement) throws RDFHandlerException {
102
103 final Resource subj = statement.getSubject();
104 final IRI pred = statement.getPredicate();
105 final Value obj = statement.getObject();
106
107 try {
108 if (pred.equals(PROP_APTITUDE)) {
109 getLexemeData(subj).aptitude = ((Literal) obj).floatValue();
110 } else if (pred.equals(PROP_ATTENTION)) {
111 getLexemeData(subj).attention = ((Literal) obj).floatValue();
112 } else if (pred.equals(PROP_PLEASENTNESS)) {
113 getLexemeData(subj).pleasentness = ((Literal) obj).floatValue();
114 } else if (pred.equals(PROP_POLARITY)) {
115 getLexemeData(subj).polarity = ((Literal) obj).floatValue();
116 } else if (pred.equals(PROP_SENSITIVITY)) {
117 getLexemeData(subj).sensitivity = ((Literal) obj).floatValue();
118 } else if (pred.equals(PROP_SEMANTICS)) {
119 getLexemeData(subj).semantics.add(idFor(obj));
120 } else if (pred.equals(PROP_TEXT)) {
121 getLexemeData(subj).text = obj.stringValue();
122 }
123 } catch (final Throwable ex) {
124 LOGGER.warn("Could not process statement: " + statement, ex);
125 }
126 }
127
128 private LexemeData getLexemeData(final Resource subject) {
129 final String id = idFor(subject);
130 LexemeData lexemeData = data.get(id);
131 if (lexemeData == null) {
132 lexemeData = new LexemeData(id);
133 data.put(id, lexemeData);
134 }
135 return lexemeData;
136 }
137
138 }, 1);
139
140 } catch (final RDFHandlerException ex) {
141 Throwables.propagateIfPossible(ex.getCause() == null ? ex : ex.getCause(),
142 IOException.class);
143 Throwables.propagate(ex);
144 }
145
146 final List<Lexeme> lexemes = Lists.newArrayList();
147 for (final LexemeData lexemeData : data.values()) {
148 final Lexeme lexeme = lexemeData.toLexeme();
149 if (lexeme == null) {
150 LOGGER.warn("Could not create lexeme for ID " + lexemeData.id);
151 } else {
152 lexemes.add(lexeme);
153 }
154 }
155
156 return new SenticNet(lexemes);
157 }
158
159 public static void main(final String... args) {
160 try {
161 final CommandLine cmd = CommandLine
162 .parser()
163 .withName("index-senticnet-lexicon")
164 .withHeader("Processes the RDF data of eu.fbk.dkm.pikes.resources.SenticNet, "
165 + "producing a TSV file with an indexed version of it that can "
166 + "be used with the eu.fbk.dkm.pikes.resources.SenticNet Java API class.")
167 .withOption("i", "input", "the input file name", "FILE", Type.FILE_EXISTING,
168 true, false, true)
169 .withOption("o", "output", "the output file name", "FILE", Type.FILE, true,
170 false, true)
171 .withLogger(LoggerFactory.getLogger("eu.fbk"))
172 .parse(args);
173
174 final File inputFile = cmd.getOptionValue("i", File.class);
175 final File outputFile = cmd.getOptionValue("o", File.class);
176
177 final SenticNet lexicon = index(inputFile.getAbsolutePath());
178 lexicon.writeTo(outputFile.getAbsolutePath());
179
180 } catch (final Throwable ex) {
181 CommandLine.fail(ex);
182 }
183 }
184
185 public SenticNet(final Iterable<Lexeme> lexemes) {
186 super(lexemes);
187 }
188
189 public Lexeme get(final Value id) {
190 return get(idFor(id));
191 }
192
193 public static final class Lexeme extends Lexicon.Lexeme {
194
195 private final float aptitude;
196
197 private final float attention;
198
199 private final float pleasentness;
200
201 private final float polarity;
202
203 private final float sensitivity;
204
205 private final String[] semantics;
206
207 public Lexeme(final String id, final Iterable<Token> tokens, final float aptitude,
208 final float attention, final float pleasentness, final float polarity,
209 final float sensitivity, final String... semantics) {
210
211 super(id, tokens);
212
213 this.aptitude = aptitude;
214 this.attention = attention;
215 this.pleasentness = pleasentness;
216 this.polarity = polarity;
217 this.sensitivity = sensitivity;
218 this.semantics = semantics.clone();
219
220 for (int i = 0; i < semantics.length; ++i) {
221 semantics[i] = semantics[i].intern();
222 }
223 }
224
225 protected Lexeme(final String id, final Iterable<Token> tokens,
226 final Map<String, String> properties) {
227
228 this(id, tokens, Float.parseFloat(properties.getOrDefault("aptitude", "0")),
229 Float.parseFloat(properties.getOrDefault("attention", "0")),
230 Float.parseFloat(properties.getOrDefault("pleasentness", "0")),
231 Float.parseFloat(properties.getOrDefault("polarity", "0")),
232 Float.parseFloat(properties.getOrDefault("sensitivity", "0")),
233 properties.getOrDefault("semantics", "").split("\\|"));
234 }
235
236 @Override
237 protected Map<String, String> getProperties() {
238 return ImmutableMap.<String, String>builder()
239 .put("aptitude", Float.toString(this.aptitude))
240 .put("attention", Float.toString(this.attention))
241 .put("pleasentness", Float.toString(this.pleasentness))
242 .put("polarity", Float.toString(this.polarity))
243 .put("sensitivity", Float.toString(this.sensitivity))
244 .put("semantics", Joiner.on('|').join(this.semantics)).build();
245 }
246
247 public float getAptitude() {
248 return this.aptitude;
249 }
250
251 public float getAttention() {
252 return this.attention;
253 }
254
255 public float getPleasentness() {
256 return this.pleasentness;
257 }
258
259 public float getPolarity() {
260 return this.polarity;
261 }
262
263 public float getSensitivity() {
264 return this.sensitivity;
265 }
266
267 public List<String> getSemantics() {
268 return ImmutableList.copyOf(this.semantics);
269 }
270
271 }
272
273 private static final class LexemeData {
274
275 String id;
276
277 @Nullable
278 String text;
279
280 float aptitude;
281
282 float attention;
283
284 float pleasentness;
285
286 float polarity;
287
288 float sensitivity;
289
290 List<String> semantics;
291
292 LexemeData(final String id) {
293 this.id = id;
294 this.semantics = Lists.newArrayList();
295 }
296
297 @Nullable
298 Lexeme toLexeme() {
299 if (this.text == null) {
300 return null;
301 }
302 final List<Token> tokens = Lists.newArrayList();
303 for (final String word : this.text.split("\\s+")) {
304 tokens.add(Token.create(word.toLowerCase(), null, null));
305 }
306 final String[] semantics = this.semantics.toArray(new String[this.semantics.size()]);
307 return new Lexeme(this.id, tokens, this.aptitude, this.attention, this.pleasentness,
308 this.polarity, this.sensitivity, semantics);
309 }
310
311 }
312
313 }