1 package eu.fbk.dkm.pikes.resources;
2
3 import com.google.common.base.Objects;
4 import com.google.common.collect.ImmutableMap;
5 import com.google.common.collect.Maps;
6 import org.tartarus.snowball.SnowballStemmer;
7
8 import javax.annotation.Nullable;
9 import java.util.Map;
10
11 public final class Stemming {
12
13 private static final Map<String, Class<? extends SnowballStemmer>> CLASSES = Maps.newHashMap();
14
15 private static final Map<String, String> LANGUAGES = buildAliasesMap();
16
17 private static Map<String, String> buildAliasesMap() {
18 final ImmutableMap.Builder<String, String> builder = ImmutableMap.builder();
19 addAliases(builder, "danish", "da", "dan");
20 addAliases(builder, "dutch", "nl", "nld", "dut");
21 addAliases(builder, "english", "en", "eng");
22 addAliases(builder, "finnish", "fi", "fin");
23 addAliases(builder, "french", "fr", "fra", "fre");
24 addAliases(builder, "german", "de", "deu", "ger");
25 addAliases(builder, "hungarian", "hu", "hun");
26 addAliases(builder, "italian", "it", "ita");
27 addAliases(builder, "norwegian", "nb", "nob");
28 addAliases(builder, "portuguese", "pt", "por");
29 addAliases(builder, "romanian", "ro", "ron", "rum");
30 addAliases(builder, "russian", "ru", "rus");
31 addAliases(builder, "spanish", "es", "spa");
32 addAliases(builder, "swedish", "sw", "swe");
33 addAliases(builder, "turkish", "tr", "tur");
34 return builder.build();
35 }
36
37 private static void addAliases(final ImmutableMap.Builder<String, String> builder,
38 final String snowballName, final String... aliases) {
39 builder.put(snowballName, snowballName);
40 for (final String alias : aliases) {
41 builder.put(alias, snowballName);
42 }
43 }
44
45 private static SnowballStemmer getStemmer(@Nullable final String lang) {
46 final String actualLang = Objects.firstNonNull(lang, "en").toLowerCase();
47 synchronized (CLASSES) {
48 Class<? extends SnowballStemmer> stemmerClass = CLASSES.get(actualLang);
49 if (stemmerClass == null) {
50 final String snowballName = Objects.firstNonNull(LANGUAGES.get(actualLang),
51 actualLang);
52 final String className = "org.tartarus.snowball.ext." + snowballName + "Stemmer";
53 try {
54 stemmerClass = Class.forName(className).asSubclass(SnowballStemmer.class);
55 } catch (final Throwable ex) {
56 throw new IllegalArgumentException("Unsupported language '" + lang + "'");
57 }
58 CLASSES.put(actualLang, stemmerClass);
59 }
60 try {
61 return stemmerClass.newInstance();
62 } catch (final Throwable ex) {
63 throw new Error("Could not instantiate stemmer " + stemmerClass.getName());
64 }
65 }
66 }
67
68 public static String stem(@Nullable final String lang, final String string) {
69 final SnowballStemmer stemmer = getStemmer(lang);
70 stemmer.setCurrent(string);
71 stemmer.stem();
72 return stemmer.getCurrent();
73 }
74
75 }