1   package eu.fbk.dkm.pikes.resources;
2   
3   import com.google.common.base.Objects;
4   import com.google.common.collect.ImmutableMap;
5   import com.google.common.collect.Maps;
6   import org.tartarus.snowball.SnowballStemmer;
7   
8   import javax.annotation.Nullable;
9   import java.util.Map;
10  
11  public final class Stemming {
12  
13      private static final Map<String, Class<? extends SnowballStemmer>> CLASSES = Maps.newHashMap();
14  
15      private static final Map<String, String> LANGUAGES = buildAliasesMap();
16  
17      private static Map<String, String> buildAliasesMap() {
18          final ImmutableMap.Builder<String, String> builder = ImmutableMap.builder();
19          addAliases(builder, "danish", "da", "dan");
20          addAliases(builder, "dutch", "nl", "nld", "dut");
21          addAliases(builder, "english", "en", "eng");
22          addAliases(builder, "finnish", "fi", "fin");
23          addAliases(builder, "french", "fr", "fra", "fre");
24          addAliases(builder, "german", "de", "deu", "ger");
25          addAliases(builder, "hungarian", "hu", "hun");
26          addAliases(builder, "italian", "it", "ita");
27          addAliases(builder, "norwegian", "nb", "nob");
28          addAliases(builder, "portuguese", "pt", "por");
29          addAliases(builder, "romanian", "ro", "ron", "rum");
30          addAliases(builder, "russian", "ru", "rus");
31          addAliases(builder, "spanish", "es", "spa");
32          addAliases(builder, "swedish", "sw", "swe");
33          addAliases(builder, "turkish", "tr", "tur");
34          return builder.build();
35      }
36  
37      private static void addAliases(final ImmutableMap.Builder<String, String> builder,
38              final String snowballName, final String... aliases) {
39          builder.put(snowballName, snowballName);
40          for (final String alias : aliases) {
41              builder.put(alias, snowballName);
42          }
43      }
44  
45      private static SnowballStemmer getStemmer(@Nullable final String lang) {
46          final String actualLang = Objects.firstNonNull(lang, "en").toLowerCase();
47          synchronized (CLASSES) {
48              Class<? extends SnowballStemmer> stemmerClass = CLASSES.get(actualLang);
49              if (stemmerClass == null) {
50                  final String snowballName = Objects.firstNonNull(LANGUAGES.get(actualLang),
51                          actualLang);
52                  final String className = "org.tartarus.snowball.ext." + snowballName + "Stemmer";
53                  try {
54                      stemmerClass = Class.forName(className).asSubclass(SnowballStemmer.class);
55                  } catch (final Throwable ex) {
56                      throw new IllegalArgumentException("Unsupported language '" + lang + "'");
57                  }
58                  CLASSES.put(actualLang, stemmerClass);
59              }
60              try {
61                  return stemmerClass.newInstance();
62              } catch (final Throwable ex) {
63                  throw new Error("Could not instantiate stemmer " + stemmerClass.getName());
64              }
65          }
66      }
67  
68      public static String stem(@Nullable final String lang, final String string) {
69          final SnowballStemmer stemmer = getStemmer(lang);
70          stemmer.setCurrent(string);
71          stemmer.stem();
72          return stemmer.getCurrent();
73      }
74  
75  }