1 package eu.fbk.dkm.pikes.resources; 2 3 import com.google.common.base.Objects; 4 import com.google.common.collect.ImmutableMap; 5 import com.google.common.collect.Maps; 6 import org.tartarus.snowball.SnowballStemmer; 7 8 import javax.annotation.Nullable; 9 import java.util.Map; 10 11 public final class Stemming { 12 13 private static final Map<String, Class<? extends SnowballStemmer>> CLASSES = Maps.newHashMap(); 14 15 private static final Map<String, String> LANGUAGES = buildAliasesMap(); 16 17 private static Map<String, String> buildAliasesMap() { 18 final ImmutableMap.Builder<String, String> builder = ImmutableMap.builder(); 19 addAliases(builder, "danish", "da", "dan"); 20 addAliases(builder, "dutch", "nl", "nld", "dut"); 21 addAliases(builder, "english", "en", "eng"); 22 addAliases(builder, "finnish", "fi", "fin"); 23 addAliases(builder, "french", "fr", "fra", "fre"); 24 addAliases(builder, "german", "de", "deu", "ger"); 25 addAliases(builder, "hungarian", "hu", "hun"); 26 addAliases(builder, "italian", "it", "ita"); 27 addAliases(builder, "norwegian", "nb", "nob"); 28 addAliases(builder, "portuguese", "pt", "por"); 29 addAliases(builder, "romanian", "ro", "ron", "rum"); 30 addAliases(builder, "russian", "ru", "rus"); 31 addAliases(builder, "spanish", "es", "spa"); 32 addAliases(builder, "swedish", "sw", "swe"); 33 addAliases(builder, "turkish", "tr", "tur"); 34 return builder.build(); 35 } 36 37 private static void addAliases(final ImmutableMap.Builder<String, String> builder, 38 final String snowballName, final String... aliases) { 39 builder.put(snowballName, snowballName); 40 for (final String alias : aliases) { 41 builder.put(alias, snowballName); 42 } 43 } 44 45 private static SnowballStemmer getStemmer(@Nullable final String lang) { 46 final String actualLang = Objects.firstNonNull(lang, "en").toLowerCase(); 47 synchronized (CLASSES) { 48 Class<? extends SnowballStemmer> stemmerClass = CLASSES.get(actualLang); 49 if (stemmerClass == null) { 50 final String snowballName = Objects.firstNonNull(LANGUAGES.get(actualLang), 51 actualLang); 52 final String className = "org.tartarus.snowball.ext." + snowballName + "Stemmer"; 53 try { 54 stemmerClass = Class.forName(className).asSubclass(SnowballStemmer.class); 55 } catch (final Throwable ex) { 56 throw new IllegalArgumentException("Unsupported language '" + lang + "'"); 57 } 58 CLASSES.put(actualLang, stemmerClass); 59 } 60 try { 61 return stemmerClass.newInstance(); 62 } catch (final Throwable ex) { 63 throw new Error("Could not instantiate stemmer " + stemmerClass.getName()); 64 } 65 } 66 } 67 68 public static String stem(@Nullable final String lang, final String string) { 69 final SnowballStemmer stemmer = getStemmer(lang); 70 stemmer.setCurrent(string); 71 stemmer.stem(); 72 return stemmer.getCurrent(); 73 } 74 75 }