1   package eu.fbk.dkm.pikes.resources;
2   
3   import com.google.common.collect.ImmutableMap;
4   import com.google.common.collect.Maps;
5   
6   import javax.annotation.Nullable;
7   import java.util.Iterator;
8   import java.util.Map;
9   
10  /**
11   * English number parsing and spelling methosds.
12   * <p>
13   * This code is based on 'numword' by Dr. Georg Fischer, https://github.com/gfis/numword.
14   * </p>
15   */
16  public final class NumberSpeller {
17  
18      private static final String[] ORDINALS = new String[] { "zeroth", "first", "second", "third",
19              "fourth", "fifth", "sixth", "seventh", "eighth", "ninth", "tenth", "eleventh",
20              "twelfth" };
21  
22      private static final String[] WORD_N = new String[] { "zero", "one", "two", "three", "four",
23              "five", "six", "seven", "eight", "nine" };
24  
25      private static final String[] WORD_N0 = new String[] { "", "ten", "twenty", "thirty", "forty",
26              "fifty", "sixty", "seventy", "eighty", "ninety" };
27  
28      private static final String[] WORD_1N = new String[] { "ten", "eleven", "twelve", "thirteen",
29              "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen" };
30  
31      private static final String[] WORD_N000 = new String[] { "", "mil", "bil", "tril", "quadril",
32              "quintil", "sextil", "septil", "octil", "nonil", "decil", "undecil", "duodecil",
33              "tredecil", "quattuordecil", "quindecil", "sexdecil", "septendecil", "octodecil",
34              "novemdecil", "vigintil" };
35  
36      private static final Map<String, String> MORPH_MAP;
37  
38      static {
39          final Map<String, String> map = Maps.newHashMap();
40  
41          map.put("h1", "hundred");
42          map.put("h2", "hundred");
43          map.put("h3", "hundred");
44          map.put("h4", "hundred");
45  
46          map.put("t1", "thousand");
47          map.put("t2", "thousand");
48          map.put("t3", "thousand");
49          map.put("t4", "thousand");
50  
51          map.put("m1", "lion");
52          map.put("m2", "lions");
53          map.put("m3", "lions");
54          map.put("m4", "lions");
55  
56          map.put("p0", " ");
57          map.put("p1", "-");
58          map.put("p2", "s");
59          map.put("p3", "and");
60  
61          for (int i = 0; i < WORD_N.length; i++) {
62              map.put(String.valueOf(i), WORD_N[i]);
63          }
64          for (int i = 1; i < WORD_N0.length; i++) {
65              map.put(String.valueOf(i * 10), WORD_N0[i]);
66          }
67          for (int i = 1; i < WORD_1N.length; i++) {
68              map.put(String.valueOf(i + 10), WORD_1N[i]);
69          }
70          for (int i = 2; i < WORD_N000.length; i++) {
71              map.put("e" + String.valueOf(i + 100).substring(1, 3), WORD_N000[i]);
72          }
73  
74          MORPH_MAP = ImmutableMap.copyOf(map);
75      }
76  
77      /**
78       * Parses a cardinal number from the string supplied.
79       *
80       * @param text
81       *            string to be parsed
82       * @return parsed cardinal number; null on failure
83       */
84      @Nullable
85      public static Long parseCardinal(final String text) {
86          int offset = 0;
87          final StringBuilder result = new StringBuilder();
88          int triple = 0; // current value of triple
89          result.delete(0, result.length()); // clear buffer
90          final StringBuffer particle = new StringBuffer(32);
91          boolean found = false;
92          final boolean liard = MORPH_MAP.get("m3") != null; // whether there is a special
93          // postfix "liard"yy
94          int prefixLen = 1;
95          while (prefixLen > 0) { // any number morphem found
96              prefixLen = 0;
97              String prefixKey = "";
98              final Iterator/* <1.5 */<String>/* 1.5> */iter = MORPH_MAP.keySet().iterator();
99              while (iter.hasNext()) { // search over all defined morphems
100                 final String key = iter.next();
101                 final String value = MORPH_MAP.get(key);
102                 if (value.length() > prefixLen && text.startsWith(value, offset)) { // remember
103                     // this
104                     prefixKey = key;
105                     prefixLen = value.length();
106                 }
107             } // while
108             if (prefixLen > 0) { // any number morphem found
109                 found = true;
110                 final char ch0 = prefixKey.charAt(0);
111                 if (ch0 == 'p') { // meaningless particle - but only if behind other morphem
112                     if (result.length() > 0 || triple != 0) {
113                         particle.append(text.substring(offset, offset + prefixLen));
114                     } else { // particle at start - no number word found
115                         found = false;
116                         prefixLen = 0;
117                     }
118                 } else if (ch0 != '-' && !Character.isDigit(ch0)) { // key with encoded meaning
119                     particle.delete(0, particle.length()); // number follows - forget the
120                     // particles
121                     switch (ch0) {
122                     case 'e': // million, (milliard), billion, ...
123                         // look whether "lion"xx or "liard"yy follows
124                         if (text.substring(offset + prefixLen).startsWith(MORPH_MAP.get("m1"))
125                                 || text.substring(offset + prefixLen).startsWith(
126                                         MORPH_MAP.get("m2"))
127                                 || liard
128                                 && (text.substring(offset + prefixLen).startsWith(
129                                         MORPH_MAP.get("m3")) || text.substring(offset + prefixLen)
130                                         .startsWith(MORPH_MAP.get("m4")))) { // "mil"
131                             // +
132                             // "lion"xx
133                             if (triple == 0) {
134                                 // "million" instead of "one million" - should not occur
135                                 triple = 1;
136                             }
137                             int exponent = Integer.parseInt(prefixKey.substring(1)) - 1;
138                             // exactly 2 digits behind 'e'
139                             final StringBuffer part = new StringBuffer(32);
140                             part.append(String.valueOf(triple));
141                             while (exponent > 0) {
142                                 if (liard) { // German billion
143                                     part.append("000000");
144                                 } else { // US billion
145                                     part.append("000");
146                                 }
147                                 exponent--;
148                             } // while exponent
149                             if (!liard
150                                     || liard
151                                     && (text.substring(offset + prefixLen).startsWith(
152                                             MORPH_MAP.get("m3")) || text.substring(
153                                             offset + prefixLen).startsWith(MORPH_MAP.get("m4")))) { // liard
154                                 // postfix
155                                 part.append("000"); // same as "lion" * 1000
156                             }
157                             if (result.length() > part.length()) {
158                                 // replace trailing zeroes
159                                 result.replace(result.length() - part.length(), result.length(),
160                                         part.toString());
161                             } else {
162                                 result.append(part);
163                             }
164                             triple = 0;
165 
166                         } else { // prefix found, but not "lion/liard"
167                             prefixLen = 0;
168                         }
169                         break;
170                     case 'h': // hundred
171                         if (triple > 0) {
172                             triple *= 100;
173                         } else { // missing "one" hundred
174                             triple = 100;
175                         }
176                         break;
177                     case 'k': // special Klingon exponents - not yet implemented ???
178                         break;
179                     case 'l': // million(s) (only in case 1000 same as prefix of 10**6: sp, pt,
180                         // eo)
181                     {
182                         if (triple == 0) {
183                             // "million" instead of "one million" - should not occur
184                             triple = 1;
185                         }
186                         final StringBuffer part = new StringBuffer(32);
187                         part.append(String.valueOf(triple));
188                         part.append("000000");
189                         if (prefixKey.compareTo("l3") >= 0) { // milliard(s)
190                             part.append("000");
191                         }
192                         if (result.length() > part.length()) {
193                             // replace trailing zeroes
194                             result.replace(result.length() - part.length(), result.length(),
195                                     part.toString());
196                         } else {
197                             result.append(part);
198                         }
199                         triple = 0;
200                     }
201                         break;
202                     // case 'p': handled separately above
203                     // break;
204                     case 't': // thousand
205                         if (triple == 0) {
206                             triple = 1;
207                         }
208                         final String part = String.valueOf(triple) + "000";
209                         if (result.length() > part.length()) {
210                             // replace trailing zeroes
211                             result.replace(result.length() - part.length(), result.length(), part);
212                         } else {
213                             result.append(part);
214                         }
215                         triple = 0;
216                         break;
217                     default: // unknown key
218                         break;
219                     } // switch ch0
220                 } else { // key with direct numeric meaning:
221                     particle.delete(0, particle.length()); // number follows - forget the
222                     // particles
223                     // units, *10, +10, *100
224                     triple += Integer.parseInt(prefixKey); // exceptions should not occur
225                 }
226                 offset += prefixLen;
227             } // number morphem found
228         } // while match
229         if (found) {
230             if (triple == 0) {
231                 if (result.length() == 0) { // a single zero
232                     result.append("0");
233                 }
234             } else {
235                 if (result.length() == 0) { // < 1000
236                     result.append(String.valueOf(triple));
237                 } else {
238                     final String part = String.valueOf(triple);
239                     result.replace(result.length() - part.length(), result.length(), part);
240                 }
241             }
242         }
243         // return offset;
244         return result.length() == 0 ? null : Long.parseLong(result.toString());
245     }
246 
247     /**
248      * Returns the word for a number in some language. This method is the heart of the package. It
249      * assumes the "normal" european numbering system derived from latin. The entire number is
250      * splitted into triples of digits: hundreds, tens, and ones. These are spelled in order,
251      * joined by some morphemes like "and", and "s" for plural. The words for ones, tens, for
252      * 10..19 and sometimes for the hundreds are stored in language specific arrays.
253      *
254      * @param number
255      *            a sequence of digit characters, maybe interspersed with non-digits (spaces,
256      *            punctuation).
257      * @return number word
258      */
259     public static String spellCardinal(final int num) {
260 
261         String number = Integer.toString(num);
262 
263         final int maxLog = (WORD_N000.length - 1) * 3;
264 
265         final StringBuilder result = new StringBuilder();
266         final StringBuffer buffer = new StringBuffer(1024);
267         // ensure length is a multiple of 'lenTuple'
268         final String nullTuple = "000";
269         buffer.append(nullTuple);
270         int position = 0;
271         while (position < number.length()) { // remove non-digits
272             final char ch = number.charAt(position);
273             if (Character.isDigit(ch)) {
274                 buffer.append(ch);
275             }
276             position++;
277         }
278         final int realLog = buffer.length() - 3; // -3 because of "000" above
279         // trim size to multiples of 'lenTuple'
280         number = buffer.toString().substring(buffer.length() % 3);
281 
282         if (realLog <= maxLog) { // number can be spelled in this language
283             position = 0;
284             final boolean nullOnly = number.equals(nullTuple);
285 
286             while (position < number.length()) { // process all triples
287 
288                 final int digitN00 = number.charAt(position++) - '0';
289                 final int digitN0 = number.charAt(position++) - '0';
290                 final int digitN = number.charAt(position++) - '0';
291                 final boolean singleTuple = digitN00 + digitN0 == 0 && digitN == 1;
292                 final boolean zeroTuple = digitN00 + digitN0 == 0 && digitN == 0;
293                 final int logTuple = (number.length() - position) / 3; // 1 for 10**3, 2 for
294 
295                 // hundreds
296                 switch (digitN00) {
297                 case 0:
298                     break;
299                 default:
300                     result.append(" ").append(WORD_N[digitN00]);
301                     result.append(" ").append(MORPH_MAP.get("h1"));
302                     if (digitN0 != 0 || digitN != 0) {
303                         result.append(" ").append(MORPH_MAP.get("p3"));
304                     }
305                     break;
306                 } // switch 100
307 
308                 // tens and ones
309                 switch (digitN0) {
310                 case 0:
311                     if (nullOnly) {
312                         result.append(" ").append(WORD_N[0]);
313                     } else if (digitN > 0) {
314                         result.append(" ").append(WORD_N[digitN]);
315                     }
316                     break;
317                 case 1:
318                     result.append(" ").append(WORD_1N[digitN]);
319                     break;
320                 default:
321                     result.append(" ").append(WORD_N0[digitN0]);
322                     if (digitN >= 1) {
323                         result.append(MORPH_MAP.get("p1")); // "-"
324                         result.append(WORD_N[digitN]);
325                     }
326                     break;
327                 }
328 
329                 // append thousand, million ... */
330                 if (!zeroTuple) {
331                     switch (logTuple) {
332                     case 0: // no thousands
333                         break;
334                     case 1:
335                         result.append(" ").append(MORPH_MAP.get("t1"));
336                         break;
337                     default:
338                         result.append(" ").append(WORD_N000[logTuple]);
339                         result.append(MORPH_MAP.get("m1")); // lion
340                         if (!singleTuple) {
341                             result.append(MORPH_MAP.get("p2")); // two million"s"
342                         }
343                         break;
344                     }
345                 }
346             }
347 
348             result.delete(0, 1); // remove any initial separator
349 
350         } else {
351             result.append(number + " >= 1");
352             for (int pos = 0; pos < maxLog; pos++) {
353                 result.append('0');
354             }
355         }
356 
357         return result.substring(0, 1).equals(" ") ? result.substring(1).toString() : result
358                 .toString();
359     }
360 
361     @Nullable
362     public static Long parseOrdinal(final String string) {
363         final String s = string.trim();
364         final int l = s.length();
365         for (int i = 0; i < ORDINALS.length; ++i) {
366             if (s.endsWith(ORDINALS[i])) {
367                 return parseCardinal(s.substring(0, l - ORDINALS[i].length()) + spellCardinal(i));
368             }
369         }
370         if (s.endsWith("ieth")) {
371             return parseCardinal(s.substring(0, l - 4) + "y");
372         }
373         return parseCardinal(s.substring(0, l - 2));
374     }
375 
376     public static String spellOrdinal(final int ordinal) {
377         if (ordinal <= 12) {
378             return ORDINALS[ordinal];
379         }
380         if (ordinal % 100 >= 20 && ordinal % 10 == 0) {
381             final String string = spellCardinal(ordinal);
382             return string.substring(0, string.length() - 1) + "ieth";
383         }
384         if (ordinal > 20 && ordinal % 10 != 0) {
385             final String string = spellCardinal(ordinal / 10 * 10);
386             return string + "-" + ORDINALS[ordinal % 10];
387         }
388         return spellCardinal(ordinal) + "th";
389     }
390 
391     public static boolean isOrdinal(final String string) {
392         return string.endsWith("th") || string.endsWith("first") || string.endsWith("second")
393                 || string.endsWith("third");
394     }
395 
396     @Nullable
397     public static Double parse(final String string) {
398 
399         final String s = string.trim();
400         final StringBuilder n = new StringBuilder();
401         int i = 0;
402         for (; i < s.length(); ++i) {
403             final char c = s.charAt(i);
404             if (Character.isDigit(c) || c == '.' || c == '-' || c == 'e' || c == 'E') {
405                 n.append(c);
406             } else if (c == ',') {
407                 n.append('.');
408             } else if (c != ' ' && c != '+' && c != '\'') {
409                 break;
410             }
411         }
412 
413         Double multiplier = null;
414         if (n.length() > 0) {
415             try {
416                 multiplier = Double.valueOf(n.toString());
417             } catch (final NumberFormatException ex) {
418                 // ignore
419             }
420         }
421 
422         final String str = s.substring(i);
423         final Long num = isOrdinal(str) ? parseOrdinal(str) : parseCardinal(str);
424         if (num == null) {
425             return multiplier;
426         } else if (multiplier == null) {
427             return num.doubleValue();
428         } else {
429             return num * multiplier;
430         }
431     }
432 
433 }