001package ball.riddler538.ant.taskdefs;
002/*-
003 * ##########################################################################
004 * Solutions for the 538 Riddler
005 * $Id: SolveRiddle20190628Task.html 5431 2020-02-12 19:03:17Z ball $
006 * $HeadURL: svn+ssh://svn.hcf.dev/var/spool/scm/repository.svn/hcf-dev/blog/2019-07-08-fivethirtyeight-best-scrabble-string/src/main/resources/javadoc/src-html/ball/riddler538/ant/taskdefs/SolveRiddle20190628Task.html $
007 * %%
008 * Copyright (C) 2015 - 2020 Allen D. Ball
009 * %%
010 * Licensed under the Apache License, Version 2.0 (the "License");
011 * you may not use this file except in compliance with the License.
012 * You may obtain a copy of the License at
013 *
014 *      http://www.apache.org/licenses/LICENSE-2.0
015 *
016 * Unless required by applicable law or agreed to in writing, software
017 * distributed under the License is distributed on an "AS IS" BASIS,
018 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
019 * See the License for the specific language governing permissions and
020 * limitations under the License.
021 * ##########################################################################
022 */
023import ball.game.scrabble.Bag;
024import ball.game.scrabble.Tile;
025import ball.util.ant.taskdefs.AntTask;
026import java.io.BufferedReader;
027import java.io.InputStreamReader;
028import java.nio.file.Files;
029import java.nio.file.Path;
030import java.nio.file.Paths;
031import java.util.Arrays;
032import java.util.Collection;
033import java.util.Comparator;
034import java.util.LinkedList;
035import java.util.List;
036import java.util.Map;
037import java.util.Optional;
038import java.util.Set;
039import java.util.SortedSet;
040import java.util.TreeMap;
041import java.util.TreeSet;
042import java.util.stream.Stream;
043import lombok.NoArgsConstructor;
044import lombok.ToString;
045import org.apache.tools.ant.BuildException;
046
047import static java.lang.String.CASE_INSENSITIVE_ORDER;
048import static java.util.Comparator.comparing;
049import static java.util.Comparator.comparingInt;
050import static java.util.stream.Collectors.joining;
051import static java.util.stream.Collectors.toList;
052import static org.apache.commons.lang3.StringUtils.EMPTY;
053import static org.apache.commons.lang3.StringUtils.SPACE;
054import static org.apache.commons.lang3.StringUtils.isNotBlank;
055import static org.apache.commons.lang3.StringUtils.repeat;
056
057/**
058 * {@link.uri http://ant.apache.org/ Ant} {@link org.apache.tools.ant.Task}
059 * to solve a
060 * {@link.uri https://fivethirtyeight.com/features/whats-your-best-scrabble-string/ Riddler Classic}:
061 * <p>
062 * From Benjamin Danard, the Superstring Scrabble Challenge:
063 * </p>
064 * <p>
065 * The game of Scrabble has 100 tiles — 98 of these tiles contain a letter
066 * and a score, and two of them are wildcards worth zero points. At home on
067 * a lazy summer day with a bag of these tiles, you decide to play the
068 * Superstring Scrabble Challenge. Using only the 100 tiles, you lay them
069 * out into one long 100-letter string of your choosing. You look through
070 * the string. For each word you find, you earn points equal to its
071 * score. Once you find a word, you don’t get any points for finding it
072 * again. The same tile may be used in multiple, overlapping words. So
073 * ‘“theater” includes “the,” “heat,” “heater,” “eat,” “eater,” “ate,” etc.
074 * </p>
075 * <p>
076 * The super challenge: What order of tiles gives you the biggest score?
077 * (The blank tiles are locked into the letter they represent once you’ve
078 * picked it.)
079 * </p>
080 * <p>
081 * The winner, and inaugural Wordsmith Extraordinaire of Riddler Nation,
082 * will be the solver whose string generates the most points. You should use
083 * {@link.uri https://norvig.com/ngrams/enable1.txt this word list} to
084 * determine whether a word is valid.
085 * </p>
086 * <p>
087 * For reference, this is the distribution of letter tiles in the bag, by
088 * their point value:
089 * </p>
090<pre>
0910: ?×2
0921: E×12 A×9 I×9 O×8 N×6 R×6 T×6 L×4 S×4 U×4
0932: D×4 G×3
0943: B×2 C×2 M×2 P×2
0954: F×2 H×2 V×2 W×2 Y×2
0965: K
0978: J X
09810: Q Z
099</pre>
100 * {@ant.task}
101 * <p>
102 * The submitted solution:
103 * </p>
104<pre>
105CARBOXYMETHYLCELLULOSEHAND_RAFTSMANS_IPWIREDRAWERDINITROBENZENEPETTIFOGGINGJUDOKAEQUATEVIVAAIOESOOIU 912
106                          C         H
107----------------------------------------------------------------------------------------------------
108CARBOXYMETHYLCELLULOSE                                                                               46
109CARBO                                                                                                9
110CARB                                                                                                 8
111CAR                                                                                                  5
112 ARB                                                                                                 5
113 AR                                                                                                  2
114   BOXY                                                                                              16
115   BOX                                                                                               12
116   BO                                                                                                4
117    OXY                                                                                              13
118    OX                                                                                               9
119       METHYLCELLULOSE                                                                               25
120       METHYL                                                                                        14
121       METH                                                                                          9
122       MET                                                                                           5
123       ME                                                                                            4
124        ETHYL                                                                                        11
125        ETH                                                                                          6
126        ET                                                                                           2
127         THY                                                                                         9
128             CELLULOSE                                                                               11
129             CELL                                                                                    6
130             CEL                                                                                     5
131              ELL                                                                                    3
132              EL                                                                                     2
133                  LOSE                                                                               4
134                  LO                                                                                 2
135                   OSE                                                                               3
136                   OS                                                                                2
137                     EH                                                                              5
138                      HANDCRAFTSMANSHIP                                                              26
139                      HANDCRAFTSMAN                                                                  21
140                      HANDCRAFTS                                                                     16
141                      HANDCRAFT                                                                      15
142                      HAND                                                                           8
143                      HA                                                                             5
144                       AND                                                                           4
145                       AN                                                                            2
146                          CRAFTSMANSHIP                                                              18
147                          CRAFTSMAN                                                                  13
148                          CRAFTS                                                                     8
149                          CRAFT                                                                      7
150                           RAFTSMAN                                                                  13
151                           RAFTS                                                                     8
152                           RAFT                                                                      7
153                            AFT                                                                      6
154                                MANS                                                                 6
155                                MAN                                                                  5
156                                MA                                                                   4
157                                   SHIP                                                              5
158                                   SH                                                                1
159                                    HIP                                                              4
160                                    HI                                                               1
161                                       WIREDRAWER                                                    17
162                                       WIREDRAW                                                      15
163                                       WIRED                                                         9
164                                       WIRE                                                          7
165                                        IRED                                                         5
166                                        IRE                                                          3
167                                         REDRAWER                                                    12
168                                         REDRAW                                                      10
169                                         RED                                                         4
170                                         RE                                                          2
171                                          ED                                                         3
172                                           DRAWER                                                    10
173                                           DRAW                                                      8
174                                            RAWER                                                    8
175                                            RAW                                                      6
176                                             AWE                                                     6
177                                             AW                                                      5
178                                              WE                                                     5
179                                               ER                                                    2
180                                                 DINITROBENZENE                                      26
181                                                 DINITRO                                             8
182                                                 DIN                                                 4
183                                                  IN                                                 2
184                                                   NITROBENZENE                                      23
185                                                   NITRO                                             5
186                                                   NIT                                               3
187                                                    IT                                               2
188                                                      ROBE                                           6
189                                                      ROB                                            5
190                                                       OBE                                           5
191                                                        BENZENE                                      18
192                                                        BEN                                          5
193                                                        BE                                           4
194                                                         EN                                          2
195                                                             NE                                      2
196                                                               PETTIFOGGING                          20
197                                                               PETTIFOG                              14
198                                                               PETTI                                 7
199                                                               PET                                   5
200                                                               PE                                    4
201                                                                  TI                                 2
202                                                                   IF                                5
203                                                                    FOGGING                          13
204                                                                    FOG                              7
205                                                                       GIN                           4
206                                                                           JUDOKA                    18
207                                                                           JUDO                      12
208                                                                            UDO                      4
209                                                                             DO                      3
210                                                                              OKA                    7
211                                                                               KA                    6
212                                                                                AE                   2
213                                                                                 EQUATE              15
214                                                                                  QUATE              14
215                                                                                  QUA                12
216                                                                                    ATE              3
217                                                                                    AT               2
218                                                                                       VIVA          10
219                                                                                           AI        2
220                                                                                             OE      2
221                                                                                               SO    2
222</pre>
223 *
224 * @author {@link.uri mailto:ball@hcf.dev Allen D. Ball}
225 * @version $Revision: 5431 $
226 */
227@AntTask("solve-riddle-2019-06-28")
228@NoArgsConstructor @ToString
229public class SolveRiddle20190628Task extends AbstractTask {
230    private static final Path TMPDIR =
231        Paths.get(System.getProperty("java.io.tmpdir"));
232
233    private static final Comparator<CharSequence> COMPARATOR =
234        comparing(CharSequence::toString, CASE_INSENSITIVE_ORDER);
235
236    private final Wordlist wordlist = new Wordlist();
237    private final WordMap wordmap = new WordMap(wordlist.keySet());
238    private final StartsWithMap starts = new StartsWithMap(wordmap.keySet());
239    private final EndsWithMap ends = new EndsWithMap(wordmap.keySet());
240    private final Bag bag = new Bag();
241    private final List<Tile> solution = new LinkedList<>();
242    private final StringBuilder sequence = new StringBuilder();
243    private final LinkedList<TreeMap<CharSequence,Integer>> score =
244        new LinkedList<>();
245
246    @Override
247    public void execute() throws BuildException {
248        super.execute();
249
250        try {
251            while (! bag.isEmpty()) {
252                Optional<CharSequence> next = next();
253                CharSequence subsequence =
254                    next.orElseGet(() -> Tile.toString(bag));
255                List<Tile> tiles = draw(subsequence, bag);
256
257                play(subsequence, tiles);
258            }
259
260            int total =
261                score
262                .stream()
263                .flatMap(t -> t.values().stream())
264                .mapToInt(t -> t.intValue())
265                .sum();
266
267            log(Tile.toString(solution) + SPACE + total);
268
269            StringBuilder subheader = new StringBuilder();
270
271            for (int i = 0, n = solution.size(); i < n; i += 1) {
272                if (solution.get(i).getLetter() != sequence.charAt(i)) {
273                    subheader.append(sequence.charAt(i));
274                } else {
275                    subheader.append(SPACE);
276                }
277            }
278
279            log(subheader.toString());
280            log(repeat("-", solution.size()));
281
282            for (int i = 0, n = score.size(); i < n; i += 1) {
283                LinkedList<CharSequence> keys =
284                    new LinkedList<>(score.get(i).keySet());
285
286                keys.sort(comparingInt(CharSequence::length).reversed());
287
288                for (CharSequence key : keys) {
289                    StringBuilder line = new StringBuilder();
290
291                    line.append(repeat(SPACE, i));
292                    line.append(key);
293                    line.append(repeat(SPACE,
294                                       solution.size() - (i + key.length())));
295                    line.append(SPACE)
296                        .append(String.valueOf(score.get(i).get(key)));
297
298                    log(line.toString());
299                }
300            }
301
302            if (! bag.isEmpty()) {
303                log("Remaining: " + Tile.toString(bag));
304            }
305        } catch (BuildException exception) {
306            throw exception;
307        } catch (Throwable throwable) {
308            throwable.printStackTrace();
309            throw new BuildException(throwable);
310        }
311    }
312
313    private Optional<CharSequence> next() {
314        TreeMap<CharSequence,CharSequence> map = new TreeMap<>(COMPARATOR);
315
316        for (int i = 1, j = sequence.length(); i <= j; i += 1) {
317            String prefix = sequence.subSequence(i, j).toString();
318
319            starts.subMap(prefix + Character.MIN_VALUE,
320                          prefix + Character.MAX_VALUE)
321                .keySet()
322                .stream()
323                .max(comparingInt(t -> potential(t)))
324                .ifPresent(t -> map.put(prefix, t));
325        }
326
327        starts.keySet()
328            .stream()
329            .max(comparingInt(t -> potential(t)))
330            .ifPresent(t -> map.put(EMPTY, t));
331
332        Optional<CharSequence> next =
333            map.entrySet()
334            .stream()
335            .max(comparingInt(t -> potential(t.getValue())))
336            .map(t -> t.getValue().toString().substring(t.getKey().length()));
337
338        return next;
339    }
340/*
341    private Optional<CharSequence> next() {
342        Optional<CharSequence> next =
343            wordmap.keySet()
344            .stream()
345            .max(comparingInt(CharSequence::length));
346
347        return next;
348    }
349*/
350    private int potential(CharSequence sequence) {
351        int potential = 0;
352
353        Collection<CharSequence> words = wordmap.get(sequence);
354
355        if (words != null) {
356            potential =
357                words.stream()
358                .mapToInt(t -> wordlist.getOrDefault(t, 0))
359                .sum();
360        }
361
362        return potential;
363    }
364
365    private void play(CharSequence subsequence, List<Tile> tiles) {
366        if (tiles.size() == subsequence.length()) {
367            solution.addAll(tiles);
368            sequence.append(subsequence);
369
370            while (score.size() < solution.size()) {
371                score.add(new TreeMap<>(COMPARATOR));
372            }
373
374            for (int j = 1; j <= sequence.length(); j += 1) {
375                for (int i = 0; i < j; i += 1) {
376                    String substring =
377                        sequence.subSequence(i, j).toString();
378
379                    if (wordmap.keySet().remove(substring)) {
380                        score.get(i)
381                            .put(substring,
382                                 solution.subList(i, j)
383                                 .stream()
384                                 .mapToInt(Tile::getPoints)
385                                 .sum());
386                    }
387                }
388            }
389
390            wordmap.keySet().removeIf(t -> (! isPlayable(t, bag)));
391
392            wordmap.values().forEach(t -> t.retainAll(wordmap.keySet()));
393            wordmap.values().removeIf(Collection::isEmpty);
394
395            starts.values().forEach(t -> t.retainAll(wordmap.keySet()));
396            starts.values().removeIf(Collection::isEmpty);
397
398            ends.values().forEach(t -> t.retainAll(wordmap.keySet()));
399            ends.values().removeIf(Collection::isEmpty);
400        } else {
401            throw new IllegalStateException();
402        }
403    }
404
405    private boolean isPlayable(CharSequence in, List<Tile> list) {
406        List<Tile> out = draw(in, list);
407        boolean isPlayable = (in.length() == out.size());
408
409        list.addAll(out);
410
411        return isPlayable;
412    }
413
414    private List<Tile> draw(CharSequence in, List<Tile> list) {
415        LinkedList<Tile> out = new LinkedList<>();
416
417        for (int character : in.codePoints().boxed().collect(toList())) {
418            Optional<Tile> tile =
419                list.stream()
420                .filter(t -> t.getLetter() == character)
421                .findFirst();
422
423            if (! tile.isPresent()) {
424                tile =
425                    list.stream()
426                    .filter(t -> t.getLetter() == '_')
427                    .findFirst();
428            }
429
430            if (tile.isPresent()) {
431                out.add(tile.get());
432                list.remove(tile.get());
433            } else {
434                break;
435            }
436        }
437
438        return out;
439    }
440
441    private class Wordlist extends TreeMap<CharSequence,Integer> {
442        private static final long serialVersionUID = 8504887435537639563L;
443
444        protected final Path path = TMPDIR.resolve(getClass().getSimpleName());
445
446        public Wordlist() {
447            super(COMPARATOR);
448
449            try {
450                if (Files.exists(path)) {
451                    for (String line : Files.readAllLines(path)) {
452                        String[] entry = line.split("=", 2);
453
454                        computeIfAbsent(entry[0],
455                                        k -> Integer.parseInt(entry[1]));
456                    }
457                } else {
458                    try (BufferedReader reader = new ResourceReader()) {
459                        Bag bag = new Bag();
460
461                        reader.lines()
462                            .map(t -> t.split("#", 2)[0])
463                            .map(t -> t.split(SPACE, 2)[0])
464                            .filter(t -> isNotBlank(t))
465                            .map(t -> t.trim().toUpperCase())
466                            .forEach(t -> put(t, null));
467
468                        keySet().removeIf(t -> (! isPlayable(t, bag)));
469                    }
470
471                    keySet().stream()
472                        .forEach(t -> computeIfAbsent(t,
473                                                      k -> draw(t, new Bag())
474                                                           .stream()
475                                                           .mapToInt(Tile::getPoints)
476                                                           .sum()));
477
478                    Stream<String> stream =
479                        entrySet().stream().map(String::valueOf);
480
481                    Files.write(path, (Iterable<String>) stream::iterator);
482                }
483            } catch (Exception exception) {
484                throw new ExceptionInInitializerError(exception);
485            }
486        }
487
488        private class ResourceReader extends BufferedReader {
489            public ResourceReader() {
490                super(new InputStreamReader(SolveRiddle20190628Task.class.getResourceAsStream("enable1.txt")));
491            }
492
493            @Override
494            public String toString() { return super.toString(); }
495        }
496    }
497
498    private static abstract class XREF extends TreeMap<CharSequence,SortedSet<CharSequence>> {
499        private static final long serialVersionUID = 3416903069379036321L;
500
501        protected final Path path = TMPDIR.resolve(getClass().getSimpleName());
502
503        protected XREF(Set<CharSequence> wordset) {
504            super(COMPARATOR);
505
506            try {
507                if (Files.exists(path)) {
508                    for (String line : Files.readAllLines(path)) {
509                        String[] entry = line.split("=", 2);
510
511                        computeIfAbsent(entry[0],
512                                        k -> new TreeSet<>(COMPARATOR))
513                            .addAll(Arrays.asList(entry[1].split(SPACE)));
514                    }
515                } else {
516                    compute(wordset);
517
518                    Stream<String> stream =
519                        entrySet().stream().map(t -> format(t));
520
521                    Files.write(path, (Iterable<String>) stream::iterator);
522                }
523            } catch (Exception exception) {
524                throw new ExceptionInInitializerError(exception);
525            }
526        }
527
528        protected abstract void compute(Set<CharSequence> wordset);
529
530        private String format(Map.Entry<CharSequence,SortedSet<CharSequence>> entry) {
531            return (String.valueOf(entry.getKey()) + "="
532                    + entry.getValue().stream().collect(joining(SPACE)));
533        }
534    }
535
536    private class WordMap extends XREF {
537        private static final long serialVersionUID = -1880844619250978768L;
538
539        public WordMap(Set<CharSequence> wordset) { super(wordset); }
540
541        @Override
542        protected void compute(Set<CharSequence> wordset) {
543            for (CharSequence key : wordset) {
544                key = key.toString();
545
546                for (int j = 1; j <= key.length(); j += 1) {
547                    for (int i = 0; i < j; i += 1) {
548                        String value = key.toString().substring(i, j);
549
550                        if (wordset.contains(value)) {
551                            computeIfAbsent(key,
552                                            k -> new TreeSet<>(COMPARATOR))
553                                .add(value);
554                        }
555                    }
556                }
557            }
558        }
559    }
560
561    private class StartsWithMap extends XREF {
562        private static final long serialVersionUID = -2944553722487812425L;
563
564        public StartsWithMap(Set<CharSequence> wordset) { super(wordset); }
565
566        @Override
567        protected void compute(Set<CharSequence> wordset) {
568            for (CharSequence word : wordset) {
569                for (int i = 0, j = word.length() - 1; j > 0; j -= 1) {
570                    String key = word.subSequence(i, j).toString();
571
572                    computeIfAbsent(key, k -> new TreeSet<>(COMPARATOR))
573                        .add(word);
574                }
575            }
576        }
577    }
578
579    private class EndsWithMap extends XREF {
580        private static final long serialVersionUID = -5078727815938269143L;
581
582        public EndsWithMap(Set<CharSequence> wordset) { super(wordset); }
583
584        @Override
585        protected void compute(Set<CharSequence> wordset) {
586            for (CharSequence word : wordset) {
587                for (int i = 1, j = word.length(); i < j; i += 1) {
588                    String key = word.subSequence(i, j).toString();
589
590                    computeIfAbsent(key, k -> new TreeSet<>(COMPARATOR))
591                        .add(word);
592                }
593            }
594        }
595    }
596}