Assignment_06/src/main/scala/Anagrams.scala

   1 package anagrams
   2
   3 /**
   4  * Sentence anagrams generation
   5  * MSE course, T-AdvPrPa course
   6  */
   7 object Anagrams {
   8         /**
   9          * Types definitions
  10          */
  11         type Word = String
  12         type Occurrences = List[(Char, Int)]
  13         type Sentence = List[Word]
  14
  15         /**
  16          * The dictionary is simply a sequence of words.
  17          * It is predefined and obtained as a sequence using the utility method `loadDictionary`.
  18          */
  19         val dictionary: List[Word] = loadDictionary()
  20
  21         /**
  22          *  Question 1 : converts the word into its character occurrence list.
  23          *
  24          *  For instance,
  25          *  wordOccurences("abcd") gives List(('a', 1), ('b', 1), ('c', 1), ('d', 1))
  26          *  wordOccurences("aabcddd") gives List(('a', 2), ('b', 1), ('c', 1), ('d', 3))
  27          *
  28          *  Note: the upper case and lower case version of the character are treated as the
  29          *  same character, and are represented as a lower case character in the occurrence list.
  30          *  The list is sorted alphabeticaly.
  31          */
  32         def wordOccurrences(w: Word): Occurrences =
  33                 w.toLowerCase().groupBy(identity).toList.map{ case (_, letters) => (letters(0), letters.length) }.sortWith((a, b) => a._1 < b._1)
  34
  35         /**
  36          * Question 2: Valid words
  37          *
  38          * The `dictionaryByOccurrences` is a `Map` from different occurrences to a sequence of all
  39          * the words that have that occurrence count. This map serves as an easy way to obtain all the anagrams of a word given its occurrence list.
  40          */
  41         lazy val dictionaryByOccurrences: Map[Occurrences, List[Word]] =
  42                 dictionary groupBy (w => wordOccurrences(w))
  43
  44         /**
  45          * Question 3: Returns all the anagrams of a given word
  46          */
  47         def wordAnagrams(word: Word): List[Word] =
  48                 dictionaryByOccurrences get wordOccurrences(word) match {
  49                         case Some(words) => words
  50                         case None => List()
  51                 }
  52
  53         /**
  54          * Question 4: Returns all the subsets of an occurrence list
  55          */
  56
  57         /**
  58          * Generates all the subsets of a set
  59          */
  60         def combinations(occurrences: Occurrences): List[Occurrences] = {
  61                 def generator[T](x: List[List[T]]): List[List[T]] = x match {
  62                         case Nil => List(Nil)
  63                         case h :: t => for (j <- generator(t); i <- h) yield i :: j
  64                 }
  65
  66                 def combinationsOnNumber(occurs : Occurrences) : List[Occurrences] =
  67                         generator(occurs map {case (c: Char, i: Int) => (1 to i) map (j => (c, j)) toList})
  68
  69                 val combs = for (
  70                         i <- 1 to occurrences.length;
  71                         l <- occurrences combinations i;
  72                         l2 <- combinationsOnNumber(l)
  73                 ) yield l2
  74
  75                 List() :: (combs toList)
  76         }
  77
  78         /**
  79          * Question 5: remove occurrences from x that are in y
  80          */
  81         def subtract(x: Occurrences, y: Occurrences): Occurrences = {
  82                 def subtractMap(x: Map[Char, Int], y: Occurrences): Occurrences = y match {
  83                         case Nil => x.toList
  84                         case (cy, ny) :: t => {
  85                                 val nx = x(cy)
  86                                 val remaining = nx - ny
  87                                 if (remaining <= 0)
  88                                         subtractMap(x - cy, t)
  89                                 else
  90                                         subtractMap(x.updated(cy, remaining), t)
  91                         }
  92                 }
  93                 subtractMap(x.toMap, y).sortWith((a, b) => a._1 < b._1)
  94         }
  95
  96         /**
  97          * Question 6 - Generate sentence anagrams
  98          */
  99
 100         /** Converts a sentence into its character occurrence list. */
 101         def sentenceOccurrences(s: Sentence): Occurrences =
 102     if (s.isEmpty)
 103       List()
 104     else
 105       wordOccurrences(s.reduce(_ + _))
 106
 107         def sentenceAnagrams(sentence: Sentence): List[Sentence] = {
 108     def loop(sentence: Sentence, remaining: Occurrences) : List[Sentence] = {
 109       if (remaining.isEmpty)
 110         List(sentence)
 111       else
 112         for (
 113           c <- combinations(remaining);
 114           word <- dictionaryByOccurrences.applyOrElse(c, (_: Occurrences) => List());
 115           s <- loop(word :: sentence, subtract(remaining, c))
 116         ) yield s
 117     }
 118     loop(List(), sentenceOccurrences(sentence))
 119         }
 120 }