src/Gargantext/Core/Text/Terms/Eleve.hs

   1 {-|
   2 Module      : Gargantext.Core.Text.Terms.Eleve
   3 Description : Unsupervized Word segmentation
   4 Copyright   : (c) CNRS, 2019-Present
   5 License     : AGPL + CECILL v3
   6 Maintainer  : team@gargantext.org
   7 Stability   : experimental
   8 Portability : POSIX
   9
  10 # Implementation of Unsupervized Word Segmentation
  11
  12 References:
  13
  14 - Python implementation (Korantin August, Emmanuel Navarro):
  15   [EleVe](https://github.com/kodexlab/eleve.git)
  16
  17 - Unsupervized Word Segmentation:the case for Mandarin Chinese Pierre
  18   Magistry, Benoît Sagot, Alpage, INRIA & Univ. Paris 7, Proceedings of
  19   the 50th Annual Meeting of the Association for Computational Linguistics
  20   , pages 383–387. [PDF](https://www.aclweb.org/anthology/P12-2075)
  21
  22 Notes for current implementation:
  23 - TODO extract longer ngrams (see paper above, viterbi algo can be used)
  24 - TODO AD TEST: prop (Node c _e f) = c == Map.size f
  25
  26 - AD: Real ngrams extraction test
  27   from Gargantext.Core.Text.Terms import extractTermsUnsupervised
  28   docs <- runCmdRepl $ selectDocs 1004
  29   extractTermsUnsupervised 3 $ DT.intercalate " "
  30                         $ catMaybes
  31                         $ Gargantext.map _hyperdataDocument_abstract docs
  32
  33 -}
  34 {-# LANGUAGE ConstraintKinds   #-}
  35 {-# LANGUAGE StandaloneDeriving   #-}
  36 {-# LANGUAGE TemplateHaskell   #-}
  37 {-# LANGUAGE TypeFamilies      #-}
  38
  39 module Gargantext.Core.Text.Terms.Eleve where
  40
  41 -- import Debug.Trace (trace)
  42 -- import Debug.SimpleReflect
  43
  44 import Control.Lens hiding (levels, children)
  45 import Control.Monad (forM_)
  46 import qualified Data.List as L
  47 import Data.Monoid
  48 import Data.Text (Text)
  49 import qualified Data.Text as T
  50 import Data.Map.Strict (Map)
  51 import Data.Maybe (fromMaybe)
  52 import qualified Data.Map.Strict as Map
  53 import Gargantext.Prelude hiding (cs)
  54 import qualified Data.Tree as Tree
  55 import Data.Tree (Tree)
  56 import qualified Prelude as P (putStrLn, logBase, isNaN, RealFloat)
  57
  58 nan :: Floating e => e
  59 nan = 0 / 0
  60
  61 noNaNs :: P.RealFloat e => [e] -> [e]
  62 noNaNs = filter (not . P.isNaN)
  63
  64 updateIfDefined :: P.RealFloat e => e -> e -> e
  65 updateIfDefined e0 e | P.isNaN e = e0
  66                      | otherwise = e
  67
  68 sim :: Entropy e => e -> e -> Bool
  69 sim x y = x == y || (P.isNaN x && P.isNaN y)
  70
  71 subst :: Entropy e => (e, e) -> e -> e
  72 subst (src, dst) x | sim src x = dst
  73                    | otherwise = x
  74 ------------------------------------------------------------------------
  75
  76 -- | TODO: Show Instance only used for debugging
  77 type Entropy e =
  78   ( Fractional e
  79   , Floating e
  80   , P.RealFloat e
  81   , Show e
  82   )
  83 ------------------------------------------------------------------------
  84 -- | Example and tests for development
  85 data I e = I
  86   { _info_entropy    :: e
  87   , _info_entropy_var :: e
  88   , _info_autonomy   :: e
  89   }
  90
  91 instance Show e => Show (I e) where
  92   show (I e ev a) = show (e, ev, a)
  93
  94 makeLenses ''I
  95
  96 type ModEntropy i o e = (e -> e) -> i -> o
  97
  98 set_autonomy :: Entropy e => ModEntropy (I e) (I e) e
  99 set_autonomy fe i = i & info_autonomy .~ fe (i ^. info_entropy_var)
 100
 101 set_entropy_var :: Entropy e => Setter e (I e) e e
 102 set_entropy_var f e = (\ev -> I e ev nan) <$> f e
 103
 104 data StartStop = Start | Stop
 105   deriving (Ord, Eq, Show)
 106
 107 data Token = NonTerminal Text
 108            | Terminal StartStop
 109   deriving (Ord, Eq, Show)
 110
 111 isTerminal :: Token -> Bool
 112 isTerminal (Terminal    _) = True
 113 isTerminal (NonTerminal _) = False
 114
 115 nonTerminals :: [Token] -> [Text]
 116 nonTerminals ts = [nt | NonTerminal nt <- ts]
 117
 118 parseToken :: Text -> Token
 119 parseToken "<start>" = Terminal Start
 120 parseToken "<stop>"  = Terminal Stop
 121 parseToken t         = NonTerminal t
 122
 123 toToken :: [Text] -> [Token]
 124 toToken xs = Terminal Start : (NonTerminal <$> xs) <> [Terminal Stop]
 125
 126 printToken :: Token -> Text
 127 printToken = f
 128   where
 129     f (NonTerminal x)  = x
 130     f (Terminal Start) = "<start>"
 131     f (Terminal Stop)  = "<stop>"
 132 ------------------------------------------------------------------------
 133
 134 data Trie k e
 135   = Node { _node_count    :: Int
 136          , _node_entropy  :: e
 137          , _node_children :: Map k (Trie k e)
 138          }
 139  | Leaf { _node_count    :: Int }
 140   deriving (Show)
 141
 142 makeLenses ''Trie
 143
 144 insertTrie :: Ord k => [k] -> Trie k () -> Trie k ()
 145 insertTrie []     n                    = n { _node_count = _node_count n +1}
 146 insertTrie (x:xs) (Leaf c)             = mkTrie (c+1) $ Map.singleton x $ insertTrie xs emptyTrie
 147 insertTrie (x:xs) (Node c _e children) = mkTrie (c+1) $ Map.alter f x children
 148   where
 149     f = Just . insertTrie xs . fromMaybe emptyTrie
 150
 151 -- emptyTrie :: (Ord k, Monoid e) => Trie k e
 152 -- emptyTrie = Node 0 mempty mempty
 153 emptyTrie :: Trie k e
 154 emptyTrie  = Leaf 0
 155
 156 mkTrie :: Monoid e => Int -> Map k (Trie k e) -> Trie k e
 157 mkTrie c children
 158   | Map.null children = Leaf c
 159   | otherwise         = Node c mempty children
 160
 161                         -----------------------------
 162 -- | Trie to Tree since Tree as nice print function
 163 toTree :: k -> Trie k e -> Tree (k,Int,Maybe e)
 164 toTree k (Leaf c)      = Tree.Node (k, c, Nothing) []
 165 toTree k (Node c e cs) = Tree.Node (k, c, Just e)  (map (uncurry toTree) $ Map.toList cs)
 166
 167 ------------------------------------------------------------------------
 168 ------------------------------------------------------------------------
 169 normalizeLevel :: Entropy e => e -> e -> e -> e
 170 normalizeLevel m v e = (e - m) / v
 171
 172 {- Unused
 173
 174 nodeChildren :: Trie k e -> Map k (Trie k e)
 175 nodeChildren (Node _ _ cs) = cs
 176 nodeChildren (Leaf _)      = Map.empty
 177
 178 -}
 179
 180 chunkAlongEleve :: Int -> [a] -> [[a]]
 181 chunkAlongEleve n xs = L.take n <$> L.tails xs
 182
 183 data Direction = Backward | Forward
 184
 185 buildTrie :: Direction -> Int -> [[Token]] -> Trie Token ()
 186 buildTrie d n sentences
 187   = L.foldr insertTrie emptyTrie
 188   . L.concat
 189   $ ( filter (/= [Terminal (term d)])
 190     . chunkAlongEleve (n + 1)
 191     . order d
 192     )
 193  <$> sentences
 194   where
 195     order Forward  = identity
 196     order Backward = reverse
 197     term  Forward  = Stop
 198     term  Backward = Start
 199
 200 class IsTrie trie where
 201   entropyTrie :: Entropy e => (k -> Bool) -> trie k () -> trie k e
 202   nodeEntropy :: Entropy e => Getting e i e -> trie k i -> e
 203   nodeChild   :: Ord k =>  k  -> trie k e -> trie k e
 204   findTrie    :: Ord k => [k] -> trie k e -> trie k e
 205   printTrie   :: (Show i, Entropy e) => Getting e i e -> trie Token i -> IO ()
 206   evTrie      :: Entropy e => Getting e i e -> Setter i o e e -> trie k i -> trie k o
 207   normalizeEntropy :: Entropy e
 208                    => Getting e i e -> ModEntropy i o e
 209                    -> trie k i -> trie k o
 210
 211 instance IsTrie Trie where
 212
 213   entropyTrie _    (Leaf c)             = Leaf c
 214   entropyTrie pred (Node c () children) = Node c e (map (entropyTrie pred) children)
 215     where
 216       children' = Map.toList children
 217       sum_count = sum $ _node_count . snd <$> children'
 218       e | sum_count == 0 = nan
 219         | otherwise      = sum $ f <$> children'
 220       f (k, child) = if pred k then   chc * P.logBase 2 (fromIntegral c)
 221                               else - chc * P.logBase 2 chc
 222         where
 223           chc = fromIntegral (_node_count child) / fromIntegral c
 224
 225   nodeEntropy inE (Node _ e _) = e ^. inE
 226   nodeEntropy _   (Leaf _)     = nan
 227
 228   nodeChild k (Node _ _ cs) = fromMaybe emptyTrie (Map.lookup k cs)
 229   nodeChild _ (Leaf _)      = emptyTrie
 230
 231   findTrie ks t = L.foldl (flip nodeChild) t ks
 232
 233   printTrie inE t = do
 234     P.putStrLn . Tree.drawTree
 235                 . fmap show
 236                 $ toTree (NonTerminal "") t
 237     P.putStrLn "  Levels:"
 238     forM_ (normalizationLevels inE t) $ \level ->
 239       P.putStrLn $ "    " <> show level
 240
 241   evTrie inE setEV = go nan
 242     where
 243       go _  (Leaf c)            = Leaf c
 244       go e0 (Node c i children) = Node c (i & setEV .~ ev e0 e1) $ go e1 <$> children
 245         where e1 = i ^. inE
 246
 247       ev 0  0  = nan
 248       ev i0 i1 = i1 - i0
 249
 250   normalizeEntropy inE modE t = go (modE identity) (normalizationLevels inE t) t
 251     where
 252       go _ _                 (Leaf c)            = Leaf c
 253       go _ []                _                   = panic "normalizeEntropy' empty levels"
 254       go f ((m, v, _) : ess) (Node c i children)
 255         = Node c (f i) $ go (modE $ normalizeLevel m v) ess <$> children
 256 ------------------------------------------------------------------------
 257
 258 levels :: Trie k e -> [[Trie k e]]
 259 levels = L.takeWhile (not . L.null) . L.iterate (L.concatMap subForest) . pure
 260   where
 261     subForest :: Trie k e -> [Trie k e]
 262     subForest (Leaf _)            = []
 263     subForest (Node _ _ children) = Map.elems children
 264
 265 entropyLevels :: Entropy e => Getting e i e -> Trie k i -> [[e]]
 266 entropyLevels inE = fmap (noNaNs . map (nodeEntropy inE)) . L.tail . levels
 267
 268 normalizationLevels :: Entropy e => Getting e i e -> Trie k i -> [(e, e, Int)]
 269 normalizationLevels inE = fmap f . entropyLevels inE
 270   where
 271     f es = (mean es, deviation es, length es)
 272
 273 ------------------------------------------------------------------------
 274
 275 data Tries k e = Tries
 276   { _fwd :: Trie k e
 277   , _bwd :: Trie k e
 278   }
 279
 280 makeLenses ''Tries
 281
 282 deriving instance (Show k, Show e) => Show (Tries k e)
 283
 284 buildTries :: Int -> [[Token]] -> Tries Token ()
 285 buildTries n sentences = Tries
 286   { _fwd = buildTrie Forward  n sentences
 287   , _bwd = buildTrie Backward n sentences
 288   }
 289
 290 instance IsTrie Tries where
 291
 292   nodeEntropy inE (Tries f b) = mean [nodeEntropy inE f, nodeEntropy inE b]
 293
 294   findTrie ks (Tries f b) = Tries (findTrie ks f) (findTrie (reverse ks) b)
 295
 296   nodeChild = onTries . nodeChild
 297
 298   entropyTrie = onTries . entropyTrie
 299
 300   evTrie inE setEV = onTries $ evTrie inE setEV
 301
 302   normalizeEntropy inE = onTries . normalizeEntropy inE
 303
 304   printTrie inE (Tries f b) = do
 305     P.putStrLn "Forward:"
 306     printTrie inE f
 307     P.putStrLn ""
 308     P.putStrLn "Backward:"
 309     printTrie inE b
 310
 311 onTries :: (Trie k i -> Trie k o) -> Tries k i -> Tries k o
 312 onTries h (Tries f b) = Tries (h f) (h b)
 313
 314 ------------------------------------------------------------------------
 315 mayCons :: [a] -> [[a]] -> [[a]]
 316 mayCons [] xss = xss
 317 mayCons xs xss = xs : xss
 318
 319 {-
 320 split :: (IsTrie trie, Entropy e) => Lens' i e -> trie Token i -> [Token] -> [[Token]]
 321 split _   _ [] = []
 322 split inE t (Terminal Start:xs) = split inE t xs
 323 split inE t (x0:xs0) = go [x0] xs0
 324   where
 325     go pref []                  = [pref]
 326     go pref (Terminal Stop:_)   = [pref]
 327     go _    (Terminal Start:_)  = panic "split impossible"
 328     go pref (x:xs) =
 329         -- trace (show (if acc then "ACC" else "CUT", (prefx, epxt), if acc then ">" else "<=", ((pref, ept), "+", ([x], ext)))) $
 330         if acc
 331           then go prefx xs
 332           else mayCons pref $ go [x] xs
 333       where
 334         prefx = pref <> [x]
 335         pt   = findTrie pref t
 336         pxt  = findTrie prefx t
 337         xt   = findTrie [x] t
 338         ept  = ne pt
 339     --  ^ entropy of the current prefix
 340         ext  = ne xt
 341     --  ^ entropy of [x]
 342         epxt = ne pxt
 343     --  ^ entropy of the current prefix plus x
 344         acc  = P.isNaN ept || P.isNaN ext || not (P.isNaN epxt) -- && (epxt > mean [ept, ext])
 345
 346         -- aut(["in","this","paper"]) > aut(["in","this"]) + aut(["paper"])
 347
 348     ne = nodeEntropy inE
 349 -}
 350
 351 split :: Entropy e => Int -> Lens' i e -> Tries Token i -> [Token] -> [[Text]]
 352 split _ _   _ []  = []
 353 split _ _   _ [t] = pure <$> nonTerminals [t]
 354 split n inE t ts  = nonTerminals pref `mayCons` split n inE t (drop (length pref) ts)
 355   where
 356     pref = maximumWith (\ks -> nodeEntropy inE $ findTrie ks t)
 357                        (L.tail . L.inits . take n $ ts)
 358
 359
 360 {-
 361 split :: Entropy e => Lens' i e -> Tries Token i -> [Token] -> [[Token]]
 362 split inE t0 ts =
 363   maximumWith (sum . map $ nodeAutonomy inE t0) (all the splits of ts)
 364 -}
 365
 366 ------------------------------------------------------------------------
 367
 368 mainEleve :: Int -> [[Text]] -> [[[Text]]]
 369 mainEleve n x = mainEleve' n x x
 370
 371 mainEleve' :: Int -> [[Text]] -> [[Text]] -> [[[Text]]]
 372 mainEleve' n x y = mainEleveWith x' n y
 373   where
 374     x' = buildTries n (fmap toToken x)
 375   -- (fmap toToken i) is computed twice, since mainEleveWith is computing it too
 376
 377 -- | This function should take the longest possible chain of:
 378 -- mainEleve'' n x y = maxChainSizeOf [ mainEleve' n x y
 379 --                                    , mainEleve' n x x
 380 --                                    , mainEleve' n y y
 381 --                                    ]
 382 mainEleve'' :: Int -> [[Text]] -> [[Text]] -> [[[Text]]]
 383 mainEleve'' = undefined
 384
 385 mainEleveWith :: Tries Token () -> Int -> [[Text]] -> [[[Text]]]
 386 mainEleveWith m n i = fmap (split n info_autonomy t) (fmap toToken i)
 387   where
 388     t :: Tries Token (I Double)
 389     t = normalizeEntropy info_entropy_var set_autonomy
 390       $ evTrie identity set_entropy_var
 391       $ entropyTrie isTerminal m
 392
 393 ------------------------------------------------------------------------
 394
 395 type Checks e = [(Text, Int, e, e, e, e, e, e, e, e, e)]
 396
 397 testEleve :: e ~ Double => Bool -> Int -> [Text] -> Checks e -> IO Bool
 398 testEleve debug n output checks = do
 399   let
 400     res = split (1 + n) info_autonomy nt <$> input
 401   when debug $ do
 402     P.putStrLn . show $ (printToken <$>) <$> input
 403     P.putStrLn ""
 404     printTrie info_entropy nt
 405     P.putStrLn ""
 406     P.putStrLn "Splitting:"
 407     P.putStrLn $ show res
 408   forM_ checks checker
 409   pure $ expected == res
 410
 411   where
 412     out      = T.words <$> output
 413     expected = fmap (T.splitOn "-") <$> out
 414     input    = toToken . (T.splitOn "-" =<<) <$> out
 415
 416     nt :: Tries Token (I Double)
 417     nt = normalizeEntropy info_entropy_var set_autonomy
 418        . evTrie identity set_entropy_var
 419        . entropyTrie isTerminal
 420        $ buildTries n input
 421
 422     check f msg ref my =
 423       if f ref my
 424         then P.putStrLn $ "    \ESC[32mPASS\ESC[m " <> msg <> " " <> show ref
 425         else P.putStrLn $ "    \ESC[31mFAIL\ESC[m " <> msg <> " ref=" <> show ref <> " my=" <> show my
 426
 427     checker (ngram, count, entropy, ev, autonomy, fwd_entropy, fwd_ev, fwd_autonomy, bwd_entropy, bwd_ev, bwd_autonomy) = do
 428       let ns  = parseToken <$> T.words ngram
 429           nt' = findTrie ns nt
 430
 431       P.putStrLn $ "  " <> T.unpack ngram <> ":"
 432       check (==) "count"        count        (_node_count                  (_fwd nt'))
 433
 434       check sim  "entropy"      entropy      (nodeEntropy info_entropy           nt' )
 435       check sim  "ev"           ev           (nodeEntropy info_entropy_var       nt' )
 436       check sim  "autonomy"     autonomy     (nodeEntropy info_autonomy          nt' )
 437
 438       check sim  "fwd_entropy"  fwd_entropy  (nodeEntropy info_entropy     (_fwd nt'))
 439       check sim  "fwd_ev"       fwd_ev       (nodeEntropy info_entropy_var (_fwd nt'))
 440       check sim  "fwd_autonomy" fwd_autonomy (nodeEntropy info_autonomy    (_fwd nt'))
 441
 442       check sim  "bwd_entropy"  bwd_entropy  (nodeEntropy info_entropy     (_bwd nt'))
 443       check sim  "bwd_ev"       bwd_ev       (nodeEntropy info_entropy_var (_bwd nt'))
 444       check sim  "bwd_autonomy" bwd_autonomy (nodeEntropy info_autonomy    (_bwd nt'))
 445
 446 -- | TODO real data is a list of tokenized sentences
 447 example0, example1, example2, example3, example4, example5, example6, example7, example8, example9 :: [Text]
 448 example0 =  ["New-York is New-York and New-York"]
 449 example1 =  ["to-be or not to-be"]
 450 example2 =  ["to-be-or not to-be-or NOT to-be and"]
 451 example3 =  example0 <> example0
 452        -- > TEST: Should not have York New in the trie
 453 example4 =  ["a-b-c-d e a-b-c-d f"]
 454 example5 =  ["a-b-c-d-e f a-b-c-d-e g a-b-c-d-e"]
 455 example6 =  ["le-petit chat"
 456             ,"le-petit chien"
 457             ,"le-petit rat"
 458             ,"le gros rat"
 459             ]
 460 example7 =  ["a-b d", "a-c e", "a-c", "a-b", "a-b", "a-c", "a-c", "a-b"]
 461 -- example8 =  ["z f", "z", "z", "z"] <> example7
 462 example8 =  ["z", "z", "z", "z"] <> example7 <> example7 <> example7
 463 example9 =  (T.replace "z" "a") <$> example8
 464 --example8 =  ["a-b d", "a-c e", "a f", "a-c g", "a-b h", "a i", "a j", "a-b k", "a-c l", "a-c m", "a n", "a-b o"]
 465
 466 checks0, checks2, checks7, checks8, checks9 :: Checks Double
 467
 468 checks0 =
 469 -- [(token, count, entropy, ev, autonomy, fwd_entropy, fwd_ev, fwd_autonomy, bwd_entropy, bwd_ev, bwd_autonomy)]
 470   [ ("<start>", 1, nan, nan, nan, 0.0, -2.113283334294875, -0.5000000000000002, nan, nan, nan)
 471   , ("New", 3, 0.792481250360578, -1.3208020839342969, 0.7499999999999999, 0.0, -2.113283334294875, -0.5000000000000002, 1.584962500721156, -0.5283208335737188, 2.0)
 472   , ("York", 3, 0.792481250360578, -1.3208020839342969, 0.7499999999999999, 1.584962500721156, -0.5283208335737188, 2.0, 0.0, -2.113283334294875, -0.5000000000000002)
 473   , ("is", 1, 0, -2.113283334294875, -0.5000000000000002, 0.0, -2.113283334294875, -0.5000000000000002, 0.0, -2.113283334294875, -0.5000000000000002)
 474   , ("and", 1, 0, -2.113283334294875, -0.5000000000000002, 0.0, -2.113283334294875, -0.5000000000000002, 0.0, -2.113283334294875, -0.5000000000000002)
 475   , ("<stop>", 0, nan, nan, nan, nan, nan, nan, 0.0, -2.113283334294875, -0.5000000000000002)
 476   , ("<start> New", 1, nan, nan, nan, 0.0, nan, nan, nan, nan, nan)
 477   , ("New York", 3, 1.584962500721156, 1.584962500721156, 1.414213562373095, 1.584962500721156, 1.584962500721156, 1.4142135623730947, 1.584962500721156, 1.584962500721156, 1.4142135623730951)
 478   , ("York is", 1, 0, nan, nan, 0.0, -1.584962500721156, -0.7071067811865476, 0.0, nan, nan)
 479   , ("is New", 1, 0, nan, nan, 0.0, nan, nan, 0.0, -1.584962500721156, -0.7071067811865474)
 480   , ("York and", 1, 0, nan, nan, 0.0, -1.584962500721156, -0.7071067811865476, 0.0, nan, nan)
 481   , ("and New", 1, 0, nan, nan, 0.0, nan, nan, 0.0, -1.584962500721156, -0.7071067811865474)
 482   , ("York <stop>", 1, nan, nan, nan, nan, nan, nan, 0.0, nan, nan)
 483   , ("<start> New York", 1, nan, nan, nan, 0.0, nan, nan, nan, nan, nan)
 484   , ("New York is", 1, 0, nan, nan, 0.0, -1.584962500721156, nan, 0.0, nan, nan)
 485   , ("York is New", 1, 0, nan, nan, 0.0, nan, nan, 0.0, nan, nan)
 486   , ("is New York", 1, 0, nan, nan, 0.0, nan, nan, 0.0, -1.584962500721156, nan)
 487   , ("New York and", 1, 0, nan, nan, 0.0, -1.584962500721156, nan, 0.0, nan, nan)
 488   , ("York and New", 1, 0, nan, nan, 0.0, nan, nan, 0.0, nan, nan)
 489   , ("and New York", 1, 0, nan, nan, 0.0, nan, nan, 0.0, -1.584962500721156, nan)
 490   , ("New York <stop>", 1, nan, nan, nan, nan, nan, nan, 0.0, nan, nan)
 491   ]
 492
 493 checks2 = []
 494 {-
 495   [("to be",  3, 1.2516291673878228, 1.2516291673878228, 1.5535694744293167, nan, 0.9182958340544896)
 496   ,("be or",  2, 0.5, nan, nan, nan, 1.0)
 497   ,("or not", 1, 0.0, nan, nan, nan, 0.0)
 498   ,("not to", 1, 0.0, nan, nan, nan, 0.0)
 499   ,("or NOT", 1, 0.0, nan, nan, nan, 0.0)
 500   ,("NOT to", 1, 0.0, nan, nan, nan, 0.0)
 501   ,("be and", 1, 0.0, nan, nan, nan, 0.0)
 502   ]
 503 -}
 504
 505 checks7 =
 506   [ ("a b", 4, 2, 1.5, 1.0106455960380136, 2, 1, 0.7302967433402215, 2, 2, 1.2909944487358056)
 507   , ("a c", 4, 2, 1.5, 1.0106455960380136, 2, 1, 0.7302967433402215, 2, 2, 1.2909944487358056)
 508   , ("a", 8, 2, -0.7139421727208477, 0.9315597394596105, 1, -1.7139421727208477, 0.1695158759052029, 3, 0.2860578272791523, 1.693603603014018)
 509   ]
 510
 511 checks8 =
 512   [ ("a b", 4, 2, 1.5, 1.2384061243840367, 2, 1, 0.9190418024406298, 2, 2, 1.5577704463274435)
 513   , ("a c", 4, 2, 1.5, 1.2384061243840367, 2, 1, 0.9190418024406298, 2, 2, 1.5577704463274435)
 514   , ("a", 8, 2, -1.1151193576322829, 0.8012882295122719, 1, -2.115119357632283, 1.1025957503820932e-2, 3, -0.11511935763228287, 1.5915505015207227)
 515   , ("z", 4, 2, -1.1151193576322829, 0.9576679529201777, 2, -1.1151193576322829, 1.0906240295212841, 2, -1.1151193576322829, 0.8247118763190712)
 516   ]
 517
 518 checks9 =
 519   [ ("a b", 4, 2, 0.8741854163060885, 0.9234576822288185, 2, -0.25162916738782304, 0.2891449181301934, 2, 2, 1.5577704463274435)
 520   , ("a c", 4, 2, 0.8741854163060885, 0.9234576822288185, 2, -0.25162916738782304, 0.2891449181301934, 2, 2, 1.5577704463274435)
 521   , ("a", 12, 2.91829583405449, 3.763498724462999e-2, 1.518835832034022, 2.251629167387823, -0.6290316794220367, 1.2162041043595873, 3.5849625007211565, 0.7043016539112967, 1.8214675597084569)
 522   ]
 523
 524 runTestsEleve :: Bool -> IO ()
 525 runTestsEleve doChecks =
 526   forM_
 527     [("example0", 3, example0, checks0)
 528     ,("example0", 2, example0, [])
 529     ,("example1", 2, example1, [])
 530     ,("example2", 3, example2, checks2)
 531     ,("example3", 2, example3, [])
 532     ,("example4", 4, example4, [])
 533     ,("example5", 5, example5, [])
 534     ,("example6", 2, example6, [])
 535     ,("example7", 2, example7, checks7)
 536     ,("example8", 2, example8, checks8)
 537     ,("example9", 2, example9, checks9)
 538     ]
 539     (\(name, n, ex, checks) -> do
 540       P.putStrLn $ name <> " " <> show n
 541       b <- testEleve False n ex (if doChecks then checks else [])
 542       P.putStrLn $ "  splitting: " <> if b then "PASS" else "FAIL"
 543     )