src/Gargantext/Text/Terms/Eleve.hs

   1 {-|
   2 Module      : Gargantext.Text.Terms.Eleve
   3 Description : Unsupervized Word segmentation
   4 Copyright   : (c) CNRS, 2019-Present
   5 License     : AGPL + CECILL v3
   6 Maintainer  : team@gargantext.org
   7 Stability   : experimental
   8 Portability : POSIX
   9
  10 # Implementation of Unsupervized Word Segmentation
  11
  12 References:
  13
  14 - Python implementation (Korantin August, Emmanuel Navarro):
  15   [EleVe](https://github.com/kodexlab/eleve.git)
  16
  17 - Unsupervized Word Segmentation:the case for Mandarin Chinese Pierre
  18   Magistry, Benoît Sagot, Alpage, INRIA & Univ. Paris 7, Proceedings of
  19   the 50th Annual Meeting of the Association for Computational Linguistics
  20   , pages 383–387. [PDF](https://www.aclweb.org/anthology/P12-2075)
  21
  22 Notes for current implementation:
  23 - TODO extract longer ngrams (see paper above, viterbi algo can be used)
  24 - TODO AD TEST: prop (Node c _e f) = c == Map.size f
  25
  26 - AD: Real ngrams extraction test
  27   from Gargantext.Text.Terms import extractTermsUnsupervised
  28   docs <- runCmdRepl $ selectDocs 1004
  29   extractTermsUnsupervised 3 $ DT.intercalate " "
  30                         $ catMaybes
  31                         $ Gargantext.map _hyperdataDocument_abstract docs
  32
  33 -}
  34 {-# LANGUAGE ConstraintKinds   #-}
  35 {-# LANGUAGE NoImplicitPrelude #-}
  36 {-# LANGUAGE OverloadedStrings #-}
  37 {-# LANGUAGE RankNTypes        #-}
  38 {-# LANGUAGE TemplateHaskell   #-}
  39 {-# LANGUAGE TypeFamilies      #-}
  40
  41 module Gargantext.Text.Terms.Eleve where
  42
  43 -- import Debug.Trace (trace)
  44 -- import Debug.SimpleReflect
  45
  46 import Control.Lens hiding (levels, children)
  47 import Control.Monad (forM_)
  48 import Data.Ord (Ord)
  49 import qualified Data.List as L
  50 import Data.Monoid
  51 import Data.Text (Text)
  52 import qualified Data.Text as T
  53 import Data.Map (Map)
  54 import Data.Maybe (fromMaybe)
  55 import qualified Data.Map as Map
  56 import Gargantext.Prelude hiding (cs)
  57 import qualified Data.Tree as Tree
  58 import Data.Tree (Tree)
  59 import qualified Prelude as P (putStrLn, logBase, isNaN, RealFloat)
  60
  61 nan :: Floating e => e
  62 nan = 0 / 0
  63
  64 noNaNs :: P.RealFloat e => [e] -> [e]
  65 noNaNs = filter (not . P.isNaN)
  66
  67 updateIfDefined :: P.RealFloat e => e -> e -> e
  68 updateIfDefined e0 e | P.isNaN e = e0
  69                      | otherwise = e
  70
  71 sim :: Entropy e => e -> e -> Bool
  72 sim x y = x == y || (P.isNaN x && P.isNaN y)
  73
  74 subst :: Entropy e => (e, e) -> e -> e
  75 subst (src, dst) x | sim src x = dst
  76                    | otherwise = x
  77 ------------------------------------------------------------------------
  78
  79 type Entropy e =
  80   ( Fractional e
  81   , Floating e
  82   , P.RealFloat e
  83   , Show e
  84   -- ^ TODO: only used for debugging
  85   )
  86 ------------------------------------------------------------------------
  87 -- | Example and tests for development
  88 data I e = I
  89   { _info_entropy    :: e
  90   , _info_entropy_var :: e
  91   , _info_autonomy   :: e
  92   }
  93
  94 instance Show e => Show (I e) where
  95   show (I e ev a) = show (e, ev, a)
  96
  97 makeLenses ''I
  98
  99 type ModEntropy i o e = (e -> e) -> i -> o
 100
 101 set_autonomy :: Entropy e => ModEntropy (I e) (I e) e
 102 set_autonomy fe i = i & info_autonomy .~ fe (i ^. info_entropy_var)
 103
 104 set_entropy_var :: Entropy e => Setter e (I e) e e
 105 set_entropy_var f e = (\ev -> I e ev nan) <$> f e
 106
 107 data StartStop = Start | Stop
 108   deriving (Ord, Eq, Show)
 109
 110 data Token = NonTerminal Text
 111            | Terminal StartStop
 112   deriving (Ord, Eq, Show)
 113
 114 isTerminal :: Token -> Bool
 115 isTerminal (Terminal    _) = True
 116 isTerminal (NonTerminal _) = False
 117
 118 nonTerminals :: [Token] -> [Text]
 119 nonTerminals ts = [nt | NonTerminal nt <- ts]
 120
 121 parseToken :: Text -> Token
 122 parseToken "<start>" = Terminal Start
 123 parseToken "<stop>"  = Terminal Stop
 124 parseToken t         = NonTerminal t
 125
 126 toToken :: [Text] -> [Token]
 127 toToken xs = Terminal Start : (NonTerminal <$> xs) <> [Terminal Stop]
 128
 129 printToken :: Token -> Text
 130 printToken = f
 131   where
 132     f (NonTerminal x)  = x
 133     f (Terminal Start) = "<start>"
 134     f (Terminal Stop)  = "<stop>"
 135 ------------------------------------------------------------------------
 136
 137 data Trie k e
 138   = Node { _node_count    :: Int
 139          , _node_entropy  :: e
 140          , _node_children :: Map k (Trie k e)
 141          }
 142  | Leaf { _node_count    :: Int }
 143   deriving (Show)
 144
 145 makeLenses ''Trie
 146
 147 insertTrie :: Ord k => [k] -> Trie k () -> Trie k ()
 148 insertTrie []     n                    = n { _node_count = _node_count n +1}
 149 insertTrie (x:xs) (Leaf c)             = mkTrie (c+1) $ Map.singleton x $ insertTrie xs emptyTrie
 150 insertTrie (x:xs) (Node c _e children) = mkTrie (c+1) $ Map.alter f x children
 151   where
 152     f = Just . insertTrie xs . fromMaybe emptyTrie
 153
 154 -- emptyTrie :: (Ord k, Monoid e) => Trie k e
 155 -- emptyTrie = Node 0 mempty mempty
 156 emptyTrie :: Trie k e
 157 emptyTrie  = Leaf 0
 158
 159 mkTrie :: Monoid e => Int -> Map k (Trie k e) -> Trie k e
 160 mkTrie c children
 161   | Map.null children = Leaf c
 162   | otherwise         = Node c mempty children
 163
 164                         -----------------------------
 165 -- | Trie to Tree since Tree as nice print function
 166 toTree :: k -> Trie k e -> Tree (k,Int,Maybe e)
 167 toTree k (Leaf c)      = Tree.Node (k, c, Nothing) []
 168 toTree k (Node c e cs) = Tree.Node (k, c, Just e)  (map (uncurry toTree) $ Map.toList cs)
 169
 170 ------------------------------------------------------------------------
 171 ------------------------------------------------------------------------
 172 normalizeLevel :: Entropy e => e -> e -> e -> e
 173 normalizeLevel m v e = (e - m) / v
 174
 175 {- Unused
 176
 177 nodeChildren :: Trie k e -> Map k (Trie k e)
 178 nodeChildren (Node _ _ cs) = cs
 179 nodeChildren (Leaf _)      = Map.empty
 180
 181 -}
 182
 183 chunkAlongEleve :: Int -> [a] -> [[a]]
 184 chunkAlongEleve n xs = L.take n <$> L.tails xs
 185
 186 data Direction = Backward | Forward
 187
 188 buildTrie :: Direction -> Int -> [[Token]] -> Trie Token ()
 189 buildTrie d n sentences
 190   = L.foldr insertTrie emptyTrie
 191   . L.concat
 192   $ ( filter (/= [Terminal (term d)])
 193     . chunkAlongEleve (n + 1)
 194     . order d
 195     )
 196  <$> sentences
 197   where
 198     order Forward  = identity
 199     order Backward = reverse
 200     term  Forward  = Stop
 201     term  Backward = Start
 202
 203 class IsTrie trie where
 204   entropyTrie :: Entropy e => (k -> Bool) -> trie k () -> trie k e
 205   nodeEntropy :: Entropy e => Getting e i e -> trie k i -> e
 206   nodeChild   :: Ord k =>  k  -> trie k e -> trie k e
 207   findTrie    :: Ord k => [k] -> trie k e -> trie k e
 208   printTrie   :: (Show i, Entropy e) => Getting e i e -> trie Token i -> IO ()
 209   evTrie      :: Entropy e => Getting e i e -> Setter i o e e -> trie k i -> trie k o
 210   normalizeEntropy :: Entropy e
 211                    => Getting e i e -> ModEntropy i o e
 212                    -> trie k i -> trie k o
 213
 214 instance IsTrie Trie where
 215
 216   entropyTrie _    (Leaf c)             = Leaf c
 217   entropyTrie pred (Node c () children) = Node c e (map (entropyTrie pred) children)
 218     where
 219       children' = Map.toList children
 220       sum_count = sum $ _node_count . snd <$> children'
 221       e | sum_count == 0 = nan
 222         | otherwise      = sum $ f <$> children'
 223       f (k, child) = if pred k then   chc * P.logBase 2 (fromIntegral c)
 224                               else - chc * P.logBase 2 chc
 225         where
 226           chc = fromIntegral (_node_count child) / fromIntegral c
 227
 228   nodeEntropy inE (Node _ e _) = e ^. inE
 229   nodeEntropy _   (Leaf _)     = nan
 230
 231   nodeChild k (Node _ _ cs) = fromMaybe emptyTrie (Map.lookup k cs)
 232   nodeChild _ (Leaf _)      = emptyTrie
 233
 234   findTrie ks t = L.foldl (flip nodeChild) t ks
 235
 236   printTrie inE t = do
 237     P.putStrLn . Tree.drawTree
 238                 . fmap show
 239                 $ toTree (NonTerminal "") t
 240     P.putStrLn "  Levels:"
 241     forM_ (normalizationLevels inE t) $ \level ->
 242       P.putStrLn $ "    " <> show level
 243
 244   evTrie inE setEV = go nan
 245     where
 246       go _  (Leaf c)            = Leaf c
 247       go e0 (Node c i children) = Node c (i & setEV .~ ev e0 e1) $ go e1 <$> children
 248         where e1 = i ^. inE
 249
 250       ev 0  0  = nan
 251       ev i0 i1 = i1 - i0
 252
 253   normalizeEntropy inE modE t = go (modE identity) (normalizationLevels inE t) t
 254     where
 255       go _ _                 (Leaf c)            = Leaf c
 256       go _ []                _                   = panic "normalizeEntropy' empty levels"
 257       go f ((m, v, _) : ess) (Node c i children)
 258         = Node c (f i) $ go (modE $ normalizeLevel m v) ess <$> children
 259 ------------------------------------------------------------------------
 260
 261 levels :: Trie k e -> [[Trie k e]]
 262 levels = L.takeWhile (not . L.null) . L.iterate (L.concatMap subForest) . pure
 263   where
 264     subForest :: Trie k e -> [Trie k e]
 265     subForest (Leaf _)            = []
 266     subForest (Node _ _ children) = Map.elems children
 267
 268 entropyLevels :: Entropy e => Getting e i e -> Trie k i -> [[e]]
 269 entropyLevels inE = fmap (noNaNs . map (nodeEntropy inE)) . L.tail . levels
 270
 271 normalizationLevels :: Entropy e => Getting e i e -> Trie k i -> [(e, e, Int)]
 272 normalizationLevels inE = fmap f . entropyLevels inE
 273   where
 274     f es = (mean es, deviation es, length es)
 275
 276 ------------------------------------------------------------------------
 277
 278 data Tries k e = Tries
 279   { _fwd :: Trie k e
 280   , _bwd :: Trie k e
 281   }
 282
 283 makeLenses ''Tries
 284
 285 buildTries :: Int -> [[Token]] -> Tries Token ()
 286 buildTries n sentences = Tries
 287   { _fwd = buildTrie Forward  n sentences
 288   , _bwd = buildTrie Backward n sentences
 289   }
 290
 291 instance IsTrie Tries where
 292
 293   nodeEntropy inE (Tries f b) = mean [nodeEntropy inE f, nodeEntropy inE b]
 294
 295   findTrie ks (Tries f b) = Tries (findTrie ks f) (findTrie (reverse ks) b)
 296
 297   nodeChild = onTries . nodeChild
 298
 299   entropyTrie = onTries . entropyTrie
 300
 301   evTrie inE setEV = onTries $ evTrie inE setEV
 302
 303   normalizeEntropy inE = onTries . normalizeEntropy inE
 304
 305   printTrie inE (Tries f b) = do
 306     P.putStrLn "Forward:"
 307     printTrie inE f
 308     P.putStrLn ""
 309     P.putStrLn "Backward:"
 310     printTrie inE b
 311
 312 onTries :: (Trie k i -> Trie k o) -> Tries k i -> Tries k o
 313 onTries h (Tries f b) = Tries (h f) (h b)
 314
 315 ------------------------------------------------------------------------
 316 mayCons :: [a] -> [[a]] -> [[a]]
 317 mayCons [] xss = xss
 318 mayCons xs xss = xs : xss
 319
 320 {-
 321 split :: (IsTrie trie, Entropy e) => Lens' i e -> trie Token i -> [Token] -> [[Token]]
 322 split _   _ [] = []
 323 split inE t (Terminal Start:xs) = split inE t xs
 324 split inE t (x0:xs0) = go [x0] xs0
 325   where
 326     go pref []                  = [pref]
 327     go pref (Terminal Stop:_)   = [pref]
 328     go _    (Terminal Start:_)  = panic "split impossible"
 329     go pref (x:xs) =
 330         -- trace (show (if acc then "ACC" else "CUT", (prefx, epxt), if acc then ">" else "<=", ((pref, ept), "+", ([x], ext)))) $
 331         if acc
 332           then go prefx xs
 333           else mayCons pref $ go [x] xs
 334       where
 335         prefx = pref <> [x]
 336         pt   = findTrie pref t
 337         pxt  = findTrie prefx t
 338         xt   = findTrie [x] t
 339         ept  = ne pt
 340     --  ^ entropy of the current prefix
 341         ext  = ne xt
 342     --  ^ entropy of [x]
 343         epxt = ne pxt
 344     --  ^ entropy of the current prefix plus x
 345         acc  = P.isNaN ept || P.isNaN ext || not (P.isNaN epxt) -- && (epxt > mean [ept, ext])
 346
 347         -- aut(["in","this","paper"]) > aut(["in","this"]) + aut(["paper"])
 348
 349     ne = nodeEntropy inE
 350 -}
 351
 352 split :: Entropy e => Int -> Lens' i e -> Tries Token i -> [Token] -> [[Text]]
 353 split _ _   _ []  = []
 354 split _ _   _ [t] = pure <$> nonTerminals [t]
 355 split n inE t ts  = nonTerminals pref `mayCons` split n inE t (drop (length pref) ts)
 356   where
 357     pref = maximumWith (\ks -> nodeEntropy inE $ findTrie ks t)
 358                        (L.tail . L.inits . take n $ ts)
 359
 360
 361 {-
 362 split :: Entropy e => Lens' i e -> Tries Token i -> [Token] -> [[Token]]
 363 split inE t0 ts =
 364   maximumWith (sum . map $ nodeAutonomy inE t0) (all the splits of ts)
 365 -}
 366
 367 ------------------------------------------------------------------------
 368
 369 mainEleve :: Int -> [[Text]] -> [[[Text]]]
 370 mainEleve n x = mainEleve' n x x
 371
 372 mainEleve' :: Int -> [[Text]] -> [[Text]] -> [[[Text]]]
 373 mainEleve' n x y = mainEleveWith x' n y
 374   where
 375     x' = buildTries n (fmap toToken x)
 376   -- (fmap toToken i) is computed twice, since mainEleveWith is computing it too
 377
 378 -- | This function should take the longest possible chain of:
 379 -- mainEleve'' n x y = maxChainSizeOf [ mainEleve' n x y
 380 --                                    , mainEleve' n x x
 381 --                                    , mainEleve' n y y
 382 --                                    ]
 383 mainEleve'' :: Int -> [[Text]] -> [[Text]] -> [[[Text]]]
 384 mainEleve'' = undefined
 385
 386 mainEleveWith :: Tries Token () -> Int -> [[Text]] -> [[[Text]]]
 387 mainEleveWith m n i = fmap (split n info_autonomy t) (fmap toToken i)
 388   where
 389     t :: Tries Token (I Double)
 390     t = normalizeEntropy info_entropy_var set_autonomy
 391       $ evTrie identity set_entropy_var
 392       $ entropyTrie isTerminal m
 393
 394 ------------------------------------------------------------------------
 395
 396 type Checks e = [(Text, Int, e, e, e, e, e, e, e, e, e)]
 397
 398 testEleve :: e ~ Double => Bool -> Int -> [Text] -> Checks e -> IO Bool
 399 testEleve debug n output checks = do
 400   let
 401     res = split (1 + n) info_autonomy nt <$> input
 402   when debug $ do
 403     P.putStrLn . show $ (printToken <$>) <$> input
 404     P.putStrLn ""
 405     printTrie info_entropy nt
 406     P.putStrLn ""
 407     P.putStrLn "Splitting:"
 408     P.putStrLn $ show res
 409   forM_ checks checker
 410   pure $ expected == res
 411
 412   where
 413     out      = T.words <$> output
 414     expected = fmap (T.splitOn "-") <$> out
 415     input    = toToken . (T.splitOn "-" =<<) <$> out
 416
 417     nt :: Tries Token (I Double)
 418     nt = normalizeEntropy info_entropy_var set_autonomy
 419        . evTrie identity set_entropy_var
 420        . entropyTrie isTerminal
 421        $ buildTries n input
 422
 423     check f msg ref my =
 424       if f ref my
 425         then P.putStrLn $ "    \ESC[32mPASS\ESC[m " <> msg <> " " <> show ref
 426         else P.putStrLn $ "    \ESC[31mFAIL\ESC[m " <> msg <> " ref=" <> show ref <> " my=" <> show my
 427
 428     checker (ngram, count, entropy, ev, autonomy, fwd_entropy, fwd_ev, fwd_autonomy, bwd_entropy, bwd_ev, bwd_autonomy) = do
 429       let ns  = parseToken <$> T.words ngram
 430           nt' = findTrie ns nt
 431
 432       P.putStrLn $ "  " <> T.unpack ngram <> ":"
 433       check (==) "count"        count        (_node_count                  (_fwd nt'))
 434
 435       check sim  "entropy"      entropy      (nodeEntropy info_entropy           nt' )
 436       check sim  "ev"           ev           (nodeEntropy info_entropy_var       nt' )
 437       check sim  "autonomy"     autonomy     (nodeEntropy info_autonomy          nt' )
 438
 439       check sim  "fwd_entropy"  fwd_entropy  (nodeEntropy info_entropy     (_fwd nt'))
 440       check sim  "fwd_ev"       fwd_ev       (nodeEntropy info_entropy_var (_fwd nt'))
 441       check sim  "fwd_autonomy" fwd_autonomy (nodeEntropy info_autonomy    (_fwd nt'))
 442
 443       check sim  "bwd_entropy"  bwd_entropy  (nodeEntropy info_entropy     (_bwd nt'))
 444       check sim  "bwd_ev"       bwd_ev       (nodeEntropy info_entropy_var (_bwd nt'))
 445       check sim  "bwd_autonomy" bwd_autonomy (nodeEntropy info_autonomy    (_bwd nt'))
 446
 447 -- | TODO real data is a list of tokenized sentences
 448 example0, example1, example2, example3, example4, example5, example6, example7, example8, example9 :: [Text]
 449 example0 =  ["New-York is New-York and New-York"]
 450 example1 =  ["to-be or not to-be"]
 451 example2 =  ["to-be-or not to-be-or NOT to-be and"]
 452 example3 =  example0 <> example0
 453        -- > TEST: Should not have York New in the trie
 454 example4 =  ["a-b-c-d e a-b-c-d f"]
 455 example5 =  ["a-b-c-d-e f a-b-c-d-e g a-b-c-d-e"]
 456 example6 =  ["le-petit chat"
 457             ,"le-petit chien"
 458             ,"le-petit rat"
 459             ,"le gros rat"
 460             ]
 461 example7 =  ["a-b d", "a-c e", "a-c", "a-b", "a-b", "a-c", "a-c", "a-b"]
 462 -- example8 =  ["z f", "z", "z", "z"] <> example7
 463 example8 =  ["z", "z", "z", "z"] <> example7 <> example7 <> example7
 464 example9 =  (T.replace "z" "a") <$> example8
 465 --example8 =  ["a-b d", "a-c e", "a f", "a-c g", "a-b h", "a i", "a j", "a-b k", "a-c l", "a-c m", "a n", "a-b o"]
 466
 467 checks0, checks2, checks7, checks8, checks9 :: Checks Double
 468
 469 checks0 =
 470 -- [(token, count, entropy, ev, autonomy, fwd_entropy, fwd_ev, fwd_autonomy, bwd_entropy, bwd_ev, bwd_autonomy)]
 471   [ ("<start>", 1, nan, nan, nan, 0.0, -2.113283334294875, -0.5000000000000002, nan, nan, nan)
 472   , ("New", 3, 0.792481250360578, -1.3208020839342969, 0.7499999999999999, 0.0, -2.113283334294875, -0.5000000000000002, 1.584962500721156, -0.5283208335737188, 2.0)
 473   , ("York", 3, 0.792481250360578, -1.3208020839342969, 0.7499999999999999, 1.584962500721156, -0.5283208335737188, 2.0, 0.0, -2.113283334294875, -0.5000000000000002)
 474   , ("is", 1, 0, -2.113283334294875, -0.5000000000000002, 0.0, -2.113283334294875, -0.5000000000000002, 0.0, -2.113283334294875, -0.5000000000000002)
 475   , ("and", 1, 0, -2.113283334294875, -0.5000000000000002, 0.0, -2.113283334294875, -0.5000000000000002, 0.0, -2.113283334294875, -0.5000000000000002)
 476   , ("<stop>", 0, nan, nan, nan, nan, nan, nan, 0.0, -2.113283334294875, -0.5000000000000002)
 477   , ("<start> New", 1, nan, nan, nan, 0.0, nan, nan, nan, nan, nan)
 478   , ("New York", 3, 1.584962500721156, 1.584962500721156, 1.414213562373095, 1.584962500721156, 1.584962500721156, 1.4142135623730947, 1.584962500721156, 1.584962500721156, 1.4142135623730951)
 479   , ("York is", 1, 0, nan, nan, 0.0, -1.584962500721156, -0.7071067811865476, 0.0, nan, nan)
 480   , ("is New", 1, 0, nan, nan, 0.0, nan, nan, 0.0, -1.584962500721156, -0.7071067811865474)
 481   , ("York and", 1, 0, nan, nan, 0.0, -1.584962500721156, -0.7071067811865476, 0.0, nan, nan)
 482   , ("and New", 1, 0, nan, nan, 0.0, nan, nan, 0.0, -1.584962500721156, -0.7071067811865474)
 483   , ("York <stop>", 1, nan, nan, nan, nan, nan, nan, 0.0, nan, nan)
 484   , ("<start> New York", 1, nan, nan, nan, 0.0, nan, nan, nan, nan, nan)
 485   , ("New York is", 1, 0, nan, nan, 0.0, -1.584962500721156, nan, 0.0, nan, nan)
 486   , ("York is New", 1, 0, nan, nan, 0.0, nan, nan, 0.0, nan, nan)
 487   , ("is New York", 1, 0, nan, nan, 0.0, nan, nan, 0.0, -1.584962500721156, nan)
 488   , ("New York and", 1, 0, nan, nan, 0.0, -1.584962500721156, nan, 0.0, nan, nan)
 489   , ("York and New", 1, 0, nan, nan, 0.0, nan, nan, 0.0, nan, nan)
 490   , ("and New York", 1, 0, nan, nan, 0.0, nan, nan, 0.0, -1.584962500721156, nan)
 491   , ("New York <stop>", 1, nan, nan, nan, nan, nan, nan, 0.0, nan, nan)
 492   ]
 493
 494 checks2 = []
 495 {-
 496   [("to be",  3, 1.2516291673878228, 1.2516291673878228, 1.5535694744293167, nan, 0.9182958340544896)
 497   ,("be or",  2, 0.5, nan, nan, nan, 1.0)
 498   ,("or not", 1, 0.0, nan, nan, nan, 0.0)
 499   ,("not to", 1, 0.0, nan, nan, nan, 0.0)
 500   ,("or NOT", 1, 0.0, nan, nan, nan, 0.0)
 501   ,("NOT to", 1, 0.0, nan, nan, nan, 0.0)
 502   ,("be and", 1, 0.0, nan, nan, nan, 0.0)
 503   ]
 504 -}
 505
 506 checks7 =
 507   [ ("a b", 4, 2, 1.5, 1.0106455960380136, 2, 1, 0.7302967433402215, 2, 2, 1.2909944487358056)
 508   , ("a c", 4, 2, 1.5, 1.0106455960380136, 2, 1, 0.7302967433402215, 2, 2, 1.2909944487358056)
 509   , ("a", 8, 2, -0.7139421727208477, 0.9315597394596105, 1, -1.7139421727208477, 0.1695158759052029, 3, 0.2860578272791523, 1.693603603014018)
 510   ]
 511
 512 checks8 =
 513   [ ("a b", 4, 2, 1.5, 1.2384061243840367, 2, 1, 0.9190418024406298, 2, 2, 1.5577704463274435)
 514   , ("a c", 4, 2, 1.5, 1.2384061243840367, 2, 1, 0.9190418024406298, 2, 2, 1.5577704463274435)
 515   , ("a", 8, 2, -1.1151193576322829, 0.8012882295122719, 1, -2.115119357632283, 1.1025957503820932e-2, 3, -0.11511935763228287, 1.5915505015207227)
 516   , ("z", 4, 2, -1.1151193576322829, 0.9576679529201777, 2, -1.1151193576322829, 1.0906240295212841, 2, -1.1151193576322829, 0.8247118763190712)
 517   ]
 518
 519 checks9 =
 520   [ ("a b", 4, 2, 0.8741854163060885, 0.9234576822288185, 2, -0.25162916738782304, 0.2891449181301934, 2, 2, 1.5577704463274435)
 521   , ("a c", 4, 2, 0.8741854163060885, 0.9234576822288185, 2, -0.25162916738782304, 0.2891449181301934, 2, 2, 1.5577704463274435)
 522   , ("a", 12, 2.91829583405449, 3.763498724462999e-2, 1.518835832034022, 2.251629167387823, -0.6290316794220367, 1.2162041043595873, 3.5849625007211565, 0.7043016539112967, 1.8214675597084569)
 523   ]
 524
 525 runTestsEleve :: Bool -> IO ()
 526 runTestsEleve doChecks =
 527   forM_
 528     [("example0", 3, example0, checks0)
 529     ,("example0", 2, example0, [])
 530     ,("example1", 2, example1, [])
 531     ,("example2", 3, example2, checks2)
 532     ,("example3", 2, example3, [])
 533     ,("example4", 4, example4, [])
 534     ,("example5", 5, example5, [])
 535     ,("example6", 2, example6, [])
 536     ,("example7", 2, example7, checks7)
 537     ,("example8", 2, example8, checks8)
 538     ,("example9", 2, example9, checks9)
 539     ]
 540     (\(name, n, ex, checks) -> do
 541       P.putStrLn $ name <> " " <> show n
 542       b <- testEleve False n ex (if doChecks then checks else [])
 543       P.putStrLn $ "  splitting: " <> if b then "PASS" else "FAIL"
 544     )