src/Gargantext/Text/Eleve.hs

   1 {-|
   2 Module      : Gargantext.Text.Eleve
   3 Description : Unsupervized Word segmentation
   4 Copyright   : (c) CNRS, 2019-Present
   5 License     : AGPL + CECILL v3
   6 Maintainer  : team@gargantext.org
   7 Stability   : experimental
   8 Portability : POSIX
   9
  10 # Implementation of Unsupervized Word Segmentation
  11
  12 References:
  13
  14 - Python implementation (Korantin August, Emmanuel Navarro):
  15   [EleVe](https://github.com/kodexlab/eleve.git)
  16
  17 - Unsupervized Word Segmentation:the case for Mandarin Chinese Pierre
  18   Magistry, Benoît Sagot, Alpage, INRIA & Univ. Paris 7, Proceedings of
  19   the 50th Annual Meeting of the Association for Computational Linguistics
  20   , pages 383–387. [PDF](https://www.aclweb.org/anthology/P12-2075)
  21
  22 Notes for current implementation:
  23 - TODO extract longer ngrams (see paper above, viterbi algo can be used)
  24 - TODO AD TEST: prop (Node c _e f) = c == Map.size f
  25
  26 - AD: Real ngrams extraction test
  27   from Gargantext.Text.Terms import extractTermsUnsupervised
  28   docs <- runCmdRepl $ selectDocs 1004
  29   extractTermsUnsupervised 3 $ DT.intercalate " "
  30                         $ catMaybes
  31                         $ Gargantext.map _hyperdataDocument_abstract docs
  32
  33 -}
  34 {-# LANGUAGE ConstraintKinds   #-}
  35 {-# LANGUAGE NoImplicitPrelude #-}
  36 {-# LANGUAGE OverloadedStrings #-}
  37 {-# LANGUAGE RankNTypes        #-}
  38 {-# LANGUAGE TemplateHaskell   #-}
  39 {-# LANGUAGE TypeFamilies      #-}
  40
  41 module Gargantext.Text.Eleve where
  42
  43 -- import Debug.Trace (trace)
  44 -- import Debug.SimpleReflect
  45
  46 import Control.Lens hiding (levels, children)
  47 import Control.Monad (forM_)
  48 import Data.Ord (Ord)
  49 import qualified Data.List as L
  50 import Data.Monoid
  51 import Data.Text (Text)
  52 import qualified Data.Text as T
  53 import Data.Map (Map)
  54 import Data.Maybe (fromMaybe)
  55 import qualified Data.Map as Map
  56 import Gargantext.Prelude hiding (cs)
  57 import qualified Data.Tree as Tree
  58 import Data.Tree (Tree)
  59 import qualified Prelude as P (putStrLn, logBase, isNaN, RealFloat)
  60
  61 nan :: Floating e => e
  62 nan = 0 / 0
  63
  64 noNaNs :: P.RealFloat e => [e] -> [e]
  65 noNaNs = filter (not . P.isNaN)
  66
  67 updateIfDefined :: P.RealFloat e => e -> e -> e
  68 updateIfDefined e0 e | P.isNaN e = e0
  69                      | otherwise = e
  70
  71 sim :: Entropy e => e -> e -> Bool
  72 sim x y = x == y || (P.isNaN x && P.isNaN y)
  73
  74 subst :: Entropy e => (e, e) -> e -> e
  75 subst (src, dst) x | sim src x = dst
  76                    | otherwise = x
  77 ------------------------------------------------------------------------
  78
  79 type Entropy e =
  80   ( Fractional e
  81   , Floating e
  82   , P.RealFloat e
  83   , Show e
  84   -- ^ TODO: only used for debugging
  85   )
  86 ------------------------------------------------------------------------
  87 -- | Example and tests for development
  88 data I e = I
  89   { _info_entropy    :: e
  90   , _info_entropy_var :: e
  91   , _info_autonomy   :: e
  92   }
  93
  94 instance Show e => Show (I e) where
  95   show (I e ev a) = show (e, ev, a)
  96
  97 makeLenses ''I
  98
  99 type ModEntropy i o e = (e -> e) -> i -> o
 100
 101 set_autonomy :: Entropy e => ModEntropy e (I e) e
 102 set_autonomy f e = I e nan (f e)
 103
 104 set_entropy_var :: Entropy e => Setter e (I e) e e
 105 set_entropy_var f e = (\ev -> I e ev nan) <$> f e
 106
 107 data StartStop = Start | Stop
 108   deriving (Ord, Eq, Show)
 109
 110 data Token = NonTerminal Text
 111            | Terminal StartStop
 112   deriving (Ord, Eq, Show)
 113
 114 isTerminal :: Token -> Bool
 115 isTerminal (Terminal    _) = True
 116 isTerminal (NonTerminal _) = False
 117
 118 parseToken :: Text -> Token
 119 parseToken "<start>" = Terminal Start
 120 parseToken "<stop>"  = Terminal Stop
 121 parseToken t         = NonTerminal t
 122
 123 toToken :: [Text] -> [Token]
 124 toToken xs = Terminal Start : (NonTerminal <$> xs) <> [Terminal Stop]
 125
 126 printToken :: Token -> Text
 127 printToken = f
 128   where
 129     f (NonTerminal x)  = x
 130     f (Terminal Start) = "<start>"
 131     f (Terminal Stop)  = "<stop>"
 132 ------------------------------------------------------------------------
 133
 134 data Trie k e
 135   = Node { _node_count    :: Int
 136          , _node_entropy  :: e
 137          , _node_children :: Map k (Trie k e)
 138          }
 139  | Leaf { _node_count    :: Int }
 140   deriving (Show)
 141
 142 makeLenses ''Trie
 143
 144 insertTries :: Ord k => [[k]] -> Trie k ()
 145 insertTries = L.foldr insertTrie emptyTrie
 146
 147 insertTrie :: Ord k => [k] -> Trie k () -> Trie k ()
 148 insertTrie []     n                    = n { _node_count = _node_count n +1}
 149 insertTrie (x:xs) (Leaf c)             = mkTrie (c+1) $ Map.singleton x $ insertTrie xs emptyTrie
 150 insertTrie (x:xs) (Node c _e children) = mkTrie (c+1) $ Map.alter f x children
 151   where
 152     f = Just . insertTrie xs . fromMaybe emptyTrie
 153
 154 -- emptyTrie :: (Ord k, Monoid e) => Trie k e
 155 -- emptyTrie = Node 0 mempty mempty
 156 emptyTrie :: Trie k e
 157 emptyTrie  = Leaf 0
 158
 159 mkTrie :: Monoid e => Int -> Map k (Trie k e) -> Trie k e
 160 mkTrie c children
 161   | Map.null children = Leaf c
 162   | otherwise         = Node c mempty children
 163
 164                         -----------------------------
 165 -- | Trie to Tree since Tree as nice print function
 166 toTree :: k -> Trie k e -> Tree (k,Int,Maybe e)
 167 toTree k (Leaf c)      = Tree.Node (k, c, Nothing) []
 168 toTree k (Node c e cs) = Tree.Node (k, c, Just e)  (map (uncurry toTree) $ Map.toList cs)
 169
 170 ------------------------------------------------------------------------
 171 ------------------------------------------------------------------------
 172 entropyTrie :: Entropy e => (k -> Bool) -> Trie k () -> Trie k e
 173 entropyTrie _    (Leaf c)             = Leaf c
 174 entropyTrie pred (Node c () children) = Node c e (map (entropyTrie pred) children)
 175   where
 176     children' = Map.toList children
 177     sum_count = sum $ _node_count . snd <$> children'
 178     e | sum_count == 0 = nan
 179       | otherwise      = sum $ f <$> children'
 180     f (k, child) = if pred k then   chc * P.logBase 2 (fromIntegral c)
 181                              else - chc * P.logBase 2 chc
 182       where
 183         chc = fromIntegral (_node_count child) / fromIntegral c
 184 ------------------------------------------------------------------------
 185 normalizeLevel :: Entropy e => e -> e -> e -> e
 186 normalizeLevel m v e = (e - m) / v
 187
 188 {- Unused
 189
 190 nodeChildren :: Trie k e -> Map k (Trie k e)
 191 nodeChildren (Node _ _ cs) = cs
 192 nodeChildren (Leaf _)      = Map.empty
 193
 194 -}
 195
 196
 197 class IsTrie trie where
 198   buildTrie   :: Entropy e => [[Token]] -> trie Token e
 199   nodeEntropy :: Entropy e => Getting e i e -> trie k i -> e
 200   nodeChild   :: Ord k =>  k  -> trie k e -> trie k e
 201   findTrie    :: Ord k => [k] -> trie k e -> trie k e
 202   findTrieR   :: Ord k => [k] -> trie k e -> trie k e
 203   printTrie   :: (Show i, Entropy e) => Getting e i e -> trie Token i -> IO ()
 204   evTrie      :: Entropy e => Getting e i e -> Setter i o e e -> trie k i -> trie k o
 205   normalizeEntropy :: Entropy e
 206                    => Getting e i e -> ModEntropy i o e
 207                    -> trie k i -> trie k o
 208
 209 -- UNUSED
 210 --nodeAutonomy :: (Ord k, Entropy e) => Getting e i e -> trie k i -> [k] -> e
 211 --nodeAutonomy inE t ks = nodeEntropy inE $ findTrie ks t
 212
 213 instance IsTrie Trie where
 214   buildTrie ts = entropyTrie isTerminal $ insertTries ts
 215
 216   nodeEntropy inE (Node _ e _) = e ^. inE
 217   nodeEntropy _   (Leaf _)     = nan
 218
 219   nodeChild k (Node _ _ cs) = fromMaybe emptyTrie (Map.lookup k cs)
 220   nodeChild _ (Leaf _)      = emptyTrie
 221
 222   findTrie ks t = L.foldl (flip nodeChild) t ks
 223   findTrieR = findTrie
 224
 225   printTrie inE t = do
 226     P.putStrLn . Tree.drawTree
 227                 . fmap show
 228                 $ toTree (NonTerminal "") t
 229     P.putStrLn "  Levels:"
 230     forM_ (normalizationLevels inE t) $ \level ->
 231       P.putStrLn $ "    " <> show level
 232
 233   evTrie inE setEV = go nan
 234     where
 235       go _  (Leaf c)            = Leaf c
 236       go e0 (Node c i children) = Node c (i & setEV .~ ev e0 e1) $ go e1 <$> children
 237         where e1 = i ^. inE
 238
 239       ev 0  0  = nan
 240       ev i0 i1 = i1 - i0
 241
 242   normalizeEntropy inE modE t = go (modE identity) (normalizationLevels inE t) t
 243     where
 244       go _ _                 (Leaf c)            = Leaf c
 245       go _ []                _                   = panic "normalizeEntropy' empty levels"
 246       go f ((m, v, _) : ess) (Node c i children)
 247         = Node c (f i) $ go (modE $ normalizeLevel m v) ess <$> children
 248 ------------------------------------------------------------------------
 249
 250 levels :: Trie k e -> [[Trie k e]]
 251 levels = L.takeWhile (not . L.null) . L.iterate (L.concatMap subForest) . pure
 252   where
 253     subForest :: Trie k e -> [Trie k e]
 254     subForest (Leaf _)            = []
 255     subForest (Node _ _ children) = Map.elems children
 256
 257 entropyLevels :: Entropy e => Getting e i e -> Trie k i -> [[e]]
 258 entropyLevels inE = fmap (noNaNs . map (nodeEntropy inE)) . L.tail . levels
 259
 260 normalizationLevels :: Entropy e => Getting e i e -> Trie k i -> [(e, e, Int)]
 261 normalizationLevels inE = fmap f . entropyLevels inE
 262   where
 263     f es = (mean es, deviation es, length es)
 264
 265 ------------------------------------------------------------------------
 266
 267 data Tries k e = Tries
 268   { _fwd :: Trie k e
 269   , _bwd :: Trie k e
 270   }
 271
 272 makeLenses ''Tries
 273
 274 nodeEntropySafe :: Entropy e => Getting e i e -> Tries k i -> e
 275 nodeEntropySafe inE (Tries f b) =
 276   mean $ noNaNs [nodeEntropy inE f, nodeEntropy inE b]
 277
 278 nodeEntropyBwdOpt :: Entropy e => Getting e i e -> Tries k i -> e
 279 nodeEntropyBwdOpt inE (Tries f b) =
 280   mean $ nodeEntropy inE f : noNaNs [nodeEntropy inE b]
 281
 282 instance IsTrie Tries where
 283   buildTrie tts = Tries { _fwd = buildTrie tts
 284                         , _bwd = buildTrie (reverse <$> tts)
 285                         }
 286
 287   nodeEntropy inE (Tries f b) = mean [nodeEntropy inE f, nodeEntropy inE b]
 288
 289   findTrie  ks = onTries (findTrie ks)
 290   findTrieR ks (Tries f b) = Tries (findTrieR ks f) (findTrieR (reverse ks) b)
 291
 292   nodeChild = onTries . nodeChild
 293
 294   evTrie inE setEV = onTries $ evTrie inE setEV
 295
 296   normalizeEntropy inE = onTries . normalizeEntropy inE
 297
 298   printTrie inE (Tries f b) = do
 299     P.putStrLn "Forward:"
 300     printTrie inE f
 301     P.putStrLn ""
 302     P.putStrLn "Backward:"
 303     printTrie inE b
 304
 305 onTries :: (Trie k i -> Trie k o) -> Tries k i -> Tries k o
 306 onTries h (Tries f b) = Tries (h f) (h b)
 307
 308 ------------------------------------------------------------------------
 309 split :: (IsTrie trie, Entropy e) => Lens' i e -> trie Token i -> [Token] -> [[Token]]
 310 split _   _  [] = []
 311 split inE t0 (Terminal Start:xs0) = split inE (nodeChild (Terminal Start) t0) xs0
 312 split inE t0 (x0:xs0) = go (nodeChild x0 t0) [x0] xs0
 313   where
 314     consRev [] xss = xss
 315     consRev xs xss = reverse xs : xss
 316
 317     go _ pref []                  = [reverse pref]
 318     go _ pref (Terminal Stop:_)   = [reverse pref]
 319     go t pref (Terminal Start:xs) = go t pref xs
 320     go t pref (x:xs) =
 321         -- trace (show (if acc then "ACC" else "CUT", (reverse (x : pref), ext), if acc then ">" else "<=", ((reverse pref, et), "+", ([x], ext0)))) $
 322         if acc
 323           then go xt (x:pref) xs
 324           else consRev pref $ go xt0 [x] xs
 325       where
 326         xt   = nodeChild x t
 327         xt0  = nodeChild x t0
 328         et   = ne 0 t
 329     --  ^ entropy of the current prefix
 330         ext0 = ne 0 xt0
 331     --  ^ entropy of [x]
 332         ext  = ne 0 xt
 333     --  ^ entropy of the current prefix plus x
 334         acc  = ext > et + ext0
 335         -- aut(["in","this","paper"]) > aut(["in","this"]) + aut(["paper"])
 336
 337     ne d t = if P.isNaN e then d else e
 338       where e = nodeEntropy inE t
 339
 340 {-
 341 split :: Entropy e => Lens' i e -> Tries Token i -> [Token] -> [[Token]]
 342 split inE t0 ts =
 343   maximumWith (sum . map $ nodeAutonomy inE t0) (all the splits of ts)
 344 -}
 345
 346 ------------------------------------------------------------------------
 347 ------------------------------------------------------------------------
 348
 349 mainEleve :: Int -> [[Text]] -> [[[Text]]]
 350 mainEleve _ _ = []
 351 {-
 352 mainEleve n input = map (map printToken) . split identity (t :: Trie Token Double) <$> inp
 353   where
 354     inp = toToken <$> input
 355     t   = buildTrie $ L.concat $ chunkAlong n 1 <$> inp
 356 -}
 357
 358 chunkAlongEleve :: Int -> [a] -> [[a]]
 359 chunkAlongEleve n xs = L.take n <$> L.tails xs
 360
 361 data Order = Backward | Forward
 362
 363 toToken' :: Order -> Int -> [[Text]] -> [[Token]]
 364 toToken' o n input = L.concat $ (filter (/= [Terminal Stop]) . chunkAlongEleve (n + 2) . (order o) ) <$> toToken <$> input
 365   where
 366     order Forward = identity
 367     order Backward = reverse
 368
 369
 370 ---------------------------------------------
 371 {-
 372 set_entropy_vars :: Entropy e  => Getting e i e -> (e -> i -> o) -> Tries Token i -> Trie Token o
 373 set_entropy_vars inE modE tries@(Tries fwd _bwd) =
 374   mapTree (\k -> modE $ nodeEntropy inE (findTrieR k tries)) [] fwd
 375
 376 mapTree :: ([Token] -> t -> e) -> [Token] -> Trie Token t -> Trie Token e
 377 mapTree f k t = go f k t
 378   where
 379     go _ _ (Leaf c)            = Leaf c
 380     go f k (Node c i children) = Node c (f k i) (Map.mapWithKey (\k'-> go f (k <> [k'])) children)
 381 -}
 382
 383 ---------------------------------------------
 384
 385 type Checks e = [(Text, Int, e, e, e, e, e, e, e, e, e)]
 386
 387 testEleve :: e ~ Double => Bool -> Int -> [Text] -> Checks e -> IO Bool
 388 testEleve debug n output checks = do
 389   let
 390     {-
 391     pss = [ (ps, findTrie ps fwd ^? _Just . node_entropy) -- . info_entropy)
 392           | ps <- L.nub $ [ c
 393                           | m <- [1..n]
 394                           , cs <- chunkAlong m 1 <$> inp
 395                           , c <- cs
 396                           ]
 397           ]
 398     -}
 399   --res = map (map printToken) . split identity fwd <$> inp
 400   --res = map (map printToken) . split info_norm_entropy' nt' <$> inp
 401     res = map (map printToken) . split info_autonomy nt <$> inp
 402   when debug $ do
 403     P.putStrLn (show input)
 404     -- forM_ pss (P.putStrLn . show)
 405     P.putStrLn ""
 406     printTrie info_entropy nt
 407     -- P.putStrLn ""
 408     -- P.putStrLn "Entropy Var:"
 409     -- printTrie identity t''
 410     P.putStrLn ""
 411     P.putStrLn "Splitting:"
 412     P.putStrLn $ show res
 413   forM_ checks checker
 414   pure $ expected == res
 415
 416   where
 417     out      = T.words <$> output
 418     expected = fmap (T.splitOn "-") <$> out
 419     input    = (T.splitOn "-" =<<) <$> out
 420     inp      = toToken <$> input
 421
 422     t :: Tries Token Double
 423     t = -- buildTrie (toToken' n input)
 424                   Tries { _fwd = buildTrie (toToken' Forward  n input)
 425                         , _bwd = buildTrie (toToken' Backward n input)
 426                         }
 427
 428     evt :: Tries Token (I Double)
 429     evt = evTrie identity set_entropy_var t
 430
 431     nt :: Tries Token (I Double)
 432     nt = normalizeEntropy info_entropy_var (\fe i -> i & info_autonomy .~ fe (i ^. info_entropy_var)) evt
 433
 434     -- t'' :: Trie Token Double
 435     -- t'' = set_entropy_vars info_autonomy (\e _i -> e) nt
 436
 437     -- nt = normalizeEntropy  identity set_autonomy (fwd :: Trie Token Double)
 438     -- nt = normalizeEntropy' info_entropy (\f -> info_norm_entropy' %~ f) nt
 439
 440     check f msg ref my =
 441       if f ref my
 442         then P.putStrLn $ "    \ESC[32mPASS\ESC[m " <> msg <> " " <> show ref
 443         else P.putStrLn $ "    \ESC[31mFAIL\ESC[m " <> msg <> " ref=" <> show ref <> " my=" <> show my
 444
 445     checker (ngram, count, entropy, ev, autonomy, fwd_entropy, fwd_ev, fwd_autonomy, bwd_entropy, bwd_ev, bwd_autonomy) = do
 446       let ns  = parseToken <$> T.words ngram
 447           t'  = findTrie ns t
 448           -- tvar  = findTrie ns  t''
 449           -- my_entropy_var = nodeEntropy identity tvar
 450           nt' = findTrie ns nt
 451
 452       P.putStrLn $ "  " <> T.unpack ngram <> ":"
 453       check (==) "count"        count        (_node_count                   (_fwd t'))
 454
 455       check sim  "entropy"      entropy      (nodeEntropyBwdOpt info_entropy     nt' )
 456       check sim  "ev"           ev           (nodeEntropy info_entropy_var       nt' )
 457       check sim  "autonomy"     autonomy     (nodeEntropy info_autonomy          nt' )
 458
 459       check sim  "fwd_entropy"  fwd_entropy  (nodeEntropy info_entropy     (_fwd nt'))
 460       check sim  "fwd_ev"       fwd_ev       (nodeEntropy info_entropy_var (_fwd nt'))
 461       check sim  "fwd_autonomy" fwd_autonomy (nodeEntropy info_autonomy    (_fwd nt'))
 462
 463       check sim  "bwd_entropy"  bwd_entropy  (nodeEntropy identity         (_bwd  t'))
 464       check sim  "bwd_ev"       bwd_ev       (nodeEntropy info_entropy_var (_bwd nt'))
 465       check sim  "bwd_autonomy" bwd_autonomy (nodeEntropy info_autonomy    (_bwd nt'))
 466
 467 -- | TODO real data is a list of tokenized sentences
 468 example0, example1, example2, example3, example4, example5, example6 :: [Text]
 469 example0 =  ["New-York is New-York and New-York"]
 470 example1 =  ["to-be or not to-be"]
 471 example2 =  ["to-be-or not to-be-or NOT to-be and"]
 472 example3 =  example0 <> example0
 473        -- > TEST: Should not have York New in the trie
 474 example4 =  ["a-b-c-d e a-b-c-d f"]
 475 example5 =  ["a-b-c-d-e f a-b-c-d-e g a-b-c-d-e"]
 476 example6 =  ["le-petit chat"
 477             ,"le-petit chien"
 478             ,"le-petit rat"
 479             ,"le gros rat"
 480             ]
 481
 482 checks0, checks2 :: Checks Double
 483
 484 checks0 =
 485   [ ("<start>", 1, nan, nan, nan, 0.0, -2.113283334294875, -0.5000000000000002, nan, nan, nan)
 486   , ("New", 3, 0.792481250360578, -1.3208020839342969, 0.7499999999999999, 0.0, -2.113283334294875, -0.5000000000000002, 1.584962500721156, -0.5283208335737188, 2.0)
 487   , ("York", 3, 0.792481250360578, -1.3208020839342969, 0.7499999999999999, 1.584962500721156, -0.5283208335737188, 2.0, 0.0, -2.113283334294875, -0.5000000000000002)
 488   , ("is", 1, 0.0, -2.113283334294875, -0.5000000000000002, 0.0, -2.113283334294875, -0.5000000000000002, 0.0, -2.113283334294875, -0.5000000000000002)
 489   , ("and", 1, 0.0, -2.113283334294875, -0.5000000000000002, 0.0, -2.113283334294875, -0.5000000000000002, 0.0, -2.113283334294875, -0.5000000000000002)
 490 --, ("<stop>", 0.0, nan, nan, nan, nan, nan, nan, 0.0, -2.113283334294875, -0.5000000000000002)
 491 -- Since it is not in the trie it no, need to count it.
 492   , ("<start> New", 1, nan, nan, nan, 0.0, nan, nan, nan, nan, nan)
 493   , ("New York", 3, 1.584962500721156, 1.584962500721156, 1.4142135623730951, 1.584962500721156, 1.584962500721156, 1.4142135623730951, nan, nan, nan)
 494   , ("York is", 1, 0.0, nan, nan, 0.0, -1.584962500721156, -0.7071067811865474, nan, nan, nan)
 495   , ("is New", 1, 0.0, nan, nan, 0.0, nan, nan, nan, nan, nan)
 496   , ("York and", 1, 0.0, nan, nan, 0.0, -1.584962500721156, -0.7071067811865474, nan, nan, nan)
 497   , ("and New", 1, 0.0, nan, nan, 0.0, nan, nan, nan, nan, nan)
 498   , ("York <stop>", 1, nan, nan, nan, nan, nan, nan, nan, nan, nan)
 499   , ("<start> New York", 1, nan, nan, nan, 0.0, nan, nan, nan, nan, nan)
 500   , ("New York is", 1, 0.0, nan, nan, 0.0, -1.584962500721156, nan, nan, nan, nan)
 501   , ("York is New", 1, 0.0, nan, nan, 0.0, nan, nan, nan, nan, nan)
 502   , ("is New York", 1, 0.0, nan, nan, 0.0, nan, nan, nan, nan, nan)
 503   , ("New York and", 1, 0.0, nan, nan, 0.0, -1.584962500721156, nan, nan, nan, nan)
 504   , ("York and New", 1, 0.0, nan, nan, 0.0, nan, nan, nan, nan, nan)
 505   , ("and New York", 1, 0.0, nan, nan, 0.0, nan, nan, nan, nan, nan)
 506   , ("New York <stop>", 1, nan, nan, nan, nan, nan, nan, nan, nan, nan)
 507   ]
 508
 509
 510
 511 checks2 = []
 512 {-
 513   [("to be",  3, 1.2516291673878228, 1.2516291673878228, 1.5535694744293167, nan, 0.9182958340544896)
 514   ,("be or",  2, 0.5, nan, nan, nan, 1.0)
 515   ,("or not", 1, 0.0, nan, nan, nan, 0.0)
 516   ,("not to", 1, 0.0, nan, nan, nan, 0.0)
 517   ,("or NOT", 1, 0.0, nan, nan, nan, 0.0)
 518   ,("NOT to", 1, 0.0, nan, nan, nan, 0.0)
 519   ,("be and", 1, 0.0, nan, nan, nan, 0.0)
 520   ]
 521 -}
 522
 523 runTests :: IO ()
 524 runTests =
 525   forM_
 526     [("example0", 2, example0, checks0)
 527     ,("example1", 2, example1, [])
 528     ,("example2", 3, example2, checks2)
 529     ,("example3", 2, example3, [])
 530     ,("example4", 4, example4, [])
 531     ,("example5", 5, example5, [])
 532     ]
 533     (\(name, n, ex, checks) -> do
 534       P.putStrLn $ name <> " " <> show n
 535       b <- testEleve False n ex checks
 536       P.putStrLn $ "  splitting: " <> if b then "PASS" else "FAIL"
 537     )