]> Git — Sourcephile - literate-phylomemy.git/blob - tests/Clustering/FrequentItemSet/BruteForceSpec.hs
init
[literate-phylomemy.git] / tests / Clustering / FrequentItemSet / BruteForceSpec.hs
1 {-# LANGUAGE OverloadedLists #-}
2
3 module Clustering.FrequentItemSet.BruteForceSpec where
4
5 import Control.Arrow (second)
6 import Control.Monad (forM_)
7 import Data.Function (($), (.))
8 import Data.Functor ((<$>))
9 import Data.Int (Int)
10 import Data.List qualified as List
11 import Data.Ord (Ord)
12 import Data.Ratio (Rational, (%))
13 import Data.Semigroup (Semigroup (..))
14 import GHC.IsList (toList)
15 import Logic
16 import Numeric.Probability (assertProbability)
17 import System.FilePath ((<.>))
18 import Test.Syd
19 import Text.Show (Show (..))
20 import Prelude (Num)
21
22 import Clustering.FrequentItemSet.BruteForce
23 import Utils
24
25 -- | From https://research.nii.ac.jp/~uno/code/lcm.html#IntroductionstoFrequentItemsetMining
26 databaseTakeakiUno :: Ord item => Num item => Transactions item
27 databaseTakeakiUno =
28 [ [1, 2, 5, 6, 7]
29 , [2, 3, 4, 5]
30 , [1, 2, 7, 8, 9]
31 , [1, 7, 9]
32 , [2, 7, 9]
33 , [2, 7, 9] -- Copy-paste typo on the original example
34 , [1, 9] -- Add this to increase the support of [1,9] because the original example is wrong…
35 , [2]
36 ]
37
38 -- | From https://hal.science/hal-03500847
39 databaseHAL03500847T2 :: Ord item => Num item => Transactions item
40 databaseHAL03500847T2 =
41 [ [1, 3, 7, 6]
42 , [1, 2, 7]
43 , [2, 8, 9, 10]
44 , [5, 11]
45 ]
46
47 spec :: Spec
48 spec = do
49 describe "allFrequentItemSets" do
50 forM_ ([2 .. 3] :: [Int]) \minSupp ->
51 golden ("db=TakeakiUno" <.> "minSupp=" <> show minSupp) $
52 -- Alas, this is not a zero-cost `coerce`
53 unName <$> allFrequentItemSets @Int (unitName databaseTakeakiUno) (assertStrictlyPositive minSupp)
54 forM_ ([2 .. 3] :: [Int]) \minSupp ->
55 golden ("db=HAL03500847T2" <.> "minSupp=" <> show minSupp) $
56 unName <$> allFrequentItemSets @Int (unitName databaseHAL03500847T2) (assertStrictlyPositive minSupp)
57
58 describe "associationRules" do
59 forM_ ([2 .. 3] :: [Int]) \minSupp ->
60 forM_ ([(75 % 100)] :: [Rational]) \minConf ->
61 golden
62 ("db=TakeakiUno" <.> "minSupp=" <> show minSupp <.> "minConf=75%")
63 [ associationRules fis (unitName databaseTakeakiUno) (unitName (assertProbability minConf))
64 | fis <- allFrequentItemSets @Int (unitName databaseTakeakiUno) (assertStrictlyPositive minSupp)
65 ]
66
67 describe "allClosedFrequentItemSets" do
68 forM_ ([2 .. 3] :: [Int]) \minSupp ->
69 forM_ ([2 .. 3] :: [Int]) \minSize ->
70 golden ("db=TakeakiUno" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize) $
71 second (List.sort . toList) . unName
72 <$> allClosedFrequentItemSets @Int (unitName databaseTakeakiUno) (assertStrictlyPositive minSupp) (assertStrictlyPositive minSize)
73 forM_ ([1 .. 3] :: [Int]) \minSupp ->
74 forM_ ([2 .. 4] :: [Int]) \minSize ->
75 golden ("db=HAL03500847T2" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize) $
76 second (List.sort . toList) . unName
77 <$> allClosedFrequentItemSets @Int (unitName databaseHAL03500847T2) (assertStrictlyPositive minSupp) (assertStrictlyPositive minSize)