1 {-# LANGUAGE OverloadedLists #-}
2 {-# LANGUAGE OverloadedStrings #-}
4 module Clustering.FrequentItemSet.LCMSpec where
6 import Control.Monad (Monad (..), forM_)
7 import Data.Array.Base qualified as Array
8 import Data.Bool (Bool (..), (&&))
9 import Data.Eq (Eq (..))
10 import Data.Function (($), (.))
11 import Data.Functor ((<$>))
12 import Data.GenValidity
13 import Data.GenValidity.Map ()
14 import Data.GenValidity.Set ()
15 import Data.GenValidity.Text ()
16 import Data.GenValidity.Time ()
18 import Data.List qualified as List
19 import Data.Map.Strict qualified as Map
20 import Data.Ord (Down (..), comparing, (<=))
22 import Data.Set qualified as Set
23 import Data.Text.Short (ShortText)
24 import Data.Text.Short qualified as ShortText
25 import Data.Tuple (snd)
27 import Data.Validity.Map ()
28 import Data.Validity.Set ()
29 import Data.Validity.Text ()
30 import Numeric.Decimal (Decimal (..), unwrapDecimal)
31 import System.FilePath ((<.>))
33 import Test.Syd.Validity
34 import Text.Show (Show (..))
38 import Clustering.FrequentItemSet.LCM
41 -- * Type 'Transaction'
42 data Transaction a = Transaction a
44 -- ** Type 'TransactionItems'
45 newtype TransactionItems a = TransactionItems {unTransactionItems :: [a]}
47 instance (Eq a, Validity a) => Validity (TransactionItems a) where
48 validate (TransactionItems is) =
50 [ delve "transaction item" is
52 "All transaction items are different"
53 (List.length (List.nub is) == List.length is)
55 instance (GenValid a, Eq a) => GenValid (TransactionItems a) where
56 genValid = TransactionItems . List.nub <$> genValid
57 shrinkValid = (TransactionItems <$>) . shrinkValid . unTransactionItems
59 databases :: [(ShortText, [Set Int])]
61 [ "1" := [[10, 20, 30]]
62 , "2" := [[10, 20, 30], [10, 20]]
63 , "3" := [[10, 20, 30], [10, 20], [10]]
65 := [ [1, 2, 3, 4, 5, 6]
78 genValidSpec @(TransactionItems Item)
79 it "solves a singleton transaction" do
86 it "solves a basic example" do
95 it "solves a another basic example" do
105 it "solves HLCM's example" do
124 describe "sortFrq" do
125 forM_ (List.zip databases [1 :: Int ..]) \(db, dbI) -> do
126 let maxItem = List.maximum (Set.findMax <$> db)
127 let itemToSupp = histogram (0, maxItem) db
128 let (lo, hi) = Array.bounds itemToSupp
129 let lstVal = [(i, itemToSupp Array.! i) | i <- [lo .. hi]]
130 goldenShow ("db" <> show dbI) $
131 List.sortBy (comparing (Down . snd)) lstVal
134 forM_ (List.zip databases [1 :: Int ..]) \(db, dbI) -> do
135 let maxItem = List.maximum (Set.findMax <$> db)
136 let itemToSupp = histogram (0, maxItem) db
137 goldenShow ("db" <> show dbI) $
140 describe "closedFrequentItemSets" do
141 forM_ databases \(dbName, db) -> do
142 letName db $ \dbNamed ->
143 forM_ ([1 .. 2] :: [Int]) \minSupp ->
144 forM_ ([1 .. 2] :: [Int]) \minSize ->
145 goldenShow ("db=" <> ShortText.unpack dbName <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize) $
146 allClosedFrequentItemSets
147 (assertStrictlyPositive minSupp)
148 (assertStrictlyPositive minSize)