{-# LANGUAGE OverloadedLists #-} {-# LANGUAGE OverloadedStrings #-} module Clustering.FrequentItemSet.LCMSpec where import Control.Monad (Monad (..), forM_) import Data.Array.Base qualified as Array import Data.Bool (Bool (..), (&&)) import Data.Eq (Eq (..)) import Data.Function (($), (.)) import Data.Functor ((<$>)) import Data.GenValidity import Data.GenValidity.Map () import Data.GenValidity.Set () import Data.GenValidity.Text () import Data.GenValidity.Time () import Data.Int (Int) import Data.List qualified as List import Data.Map.Strict qualified as Map import Data.Ord (Down (..), comparing, (<=)) import Data.Set (Set) import Data.Set qualified as Set import Data.Text.Short (ShortText) import Data.Text.Short qualified as ShortText import Data.Tuple (snd) import Data.Validity import Data.Validity.Map () import Data.Validity.Set () import Data.Validity.Text () import Numeric.Decimal (Decimal (..), unwrapDecimal) import System.FilePath ((<.>)) import Test.Syd import Test.Syd.Validity import Text.Show (Show (..)) import Logic import Clustering.FrequentItemSet.LCM import Utils -- * Type 'Transaction' data Transaction a = Transaction a -- ** Type 'TransactionItems' newtype TransactionItems a = TransactionItems {unTransactionItems :: [a]} deriving (Eq, Show) instance (Eq a, Validity a) => Validity (TransactionItems a) where validate (TransactionItems is) = mconcat [ delve "transaction item" is , declare "All transaction items are different" (List.length (List.nub is) == List.length is) ] instance (GenValid a, Eq a) => GenValid (TransactionItems a) where genValid = TransactionItems . List.nub <$> genValid shrinkValid = (TransactionItems <$>) . shrinkValid . unTransactionItems databases :: [(ShortText, [Set Int])] databases = [ "1" := [[10, 20, 30]] , "2" := [[10, 20, 30], [10, 20]] , "3" := [[10, 20, 30], [10, 20], [10]] , "4" := [ [1, 2, 3, 4, 5, 6] , [2, 3, 5] , [2, 5] , [1, 2, 4, 5, 6] , [2, 4] , [1, 4, 6] , [3, 4, 6] ] ] spec :: Spec spec = do {- genValidSpec @(TransactionItems Item) it "solves a singleton transaction" do runLCMmatrix [ [1] ] 1 `shouldBe` [ [1, 1] ] it "solves a basic example" do runLCMmatrix [ [1] , [1, 2] ] 1 `shouldBe` [ [2, 1] , [1, 1, 2] ] it "solves a another basic example" do runLCMmatrix [ [1] , [1, 2] , [1, 2] ] 1 `shouldBe` [ [3, 1] , [2, 1, 2] ] it "solves HLCM's example" do runLCMmatrix [ [1, 2, 3, 4, 5, 6] , [2, 3, 5] , [2, 5] , [1, 2, 4, 5, 6] , [2, 4] , [1, 4, 6] , [3, 4, 6] ] 3 `shouldBe` [ [5, 4] , [5, 2] , [3, 2, 4] , [4, 4, 6] , [4, 2, 5] , [3, 3] , [3, 1, 4, 6] ] describe "sortFrq" do forM_ (List.zip databases [1 :: Int ..]) \(db, dbI) -> do let maxItem = List.maximum (Set.findMax <$> db) let itemToSupp = histogram (0, maxItem) db let (lo, hi) = Array.bounds itemToSupp let lstVal = [(i, itemToSupp Array.! i) | i <- [lo .. hi]] goldenShow ("db" <> show dbI) $ List.sortBy (comparing (Down . snd)) lstVal -- sortFrq lstVal describe "permut" do forM_ (List.zip databases [1 :: Int ..]) \(db, dbI) -> do let maxItem = List.maximum (Set.findMax <$> db) let itemToSupp = histogram (0, maxItem) db goldenShow ("db" <> show dbI) $ permut itemToSupp -} describe "closedFrequentItemSets" do forM_ databases \(dbName, db) -> do letName db $ \dbNamed -> forM_ ([1 .. 2] :: [Int]) \minSupp -> forM_ ([1 .. 2] :: [Int]) \minSize -> goldenShow ("db=" <> ShortText.unpack dbName <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize) $ allClosedFrequentItemSets (assertStrictlyPositive minSupp) (assertStrictlyPositive minSize) dbNamed