1 {-# LANGUAGE RankNTypes #-}
2 {-# OPTIONS_GHC -Wno-deprecations #-}
4 -- | Brute-force algorithms related to mining frequent item sets.
6 -- Definition: in `Clustering.FrequentItemSet.References.RefAgrawalSrikantApriori`:
8 -- > Given a set of transactions D, the problem of mining
9 -- > association rules is to generate all association rules
10 -- > that have support and confidence greater than the
11 -- > user-specified minimum support (called minsup) and
12 -- > minimum confidence (called minconf) respectively.
13 module Clustering.FrequentItemSet.BruteForce (
17 type FrequentItemSet (),
21 type AssociationRule (),
22 type AssociationConfidence (),
23 type Association (..),
26 -- type ClosedFrequentItemSets (),
27 -- closedFrequentItemSets,
28 -- allClosedFrequentItemSets,
32 import Data.Eq (Eq (..))
33 import Data.Foldable (foldMap)
34 import Data.Function ((&))
35 import Data.Functor ((<&>))
37 import Data.List qualified as List
38 import Data.Monoid (Monoid (..))
39 import Data.Ord qualified as Ord
40 import Data.Ratio ((%))
42 import Data.Set qualified as Set
43 import Data.Validity (Validity (..), delve)
44 import GHC.Generics (Generic)
46 import Logic.Theory.Arithmetic (Zero)
47 import Logic.Theory.Ord
48 import Numeric.Probability
49 import Prelude (fromIntegral, (+))
50 import Text.Show (Show (..))
54 data Transaction item a = Transaction
55 { transactionData :: a
56 , transactionItems :: ItemSet item
58 deriving (Show, Generic)
59 instance (Validity (ItemSet item), Validity a) => Validity (Transaction item a) where
62 [ delve "transactionData" (transactionData x)
63 , delve "transactionItems" (transactionItems x)
66 data Support itemSet db = SupportAxiom
67 instance Axiom (Support itemSet db ::: Int >= Zero)
69 -- | Return the number of occurrences of @(itemSet)@ in @(db)@.
72 items ::: ItemSet item ->
73 db ::: [Transaction item a] ->
74 Support itemSet db ::: Int
75 support (Named items) (Named db) =
76 SupportAxiom ... List.length (List.filter (items `Set.isSubsetOf`) (db <&> transactionItems))
78 data FrequentItemSet items db minSupp = FrequentItemSetAxiom
80 -- | Determine the frequent itemsets.
83 items ::: ItemSet item ->
84 db ::: [Transaction item a] ->
85 minSupp ::: Int / minSupp > Zero ->
86 [FrequentItemSet items db minSupp ::: ItemSet item]
87 frequentItemSets (Named items) (Named db) (Named minSupp) =
88 [ FrequentItemSetAxiom ... sub
89 | (sub, occ) <- occBySubSet (db <&> transactionItems)
93 -- TimeEfficiencyWarning: iterate only once through the given `(db)`
94 -- to count the occurence of each subset of the given `(items)`.
95 -- occBySubSet :: [Transaction item a] -> [(ItemSet item, Int)]
98 (\t -> List.map (\(sub, occ) -> (sub, if sub `Set.isSubsetOf` t then occ + 1 else occ)))
99 [(sub, 0) | sub <- Set.powerSet items & Set.toList]
101 data AllItems db = AllItemsAxiom
104 -- `allFrequentItemSets` db minSupp = `frequentItemSets` is db minSupp
106 -- where `(is)` gathers all the items present in the given `(db)`.
107 allFrequentItemSets ::
109 db ::: [Transaction item a] ->
110 minSupp ::: Int / minSupp > Zero ->
111 [ FrequentItemSet (AllItems db) db minSupp
114 allFrequentItemSets db =
115 frequentItemSets (AllItemsAxiom ... foldMap transactionItems (unName db)) db
117 -- | An association rule.
119 -- Definition: in `Clustering.FrequentItemSet.References.RefAgrawalSrikantApriori`:
121 -- > An association rule is an implication of the form
122 -- > X ⇒ Y, where X ⊂ I, Y ⊂ I, and X ∩ Y = ∅.
124 -- > The rule X ⇒ Y holds in the transaction set D with confidence c
125 -- > if c% of transactions in D that contain X also contain Y.
127 -- > The rule X ⇒ Y has support s in the transaction set D
128 -- > if s% of transactions in D contain X ∪ Y.
129 data AssociationRule items db minSupp = AssociationRuleAxiom
131 data Association items db minSupp minConf item = Association
132 { associationCause :: FrequentItemSet items db minSupp ::: ItemSet item
133 -- ^ CorrectnessProperty: `associationCause` is a `FrequentItemSet`.
134 -- Because `freqSet` is frequent, and `causeSet` ⊂ `freqSet`.
135 , associationConfidence :: AssociationConfidence items db minSupp minConf ::: Probability
136 , -- CorrectnessProperty: `associationConfidence` is a `Probability`.
137 -- Because P(consequenceSet | causeSet) = P(causeSet ∩ consequenceSet) / P(causeSet)
138 associationConsequence :: FrequentItemSet items db minSupp ::: ItemSet item
139 -- ^ CorrectnessProperty: `associationConsequence` is a `FrequentItemSet`.
140 -- Because `freqSet` is frequent, and `consequenceSet` ⊂ `freqSet`.
144 data AssociationConfidence items db minSupp minConf = AssociationConfidenceAxiom
146 -- | By construction in `associationRules`.
147 instance Axiom (AssociationConfidence items db minSupp minConf >= minConf)
150 -- `associationRules` freqSet db minConf
152 -- generates association rules from the given `FrequentItemSet` `(freqSet)`
153 -- in the given `(db)`,
154 -- with a 'Confidence' greater or equal to the given `(minConf)`.
156 -- Algorithm: in `Clustering.FrequentItemSet.References.RefAgrawalSrikantApriori`,
157 -- section 1.1 "Problem Decomposition and Paper Organization", point 2:
159 -- For a given `FrequentItemSet` @(freqSet)@,
160 -- For every @(causeSet)@ non-empty subset of @(freqSet)@.
161 -- output a rule of the form @(causeSet => (freqSet - causeSet))@
162 -- if in the given @(db)@,
163 -- the ratio @(`support` freqSet)@
164 -- over @(`support` causeSet)@
165 -- is greater or egal to the given @(minConf)@.
168 -- > The association rules we consider are probabilistic in nature.
169 -- > The presence of a rule X → A does not necessarily mean
170 -- > that X + Y → A also holds because the latter may
171 -- > not have minimum support.
172 -- > Similarly, the presence of rules X → Y and Y → Z
173 -- > does not necessarily mean > that X → Z holds
174 -- > because the latter may not have minimum confidence.
176 -- PerformanceTimeWarning: traverses the given `(db)`
177 -- once for each non-empty subset of the given `(freqSet)`.
180 FrequentItemSet items db minSupp ::: ItemSet item ->
181 db ::: [Transaction item a] ->
182 minConf ::: Probability ->
183 [ AssociationRule items db minSupp
184 ::: Association items db minSupp minConf item
186 associationRules freqSet db minConf =
187 [ AssociationRuleAxiom
189 { associationCause = FrequentItemSetAxiom ... causeSet
190 , associationConfidence = AssociationConfidenceAxiom ... confidence
191 , -- CorrectnessProperty: `consequenceSet` is frequent.
192 -- Because `freqSet` is frequent, and `consequenceSet` ⊂ `freqSet`.
193 associationConsequence = FrequentItemSetAxiom ... consequenceSet
195 | causeSet <- Set.powerSet (unName freqSet) & Set.toList
196 , not (Set.null causeSet)
197 , let consequenceSet = unName freqSet `Set.difference` causeSet
198 , not (Set.null consequenceSet)
199 , let Named causeOcc = support (() ... causeSet) db
200 , confidence <- probability (fromIntegral freqSetOcc % fromIntegral causeOcc)
201 , unName minConf Ord.<= confidence
204 Named freqSetOcc = support freqSet db