1 {-# LANGUAGE RankNTypes #-}
2 {-# OPTIONS_GHC -Wno-deprecations #-}
4 -- | Brute-force algorithms related to mining frequent item sets.
6 -- Definition: in `Clustering.FrequentItemSet.References.RefAgrawalSrikantApriori`:
8 -- > Given a set of transactions D, the problem of mining
9 -- > association rules is to generate all association rules
10 -- > that have support and confidence greater than the
11 -- > user-specified minimum support (called minsup) and
12 -- > minimum confidence (called minconf) respectively.
13 module Clustering.FrequentItemSet.BruteForce (
17 type FrequentItemSet (),
21 type AssociationRule (),
22 type AssociationConfidence (),
23 type Association (..),
28 import Data.Eq (Eq (..))
29 import Data.Foldable (fold)
30 import Data.Function ((&))
32 import Data.List qualified as List
33 import Data.Ord qualified as Ord
34 import Data.Ratio ((%))
36 import Data.Set qualified as Set
38 import Logic.Theory.Arithmetic (Zero)
39 import Logic.Theory.Ord
41 -- import Numeric.Natural (Natural)
42 import Numeric.Probability
43 import Text.Show (Show (..))
44 import Prelude (fromIntegral, (+))
47 type Transactions item = [ItemSet item]
49 data Support itemSet db = SupportAxiom
50 instance Axiom (Support itemSet db ::: Int >= Zero)
52 -- | Return the number of occurrences of @(itemSet)@ in @(db)@.
55 itemSet ::: ItemSet item ->
56 db ::: Transactions item ->
57 Support itemSet db ::: Int
58 support (Named itemSet) (Named ts) =
59 SupportAxiom ... List.length (List.filter (itemSet `Set.isSubsetOf`) ts)
61 data FrequentItemSet items db minSupp = FrequentItemSetAxiom
63 -- | Determine the frequent itemsets.
66 items ::: ItemSet item ->
67 db ::: Transactions item ->
68 minSupp ::: Int / minSupp > Zero ->
69 [FrequentItemSet items db minSupp ::: ItemSet item]
70 frequentItemSets (Named items) (Named ts) (Named minSupp) =
71 [ FrequentItemSetAxiom ... sub
72 | (sub, occ) <- occBySubSet ts
76 -- TimeEfficiencyWarning: iterate only once through the given `(db)`
77 -- to count the occurence of each subset of the given `(items)`.
78 -- occBySubSet :: Transactions item -> [(ItemSet item, Int)]
81 (\t -> List.map (\(sub, occ) -> (sub, if sub `Set.isSubsetOf` t then occ + 1 else occ)))
82 [(sub, 0) | sub <- Set.powerSet items & Set.toList]
84 data AllItems db = AllItemsAxiom
87 -- `allFrequentItemSets` db minSupp = `frequentItemSets` is db minSupp
89 -- where `(is)` gathers all the items present in the given `(db)`.
90 allFrequentItemSets ::
92 db ::: Transactions item ->
93 minSupp ::: Int / minSupp > Zero ->
94 [ FrequentItemSet (AllItems db) db minSupp
97 allFrequentItemSets ts = frequentItemSets (AllItemsAxiom ... fold (unName ts)) ts
99 -- | An association rule.
101 -- Definition: in `Clustering.FrequentItemSet.References.RefAgrawalSrikantApriori`:
103 -- > An association rule is an implication of the form
104 -- > X ⇒ Y, where X ⊂ I, Y ⊂ I, and X ∩ Y = ∅.
106 -- > The rule X ⇒ Y holds in the transaction set D with confidence c
107 -- > if c% of transactions in D that contain X also contain Y.
109 -- > The rule X ⇒ Y has support s in the transaction set D
110 -- > if s% of transactions in D contain X ∪ Y.
111 data AssociationRule items db minSupp = AssociationRuleAxiom
113 data Association items db minSupp minConf item = Association
114 { associationCause :: FrequentItemSet items db minSupp ::: ItemSet item
115 -- ^ CorrectnessProperty: `associationCause` is a `FrequentItemSet`.
116 -- Because `freqSet` is frequent, and `causeSet` ⊂ `freqSet`.
117 , associationConfidence :: AssociationConfidence items db minSupp minConf ::: Probability
118 , -- CorrectnessProperty: `associationConfidence` is a `Probability`.
119 -- Because P(consequenceSet | causeSet) = P(causeSet ∩ consequenceSet) / P(causeSet)
120 associationConsequence :: FrequentItemSet items db minSupp ::: ItemSet item
121 -- ^ CorrectnessProperty: `associationConsequence` is a `FrequentItemSet`.
122 -- Because `freqSet` is frequent, and `consequenceSet` ⊂ `freqSet`.
126 data AssociationConfidence items db minSupp minConf = AssociationConfidenceAxiom
128 -- | By construction in `associationRules`.
129 instance Axiom (AssociationConfidence items db minSupp minConf <= minConf)
132 -- `associationRules` freqSet db minConf
134 -- generates association rules from the given `FrequentItemSet` `(freqSet)`
135 -- in the given `(db)`,
136 -- with a 'Confidence' greater or equal to the given `(minConf)`.
138 -- Algorithm: in `Clustering.FrequentItemSet.References.RefAgrawalSrikantApriori`,
139 -- section 1.1 "Problem Decomposition and Paper Organization", point 2:
141 -- For a given `FrequentItemSet` @(freqSet)@,
142 -- For every @(causeSet)@ non-empty subset of @(freqSet)@.
143 -- output a rule of the form @(causeSet => (freqSet - causeSet))@
144 -- if in the given @(db)@,
145 -- the ratio @(`support` freqSet)@
146 -- over @(`support` causeSet)@
147 -- is greater or egal to the given @(minConf)@.
150 -- > The association rules we consider are probabilistic in nature.
151 -- > The presence of a rule X → A does not necessarily mean
152 -- > that X + Y → A also holds because the latter may
153 -- > not have minimum support.
154 -- > Similarly, the presence of rules X → Y and Y → Z
155 -- > does not necessarily mean > that X → Z holds
156 -- > because the latter may not have minimum confidence.
158 -- PerformanceTimeWarning: traverses the given `(db)`
159 -- once for each non-empty subset of the given `(freqSet)`.
162 FrequentItemSet items db minSupp ::: ItemSet item ->
163 db ::: Transactions item ->
164 minConf ::: Probability ->
165 [ AssociationRule items db minSupp
166 ::: Association items db minSupp minConf item
168 associationRules freqSet ts minConf =
169 [ AssociationRuleAxiom
171 { associationCause = FrequentItemSetAxiom ... causeSet
172 , associationConfidence = AssociationConfidenceAxiom ... confidence
173 , -- CorrectnessProperty: `consequenceSet` is frequent.
174 -- Because `freqSet` is frequent, and `consequenceSet` ⊂ `freqSet`.
175 associationConsequence = FrequentItemSetAxiom ... consequenceSet
177 | causeSet <- Set.powerSet (unName freqSet) & Set.toList
178 , not (List.null causeSet)
179 , let consequenceSet = unName freqSet `Set.difference` causeSet
180 , not (List.null consequenceSet)
181 , let Named causeOcc = support (() ... causeSet) ts
182 , confidence <- probability (fromIntegral freqSetOcc % fromIntegral causeOcc)
183 , -- CorrectnessProperty: `AxiomAssociationConfidenceGEminConf`
184 unName minConf Ord.<= confidence
187 Named freqSetOcc = support freqSet ts