]> Git — Sourcephile - literate-phylomemy.git/blob - src/Clustering/FrequentItemSet/LCM.hs
completeness(scale): add support for scale
[literate-phylomemy.git] / src / Clustering / FrequentItemSet / LCM.hs
1 -- SPDX-License-Identifier: BSD-3-Clause
2 -- SPDX-FileCopyrightText: 2010 Alexandre Termier et al.
3
4 {-# LANGUAGE BangPatterns #-}
5 {-# LANGUAGE MagicHash #-}
6 {-# LANGUAGE UnboxedTuples #-}
7 {-# OPTIONS_GHC -Wno-unused-imports #-}
8
9 -- | Original LCM algorithm from:
10 --
11 -- \"An efficient algorithm for enumerating closed patterns in transaction databases\".
12 -- Takeaki Uno, Tatsuya Asai, Yuzo Uchida, and Hiroki Arimura.
13 -- In Discovery Science, pages 16–31, 2004.
14 -- https://research.nii.ac.jp/~uno/papers/lcm_ds04.pdf
15 -- https://www.annas-archive.org/scidb/10.1007/978-3-540-30214-8_2
16 --
17 -- Code adapted from HLCM:
18 --
19 -- \"HLCM: a first experiment on parallel data mining with Haskell\".
20 -- Alexandre Termier & Benjamin Négrevergne & Simon Marlow & Satnam Singh.
21 -- https://lig-membres.imag.fr/termier/HLCM/hlcm.pdf
22 -- https://hackage.haskell.org/package/hlcm
23 --
24 -- Possible future work:
25 --
26 -- \"Discovering closed frequent itemsets on multicore:
27 -- parallelizing computations and optimizing memory accesses\".
28 -- Benjamin Négrevergne & Alexandre Termier & Jean-François Méhaut & Takeaki Uno.
29 -- https://people.irisa.fr/Alexandre.Termier/publis/2010_negrevergne_hppddm.pdf
30 module Clustering.FrequentItemSet.LCM (
31 type ItemSet,
32 Transaction (..),
33 type FrequentItemSets (),
34 frequentItemSets,
35 type AllItems (),
36 allFrequentItemSets,
37 type Clusters,
38 type ClosedFrequentItemSets (),
39 closedFrequentItemSets,
40 allClosedFrequentItemSets,
41 itemToSupport,
42 runLCM,
43 type ItemSupport,
44 )
45 where
46
47 import Clustering.FrequentItemSet.BruteForce (ItemSet, Support (), Transaction (..))
48 import Data.Bifunctor (second)
49 import Data.Bool
50 import Data.Eq (Eq (..))
51 import Data.Foldable (fold, foldMap)
52 import Data.Function (id, ($), (&), (.))
53 import Data.Functor ((<$>), (<&>))
54 import Data.Int (Int)
55 import Data.Monoid (Monoid (..))
56 import Data.Ord (Down, Ord (..), comparing)
57 import Data.Ratio ((%))
58 import Data.Semigroup (Semigroup (..))
59 import Data.Sequence qualified as Seq
60 import Data.Tuple (fst, snd)
61 import Data.Validity (Validity (..), delve)
62 import Debug.Pretty.Simple (pTraceShow)
63 import Debug.Trace
64 import GHC.Generics (Generic)
65 import GHC.IsList (toList)
66 import GHC.Stack (HasCallStack)
67 import Logic
68 import Logic.Theory.Arithmetic (Zero)
69 import Logic.Theory.Ord qualified as Logic.Ord
70 import Numeric.Probability
71 import Text.Show (Show (..))
72 import Prelude (Enum, Num, error, fromIntegral, (+), (-))
73
74 import Control.Exception (evaluate)
75 import Control.Monad
76 import Control.Monad.ST
77 import Control.Parallel
78 import Control.Parallel.Strategies
79 import Data.Array.Base
80 import Data.Array.ST
81 import Data.Array.Unboxed
82 import Data.ByteString.Char8 qualified as L
83 import Data.List
84 import Data.List qualified as List
85 import Data.Map.Strict (Map)
86 import Data.Map.Strict qualified as Map
87 import Data.Maybe (catMaybes, fromJust, isNothing, maybeToList)
88 import Data.Set (Set)
89 import Data.Set qualified as Set
90 import Data.Vector qualified as V
91 import GHC.Exts hiding (Item)
92 import GHC.ST
93
94 data FrequentItemSets items db minSupp minSize = FrequentItemSetsAxiom
95
96 -- |
97 -- @
98 -- `frequentItemSets` minSupp minSize items db
99 -- @
100 -- returns a list of the closed frequent itemsets of
101 -- the transactions @(db)@ restricted to the specified @(items)@,
102 -- and such that the number of transactions containing them is greater or equal to @(minSupp)@,
103 -- and such that the size of those transactions is greater or equal to @(minSize)@.
104 -- Each closed frequent itemset is coupled with the sequence of the transactions containing them.
105 frequentItemSets ::
106 forall a item db minSize minSupp items.
107 Ord item =>
108 Show item =>
109 Show a =>
110 minSupp ::: Int / minSupp Logic.Ord.> Zero ->
111 minSize ::: Int / minSize Logic.Ord.> Zero ->
112 items ::: ItemSet item ->
113 db ::: [Transaction item a] ->
114 FrequentItemSets items db minSupp minSize
115 ::: Clusters item a
116 frequentItemSets (Named minSupp) (Named minSize) (Named items) (Named db) =
117 FrequentItemSetsAxiom ...
118 Map.fromDistinctAscList (loop Set.empty items db)
119 where
120 -- Lexicographic depth-first search for frequent item sets.
121 loop previousFIS nextItems previousTxns
122 | Set.null nextItems = []
123 | otherwise =
124 -- pTraceShow (["LCM", "frequentItemSets", "loop"], (("previousFIS", previousFIS), ("nextItems", nextItems), ("previousTxns", previousTxns))) $
125 -- Map each item of `nextItems` to its transactions in `previousTxns`.
126 let nextItemToTxns = itemToTxns nextItems previousTxns
127 in (`List.concatMap` Map.toList nextItemToTxns) \(nextItem, nextTxns) ->
128 -- Keep only the itemsets which are supported by enough transactions.
129 if minSupp <= fromIntegral (Seq.length nextTxns)
130 then
131 let nextFIS = Set.insert nextItem previousFIS
132 in -- Keep only the itemsets which have already gathered enough items.
133 [ (nextFIS, nextTxns)
134 | minSize <= fromIntegral (Set.size nextFIS)
135 ]
136 <>
137 -- Recurse with each item of `nextItems` greater than `nextItem`
138 -- (to traverse the frequent item sets as a tree instead of a poset lattice),
139 -- and with the transactions containing `nextItem`.
140 loop nextFIS (Set.split nextItem nextItems & snd) (nextTxns & toList)
141 else []
142
143 -- | @
144 -- `itemToTxns` items db
145 -- @
146 -- maps each item of `items` to the transactions of `db` containing it.
147 --
148 -- This maps from an "horizontal" representation to a "vertical" one,
149 -- itself mapping to an "horizontal" one.
150 -- See p.8 of https://www.lirmm.fr/~lazaar/imagina/NL-DM-IMAGINA1819-part1.pdf
151 --
152 -- It's a kind of occurrence deliver.
153 -- p.4 of http://osdm.uantwerpen.be/papers/p77-uno.pdf
154 itemToTxns ::
155 Ord item =>
156 ItemSet item ->
157 [Transaction item a] ->
158 Map item (Seq.Seq (Transaction item a))
159 itemToTxns itms txs =
160 Map.fromListWith
161 (<>)
162 [ (itm, Seq.singleton tx)
163 | tx <- txs
164 , itm <- Set.intersection itms (transactionItems tx) & Set.toList
165 ]
166
167 data AllItems db = AllItemsAxiom
168
169 -- | `frequentItemSets` applied to all the items of the given transactions.
170 allFrequentItemSets ::
171 Ord item =>
172 Show item =>
173 Show a =>
174 minSupp ::: Int / minSupp Logic.Ord.> Zero ->
175 minSize ::: Int / minSize Logic.Ord.> Zero ->
176 db ::: [Transaction item a] ->
177 FrequentItemSets (AllItems db) db minSupp minSize
178 ::: Clusters item a
179 allFrequentItemSets minSupp minSize db =
180 frequentItemSets
181 minSupp
182 minSize
183 (AllItemsAxiom ... foldMap transactionItems (unName db))
184 db
185
186 data ClosedFrequentItemSets items db minSupp minSize = ClosedFrequentItemSetsAxiom
187
188 type Clusters item a =
189 Map
190 (ItemSet item)
191 (Seq.Seq (Transaction item a))
192
193 closedFrequentItemSets ::
194 forall item db minSize minSupp items.
195 HasCallStack =>
196 Ord item =>
197 Show item =>
198 minSupp ::: Int / minSupp Logic.Ord.> Zero ->
199 minSize ::: Int / minSize Logic.Ord.> Zero ->
200 items ::: ItemSet item ->
201 db ::: [Set item] ->
202 ClosedFrequentItemSets items db minSupp minSize
203 ::: [(ItemSupport, ItemSet item)]
204 closedFrequentItemSets (Named minSupp) (Named minSize) (Named items) (Named db) =
205 ClosedFrequentItemSetsAxiom ...
206 runLCM items minSupp minSize db
207
208 allClosedFrequentItemSets ::
209 HasCallStack =>
210 Ord item =>
211 Show item =>
212 minSupp ::: Int / minSupp Logic.Ord.> Zero ->
213 minSize ::: Int / minSize Logic.Ord.> Zero ->
214 db ::: [Set item] ->
215 ClosedFrequentItemSets (AllItems db) db minSupp minSize
216 ::: [(ItemSupport, ItemSet item)]
217 allClosedFrequentItemSets minSupp minSize db =
218 closedFrequentItemSets
219 minSupp
220 minSize
221 (AllItemsAxiom ... fold (unName db))
222 db
223
224 type ItemSupport = Int
225 type ItemRank = Int
226
227 runLCM ::
228 forall item.
229 Show item =>
230 HasCallStack =>
231 Ord item =>
232 Set item ->
233 ItemSupport ->
234 Int ->
235 [Set item] ->
236 [(ItemSupport, Set item)]
237 runLCM items minSupp minSize db =
238 let
239 itemToSupp :: [(item, ItemSupport)]
240 itemToSupp =
241 itemToSupport items db
242 & Map.toList
243 & List.filter ((>= minSupp) . snd)
244 & List.sortBy (comparing (Down . snd))
245
246 itemsSize :: Int
247 itemsSize = List.length itemToSupp
248
249 itemToRank :: Map item ItemRank
250 itemToRank =
251 Map.fromList
252 [ (i, List.head $ List.findIndices ((== i) . fst) itemToSupp)
253 | (i, _) <- itemToSupp
254 ]
255
256 -- Rewrites the database to use `ItemRank` instead of `item`
257 rankDB :: [Set ItemRank]
258 rankDB =
259 [ Set.fromList
260 [ rank
261 | i <- tx & Set.toList
262 -- Items whose support is lower than `minSupp`
263 -- have been filtered-out in `itemToSupp`,
264 -- hence do not have a rank.
265 , rank <- Map.lookup i itemToRank & maybeToList
266 ]
267 | tx <- db
268 ]
269
270 -- Rewrite the database as a `LexicoTreeItem`
271 dbLT = List.foldr (\tx acc -> insertLT (tx & Set.toList) (-1) 1 acc) Nil rankDB
272
273 rankToItem :: Array ItemRank item
274 rankToItem =
275 List.zip [0 ..] (fst <$> itemToSupp)
276 & array (0, itemsSize - 1)
277
278 unrank :: [(ItemSupport, Set ItemRank)] -> [(ItemSupport, Set item)]
279 unrank = List.map $ second $ Set.map (rankToItem `unsafeAt`)
280 in
281 [ lcmLoop minSupp minSize 1 dbLT Set.empty candidateRank (rankToSuppLT items dbLT) items
282 | candidateRank <- [0 .. Set.size items -1]
283 ]
284 & parBuffer 8 rdeepseq
285 & runEval
286 & List.concat
287 & unrank
288
289 -- |
290 -- For a transaction database, a closed frequent Itemset, and a candidate item
291 -- for extension of this closed frequent Itemset, recursively computes all
292 -- the successor closed frequent Itemsets by PPC-extension.
293 lcmLoop ::
294 Show item =>
295 ItemSupport ->
296 Int ->
297 -- | Current depth in the search tree (for parallel optimisation purposes)
298 Int ->
299 -- | Transaction database.
300 LexicoTreeItem ->
301 -- | Input closed frequent Itemset.
302 Set ItemRank ->
303 -- | Candidate to extend the closed frequent Itemset above.
304 ItemRank ->
305 -- | Array associating each item with its frequency
306 UArray ItemRank ItemSupport ->
307 -- | Maximal item
308 Set item ->
309 -- | Result : list of closed frequent Itemsets. Each result is a list of Items, the head of the list being the frequency of the item.
310 [(ItemSupport, Set ItemRank)]
311 lcmLoop minSupp minSize depth previousDB previousRanks candidateRank rankToSupp items =
312 let
313 -- HLCM: line 1: CDB = project and reduce DB w.r.t. P and limit
314 -- Reduce database by eliminating:
315 -- - all items greater than `candidateRank`,
316 -- - and all items with zero support.
317 reducedDB = projectAndReduce candidateRank rankToSupp previousDB
318
319 -- HLCM: line 2: Compute frequencies of items in CDB
320 -- Compute items occurrences in reduced database.
321 reducedRankToSupp = rankToSuppLT items reducedDB
322
323 -- HLCM: line 3: CP = 100% frequent items in CDB
324 -- Check which items actually appear in reduced database.
325 candidateSupp = rankToSupp ! candidateRank
326
327 -- HLCM: line 6: Candidates = frequent items of CDB that are not in CP
328 -- Compute 100% frequent items, future candidates, and unfrequent items.
329 (closedFreqRanks, candidateRanks, unfreqRanks) =
330 computeCandidates minSupp candidateSupp items reducedRankToSupp
331 in
332 --pTraceShow (["lcmLoop"], minSupp, minSize, depth, previousDB, previousRanks, candidateRank, rankToSupp, items) $
333 -- HLCM: line 4: if max(CP) = limit then
334 if not (List.null closedFreqRanks) -- if there is a result ...
335 && last closedFreqRanks <= candidateRank -- ...and if it is OK to extend it
336 then
337 let
338 -- HLCM: line 5: P' = P ∪ CP
339 -- Result closed frequent Itemset = input closed frequent Itemset + 100% frequent Items
340 closedItemset = previousRanks <> Set.fromList closedFreqRanks
341
342 -- HLCM: line 8: for all e ∈ Candidates, e ≤ limit do
343 -- Only candidates with value lower than input candidateRank
344 -- can be used for further extension on this branch.
345 smallCandidates = List.takeWhile (< candidateRank) candidateRanks
346 in
347 [ (candidateSupp, closedItemset)
348 | minSize <= fromIntegral (Set.size closedItemset)
349 ]
350 <> if not (List.null smallCandidates) -- ... and if we have at least 1 possible extension
351 then
352 let
353 -- Update items occurrences table by suppressing:
354 -- - 100% frequent items,
355 -- - and unfrequent items.
356 newRankToSupp = suppressItems reducedRankToSupp closedFreqRanks unfreqRanks
357
358 loop newCandidate = lcmLoop minSupp minSize (depth + 1) reducedDB closedItemset newCandidate newRankToSupp items
359 in
360 -- Recursively extend the candidates
361 if 3 < depth -- create parallel sparks only for low search space depth
362 then List.concat $ runEval $ parBuffer 2 rdeepseq $ List.map loop smallCandidates
363 else List.concatMap loop smallCandidates
364 else []
365 else []
366
367 -- |
368 -- For a transaction database of type [[item]], compute the frequency
369 -- of each item and return an array (item, frequency).
370 itemToSupport ::
371 Ord item =>
372 Set item ->
373 [Set item] ->
374 Map item ItemSupport
375 itemToSupport items db =
376 Map.fromListWith
377 (+)
378 [ (itm, 1)
379 | tx <- db
380 , itm <- Set.intersection items tx & Set.toList
381 ]
382
383 -- |
384 -- For a given itemset being extended by a given candidate, compute :
385 -- - the closure of this itemset
386 -- - and the candidates for further extension.
387 computeCandidates ::
388 ItemSupport ->
389 ItemSupport ->
390 Set item ->
391 UArray ItemRank ItemSupport ->
392 -- (100% frequent items == closure, candidates for further extension, unfrequent items)
393 ([ItemRank], [ItemRank], [ItemRank])
394 computeCandidates minSupp candidateSupp items rankToSupp =
395 let
396 (frequentItems, unfreqItems) =
397 List.partition
398 (\i -> rankToSupp ! i >= minSupp)
399 [i | i <- [0 .. Set.size items - 1], rankToSupp ! i > 0]
400 (closedFrequentRanks, candidateRanks) =
401 List.partition (\i -> rankToSupp ! i == candidateSupp) frequentItems
402 in
403 (closedFrequentRanks, candidateRanks, unfreqItems)
404
405 -- |
406 -- Modifies an array associating Items with their frequency, in order to
407 -- give a frequency of 0 to a given list of Items.
408 --
409 -- NB : for performance reasons, this is REALLY a modification, made with unsafe operations.
410 suppressItems ::
411 -- | Array associating an item with its frequency
412 UArray ItemRank ItemSupport ->
413 -- | List of 100% frequent Items
414 [ItemRank] ->
415 -- | List of unfrequent Items
416 [ItemRank] ->
417 -- | Initial array, with frequencies of 100% frequent Items
418 -- and unfrequent Items set to 0.
419 UArray ItemRank ItemSupport
420 suppressItems rankToSupp closedRanks unfreqRanks =
421 runST do
422 -- Can be used in multithread because no concurrent write
423 arr <- unsafeThaw rankToSupp :: ST s (STUArray s ItemRank ItemSupport)
424 forM_ closedRanks \i -> writeArray arr i 0
425 forM_ unfreqRanks \i -> writeArray arr i 0
426 -- Can be used in multithread because no concurrent write
427 unsafeFreeze arr
428
429 -----------------------------------------------------------------
430 -- LEXICOGRAPHIC TREE MANIPULATION
431 -----------------------------------------------------------------
432
433 -- |
434 -- Creates a new, reduced transaction database by eliminating all items
435 -- greater than @candidateRank@ item, and all infrequent Items.
436 projectAndReduce ::
437 -- | Candidate item, on which the projection is made
438 ItemRank ->
439 -- | Array associating each item with its frequency in
440 -- original transaction database.
441 UArray ItemRank ItemSupport ->
442 -- | Original transaction database
443 LexicoTreeItem ->
444 -- | Result : Reduced transaction database
445 LexicoTreeItem
446 projectAndReduce !candidateRank rankToSupp = go
447 where
448 go Nil = Nil
449 go (Node e suiv alt w)
450 | e > candidateRank = Nil
451 | e == candidateRank =
452 let !(suiv', addWeight) = filterInfrequent suiv rankToSupp
453 in Node e suiv' Nil (w + addWeight)
454 | otherwise =
455 let
456 !alt' = go alt
457 !suiv' = go suiv
458 in
459 if rankToSupp ! e > 0
460 then
461 if notNil suiv' && notNil alt'
462 then Node e suiv' alt' 0
463 else if notNil suiv' then Node e suiv' Nil 0 else alt'
464 else
465 if notNil suiv' && notNil alt'
466 then mergeAlts suiv' alt'
467 else if notNil suiv' then suiv' else alt'
468
469 type Weight = Int
470
471 -- |
472 -- Suppress all infrequent Items from a transaction database expressed as
473 -- lexicographic tree, and returns a new lexicographic tree.
474 filterInfrequent ::
475 -- | Original transaction database
476 LexicoTreeItem ->
477 -- | Array associating each item with its frequency in
478 -- original transaction database. In this setting,
479 -- an infrequent item as a frequency of 0 (because of preprocessing by
480 -- ' suppressItems ').
481 UArray ItemRank ItemSupport ->
482 -- | Result : (transaction database without infrequent Items, weight to report in parent nodes)
483 (LexicoTreeItem, Weight)
484 filterInfrequent Nil _ = (Nil, 0)
485 filterInfrequent (Node e suiv alt w) occs
486 | occs ! e > 0 = (Node e suiv' alt' (w + ws), wa)
487 | notNil suiv' && notNil alt' = (mergeAlts suiv' alt', w')
488 | notNil alt' = (alt', w')
489 | notNil suiv' = (suiv', w')
490 | otherwise = (Nil, w')
491 where
492 w' = w + ws + wa
493 !(suiv', ws) = filterInfrequent suiv occs
494 !(alt', wa) = filterInfrequent alt occs
495
496 {-# INLINE notNil #-}
497 notNil :: LexicoTreeItem -> Bool
498 notNil Nil = False
499 notNil _ = True
500
501 -- |
502 -- Occurence delivering:
503 -- Map each item of the given database to its support.
504 rankToSuppLT ::
505 Set item ->
506 -- | Transaction database (in lexicographic tree format)
507 LexicoTreeItem ->
508 -- | Result : array associating each item to its frequency.
509 UArray ItemRank ItemSupport
510 rankToSuppLT items dbLT =
511 runST do
512 arr <- newArray_ (0, Set.size items - 1)
513 -- TODO: this workaround should no longer be necessary
514 -- Creates an empty array : each item starts with frequency 0
515 -- workaround for http://hackage.haskell.org/trac/ghc/ticket/3586
516 forM_ [0 .. Set.size items - 1] $ \i -> unsafeWrite arr i 0
517 -- Compute frequencies for each item by efficient tree traversal
518 _ <- traverseLT dbLT arr
519 unsafeFreeze arr
520
521 -- |
522 -- Efficient traversal of the transaction database as a lexicographic tree.
523 -- Items frequencies are updated on the fly.
524 traverseLT ::
525 forall s.
526 -- | Transaction database
527 LexicoTreeItem ->
528 -- | Array associating each item with its frequency. UPDATED by this function !
529 STUArray s ItemRank ItemSupport ->
530 ST s ()
531 traverseLT tree arr = ST $ \s ->
532 case go tree s of
533 (# s', _ #) -> (# s', () #)
534 where
535 go ::
536 LexicoTreeItem ->
537 State# s ->
538 (# State# s, Int# #)
539 go Nil s = (# s, 0# #)
540 go (Node item child alt w@(I# w#)) s0 =
541 case go child s0 of
542 (# s1, childw #) ->
543 case go alt s1 of
544 (# s2, altw #) ->
545 case unsafeRead arr item of
546 ST f ->
547 case f s2 of
548 (# _s3, I# itemw #) ->
549 case unsafeWrite arr item (I# itemw + I# childw + w) of
550 ST f' ->
551 case f' s2 of
552 (# s4, _ #) -> (# s4, childw +# w# +# altw #)
553
554 -- RankToSupp
555
556 -- | Type for a lexicographic tree, implementating a n-ary tree over a binary tree.
557 data LexicoTreeItem
558 = -- | Void node
559 Nil
560 | -- | A node : item, next node (next in transaction), alternative node (other branch), weight
561 Node
562 {-# UNPACK #-} !ItemRank
563 !LexicoTreeItem -- NB. experimental strictness annotation
564 !LexicoTreeItem -- NB. experimental strictness annotation
565 {-# UNPACK #-} !Int
566 deriving (Eq, Show)
567
568 -- |
569 -- Inserts a transaction in list format into the lexicographic tree.
570 -- Automatically merges identical transactions.
571 -- Performs suffix intersection.
572 insertLT ::
573 -- | Transaction to insert into lexicographic tree
574 [ItemRank] ->
575 -- | "coreI" item, for suffix intersection.
576 ItemRank ->
577 -- | Weight of the transaction to insert
578 ItemSupport ->
579 -- | Input lexicographic tree
580 LexicoTreeItem ->
581 -- | Result : a new lexicographic tree with the transaction inserted
582 LexicoTreeItem
583 insertLT [] _ _ lt = lt
584 insertLT lst _ w Nil = createPath lst w
585 insertLT [x] i w (Node e suiv alt weight)
586 | x < e = Node x Nil (Node e suiv alt weight) w
587 | x == e = Node e suiv alt (weight + w)
588 | x > e = Node e suiv (insertLT [x] i w alt) weight
589 insertLT (x : xs) i w (Node e suiv alt weight)
590 | x < e = Node x (createPath xs w) (Node e suiv alt weight) 0
591 | x == e =
592 if (e /= i)
593 then Node e (insertLT xs i w suiv) alt weight
594 else suffixIntersectionLT xs w (Node e suiv alt weight)
595 | x > e = Node e suiv (insertLT (x : xs) i w alt) weight
596 insertLT _ _ _ _ = error "insertLT"
597
598 -- |
599 -- From a transaction and its weight, directly creates a path-shaped lexicographic tree.
600 createPath ::
601 -- | Transaction
602 [ItemRank] ->
603 -- | Weight of the transaction
604 Int ->
605 -- | Result : a path-shaped lexicographic tree encoding the transaction
606 LexicoTreeItem
607 createPath [] _ = Nil
608 createPath [x] w = Node x Nil Nil w
609 createPath (x : xs) w = Node x (createPath xs w) Nil 0
610
611 -- |
612 -- Perform the "suffix intersection" operation with the suffix of a transaction
613 -- and the corresponding part of a lexicographic tree.
614 --
615 -- For more details, see "prefixIntersection" in Takeaki Uno's papers about LCM.
616 suffixIntersectionLT ::
617 -- | Suffix of the transaction to insert.
618 [ItemRank] ->
619 -- | Weight of the transaction to insert
620 Int ->
621 -- | (Sub-)lexicographic tree where the transaction must be inserted. The @next@ part (see data type comments)
622 -- should be a simple path, it will be the target of intersection with the suffix.
623 LexicoTreeItem ->
624 -- | Result : lexicographic tree where the suffix has been added, with correct intersections performed.
625 LexicoTreeItem
626 suffixIntersectionLT _ w (Node e Nil alt weight) = Node e Nil alt (weight + w)
627 suffixIntersectionLT lst w (Node e suiv alt weight) =
628 let (newSuiv, addWeight) = suffInterSuiv lst w suiv
629 in Node e newSuiv alt (weight + addWeight)
630 suffixIntersectionLT _ _ _ = error "suffixIntersectionLT"
631
632 -- |
633 -- Intersects a list-shaped transaction and a path-shaped lexicographic tree.
634 -- The result is a path shaped lexicographic tree with weights correctly updated.
635 suffInterSuiv ::
636 -- | Transaction as list
637 [ItemRank] ->
638 -- | Weight of the above transaction
639 Int ->
640 -- | Path-shaped lexicographic tree
641 LexicoTreeItem ->
642 -- | Result : (path-shaped lexicographic tree representing the intersection
643 -- of transaction and input path , 0 if intersection not [] / sum of weights else)
644 (LexicoTreeItem, Int)
645 suffInterSuiv lst w suiv =
646 let
647 (lstSuiv, weightSuiv) = getLstSuiv suiv
648 inter = List.intersect lstSuiv lst
649 in
650 if (inter /= [])
651 then (createPath inter (weightSuiv + w), 0)
652 else (Nil, weightSuiv + w)
653
654 -- |
655 -- Collects all the nodes of lexicographic tree in a list of elements.
656 getLstSuiv ::
657 -- | Path shaped lexicographic tree.
658 LexicoTreeItem ->
659 -- | Result : (list of elements in the path, sum of weights of nodes in the path)
660 ([ItemRank], Int)
661 getLstSuiv Nil = ([], 0)
662 getLstSuiv (Node e suiv Nil weight) =
663 let (lst, w) = getLstSuiv suiv
664 in (e : lst, w + weight)
665 getLstSuiv _ = error "getLstSuiv"
666
667 -- |
668 -- Merge two lexicographic trees.
669 mergeAlts :: LexicoTreeItem -> LexicoTreeItem -> LexicoTreeItem
670 mergeAlts Nil lt = lt
671 mergeAlts lt Nil = lt
672 mergeAlts (Node e1 suiv1 alt1 w1) (Node e2 suiv2 alt2 w2)
673 | e1 < e2 = (Node e1 suiv1 (mergeAlts alt1 (Node e2 suiv2 alt2 w2)) w1)
674 | e1 > e2 = (Node e2 suiv2 (mergeAlts (Node e1 suiv1 alt1 w1) alt2) w2)
675 | e1 == e2 = (Node e1 (mergeAlts suiv1 suiv2) (mergeAlts alt1 alt2) (w1 + w2))
676 mergeAlts _ _ = error "mergeAlts"