]> Git — Sourcephile - literate-phylomemy.git/blob - src/Clustering/FrequentItemSet/LCM.hs
init
[literate-phylomemy.git] / src / Clustering / FrequentItemSet / LCM.hs
1 -- SPDX-License-Identifier: BSD-3-Clause
2 -- SPDX-FileCopyrightText: 2010 Alexandre Termier
3
4 {-# LANGUAGE BangPatterns #-}
5 {-# LANGUAGE MagicHash #-}
6 {-# LANGUAGE UnboxedTuples #-}
7 {-# OPTIONS_GHC -Wno-unused-imports #-}
8
9 -- | Original LCM algorithm from:
10 --
11 -- \"An efficient algorithm for enumerating closed patterns in transaction databases\".
12 -- Takeaki Uno, Tatsuya Asai, Yuzo Uchida, and Hiroki Arimura.
13 -- In Discovery Science, pages 16–31, 2004.
14 -- https://research.nii.ac.jp/~uno/papers/lcm_ds04.pdf
15 -- https://www.annas-archive.org/scidb/10.1007/978-3-540-30214-8_2
16 --
17 -- Code adapted from HLCM:
18 --
19 -- \"HLCM: a first experiment on parallel data mining with Haskell\".
20 -- Alexandre Termier & Benjamin Négrevergne & Simon Marlow & Satnam Singh.
21 -- https://lig-membres.imag.fr/termier/HLCM/hlcm.pdf
22 -- https://hackage.haskell.org/package/hlcm
23 --
24 -- Possible future work:
25 --
26 -- \"Discovering closed frequent itemsets on multicore:
27 -- parallelizing computations and optimizing memory accesses\".
28 -- Benjamin Négrevergne & Alexandre Termier & Jean-François Méhaut & Takeaki Uno.
29 -- https://people.irisa.fr/Alexandre.Termier/publis/2010_negrevergne_hppddm.pdf
30 module Clustering.FrequentItemSet.LCM (
31 type ItemSet,
32 Transaction (..),
33 type FrequentItemSets (),
34 frequentItemSets,
35 type AllItems (),
36 allFrequentItemSets,
37 type Clusters,
38 type ClosedFrequentItemSets (),
39 closedFrequentItemSets,
40 allClosedFrequentItemSets,
41 itemToSupport,
42 runLCM,
43 type ItemSupport,
44 )
45 where
46
47 import Clustering.FrequentItemSet.BruteForce (ItemSet, Support (), Transaction (..))
48 import Data.Bifunctor (second)
49 import Data.Bool
50 import Data.Eq (Eq (..))
51 import Data.Foldable (fold, foldMap)
52 import Data.Function (id, ($), (&), (.))
53 import Data.Functor ((<$>), (<&>))
54 import Data.Int (Int)
55 import Data.Monoid (Monoid (..))
56 import Data.Ord (Down, Ord (..), comparing)
57 import Data.Ratio ((%))
58 import Data.Semigroup (Semigroup (..))
59 import Data.Sequence qualified as Seq
60 import Data.Tuple (fst, snd)
61 import Data.Validity (Validity (..), delve)
62 import Debug.Pretty.Simple (pTraceShow)
63 import Debug.Trace
64 import GHC.Generics (Generic)
65 import GHC.IsList (toList)
66 import GHC.Stack (HasCallStack)
67 import Logic
68 import Logic.Theory.Arithmetic (Zero)
69 import Logic.Theory.Ord qualified as Logic.Ord
70 import Numeric.Probability
71 import Text.Show (Show (..))
72 import Prelude (Enum, Num, error, fromIntegral, (+), (-))
73
74 import Control.Exception (evaluate)
75 import Control.Monad
76 import Control.Monad.ST
77 import Control.Parallel
78 import Control.Parallel.Strategies
79 import Data.Array.Base
80 import Data.Array.ST
81 import Data.Array.Unboxed
82 import Data.ByteString.Char8 qualified as L
83 import Data.List
84 import Data.List qualified as List
85 import Data.Map.Strict (Map)
86 import Data.Map.Strict qualified as Map
87 import Data.Maybe (catMaybes, fromJust, isNothing, maybeToList)
88 import Data.Set (Set)
89 import Data.Set qualified as Set
90 import Data.Vector qualified as V
91 import GHC.Exts hiding (Item)
92 import GHC.ST
93
94 data FrequentItemSets items db minSupp minSize = FrequentItemSetsAxiom
95
96 -- |
97 -- @
98 -- `frequentItemSets` minSupp minSize items db
99 -- @
100 -- returns a list of the closed frequent itemsets of
101 -- the transactions @(db)@ restricted to the specified @(items)@,
102 -- and such that the number of transactions containing them is greater or equal to @(minSupp)@,
103 -- and such that the size of those transactions is greater or equal to @(minSize)@.
104 -- Each closed frequent itemset is coupled with the sequence of the transactions containing them.
105 frequentItemSets ::
106 forall a item db minSize minSupp items.
107 Ord item =>
108 Show item =>
109 Show a =>
110 minSupp ::: Int / minSupp Logic.Ord.> Zero ->
111 minSize ::: Int / minSize Logic.Ord.> Zero ->
112 items ::: ItemSet item ->
113 db ::: [Transaction item a] ->
114 FrequentItemSets items db minSupp minSize
115 ::: Clusters item a
116 frequentItemSets (Named minSupp) (Named minSize) (Named items) (Named db) =
117 FrequentItemSetsAxiom ...
118 Map.fromDistinctAscList (loop Set.empty items db)
119 where
120 -- Lexicographic depth-first search for frequent item sets.
121 loop previousFIS nextItems previousTxns
122 | Set.null nextItems = []
123 | otherwise =
124 -- pTraceShow (["LCM", "frequentItemSets", "loop"], (("previousFIS", previousFIS), ("nextItems", nextItems), ("previousTxns", previousTxns))) $
125 -- Map each item of `nextItems` to its transactions in `previousTxns`.
126 let nextItemToTxns = itemToTxns nextItems previousTxns
127 in (`List.concatMap` Map.toList nextItemToTxns) \(nextItem, nextTxns) ->
128 -- Keep only the itemsets which are supported by enough transactions.
129 if minSupp <= fromIntegral (Seq.length nextTxns)
130 then
131 let nextFIS = Set.insert nextItem previousFIS
132 in -- Keep only the itemsets which have already gathered enough items.
133 [ (nextFIS, nextTxns)
134 | minSize <= fromIntegral (Set.size nextFIS)
135 ]
136 <>
137 -- Recurse with each item of `nextItems` greater than `nextItem`
138 -- (to traverse the frequent item sets as a tree instead of a poset lattice),
139 -- and with the transactions containing `nextItem`.
140 loop nextFIS (Set.split nextItem nextItems & snd) (nextTxns & toList)
141 else []
142
143 -- | @
144 -- `itemToTxns` items db
145 -- @
146 -- maps each item of `items` to the transactions of `db` containing it.
147 --
148 -- This maps from an "horizontal" representation to a "vertical" one,
149 -- itself mapping to an "horizontal" one.
150 -- See p.8 of https://www.lirmm.fr/~lazaar/imagina/NL-DM-IMAGINA1819-part1.pdf
151 --
152 -- It's a kind of occurrence deliver.
153 -- p.4 of http://osdm.uantwerpen.be/papers/p77-uno.pdf
154 itemToTxns ::
155 Ord item =>
156 ItemSet item ->
157 [Transaction item a] ->
158 Map.Map item (Seq.Seq (Transaction item a))
159 itemToTxns itms txs =
160 Map.fromListWith
161 (<>)
162 [ (itm, Seq.singleton tx)
163 | tx <- txs
164 , itm <- Set.intersection itms (transactionItems tx) & Set.toList
165 ]
166
167 data AllItems db = AllItemsAxiom
168
169 -- | `frequentItemSets` applied to all the items of the given transactions.
170 allFrequentItemSets ::
171 Ord item =>
172 Show item =>
173 Show a =>
174 minSupp ::: Int / minSupp Logic.Ord.> Zero ->
175 minSize ::: Int / minSize Logic.Ord.> Zero ->
176 db ::: [Transaction item a] ->
177 FrequentItemSets (AllItems db) db minSupp minSize
178 ::: Clusters item a
179 allFrequentItemSets minSupp minSize db =
180 frequentItemSets
181 minSupp
182 minSize
183 (AllItemsAxiom ... foldMap transactionItems (unName db))
184 db
185
186 data ClosedFrequentItemSets items db minSupp minSize = ClosedFrequentItemSetsAxiom
187
188 type Clusters item a =
189 Map.Map
190 (ItemSet item)
191 (Seq.Seq (Transaction item a))
192
193 closedFrequentItemSets ::
194 forall item db minSize minSupp items.
195 HasCallStack =>
196 Ord item =>
197 Show item =>
198 minSupp ::: Int / minSupp Logic.Ord.> Zero ->
199 minSize ::: Int / minSize Logic.Ord.> Zero ->
200 items ::: ItemSet item ->
201 db ::: [Set item] ->
202 ClosedFrequentItemSets items db minSupp minSize
203 ::: [(ItemSupport, ItemSet item)]
204 closedFrequentItemSets (Named minSupp) (Named minSize) (Named items) (Named db) =
205 ClosedFrequentItemSetsAxiom ...
206 runLCM items minSupp minSize db
207
208 allClosedFrequentItemSets ::
209 HasCallStack =>
210 Ord item =>
211 Show item =>
212 minSupp ::: Int / minSupp Logic.Ord.> Zero ->
213 minSize ::: Int / minSize Logic.Ord.> Zero ->
214 db ::: [Set item] ->
215 ClosedFrequentItemSets (AllItems db) db minSupp minSize
216 ::: [(ItemSupport, ItemSet item)]
217 allClosedFrequentItemSets minSupp minSize db =
218 closedFrequentItemSets
219 minSupp
220 minSize
221 (AllItemsAxiom ... fold (unName db))
222 db
223
224 -- |
225 --
226 -- Library for using the LCM algorithm in order to compute closed frequent pattern.
227 -- Input must be a transaction database, either in text format (as a ByteString)
228 -- or in @[[Item]]@ format, where @Item = Int@.
229 --
230 -- Several bencharking functions allowing to tune parallel strategy used and depth
231 -- cutoff are also provided.
232 type ItemSupport = Int
233
234 -- type Item = Int
235 type Weight = Int
236
237 -----------------------------------------------------------------
238 -- LCM functions
239 -----------------------------------------------------------------
240
241 type ItemRank = Int
242
243 -- | Get the data as a matrix of Items, parses it and
244 -- and executes LCM to discover closed frequent Itemsets.
245 runLCM ::
246 forall item.
247 Show item =>
248 HasCallStack =>
249 Ord item =>
250 Set item ->
251 ItemSupport ->
252 Int ->
253 -- | The transaction database as matrix of Items (List of List)
254 [Set item] ->
255 -- | Output: list of closed frequent Itemsets
256 [(ItemSupport, Set item)]
257 runLCM items minSupp minSize db =
258 let
259 itemToSupp :: [(item, ItemSupport)]
260 itemToSupp =
261 itemToSupport items db
262 & Map.toList
263 & List.filter ((>= minSupp) . snd)
264 & List.sortBy (comparing (Down . snd))
265
266 itemsSize :: Int
267 itemsSize = List.length itemToSupp
268
269 itemToRank :: Map.Map item ItemRank
270 itemToRank =
271 Map.fromList
272 [ (i, List.head $ List.findIndices ((== i) . fst) itemToSupp)
273 | (i, _) <- itemToSupp
274 ]
275
276 -- Rewrites the database to use `ItemRank` instead of `item`
277 rankDB :: [Set ItemRank]
278 rankDB =
279 [ Set.fromList
280 [ rank
281 | i <- tx & Set.toList
282 -- Items whose support is lower than `minSupp`
283 -- have been filtered-out in `itemToSupp`,
284 -- hence do not have a rank.
285 , rank <- Map.lookup i itemToRank & maybeToList
286 ]
287 | tx <- db
288 ]
289
290 dbLT = List.foldr (\tx acc -> insertLT (tx & Set.toList) (-1) 1 acc) Nil rankDB
291
292 rankToItem :: Array ItemRank item
293 rankToItem =
294 List.zip [0 ..] (fst <$> itemToSupp)
295 & array (0, itemsSize - 1)
296
297 unrank :: [(ItemSupport, Set ItemRank)] -> [(ItemSupport, Set item)]
298 unrank = List.map $ second $ Set.map (rankToItem `unsafeAt`)
299 in
300 [ lcmLoop minSupp minSize 1 dbLT Set.empty candidateRank (rankToSuppLT items dbLT) items
301 | candidateRank <- [0 .. Set.size items -1]
302 ]
303 & parBuffer 8 rdeepseq
304 & runEval
305 & List.concat
306 & unrank
307
308 -- |
309 -- For a transaction database, a closed frequent Itemset, and a candidate item
310 -- for extension of this closed frequent Itemset, recursively computes all
311 -- the successor closed frequent Itemsets by PPC-extension.
312 lcmLoop ::
313 Show item =>
314 ItemSupport ->
315 Int ->
316 -- | Current depth in the search tree (for parallel optimisation purposes)
317 Int ->
318 -- | Transaction database.
319 LexicoTreeItem ->
320 -- | Input closed frequent Itemset.
321 Set ItemRank ->
322 -- | Candidate to extend the closed frequent Itemset above.
323 ItemRank ->
324 -- | Array associating each item with its frequency
325 UArray ItemRank ItemSupport ->
326 -- | Maximal item
327 Set item ->
328 -- | Result : list of closed frequent Itemsets. Each result is a list of Items, the head of the list being the frequency of the item.
329 [(ItemSupport, Set ItemRank)]
330 lcmLoop minSupp minSize depth previousDB previousRanks candidateRank rankToSupp items =
331 let
332 -- HLCM: line 1: CDB = project and reduce DB w.r.t. P and limit
333 -- Reduce database by eliminating:
334 -- - all items greater than `candidateRank`,
335 -- - and all items with zero support.
336 reducedDB = projectAndReduce candidateRank rankToSupp previousDB
337
338 -- HLCM: line 2: Compute frequencies of items in CDB
339 -- Compute items occurrences in reduced database.
340 reducedRankToSupp = rankToSuppLT items reducedDB
341
342 -- HLCM: line 3: CP = 100% frequent items in CDB
343 -- Check which items actually appear in reduced database.
344 candidateSupp = rankToSupp ! candidateRank
345
346 -- HLCM: line 6: Candidates = frequent items of CDB that are not in CP
347 -- Compute 100% frequent items, future candidates, and unfrequent items.
348 (closedFreqRanks, candidateRanks, unfreqRanks) =
349 computeCandidates minSupp candidateSupp items reducedRankToSupp
350 in
351 --pTraceShow (["lcmLoop"], minSupp, minSize, depth, previousDB, previousRanks, candidateRank, rankToSupp, items) $
352 -- HLCM: line 4: if max(CP) = limit then
353 if not (List.null closedFreqRanks) -- if there is a result ...
354 && last closedFreqRanks <= candidateRank -- ...and if it is OK to extend it
355 then
356 let
357 -- HLCM: line 5: P' = P ∪ CP
358 -- Result closed frequent Itemset = input closed frequent Itemset + 100% frequent Items
359 closedItemset = previousRanks <> Set.fromList closedFreqRanks
360
361 -- HLCM: line 8: for all e ∈ Candidates, e ≤ limit do
362 -- Only candidates with value lower than input candidateRank
363 -- can be used for further extension on this branch.
364 smallCandidates = List.takeWhile (< candidateRank) candidateRanks
365 in
366 [ (candidateSupp, closedItemset)
367 | minSize <= fromIntegral (Set.size closedItemset)
368 ]
369 <> if not (List.null smallCandidates) -- ... and if we have at least 1 possible extension
370 then
371 let
372 -- Update items occurrences table by suppressing:
373 -- - 100% frequent items,
374 -- - and unfrequent items.
375 newRankToSupp = suppressItems reducedRankToSupp closedFreqRanks unfreqRanks
376
377 loop newCandidate = lcmLoop minSupp minSize (depth + 1) reducedDB closedItemset newCandidate newRankToSupp items
378 in
379 -- Recursively extend the candidates
380 if 3 < depth -- create parallel sparks only for low search space depth
381 then List.concat $ runEval $ parBuffer 2 rdeepseq $ List.map loop smallCandidates
382 else List.concatMap loop smallCandidates
383 else []
384 else []
385
386 -- |
387 -- For a transaction database of type [[item]], compute the frequency
388 -- of each item and return an array (item, frequency).
389 itemToSupport ::
390 Ord item =>
391 Set item ->
392 [Set item] ->
393 Map.Map item ItemSupport
394 itemToSupport items db =
395 Map.fromListWith
396 (+)
397 [ (itm, 1)
398 | tx <- db
399 , itm <- Set.intersection items tx & Set.toList
400 ]
401
402 {-
403 accumArray
404 (+)
405 0
406 (0, Set.size items)
407 [ (idx, 1)
408 | is <- db
409 , i <- is & Set.toList
410 , idx <- Set.lookupIndex i items & maybeToList
411 ]
412 -}
413
414 -- XXX PERF: must be bad : the array is converted to list (one copy),
415 -- then this list is sorted (more copies of small lists), and at
416 -- last a new array is created...
417 -- Try to improve this with a mutable array and more "in place" spirit...
418
419 -- |
420 -- For a given itemset being extended by a given candidate, compute :
421 -- - the closure of this itemset
422 -- - and the candidates for further extension.
423 computeCandidates ::
424 ItemSupport ->
425 ItemSupport ->
426 Set item ->
427 UArray ItemRank ItemSupport ->
428 -- (100% frequent items == closure, candidates for further extension, unfrequent items)
429 ([ItemRank], [ItemRank], [ItemRank])
430 computeCandidates minSupp candidateSupp items rankToSupp =
431 let
432 (frequentItems, unfreqItems) =
433 List.partition
434 (\i -> rankToSupp ! i >= minSupp)
435 [i | i <- [0 .. Set.size items - 1], rankToSupp ! i > 0]
436 (closedFrequentRanks, candidateRanks) =
437 List.partition (\i -> rankToSupp ! i == candidateSupp) frequentItems
438 in
439 (closedFrequentRanks, candidateRanks, unfreqItems)
440
441 -- |
442 -- Modifies an array associating Items with their frequency, in order to
443 -- give a frequency of 0 to a given list of Items.
444 --
445 -- NB : for performance reasons, this is REALLY a modification, made with unsafe operations.
446 suppressItems ::
447 -- | Array associating an item with its frequency
448 UArray ItemRank ItemSupport ->
449 -- | List of 100% frequent Items
450 [ItemRank] ->
451 -- | List of unfrequent Items
452 [ItemRank] ->
453 -- | Initial array, with frequencies of 100% frequent Items
454 -- and unfrequent Items set to 0.
455 UArray ItemRank ItemSupport
456 suppressItems rankToSupp closedRanks unfreqRanks =
457 runST do
458 -- Can be used in multithread because no concurrent write
459 arr <- unsafeThaw rankToSupp :: ST s (STUArray s ItemRank ItemSupport)
460 forM_ closedRanks \i -> writeArray arr i 0
461 forM_ unfreqRanks \i -> writeArray arr i 0
462 -- Can be used in multithread because no concurrent write
463 unsafeFreeze arr
464
465 -----------------------------------------------------------------
466 -- LEXICOGRAPHIC TREE MANIPULATION
467 -----------------------------------------------------------------
468
469 -- |
470 -- Creates a new, reduced transaction database by eliminating all items
471 -- greater than @candidateRank@ item, and all infrequent Items.
472 projectAndReduce ::
473 -- | Candidate item, on which the projection is made
474 ItemRank ->
475 -- | Array associating each item with its frequency in
476 -- original transaction database.
477 UArray ItemRank ItemSupport ->
478 -- | Original transaction database
479 LexicoTreeItem ->
480 -- | Result : Reduced transaction database
481 LexicoTreeItem
482 projectAndReduce !candidateRank rankToSupp = go
483 where
484 go Nil = Nil
485 go (Node e suiv alt w)
486 | e > candidateRank = Nil
487 | e == candidateRank =
488 let !(suiv', addWeight) = filterInfrequent suiv rankToSupp
489 in Node e suiv' Nil (w + addWeight)
490 | otherwise =
491 let
492 !alt' = go alt
493 !suiv' = go suiv
494 in
495 if rankToSupp ! e > 0
496 then
497 if notNil suiv' && notNil alt'
498 then Node e suiv' alt' 0
499 else if notNil suiv' then Node e suiv' Nil 0 else alt'
500 else
501 if notNil suiv' && notNil alt'
502 then mergeAlts suiv' alt'
503 else if notNil suiv' then suiv' else alt'
504
505 -- |
506 -- Suppress all infrequent Items from a transaction database expressed as
507 -- lexicographic tree, and returns a new lexicographic tree.
508 filterInfrequent ::
509 -- | Original transaction database
510 LexicoTreeItem ->
511 -- | Array associating each item with its frequency in
512 -- original transaction database. In this setting,
513 -- an infrequent item as a frequency of 0 (because of preprocessing by
514 -- ' suppressItems ').
515 UArray ItemRank ItemSupport ->
516 -- | Result : (transaction database without infrequent Items, weight to report in parent nodes)
517 (LexicoTreeItem, Weight)
518 filterInfrequent Nil _ = (Nil, 0)
519 filterInfrequent (Node e suiv alt w) occs
520 | occs ! e > 0 = (Node e suiv' alt' (w + ws), wa)
521 | notNil suiv' && notNil alt' = (mergeAlts suiv' alt', w')
522 | notNil alt' = (alt', w')
523 | notNil suiv' = (suiv', w')
524 | otherwise = (Nil, w')
525 where
526 w' = w + ws + wa
527 !(suiv', ws) = filterInfrequent suiv occs
528 !(alt', wa) = filterInfrequent alt occs
529
530 {-# INLINE notNil #-}
531 notNil :: LexicoTreeItem -> Bool
532 notNil Nil = False
533 notNil _ = True
534
535 -- |
536 -- Occurence delivering:
537 -- Map each item of the given database to its support.
538 rankToSuppLT ::
539 Set item ->
540 -- | Transaction database (in lexicographic tree format)
541 LexicoTreeItem ->
542 -- | Result : array associating each item to its frequency.
543 UArray ItemRank ItemSupport
544 rankToSuppLT items dbLT =
545 runST do
546 arr <- newArray_ (0, Set.size items - 1)
547 -- TODO: this workaround should no longer be necessary
548 -- Creates an empty array : each item starts with frequency 0
549 -- workaround for http://hackage.haskell.org/trac/ghc/ticket/3586
550 forM_ [0 .. Set.size items - 1] $ \i -> unsafeWrite arr i 0
551 -- Compute frequencies for each item by efficient tree traversal
552 _ <- traverseLT dbLT arr
553 unsafeFreeze arr
554
555 -- |
556 -- Efficient traversal of the transaction database as a lexicographic tree.
557 -- Items frequencies are updated on the fly.
558 traverseLT ::
559 forall s.
560 -- | Transaction database
561 LexicoTreeItem ->
562 -- | Array associating each item with its frequency. UPDATED by this function !
563 STUArray s ItemRank ItemSupport ->
564 ST s ()
565 traverseLT tree arr = ST $ \s ->
566 case go tree s of
567 (# s', _ #) -> (# s', () #)
568 where
569 go ::
570 LexicoTreeItem ->
571 State# s ->
572 (# State# s, Int# #)
573 go Nil s = (# s, 0# #)
574 go (Node item child alt w@(I# w#)) s0 =
575 case go child s0 of
576 (# s1, childw #) ->
577 case go alt s1 of
578 (# s2, altw #) ->
579 case unsafeRead arr item of
580 ST f ->
581 case f s2 of
582 (# _s3, I# itemw #) ->
583 case unsafeWrite arr item (I# itemw + I# childw + w) of
584 ST f' ->
585 case f' s2 of
586 (# s4, _ #) -> (# s4, childw +# w# +# altw #)
587
588 -- RankToSupp
589
590 -- | Type for a lexicographic tree, implementating a n-ary tree over a binary tree.
591 data LexicoTreeItem
592 = -- | Void node
593 Nil
594 | -- | A node : item, next node (next in transaction), alternative node (other branch), weight
595 Node
596 {-# UNPACK #-} !ItemRank
597 !LexicoTreeItem -- NB. experimental strictness annotation
598 !LexicoTreeItem -- NB. experimental strictness annotation
599 {-# UNPACK #-} !Int
600 deriving (Eq, Show)
601
602 -- |
603 -- Inserts a transaction in list format into the lexicographic tree.
604 -- Automatically merges identical transactions.
605 -- Performs suffix intersection.
606 insertLT ::
607 -- | Transaction to insert into lexicographic tree
608 [ItemRank] ->
609 -- | "coreI" item, for suffix intersection.
610 ItemRank ->
611 -- | Weight of the transaction to insert
612 ItemSupport ->
613 -- | Input lexicographic tree
614 LexicoTreeItem ->
615 -- | Result : a new lexicographic tree with the transaction inserted
616 LexicoTreeItem
617 insertLT [] _ _ lt = lt
618 insertLT lst _ w Nil = createPath lst w
619 insertLT [x] i w (Node e suiv alt weight)
620 | x < e = Node x Nil (Node e suiv alt weight) w
621 | x == e = Node e suiv alt (weight + w)
622 | x > e = Node e suiv (insertLT [x] i w alt) weight
623 insertLT (x : xs) i w (Node e suiv alt weight)
624 | x < e = Node x (createPath xs w) (Node e suiv alt weight) 0
625 | x == e =
626 if (e /= i)
627 then Node e (insertLT xs i w suiv) alt weight
628 else suffixIntersectionLT xs w (Node e suiv alt weight)
629 | x > e = Node e suiv (insertLT (x : xs) i w alt) weight
630 insertLT _ _ _ _ = error "insertLT"
631
632 -- |
633 -- From a transaction and its weight, directly creates a path-shaped lexicographic tree.
634 createPath ::
635 -- | Transaction
636 [ItemRank] ->
637 -- | Weight of the transaction
638 Int ->
639 -- | Result : a path-shaped lexicographic tree encoding the transaction
640 LexicoTreeItem
641 createPath [] _ = Nil
642 createPath [x] w = Node x Nil Nil w
643 createPath (x : xs) w = Node x (createPath xs w) Nil 0
644
645 -- |
646 -- Perform the "suffix intersection" operation with the suffix of a transaction
647 -- and the corresponding part of a lexicographic tree.
648 --
649 -- For more details, see "prefixIntersection" in Takeaki Uno's papers about LCM.
650 suffixIntersectionLT ::
651 -- | Suffix of the transaction to insert.
652 [ItemRank] ->
653 -- | Weight of the transaction to insert
654 Int ->
655 -- | (Sub-)lexicographic tree where the transaction must be inserted. The @next@ part (see data type comments)
656 -- should be a simple path, it will be the target of intersection with the suffix.
657 LexicoTreeItem ->
658 -- | Result : lexicographic tree where the suffix has been added, with correct intersections performed.
659 LexicoTreeItem
660 suffixIntersectionLT _ w (Node e Nil alt weight) = Node e Nil alt (weight + w)
661 suffixIntersectionLT lst w (Node e suiv alt weight) =
662 let (newSuiv, addWeight) = suffInterSuiv lst w suiv
663 in Node e newSuiv alt (weight + addWeight)
664 suffixIntersectionLT _ _ _ = error "suffixIntersectionLT"
665
666 -- |
667 -- Intersects a list-shaped transaction and a path-shaped lexicographic tree.
668 -- The result is a path shaped lexicographic tree with weights correctly updated.
669 suffInterSuiv ::
670 -- | Transaction as list
671 [ItemRank] ->
672 -- | Weight of the above transaction
673 Int ->
674 -- | Path-shaped lexicographic tree
675 LexicoTreeItem ->
676 -- | Result : (path-shaped lexicographic tree representing the intersection
677 -- of transaction and input path , 0 if intersection not [] / sum of weights else)
678 (LexicoTreeItem, Int)
679 suffInterSuiv lst w suiv =
680 let
681 (lstSuiv, weightSuiv) = getLstSuiv suiv
682 inter = List.intersect lstSuiv lst
683 in
684 if (inter /= [])
685 then (createPath inter (weightSuiv + w), 0)
686 else (Nil, weightSuiv + w)
687
688 -- |
689 -- Collects all the nodes of lexicographic tree in a list of elements.
690 getLstSuiv ::
691 -- | Path shaped lexicographic tree.
692 LexicoTreeItem ->
693 -- | Result : (list of elements in the path, sum of weights of nodes in the path)
694 ([ItemRank], Int)
695 getLstSuiv Nil = ([], 0)
696 getLstSuiv (Node e suiv Nil weight) =
697 let (lst, w) = getLstSuiv suiv
698 in (e : lst, w + weight)
699 getLstSuiv _ = error "getLstSuiv"
700
701 -- |
702 -- Merge two lexicographic trees.
703 mergeAlts :: LexicoTreeItem -> LexicoTreeItem -> LexicoTreeItem
704 mergeAlts Nil lt = lt
705 mergeAlts lt Nil = lt
706 mergeAlts (Node e1 suiv1 alt1 w1) (Node e2 suiv2 alt2 w2)
707 | e1 < e2 = (Node e1 suiv1 (mergeAlts alt1 (Node e2 suiv2 alt2 w2)) w1)
708 | e1 > e2 = (Node e2 suiv2 (mergeAlts (Node e1 suiv1 alt1 w1) alt2) w2)
709 | e1 == e2 = (Node e1 (mergeAlts suiv1 suiv2) (mergeAlts alt1 alt2) (w1 + w2))
710 mergeAlts _ _ = error "mergeAlts"