2 Module : Gargantext.Ngrams.FrequentItemSet
3 Description : Ngrams tools
4 Copyright : (c) CNRS, 2018
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Domain Specific Language to manage Frequent Item Set (FIS)
14 module Gargantext.Ngrams.FrequentItemSet
22 import Data.List (tail, filter)
27 import Gargantext.Prelude
29 type Size = Either Int (Int, Int)
32 ------------------------------------------------------------------------
33 -- | Occurrence is Frequent Item Set of size 1
34 occ :: Frequency -> [[Item]] -> [Fis]
35 occ f is = fisWithSize (Left 1) f is
37 -- | Cooccurrence is Frequent Item Set of size 2
38 cooc :: Frequency -> [[Item]] -> [Fis]
39 cooc f is = fisWithSize (Left 2) f is
41 all :: Frequency -> [[Item]] -> [Fis]
42 all f is = fisWith Nothing f is
44 ------------------------------------------------------------------------
45 between :: (Int, Int) -> Frequency -> [[Item]] -> [Fis]
46 between (x,y) f is = fisWithSize (Right (x,y)) f is
48 --maximum :: Int -> Frequency -> [[Item]] -> [Fis]
49 --maximum m f is = between (0,m) f is
52 ------------------------------------------------------------------------
53 ------------------------------------------------------------------------
54 -- | Data type to type the Frequent Item Set
55 -- TODO replace List with Set in fisItemSet
56 -- be careful : risks to erase HLCM behavior
58 data Fis' a = Fis' { _fisCount :: Int
62 -- | Sugar from items to FIS
63 items2fis :: [Item] -> Maybe Fis
64 items2fis is = case head is of
66 Just h -> Just (Fis' h (tail is))
68 ------------------------------------------------------------------------
69 ------------------------------------------------------------------------
71 fisWithSize :: Size -> Frequency -> [[Item]] -> [Fis]
72 fisWithSize n f is = case n of
73 Left n' -> fisWith (Just (\x -> length x == (n'+1) )) f is
74 Right (a,b) -> fisWith (Just (\x -> cond1 a x && cond2 b x)) f is
76 cond1 a' x = length x >= a'
77 cond2 b' x = length x <= b'
80 fisWith :: Maybe ([Item] -> Bool) -> Frequency -> [[Item]] -> [Fis]
81 fisWith s f is = unMaybe $ map items2fis $ filter' $ runLCMmatrix is f
85 Just fun -> filter fun
87 ------------------------------------------------------------------------
88 ------------------------------------------------------------------------