]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Ngrams/FrequentItemSet.hs
[CODE/READ] with NP.
[gargantext.git] / src / Gargantext / Ngrams / FrequentItemSet.hs
1 {-|
2 Module : Gargantext.Ngrams.FrequentItemSet
3 Description : Ngrams tools
4 Copyright : (c) CNRS, 2018
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Domain Specific Language to manage Frequent Item Set (FIS)
11
12 -}
13
14 module Gargantext.Ngrams.FrequentItemSet
15 ( Fis, Size
16 , occ, cooc
17 , all, between
18 , module HLCM
19 )
20 where
21
22 import Data.List (tail, filter)
23 import Data.Either
24
25 import HLCM
26
27 import Gargantext.Prelude
28
29 type Size = Either Int (Int, Int)
30
31 --data Size = Point | Segment
32
33 ------------------------------------------------------------------------
34 -- | Occurrence is Frequent Item Set of size 1
35 occ :: Frequency -> [[Item]] -> [Fis]
36 occ f is = fisWithSize (Left 1) f is
37
38 -- | Cooccurrence is Frequent Item Set of size 2
39 cooc :: Frequency -> [[Item]] -> [Fis]
40 cooc f is = fisWithSize (Left 2) f is
41
42 all :: Frequency -> [[Item]] -> [Fis]
43 all f is = fisWith Nothing f is
44
45 ------------------------------------------------------------------------
46 between :: (Int, Int) -> Frequency -> [[Item]] -> [Fis]
47 between (x,y) f is = fisWithSize (Right (x,y)) f is
48
49 --maximum :: Int -> Frequency -> [[Item]] -> [Fis]
50 --maximum m f is = between (0,m) f is
51
52
53 ------------------------------------------------------------------------
54 ------------------------------------------------------------------------
55 -- | Data type to type the Frequent Item Set
56 -- TODO replace List with Set in fisItemSet
57 -- be careful : risks to erase HLCM behavior
58 type Fis = Fis' Item
59 data Fis' a = Fis' { _fisCount :: Int
60 , _fisItemSet :: [a]
61 } deriving (Show)
62
63 -- | Sugar from items to FIS
64 items2fis :: [Item] -> Maybe Fis
65 items2fis is = case head is of
66 Nothing -> Nothing
67 Just h -> Just (Fis' h (tail is))
68
69 ------------------------------------------------------------------------
70 ------------------------------------------------------------------------
71
72 fisWithSize :: Size -> Frequency -> [[Item]] -> [Fis]
73 fisWithSize n f is = case n of
74 Left n' -> fisWith (Just (\x -> length x == (n'+1) )) f is
75 Right (a,b) -> fisWith (Just (\x -> cond1 a x && cond2 b x)) f is
76 where
77 cond1 a' x = length x >= a'
78 cond2 b' x = length x <= b'
79
80
81 fisWith :: Maybe ([Item] -> Bool) -> Frequency -> [[Item]] -> [Fis]
82 fisWith s f is = unMaybe $ map items2fis $ filter' $ runLCMmatrix is f
83 where
84 filter' = case s of
85 Nothing -> identity
86 Just fun -> filter fun
87
88 ------------------------------------------------------------------------
89 ------------------------------------------------------------------------