2 Module : Gargantext.Text.Metrics.FrequentItemSet
3 Description : Ngrams tools
4 Copyright : (c) CNRS, 2018
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Domain Specific Language to manage Frequent Item Set (FIS)
14 {-# LANGUAGE NoImplicitPrelude #-}
16 module Gargantext.Text.Metrics.FrequentItemSet
28 import Prelude (Functor(..)) -- TODO
29 import qualified Data.Map.Strict as Map
30 import Data.Map.Strict (Map)
31 import qualified Data.Set as Set
33 import qualified Data.Vector as V
34 import Data.Vector (Vector)
36 import Data.List (filter, concat)
37 import Data.Maybe (catMaybes)
41 import Gargantext.Prelude
43 data Size = Point Int | Segment Int Int
45 ------------------------------------------------------------------------
46 -- | Occurrence is Frequent Item Set of size 1
47 occ_hlcm :: Frequency -> [[Item]] -> [Fis]
48 occ_hlcm = fisWithSize (Point 1)
50 -- | Cooccurrence is Frequent Item Set of size 2
51 cooc_hlcm :: Frequency -> [[Item]] -> [Fis]
52 cooc_hlcm = fisWithSize (Point 2)
54 all :: Frequency -> [[Item]] -> [Fis]
57 ------------------------------------------------------------------------
58 between :: (Int, Int) -> Frequency -> [[Item]] -> [Fis]
59 between (x,y) = fisWithSize (Segment x y)
61 --maximum :: Int -> Frequency -> [[Item]] -> [Fis]
62 --maximum m = between (0,m)
65 ------------------------------------------------------------------------
66 ------------------------------------------------------------------------
67 -- | Data type to type the Frequent Item Set
68 -- TODO replace List with Set in fisItemSet
69 -- be careful : risks to erase HLCM behavior
71 data Fis' a = Fis' { _fisCount :: Int
75 instance Functor Fis' where
76 fmap f (Fis' c is) = Fis' c (fmap f is)
78 -- | Sugar from items to FIS
79 items2fis :: [Item] -> Maybe Fis
80 items2fis [] = Nothing
81 items2fis (i:is) = Just $ Fis' i is
83 ------------------------------------------------------------------------
84 ------------------------------------------------------------------------
86 fisWithSize :: Size -> Frequency -> [[Item]] -> [Fis]
87 fisWithSize n f is = case n of
88 Point n' -> fisWith (Just (\x -> length x == (n'+1) )) f is
89 Segment a b -> fisWith (Just (\x -> cond a (length x) b)) f is
91 cond a' x b' = a' <= x && x <= b'
94 fisWith :: Maybe ([Item] -> Bool) -> Frequency -> [[Item]] -> [Fis]
95 fisWith s f is = catMaybes $ map items2fis $ filter' $ runLCMmatrix is f
99 Just fun -> filter fun
101 -- Here the sole purpose to take the keys as a Set is tell we do not want
103 fisWithSizePoly :: Ord a => Size -> Frequency -> Set a -> [[a]] -> [Fis' a]
104 fisWithSizePoly n f ks = map (fmap fromItem) . fisWithSize n f . map (map toItem)
106 ksv = V.fromList $ Set.toList ks
107 ksm = Map.fromList . flip zip [0..] $ V.toList ksv
111 fisWithSizePoly2 :: Ord a => Size -> Frequency -> [[a]] -> [Fis' a]
112 fisWithSizePoly2 n f is = fisWithSizePoly n f ks is
114 ks = Set.fromList $ concat is
117 ------------------------------------------------------------------------
118 ------------------------------------------------------------------------
122 ---- | /!\ indexes are not the same:
124 ---- | Index ngrams from Map
125 ----indexNgram :: Ord a => Map a Occ -> Map Index a
126 ----indexNgram m = fromList (zip [1..] (keys m))
128 ---- | Index ngrams from Map
129 ----ngramIndex :: Ord a => Map a Occ -> Map a Index
130 ----ngramIndex m = fromList (zip (keys m) [1..])
132 --indexWith :: Ord a => Map a Occ -> [a] -> [Int]
133 --indexWith m xs = unMaybe $ map (\x -> lookupIndex x m) xs
135 --indexIt :: Ord a => [[a]] -> (Map a Int, [[Int]])
136 --indexIt xs = (m, is)
138 -- m = sumOcc (map occ xs)
139 -- is = map (indexWith m) xs
141 --list2fis :: Ord a => FIS.Frequency -> [[a]] -> (Map a Int, [FIS.Fis])
142 --list2fis n xs = (m', fs)
144 -- (m, is) = indexIt xs
145 -- m' = M.filter (>50000) m
148 --text2fis :: FIS.Frequency -> [Text] -> (Map Text Int, [FIS.Fis])
149 --text2fis n xs = list2fis n (map terms xs)
151 ----text2fisWith :: FIS.Size -> FIS.Frequency -> [Text] -> (Map Text Int, [FIS.Fis])
152 ----text2fisWith = undefined