]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Metrics/FrequentItemSet.hs
[FEAT] cooc added for pipeline
[gargantext.git] / src / Gargantext / Text / Metrics / FrequentItemSet.hs
1 {-|
2 Module : Gargantext.Text.Metrics.FrequentItemSet
3 Description : Ngrams tools
4 Copyright : (c) CNRS, 2018
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Domain Specific Language to manage Frequent Item Set (FIS)
11
12 -}
13
14 {-# LANGUAGE NoImplicitPrelude #-}
15
16 module Gargantext.Text.Metrics.FrequentItemSet
17 ( Fis, Size
18 , occ_hlcm, cooc_hlcm
19 , all, between
20 , module HLCM
21 )
22 where
23
24 import Data.List (tail, filter)
25 import Data.Either
26
27 import HLCM
28
29 import Gargantext.Prelude
30
31 type Size = Either Int (Int, Int)
32
33 --data Size = Point | Segment
34
35 ------------------------------------------------------------------------
36 -- | Occurrence is Frequent Item Set of size 1
37 occ_hlcm :: Frequency -> [[Item]] -> [Fis]
38 occ_hlcm f is = fisWithSize (Left 1) f is
39
40 -- | Cooccurrence is Frequent Item Set of size 2
41 cooc_hlcm :: Frequency -> [[Item]] -> [Fis]
42 cooc_hlcm f is = fisWithSize (Left 2) f is
43
44 all :: Frequency -> [[Item]] -> [Fis]
45 all f is = fisWith Nothing f is
46
47 ------------------------------------------------------------------------
48 between :: (Int, Int) -> Frequency -> [[Item]] -> [Fis]
49 between (x,y) f is = fisWithSize (Right (x,y)) f is
50
51 --maximum :: Int -> Frequency -> [[Item]] -> [Fis]
52 --maximum m f is = between (0,m) f is
53
54
55 ------------------------------------------------------------------------
56 ------------------------------------------------------------------------
57 -- | Data type to type the Frequent Item Set
58 -- TODO replace List with Set in fisItemSet
59 -- be careful : risks to erase HLCM behavior
60 type Fis = Fis' Item
61 data Fis' a = Fis' { _fisCount :: Int
62 , _fisItemSet :: [a]
63 } deriving (Show)
64
65 -- | Sugar from items to FIS
66 items2fis :: [Item] -> Maybe Fis
67 items2fis is = case head is of
68 Nothing -> Nothing
69 Just h -> Just (Fis' h (tail is))
70
71 ------------------------------------------------------------------------
72 ------------------------------------------------------------------------
73
74 fisWithSize :: Size -> Frequency -> [[Item]] -> [Fis]
75 fisWithSize n f is = case n of
76 Left n' -> fisWith (Just (\x -> length x == (n'+1) )) f is
77 Right (a,b) -> fisWith (Just (\x -> cond1 a x && cond2 b x)) f is
78 where
79 cond1 a' x = length x >= a'
80 cond2 b' x = length x <= b'
81
82
83 fisWith :: Maybe ([Item] -> Bool) -> Frequency -> [[Item]] -> [Fis]
84 fisWith s f is = unMaybe $ map items2fis $ filter' $ runLCMmatrix is f
85 where
86 filter' = case s of
87 Nothing -> identity
88 Just fun -> filter fun
89
90 ------------------------------------------------------------------------
91 ------------------------------------------------------------------------
92
93
94 --
95 ---- | /!\ indexes are not the same:
96 --
97 ---- | Index ngrams from Map
98 ----indexNgram :: Ord a => Map a Occ -> Map Index a
99 ----indexNgram m = fromList (zip [1..] (keys m))
100 --
101 ---- | Index ngrams from Map
102 ----ngramIndex :: Ord a => Map a Occ -> Map a Index
103 ----ngramIndex m = fromList (zip (keys m) [1..])
104 --
105 --indexWith :: Ord a => Map a Occ -> [a] -> [Int]
106 --indexWith m xs = unMaybe $ map (\x -> lookupIndex x m) xs
107 --
108 --indexIt :: Ord a => [[a]] -> (Map a Int, [[Int]])
109 --indexIt xs = (m, is)
110 -- where
111 -- m = sumOcc (map occ xs)
112 -- is = map (indexWith m) xs
113 --
114 --list2fis :: Ord a => FIS.Frequency -> [[a]] -> (Map a Int, [FIS.Fis])
115 --list2fis n xs = (m', fs)
116 -- where
117 -- (m, is) = indexIt xs
118 -- m' = M.filter (>50000) m
119 -- fs = FIS.all n is
120 --
121 --text2fis :: FIS.Frequency -> [Text] -> (Map Text Int, [FIS.Fis])
122 --text2fis n xs = list2fis n (map terms xs)
123 --
124 ----text2fisWith :: FIS.Size -> FIS.Frequency -> [Text] -> (Map Text Int, [FIS.Fis])
125 ----text2fisWith = undefined
126 --
127