]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/Aggregates/Fis.hs
[ELEVE] Ngrams, still NaN.
[gargantext.git] / src / Gargantext / Viz / Phylo / Aggregates / Fis.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Viz.Phylo.Aggregates.Fis
18 where
19
20 import Data.List (null,concat,sort)
21 import Data.Map (Map, empty,elems)
22 import Data.Tuple (fst, snd)
23 import Data.Set (size)
24 import Gargantext.Prelude
25 import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
26 import Gargantext.Viz.Phylo
27 import Gargantext.Viz.Phylo.Tools
28 import qualified Data.Map as Map
29 import qualified Data.Set as Set
30 import qualified Data.Vector.Storable as Vector
31
32 import Numeric.Statistics (percentile)
33
34 import Debug.Trace (trace)
35
36
37 -- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
38 filterFis :: Bool -> Int -> (Int -> [PhyloFis] -> [PhyloFis]) -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
39 filterFis keep thr f m = case keep of
40 False -> Map.map (\l -> f thr l) m
41 True -> Map.map (\l -> keepFilled (f) thr l) m
42
43
44 -- | To filter Fis with small Support
45 filterFisBySupport :: Int -> [PhyloFis] -> [PhyloFis]
46 filterFisBySupport thr l = filter (\fis -> getSupport fis > thr) l
47
48
49 -- | To filter Fis with small Clique size
50 filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis]
51 filterFisByClique thr l = filter (\fis -> (size $ getClique fis) > thr) l
52
53
54 -- | To filter nested Fis
55 filterFisByNested :: Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
56 filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head' "Fis" $ map getClique l) (map getClique l) []
57 in filter (\fis -> elem (getClique fis) cliqueMax) l)
58
59
60 -- | To transform a list of Documents into a Frequent Items Set
61 docsToFis :: Map (Date, Date) [Document] -> Map (Date, Date) [PhyloFis]
62 docsToFis docs = map (\d -> let fs = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text d)
63 in map (\f -> PhyloFis (fst f) (snd f) empty) fs) docs
64
65
66 -- | To process a list of Filters on top of the PhyloFis
67 processFilters :: [Filter] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
68 processFilters filters phyloFis
69 | null filters = phyloFis
70 | otherwise = panic "[ERR][Viz.Phylo.LevelMaker.processFilters] please add some filters for the Fis"
71
72
73 -- | To process a list of Metrics on top of the PhyloFis
74 processMetrics :: [Metric] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
75 processMetrics metrics phyloFis
76 | null metrics = phyloFis
77 | otherwise = panic "[ERR][Viz.Phylo.LevelMaker.processMetrics] please add some metrics for the Fis"
78
79
80 -- | To transform some Documents into PhyloFis and apply a List of Metrics and Filters
81 toPhyloFis :: Map (Date, Date) [Document] -> Bool -> Support -> Int -> [Metric] -> [Filter] -> Map (Date, Date) [PhyloFis]
82 toPhyloFis ds k s t ms fs = processFilters fs
83 $ processMetrics ms
84 $ traceFis "----\nFiltered Fis by clique size :\n"
85 $ filterFis k t (filterFisByClique)
86 $ traceFis "----\nFiltered Fis by nested :\n"
87 $ filterFisByNested
88 $ traceFis "----\nFiltered Fis by support :\n"
89 $ filterFis k s (filterFisBySupport)
90 $ traceFis "----\nUnfiltered Fis :\n"
91 $ docsToFis ds
92
93
94 -----------------
95 -- | Tracers | --
96 -----------------
97
98
99
100 traceFis :: [Char] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
101 traceFis lbl m = trace (lbl <> "count : " <> show (sum $ map length $ elems m) <> " Fis\n"
102 <> "support : " <> show (percentile 25 (Vector.fromList supps)) <> " (25%) "
103 <> show (percentile 50 (Vector.fromList supps)) <> " (50%) "
104 <> show (percentile 75 (Vector.fromList supps)) <> " (75%) "
105 <> show (percentile 90 (Vector.fromList supps)) <> " (90%) "
106 <> show (percentile 100 (Vector.fromList supps)) <> " (100%)\n"
107 <> " " <> show (countSup 1 supps) <> " (>1) "
108 <> show (countSup 2 supps) <> " (>2) "
109 <> show (countSup 3 supps) <> " (>3) "
110 <> show (countSup 4 supps) <> " (>4) "
111 <> show (countSup 5 supps) <> " (>5) "
112 <> show (countSup 6 supps) <> " (>6)\n"
113 <> "clique size : " <> show (percentile 25 (Vector.fromList ngrms)) <> " (25%) "
114 <> show (percentile 50 (Vector.fromList ngrms)) <> " (50%) "
115 <> show (percentile 75 (Vector.fromList ngrms)) <> " (75%) "
116 <> show (percentile 90 (Vector.fromList ngrms)) <> " (90%) "
117 <> show (percentile 100 (Vector.fromList ngrms)) <> " (100%)\n"
118 <> " " <> show (countSup 1 ngrms) <> " (>1) "
119 <> show (countSup 2 ngrms) <> " (>2) "
120 <> show (countSup 3 ngrms) <> " (>3) "
121 <> show (countSup 4 ngrms) <> " (>4) "
122 <> show (countSup 5 ngrms) <> " (>5) "
123 <> show (countSup 6 ngrms) <> " (>6)\n"
124 ) m
125 where
126 --------------------------------------
127 countSup :: Double -> [Double] -> Int
128 countSup s l = length $ filter (>s) l
129 --------------------------------------
130 supps :: [Double]
131 supps = sort $ map (fromIntegral . _phyloFis_support) $ concat $ elems m
132 --------------------------------------
133 ngrms :: [Double]
134 ngrms = sort $ map (\f -> fromIntegral $ Set.size $ _phyloFis_clique f) $ concat $ elems m
135 --------------------------------------