]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/Aggregates/Fis.hs
Merge branch 'dev-phylo' into dev-merge
[gargantext.git] / src / Gargantext / Viz / Phylo / Aggregates / Fis.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Viz.Phylo.Aggregates.Fis
18 where
19
20 import Control.Lens hiding (makeLenses, both, Level)
21 import Data.List (null,concat,sort,(++))
22 import Data.Map (Map,elems,mapWithKey,unionWith,fromList,keys)
23 import Data.Tuple (fst, snd)
24 import Data.Set (size)
25 import Gargantext.Prelude
26 import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
27 import Gargantext.Viz.Phylo
28 import Gargantext.Viz.Phylo.Tools
29
30 import qualified Data.Map as Map
31 import qualified Data.Set as Set
32 import qualified Data.Vector.Storable as Vector
33
34 import Numeric.Statistics (percentile)
35
36 import Debug.Trace (trace)
37
38
39 -- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
40 filterFis :: Bool -> Int -> (Int -> [PhyloFis] -> [PhyloFis]) -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
41 filterFis keep thr f m = case keep of
42 False -> Map.map (\l -> f thr l) m
43 True -> Map.map (\l -> keepFilled (f) thr l) m
44
45
46 -- | To filter Fis with small Support
47 filterFisBySupport :: Int -> [PhyloFis] -> [PhyloFis]
48 filterFisBySupport thr l = filter (\fis -> getSupport fis >= thr) l
49
50
51 -- | To filter Fis with small Clique size
52 filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis]
53 filterFisByClique thr l = filter (\fis -> (size $ getClique fis) >= thr) l
54
55
56 -- | To filter nested Fis
57 filterFisByNested :: Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
58 filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head' "Fis" $ map getClique l) (map getClique l) []
59 in filter (\fis -> elem (getClique fis) cliqueMax) l)
60
61
62 docsToFis' :: Map (Date,Date) [Document] -> Phylo -> Phylo
63 docsToFis' m p = if (null $ getPhyloFis p)
64 then trace("----\nRebuild the Fis from scratch\n")
65 $ p & phylo_fis .~ mapWithKey (\k docs -> let fis = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
66 in map (\f -> PhyloFis (fst f) (snd f) k) fis) m
67 else trace("----\nUse Fis from an existing file\n")
68 $ p & phylo_fis %~ (unionWith (++) (fromList $ map (\k -> (k,[])) $ keys m))
69
70
71 toPhyloFis' :: Map (Date, Date) [PhyloFis] -> Bool -> Support -> Int -> Map (Date, Date) [PhyloFis]
72 toPhyloFis' fis k s t = traceFis "----\nFiltered Fis by clique size :\n"
73 $ filterFis k t (filterFisByClique)
74 $ traceFis "----\nFiltered Fis by nested :\n"
75 $ filterFisByNested
76 $ traceFis "----\nFiltered Fis by support :\n"
77 $ filterFis k s (filterFisBySupport)
78 $ traceFis "----\nUnfiltered Fis :\n" fis
79
80
81 -----------------
82 -- | Tracers | --
83 -----------------
84
85
86
87 traceFis :: [Char] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
88 traceFis lbl m = trace (lbl <> "count : " <> show (sum $ map length $ elems m) <> " Fis\n"
89 <> "support : " <> show (percentile 25 (Vector.fromList supps)) <> " (25%) "
90 <> show (percentile 50 (Vector.fromList supps)) <> " (50%) "
91 <> show (percentile 75 (Vector.fromList supps)) <> " (75%) "
92 <> show (percentile 90 (Vector.fromList supps)) <> " (90%) "
93 <> show (percentile 100 (Vector.fromList supps)) <> " (100%)\n"
94 <> " " <> show (countSup 1 supps) <> " (>1) "
95 <> show (countSup 2 supps) <> " (>2) "
96 <> show (countSup 3 supps) <> " (>3) "
97 <> show (countSup 4 supps) <> " (>4) "
98 <> show (countSup 5 supps) <> " (>5) "
99 <> show (countSup 6 supps) <> " (>6)\n"
100 <> "clique size : " <> show (percentile 25 (Vector.fromList ngrms)) <> " (25%) "
101 <> show (percentile 50 (Vector.fromList ngrms)) <> " (50%) "
102 <> show (percentile 75 (Vector.fromList ngrms)) <> " (75%) "
103 <> show (percentile 90 (Vector.fromList ngrms)) <> " (90%) "
104 <> show (percentile 100 (Vector.fromList ngrms)) <> " (100%)\n"
105 <> " " <> show (countSup 1 ngrms) <> " (>1) "
106 <> show (countSup 2 ngrms) <> " (>2) "
107 <> show (countSup 3 ngrms) <> " (>3) "
108 <> show (countSup 4 ngrms) <> " (>4) "
109 <> show (countSup 5 ngrms) <> " (>5) "
110 <> show (countSup 6 ngrms) <> " (>6)\n"
111 ) m
112 where
113 --------------------------------------
114 countSup :: Double -> [Double] -> Int
115 countSup s l = length $ filter (>s) l
116 --------------------------------------
117 supps :: [Double]
118 supps = sort $ map (fromIntegral . _phyloFis_support) $ concat $ elems m
119 --------------------------------------
120 ngrms :: [Double]
121 ngrms = sort $ map (\f -> fromIntegral $ Set.size $ _phyloFis_clique f) $ concat $ elems m
122 --------------------------------------