]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/Aggregates/Fis.hs
add some tracers and fix the temporal matching
[gargantext.git] / src / Gargantext / Viz / Phylo / Aggregates / Fis.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Viz.Phylo.Aggregates.Fis
18 where
19
20 import Data.List (null,concat,sort)
21 import Data.Map (Map, empty,elems)
22 import Data.Tuple (fst, snd)
23 import Data.Set (size)
24 import Data.Vector.Storable (Vector)
25 import Gargantext.Prelude
26 import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
27 import Gargantext.Viz.Phylo
28 import Gargantext.Viz.Phylo.Tools
29 import qualified Data.Map as Map
30 import qualified Data.Set as Set
31 import qualified Data.Vector.Storable as Vector
32
33 import Numeric.Statistics (percentile)
34
35 import Debug.Trace (trace)
36
37
38 -- | To Filter Fis by support
39 filterFisBySupport :: Bool -> Int -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
40 filterFisBySupport keep min' m = case keep of
41 False -> Map.map (\l -> filterMinorFis min' l) m
42 True -> Map.map (\l -> keepFilled (filterMinorFis) min' l) m
43
44
45 filterFisByNgrams :: Int -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
46 filterFisByNgrams thr m = Map.map(\lst -> filter (\fis -> (size $ getClique fis) > thr) lst) m
47
48
49 -- | To filter Fis with small Support, to preserve nonempty periods please use : filterFisBySupport true
50 filterMinorFis :: Int -> [PhyloFis] -> [PhyloFis]
51 filterMinorFis min' l = filter (\fis -> getSupport fis > min') l
52
53
54 -- | To filter nested Fis
55 filterFisByNested :: Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
56 filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head' "Fis" $ map getClique l) (map getClique l) []
57 in filter (\fis -> elem (getClique fis) cliqueMax) l)
58
59
60 -- | To transform a list of Documents into a Frequent Items Set
61 docsToFis :: Map (Date, Date) [Document] -> Map (Date, Date) [PhyloFis]
62 docsToFis docs = map (\d -> let fs = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text d)
63 in map (\f -> PhyloFis (fst f) (snd f) empty) fs) docs
64
65
66 -- | To process a list of Filters on top of the PhyloFis
67 processFilters :: [Filter] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
68 processFilters filters phyloFis
69 | null filters = phyloFis
70 | otherwise = panic "[ERR][Viz.Phylo.LevelMaker.processFilters] please add some filters for the Fis"
71
72
73 -- | To process a list of Metrics on top of the PhyloFis
74 processMetrics :: [Metric] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
75 processMetrics metrics phyloFis
76 | null metrics = phyloFis
77 | otherwise = panic "[ERR][Viz.Phylo.LevelMaker.processMetrics] please add some metrics for the Fis"
78
79
80 -- | To transform some Documents into PhyloFis and apply a List of Metrics and Filters
81 toPhyloFis :: Map (Date, Date) [Document] -> Bool -> Support -> Int -> [Metric] -> [Filter] -> Map (Date, Date) [PhyloFis]
82 toPhyloFis ds k s t ms fs = processFilters fs
83 $ processMetrics ms
84 $ traceFis "----\nFiltered Fis by clique size :\n"
85 $ filterFisByNgrams t
86 $ traceFis "----\nFiltered Fis by nested :\n"
87 $ filterFisByNested
88 $ traceFis "----\nFiltered Fis by support :\n"
89 $ filterFisBySupport k s
90 $ traceFis "----\nUnfiltered Fis :\n"
91 $ docsToFis ds
92
93
94 -----------------
95 -- | Tracers | --
96 -----------------
97
98 traceFis :: [Char] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
99 traceFis lbl m = trace (lbl <> "count : " <> show (sum $ map length $ elems m) <> " Fis\n"
100 <> "support : " <> show (percentile 25 supps) <> " (25%) "
101 <> show (percentile 50 supps) <> " (50%) "
102 <> show (percentile 75 supps) <> " (75%) "
103 <> show (percentile 90 supps) <> " (90%)\n"
104 <> "clique size : " <> show (percentile 25 ngrms) <> " (25%) "
105 <> show (percentile 50 ngrms) <> " (50%) "
106 <> show (percentile 75 ngrms) <> " (75%) "
107 <> show (percentile 90 ngrms) <> " (90%)\n"
108 ) m
109 where
110 supps :: Vector Double
111 supps = Vector.fromList $ sort $ map (fromIntegral . _phyloFis_support) $ concat $ elems m
112 ngrms :: Vector Double
113 ngrms = Vector.fromList $ sort $ map (\f -> fromIntegral $ Set.size $ _phyloFis_clique f) $ concat $ elems m