2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
17 module Gargantext.Viz.Phylo.Aggregates.Fis
20 import Data.List (null,concat,sort)
21 import Data.Map (Map, empty,elems)
22 import Data.Tuple (fst, snd)
23 import Data.Set (size)
24 import Data.Vector.Storable (Vector)
25 import Gargantext.Prelude
26 import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
27 import Gargantext.Viz.Phylo
28 import Gargantext.Viz.Phylo.Tools
29 import qualified Data.Map as Map
30 import qualified Data.Set as Set
31 import qualified Data.Vector.Storable as Vector
33 import Numeric.Statistics (percentile)
35 import Debug.Trace (trace)
38 -- | To Filter Fis by support
39 filterFisBySupport :: Bool -> Int -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
40 filterFisBySupport keep min' m = case keep of
41 False -> Map.map (\l -> filterMinorFis min' l) m
42 True -> Map.map (\l -> keepFilled (filterMinorFis) min' l) m
45 filterFisByNgrams :: Int -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
46 filterFisByNgrams thr m = Map.map(\lst -> filter (\fis -> (size $ getClique fis) > thr) lst) m
49 -- | To filter Fis with small Support, to preserve nonempty periods please use : filterFisBySupport true
50 filterMinorFis :: Int -> [PhyloFis] -> [PhyloFis]
51 filterMinorFis min' l = filter (\fis -> getSupport fis > min') l
54 -- | To filter nested Fis
55 filterFisByNested :: Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
56 filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head' "Fis" $ map getClique l) (map getClique l) []
57 in filter (\fis -> elem (getClique fis) cliqueMax) l)
60 -- | To transform a list of Documents into a Frequent Items Set
61 docsToFis :: Map (Date, Date) [Document] -> Map (Date, Date) [PhyloFis]
62 docsToFis docs = map (\d -> let fs = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text d)
63 in map (\f -> PhyloFis (fst f) (snd f) empty) fs) docs
66 -- | To process a list of Filters on top of the PhyloFis
67 processFilters :: [Filter] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
68 processFilters filters phyloFis
69 | null filters = phyloFis
70 | otherwise = panic "[ERR][Viz.Phylo.LevelMaker.processFilters] please add some filters for the Fis"
73 -- | To process a list of Metrics on top of the PhyloFis
74 processMetrics :: [Metric] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
75 processMetrics metrics phyloFis
76 | null metrics = phyloFis
77 | otherwise = panic "[ERR][Viz.Phylo.LevelMaker.processMetrics] please add some metrics for the Fis"
80 -- | To transform some Documents into PhyloFis and apply a List of Metrics and Filters
81 toPhyloFis :: Map (Date, Date) [Document] -> Bool -> Support -> Int -> [Metric] -> [Filter] -> Map (Date, Date) [PhyloFis]
82 toPhyloFis ds k s t ms fs = processFilters fs
84 $ traceFis "----\nFiltered Fis by clique size :\n"
86 $ traceFis "----\nFiltered Fis by nested :\n"
88 $ traceFis "----\nFiltered Fis by support :\n"
89 $ filterFisBySupport k s
90 $ traceFis "----\nUnfiltered Fis :\n"
98 traceFis :: [Char] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
99 traceFis lbl m = trace (lbl <> "count : " <> show (sum $ map length $ elems m) <> " Fis\n"
100 <> "support : " <> show (percentile 25 supps) <> " (25%) "
101 <> show (percentile 50 supps) <> " (50%) "
102 <> show (percentile 75 supps) <> " (75%) "
103 <> show (percentile 90 supps) <> " (90%)\n"
104 <> "clique size : " <> show (percentile 25 ngrms) <> " (25%) "
105 <> show (percentile 50 ngrms) <> " (50%) "
106 <> show (percentile 75 ngrms) <> " (75%) "
107 <> show (percentile 90 ngrms) <> " (90%)\n"
110 supps :: Vector Double
111 supps = Vector.fromList $ sort $ map (fromIntegral . _phyloFis_support) $ concat $ elems m
112 ngrms :: Vector Double
113 ngrms = Vector.fromList $ sort $ map (\f -> fromIntegral $ Set.size $ _phyloFis_clique f) $ concat $ elems m