]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/PhyloMaker.hs
color update
[gargantext.git] / src / Gargantext / Viz / Phylo / PhyloMaker.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.PhyloMaker
3 Description : Maker engine for rebuilding a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
15
16 module Gargantext.Viz.Phylo.PhyloMaker where
17
18 import Data.List (concat, nub, partition, sort, (++))
19 import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), filterWithKey, restrictKeys)
20 import Data.Set (size)
21 import Data.Vector (Vector)
22
23 import Gargantext.Prelude
24 import Gargantext.Viz.AdaptativePhylo
25 import Gargantext.Viz.Phylo.PhyloTools
26 import Gargantext.Text.Context (TermList)
27 import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
28
29 import Control.DeepSeq (NFData)
30 import Control.Parallel.Strategies (parList, rdeepseq, using)
31 import Debug.Trace (trace)
32 import Control.Lens hiding (Level)
33
34 import qualified Data.Vector as Vector
35 import qualified Data.Set as Set
36
37
38 ------------------
39 -- | To Phylo | --
40 ------------------
41
42
43 toPhylo :: [Document] -> TermList -> Config -> Phylo
44 toPhylo docs lst conf = phylo1
45 where
46 --------------------------------------
47 phylo1 :: Phylo
48 phylo1 = toPhylo1 docs phyloBase
49 --------------------------------------
50 phyloBase :: Phylo
51 phyloBase = toPhyloBase docs lst conf
52 --------------------------------------
53
54
55
56 --------------------
57 -- | To Phylo 1 | --
58 --------------------
59
60
61 appendGroups :: (a -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo
62 appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n")
63 $ over ( phylo_periods
64 . traverse
65 . phylo_periodLevels
66 . traverse)
67 (\phyloLvl -> if lvl == (phyloLvl ^. phylo_levelLevel)
68 then
69 let pId = phyloLvl ^. phylo_levelPeriod
70 phyloFis = m ! pId
71 in phyloLvl
72 & phylo_levelGroups .~ (fromList $ foldl (\groups obj ->
73 groups ++ [ (((pId,lvl),length groups)
74 , f obj pId lvl (length groups) (getRoots phylo)
75 (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId]))
76 ] ) [] phyloFis)
77 else
78 phyloLvl )
79 phylo
80
81
82 fisToGroup :: PhyloFis -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup
83 fisToGroup fis pId lvl idx fdt coocs =
84 let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloFis_clique) fdt
85 in PhyloGroup pId lvl idx
86 (fis ^. phyloFis_support)
87 ngrams
88 (ngramsToCooc ngrams coocs)
89 (1,[])
90 [] [] [] []
91 Nothing
92
93
94 toPhylo1 :: [Document] -> Phylo -> Phylo
95 toPhylo1 docs phyloBase = appendGroups fisToGroup 1 phyloFis phyloBase
96 where
97 --------------------------------------
98 phyloFis :: Map (Date,Date) [PhyloFis]
99 phyloFis = toPhyloFis docs' (fisSupport $ getConfig phyloBase) (fisSize $ getConfig phyloBase)
100 --------------------------------------
101 docs' :: Map (Date,Date) [Document]
102 docs' = groupDocsByPeriod date (getPeriodIds phyloBase) docs
103 --------------------------------------
104
105
106 ---------------------------
107 -- | Frequent Item Set | --
108 ---------------------------
109
110
111 -- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
112 filterFis :: Bool -> Int -> (Int -> [PhyloFis] -> [PhyloFis]) -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
113 filterFis keep thr f m = case keep of
114 False -> map (\l -> f thr l) m
115 True -> map (\l -> keepFilled (f) thr l) m
116
117
118 -- | To filter Fis with small Support
119 filterFisBySupport :: Int -> [PhyloFis] -> [PhyloFis]
120 filterFisBySupport thr l = filter (\fis -> (fis ^. phyloFis_support) >= thr) l
121
122
123 -- | To filter Fis with small Clique size
124 filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis]
125 filterFisByClique thr l = filter (\fis -> (size $ fis ^. phyloFis_clique) >= thr) l
126
127
128 -- | To filter nested Fis
129 filterFisByNested :: Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
130 filterFisByNested m =
131 let fis = map (\l ->
132 foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ f' ^. phyloFis_clique) (Set.toList $ f ^. phyloFis_clique)) mem)
133 then mem
134 else
135 let fMax = filter (\f' -> not $ isNested (Set.toList $ f ^. phyloFis_clique) (Set.toList $ f' ^. phyloFis_clique)) mem
136 in fMax ++ [f] ) [] l)
137 $ elems m
138 fis' = fis `using` parList rdeepseq
139 in fromList $ zip (keys m) fis'
140
141
142 -- | To transform a time map of docs innto a time map of Fis with some filters
143 toPhyloFis :: Map (Date, Date) [Document] -> Int -> Int -> Map (Date,Date) [PhyloFis]
144 toPhyloFis phyloDocs support clique = traceFis "Filtered Fis"
145 $ filterFisByNested
146 $ traceFis "Filtered by clique size"
147 $ filterFis True clique (filterFisByClique)
148 $ traceFis "Filtered by support"
149 $ filterFis True support (filterFisBySupport)
150 $ traceFis "Unfiltered Fis" phyloFis
151 where
152 --------------------------------------
153 phyloFis :: Map (Date,Date) [PhyloFis]
154 phyloFis =
155 let fis = map (\(prd,docs) -> let lst = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
156 in (prd, map (\f -> PhyloFis (fst f) (snd f) prd) lst))
157 $ toList phyloDocs
158 fis' = fis `using` parList rdeepseq
159 in fromList fis'
160 --------------------------------------
161
162
163 --------------------
164 -- | Coocurency | --
165 --------------------
166
167
168 -- | To build the local cooc matrix of each phylogroup
169 ngramsToCooc :: [Int] -> [Cooc] -> Cooc
170 ngramsToCooc ngrams coocs =
171 let cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty coocs
172 pairs = listToKeys ngrams
173 in filterWithKey (\k _ -> elem k pairs) cooc
174
175
176 -- | To transform the docs into a time map of coocurency matrix
177 docsToCoocByYear :: [Document] -> Vector Ngrams -> Config -> Map Date Cooc
178 docsToCoocByYear docs fdt conf =
179 let mCooc = fromListWith sumCooc
180 $ map (\(_d,l) -> (_d, listToMatrix l))
181 $ map (\doc -> (date doc, sort $ ngramsToIdx (text doc) fdt)) docs
182 mCooc' = fromList
183 $ map (\t -> (t,empty))
184 $ toTimeScale (map date docs) (timeUnit conf)
185 in trace ("\n" <> "-- | Build the coocurency matrix for " <> show (length $ keys mCooc') <> " unit of time" <> "\n")
186 $ unionWith sumCooc mCooc mCooc'
187
188
189 -----------------------
190 -- | to Phylo Base | --
191 -----------------------
192
193
194 -- | To group a list of Documents by fixed periods
195 groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
196 groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods"
197 groupDocsByPeriod f pds es =
198 let periods = map (inPeriode f es) pds
199 periods' = periods `using` parList rdeepseq
200
201 in trace ("\n" <> "-- | Group " <> show(length es) <> " docs by " <> show(length pds) <> " periods" <> "\n")
202 $ fromList $ zip pds periods'
203 where
204 --------------------------------------
205 inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
206 inPeriode f' h (start,end) =
207 fst $ partition (\d -> f' d >= start && f' d <= end) h
208 --------------------------------------
209
210
211 -- | To count the number of docs by unit of time (like a year)
212 nbDocsByTime :: [Document] -> Int -> Map Date Double
213 nbDocsByTime docs step =
214 let docs' = fromListWith (+) $ map (\d -> (date d,1)) docs
215 time = fromList $ map (\t -> (t,0)) $ toTimeScale (keys docs') step
216 in trace ("\n" <> "-- | Group " <> show(length docs) <> " docs by " <> show(length time) <> " unit of time" <> "\n")
217 $ unionWith (+) time docs'
218
219
220 initPhyloLevels :: Int -> PhyloPeriodId -> Map PhyloLevelId PhyloLevel
221 initPhyloLevels lvlMax pId =
222 fromList $ map (\lvl -> ((pId,lvl),PhyloLevel pId lvl empty)) [1..lvlMax]
223
224
225 -- | To init the basic elements of a Phylo
226 toPhyloBase :: [Document] -> TermList -> Config -> Phylo
227 toPhyloBase docs lst conf =
228 let foundations = PhyloFoundations (Vector.fromList $ nub $ concat $ map text docs) lst
229 params = defaultPhyloParam { _phyloParam_config = conf }
230 periods = toPeriods (sort $ nub $ map date docs) (timePeriod conf) (timeStep conf)
231 in trace ("\n" <> "-- | Create PhyloBase out of " <> show(length docs) <> " docs \n")
232 $ Phylo foundations
233 (docsToCoocByYear docs (foundations ^. foundations_roots) conf)
234 (nbDocsByTime docs $ timeUnit conf)
235 params
236 (fromList $ map (\prd -> (prd, PhyloPeriod prd (initPhyloLevels (phyloLevel conf) prd))) periods)