]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/LevelMaker.hs
Merge branch 'dev' into dev-graph-explorer-gexf
[gargantext.git] / src / Gargantext / Viz / Phylo / LevelMaker.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TypeSynonymInstances #-}
17 {-# LANGUAGE FlexibleInstances #-}
18
19 module Gargantext.Viz.Phylo.LevelMaker
20 where
21
22 import Control.Parallel.Strategies
23 import Control.Lens hiding (both, Level)
24 import Data.List ((++), sort, concat, nub, zip, last, null)
25 import Data.Map (Map, (!), empty, singleton, size)
26 import Data.Text (Text)
27 import Data.Tuple.Extra
28 import Data.Vector (Vector)
29 import Gargantext.Prelude
30 import Gargantext.Viz.Phylo
31 import Gargantext.Viz.Phylo.Metrics
32 import Gargantext.Viz.Phylo.Aggregates
33 import Gargantext.Viz.Phylo.Cluster
34 import Gargantext.Viz.Phylo.BranchMaker
35 import Gargantext.Viz.Phylo.LinkMaker
36 import Gargantext.Viz.Phylo.Tools
37 import Gargantext.Text.Context (TermList)
38
39 import qualified Data.Vector.Storable as VS
40 import qualified Data.Set as Set
41 import qualified Data.Vector as Vector
42
43 import Debug.Trace (trace)
44 import Numeric.Statistics (percentile)
45
46
47 -------------------------
48 -- | PhyloLevelMaker | --
49 -------------------------
50
51
52 -- | A typeClass for polymorphic PhyloLevel functions
53 class PhyloLevelMaker aggregate
54 where
55 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
56 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
57 -- | To create a list of PhyloGroups based on a list of aggregates a
58 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
59
60
61 instance PhyloLevelMaker PhyloCluster
62 where
63 --------------------------------------
64 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
65 addPhyloLevel lvl m p
66 | lvl > 1 = addPhyloLevel' lvl m p
67 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
68 --------------------------------------
69 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
70 toPhyloGroups lvl (d,d') l m p =
71 let clusters = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
72 clusters' = clusters `using` parList rdeepseq
73 in clusters'
74 --------------------------------------
75
76
77 instance PhyloLevelMaker PhyloFis
78 where
79 --------------------------------------
80 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
81 addPhyloLevel lvl m p
82 | lvl == 1 = addPhyloLevel' lvl m p
83 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
84 --------------------------------------
85 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
86 toPhyloGroups lvl (d,d') l _ p =
87 let groups = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis (getPhyloCooc p) (getFoundationsRoots p)) $ zip [1..] l
88 groups' = groups `using` parList rdeepseq
89 in groups'
90 --------------------------------------
91
92
93 instance PhyloLevelMaker Document
94 where
95 --------------------------------------
96 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
97 addPhyloLevel lvl m p
98 | lvl == 0 = addPhyloLevel' lvl m p
99 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
100 --------------------------------------
101 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
102 toPhyloGroups lvl (d,d') l _m p = map (\ngram -> ngramsToGroup (d,d') lvl (getIdxInRoots ngram p) ngram [ngram] p)
103 $ (nub . concat)
104 $ map text l
105 --------------------------------------
106
107
108 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
109 addPhyloLevel' :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
110 addPhyloLevel' lvl m p = alterPhyloPeriods
111 (\period -> let pId = _phylo_periodId period
112 in over (phylo_periodLevels)
113 (\phyloLevels ->
114 let groups = toPhyloGroups lvl pId (m ! pId) m p
115 in trace (show (length groups) <> " groups for " <> show (pId) ) $ phyloLevels ++ [PhyloLevel (pId, lvl) groups]
116 ) period) p
117
118
119 ----------------------
120 -- | toPhyloGroup | --
121 ----------------------
122
123
124 -- | To transform a Clique into a PhyloGroup
125 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Map Date (Map (Int,Int) Double) -> Vector Ngrams -> PhyloGroup
126 cliqueToGroup prd lvl idx lbl fis cooc' root = PhyloGroup ((prd, lvl), idx) lbl ngrams
127 (getNgramsMeta cooc ngrams)
128 -- empty
129 (singleton "support" (fromIntegral $ getSupport fis))
130 Nothing
131 cooc
132 [] [] [] childs
133 where
134 --------------------------------------
135 cooc :: Map (Int, Int) Double
136 cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) cooc'
137 --------------------------------------
138 ngrams :: [Int]
139 ngrams = sort $ map (\x -> getIdxInRoots' x root)
140 $ Set.toList
141 $ getClique fis
142 --------------------------------------
143 childs :: [Pointer]
144 childs = map (\n -> (((prd, lvl - 1), n),1)) ngrams
145 --------------------------------------
146
147
148 -- | To transform a Cluster into a Phylogroup
149 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
150 clusterToGroup prd lvl idx lbl groups _m p =
151 PhyloGroup ((prd, lvl), idx) lbl ngrams
152 (getNgramsMeta cooc ngrams)
153 -- empty
154 empty
155 Nothing
156 cooc
157 ascLink desLink [] childs
158 where
159 --------------------------------------
160 cooc :: Map (Int, Int) Double
161 cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p)
162 --------------------------------------
163 childs :: [Pointer]
164 childs = map (\g -> (getGroupId g, 1)) groups
165 ascLink = concat $ map getGroupPeriodParents groups
166 desLink = concat $ map getGroupPeriodChilds groups
167 --------------------------------------
168 ngrams :: [Int]
169 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
170 --------------------------------------
171
172
173 -- | To transform a list of Ngrams into a PhyloGroup
174 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
175 ngramsToGroup prd lvl idx lbl ngrams p = PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty empty Nothing
176 (getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
177 [] [] [] []
178
179
180 ----------------------
181 -- | toPhyloLevel | --
182 ----------------------
183
184
185 -- | To reconstruct the Phylo from a set of Document to a given Level
186 toPhylo :: PhyloQueryBuild -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
187 toPhylo q c termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
188 where
189 --------------------------------------
190 phylo1 :: Phylo
191 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phyloBase
192 -- phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo
193 --------------------------------------
194 -- phylo0 :: Phylo
195 -- phylo0 = tracePhyloN 0
196 -- $ addPhyloLevel 0 phyloDocs phyloBase
197 --------------------------------------
198 phyloDocs :: Map (Date, Date) [Document]
199 phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) c
200 --------------------------------------
201 phyloBase :: Phylo
202 phyloBase = tracePhyloBase
203 $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c termList fis
204 --------------------------------------
205
206
207 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
208 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
209 toNthLevel lvlMax prox clus p
210 | lvl >= lvlMax = p
211 | otherwise = toNthLevel lvlMax prox clus
212 $ traceBranches (lvl + 1)
213 $ setPhyloBranches (lvl + 1)
214 -- $ transposePeriodLinks (lvl + 1)
215 $ traceTranspose (lvl + 1) Descendant
216 $ transposeLinks (lvl + 1) Descendant
217 $ traceTranspose (lvl + 1) Ascendant
218 $ transposeLinks (lvl + 1) Ascendant
219 $ tracePhyloN (lvl + 1)
220 $ setLevelLinks (lvl, lvl + 1)
221 $ addPhyloLevel (lvl + 1)
222 (clusters) p
223 where
224 --------------------------------------
225 clusters :: Map (Date,Date) [PhyloCluster]
226 clusters = phyloToClusters lvl clus p
227 --------------------------------------
228 lvl :: Level
229 lvl = getLastLevel p
230 --------------------------------------
231
232
233 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
234 toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
235 toPhylo1 clus prox d p = case clus of
236 Fis (FisParams k s t) -> traceBranches 1
237 -- $ reLinkPhyloBranches 1
238 -- $ traceBranches 1
239 $ setPhyloBranches 1
240 $ traceTempoMatching Descendant 1
241 $ interTempoMatching Descendant 1 prox
242 $ traceTempoMatching Ascendant 1
243 $ interTempoMatching Ascendant 1 prox
244 $ tracePhyloN 1
245 -- $ setLevelLinks (0,1)
246 $ addPhyloLevel 1 (getPhyloFis phyloFis)
247 $ trace (show (size $ getPhyloFis phyloFis) <> " Fis created") $ phyloFis
248 where
249 --------------------------------------
250 phyloFis :: Phylo
251 phyloFis = if (null $ getPhyloFis p)
252 then p & phylo_fis .~ refineFis (docsToFis d p) k s t
253 else p & phylo_fis .~ docsToFis d p
254 --------------------------------------
255
256 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
257
258
259 -- | To create the base of the Phylo (foundations, periods, cooc, etc)
260 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
261 toPhyloBase q p c termList fis = initPhyloBase periods foundations nbDocs cooc fis p
262 where
263 --------------------------------------
264 cooc :: Map Date (Map (Int,Int) Double)
265 cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
266 --------------------------------------
267 nbDocs :: Map Date Double
268 nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
269 --------------------------------------
270 foundations :: PhyloFoundations
271 foundations = PhyloFoundations (initFoundationsRoots (termListToNgrams termList)) termList
272 --------------------------------------
273 periods :: [(Date,Date)]
274 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
275 $ both date (head' "toPhyloBase" c, last' "toPhyloBase" c)
276 --------------------------------------
277
278
279 -----------------
280 -- | Tracers | --
281 -----------------
282
283
284 tracePhyloN :: Level -> Phylo -> Phylo
285 tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n"
286 <> show (length $ getGroupsWithLevel lvl p) <> " groups created \n") p
287
288 traceTranspose :: Level -> Filiation -> Phylo -> Phylo
289 traceTranspose lvl fil p = trace ("----\n Transpose " <> show (fil) <> " links for " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n") p
290
291
292 tracePhyloBase :: Phylo -> Phylo
293 tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
294 <> show (length $ _phylo_periods p) <> " periods from "
295 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
296 <> " to "
297 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
298 <> "\n"
299 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
300
301
302 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
303 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
304 <> "count : " <> show (length pts) <> " pointers\n") p
305 where
306 --------------------------------------
307 pts :: [Pointer]
308 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
309 --------------------------------------
310
311
312 traceBranches :: Level -> Phylo -> Phylo
313 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
314 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
315 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
316 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
317 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
318 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
319 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
320 where
321 --------------------------------------
322 brs :: [Double]
323 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
324 $ filter (\(id,_) -> (fst id) == lvl)
325 $ getGroupsByBranches p
326 --------------------------------------