]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/LevelMaker.hs
fix the diagonal issue
[gargantext.git] / src / Gargantext / Viz / Phylo / LevelMaker.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TypeSynonymInstances #-}
17 {-# LANGUAGE FlexibleInstances #-}
18
19 module Gargantext.Viz.Phylo.LevelMaker
20 where
21
22 import Control.Lens hiding (both, Level)
23 import Data.List ((++), sort, concat, nub, zip, last)
24 import Data.Map (Map, (!), empty, singleton)
25 import Data.Text (Text)
26 import Data.Tuple.Extra
27 import Gargantext.Prelude
28 import Gargantext.Viz.Phylo
29 import Gargantext.Viz.Phylo.Aggregates.Cluster
30 import Gargantext.Viz.Phylo.Aggregates.Document
31 import Gargantext.Viz.Phylo.Aggregates.Fis
32 import Gargantext.Viz.Phylo.BranchMaker
33 import Gargantext.Viz.Phylo.LinkMaker
34 import Gargantext.Viz.Phylo.Tools
35 import Gargantext.Viz.Phylo.Aggregates.Cooc
36 import Gargantext.Text.Context (TermList)
37
38 import qualified Data.Vector.Storable as VS
39 import qualified Data.Set as Set
40 import qualified Data.Vector as Vector
41
42 import Debug.Trace (trace)
43 import Numeric.Statistics (percentile)
44
45
46 -- | A typeClass for polymorphic PhyloLevel functions
47 class PhyloLevelMaker aggregate
48 where
49 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
50 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
51 -- | To create a list of PhyloGroups based on a list of aggregates a
52 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
53
54
55 instance PhyloLevelMaker PhyloCluster
56 where
57 --------------------------------------
58 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
59 addPhyloLevel lvl m p
60 | lvl > 1 = toPhyloLevel lvl m p
61 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
62 --------------------------------------
63 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
64 toPhyloGroups lvl (d,d') l m p = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
65 --------------------------------------
66
67
68 instance PhyloLevelMaker PhyloFis
69 where
70 --------------------------------------
71 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
72 addPhyloLevel lvl m p
73 | lvl == 1 = toPhyloLevel lvl m p
74 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
75 --------------------------------------
76 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
77 toPhyloGroups lvl (d,d') l _ p = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis p) $ zip [1..] l
78 --------------------------------------
79
80
81 instance PhyloLevelMaker Document
82 where
83 --------------------------------------
84 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
85 addPhyloLevel lvl m p
86 | lvl == 0 = toPhyloLevel lvl m p
87 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
88 --------------------------------------
89 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
90 toPhyloGroups lvl (d,d') l _m p = map (\(idx,ngram) -> ngramsToGroup (d,d') lvl idx ngram [ngram] p)
91 $ zip [1..]
92 $ (nub . concat)
93 $ map text l
94 --------------------------------------
95
96
97 -- | To transform a Cluster into a Phylogroup
98 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
99 clusterToGroup prd lvl idx lbl groups _m p =
100 PhyloGroup ((prd, lvl), idx) lbl ngrams empty
101 Nothing
102 (getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
103 [] [] [] childs
104 where
105 --------------------------------------
106 childs :: [Pointer]
107 childs = map (\g -> (getGroupId g, 1)) groups
108 --------------------------------------
109 ngrams :: [Int]
110 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
111 --------------------------------------
112
113
114 -- | To transform a Clique into a PhyloGroup
115 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
116 cliqueToGroup prd lvl idx lbl fis p =
117 PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing
118 (getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
119 [] [] [] []
120 where
121 --------------------------------------
122 ngrams :: [Int]
123 ngrams = sort $ map (\x -> getIdxInRoots x p)
124 $ Set.toList
125 $ getClique fis
126 --------------------------------------
127
128
129 -- | To transform a list of Ngrams into a PhyloGroup
130 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
131 ngramsToGroup prd lvl idx lbl ngrams p = PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing
132 (getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
133 [] [] [] []
134
135
136 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
137 toPhyloLevel :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
138 toPhyloLevel lvl m p = alterPhyloPeriods
139 (\period -> let pId = _phylo_periodId period
140 in over (phylo_periodLevels)
141 (\phyloLevels ->
142 let groups = toPhyloGroups lvl pId (m ! pId) m p
143 in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
144 ) period) p
145
146
147 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
148 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
149 toNthLevel lvlMax prox clus p
150 | lvl >= lvlMax = p
151 | otherwise = toNthLevel lvlMax prox clus
152 $ traceBranches (lvl + 1)
153 $ setPhyloBranches (lvl + 1)
154 $ transposePeriodLinks (lvl + 1)
155 $ setLevelLinks (lvl, lvl + 1)
156 $ addPhyloLevel (lvl + 1)
157 (clusters) p
158 where
159 --------------------------------------
160 clusters :: Map (Date,Date) [PhyloCluster]
161 clusters = phyloToClusters lvl clus p
162 --------------------------------------
163 lvl :: Level
164 lvl = getLastLevel p
165 --------------------------------------
166
167
168 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
169 toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
170 toPhylo1 clus prox d p = case clus of
171 Fis (FisParams k s t) -> traceReBranches 1
172 $ linkPhyloBranches 1 prox
173 $ traceBranches 1
174 $ setPhyloBranches 1
175 $ traceTempoMatching Descendant 1
176 $ interTempoMatching Descendant 1 prox
177 $ traceTempoMatching Ascendant 1
178 $ interTempoMatching Ascendant 1 prox
179 $ setLevelLinks (0,1)
180 $ setLevelLinks (1,0)
181 $ addPhyloLevel 1 phyloFis phylo'
182 where
183 --------------------------------------
184 phyloFis :: Map (Date, Date) [PhyloFis]
185 phyloFis = toPhyloFis' (getPhyloFis phylo') k s t
186 --------------------------------------
187 phylo' :: Phylo
188 phylo' = docsToFis' d p
189 --------------------------------------
190
191 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
192
193
194 -- | To reconstruct the Level 0 of a Phylo
195 toPhylo0 :: Map (Date, Date) [Document] -> Phylo -> Phylo
196 toPhylo0 d p = addPhyloLevel 0 d p
197
198
199 class PhyloMaker corpus
200 where
201 toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
202 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> corpus -> [Ngrams] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
203 corpusToDocs :: corpus -> Phylo -> Map (Date,Date) [Document]
204
205 instance PhyloMaker [(Date, Text)]
206 where
207 --------------------------------------
208 toPhylo q c roots termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
209 where
210 --------------------------------------
211 phylo1 :: Phylo
212 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
213 --------------------------------------
214 phylo0 :: Phylo
215 phylo0 = toPhylo0 phyloDocs phyloBase
216 --------------------------------------
217 phyloDocs :: Map (Date, Date) [Document]
218 phyloDocs = corpusToDocs c phyloBase
219 --------------------------------------
220 phyloBase :: Phylo
221 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList fis
222 --------------------------------------
223 --------------------------------------
224 toPhyloBase q p c roots termList fis = initPhyloBase periods foundations nbDocs cooc fis p
225 where
226 --------------------------------------
227 cooc :: Map Date (Map (Int,Int) Double)
228 cooc = docsToCooc (parseDocs (foundations ^. phylo_foundationsRoots) c) (foundations ^. phylo_foundationsRoots)
229 --------------------------------------
230 nbDocs :: Map Date Double
231 nbDocs = countDocs c
232 --------------------------------------
233 foundations :: PhyloFoundations
234 foundations = PhyloFoundations (initFoundationsRoots roots) termList
235 --------------------------------------
236 periods :: [(Date,Date)]
237 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
238 $ both fst (head' "LevelMaker" c,last c)
239 --------------------------------------
240 --------------------------------------
241 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) $ parseDocs (getFoundationsRoots p) c
242
243
244 instance PhyloMaker [Document]
245 where
246 --------------------------------------
247 toPhylo q c roots termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
248 where
249 --------------------------------------
250 phylo1 :: Phylo
251 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
252 --------------------------------------
253 phylo0 :: Phylo
254 phylo0 = toPhylo0 phyloDocs phyloBase
255 --------------------------------------
256 phyloDocs :: Map (Date, Date) [Document]
257 phyloDocs = corpusToDocs c phyloBase
258 --------------------------------------
259 phyloBase :: Phylo
260 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList fis
261 --------------------------------------
262 --------------------------------------
263 toPhyloBase q p c roots termList fis = initPhyloBase periods foundations nbDocs cooc fis p
264 where
265 --------------------------------------
266 cooc :: Map Date (Map (Int,Int) Double)
267 cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
268 --------------------------------------
269 nbDocs :: Map Date Double
270 nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
271 --------------------------------------
272 foundations :: PhyloFoundations
273 foundations = PhyloFoundations (initFoundationsRoots roots) termList
274 --------------------------------------
275 periods :: [(Date,Date)]
276 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
277 $ both date (head' "LevelMaker" c,last c)
278 --------------------------------------
279 --------------------------------------
280 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) c
281
282
283 -----------------
284 -- | Tracers | --
285 -----------------
286
287
288 tracePhylo0 :: Phylo -> Phylo
289 tracePhylo0 p = trace ("\n---------------\n--| Phylo 0 |--\n---------------\n\n") p
290
291 tracePhylo1 :: Phylo -> Phylo
292 tracePhylo1 p = trace ("\n---------------\n--| Phylo 1 |--\n---------------\n\n") p
293
294 tracePhyloN :: Level -> Phylo -> Phylo
295 tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n") p
296
297
298 tracePhyloBase :: Phylo -> Phylo
299 tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
300 <> show (length $ _phylo_periods p) <> " periods from "
301 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
302 <> " to "
303 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
304 <> "\n"
305 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
306
307
308 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
309 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
310 <> "count : " <> show (length pts) <> " pointers\n"
311 <> "similarity : " <> show (percentile 25 (VS.fromList sim)) <> " (25%) "
312 <> show (percentile 50 (VS.fromList sim)) <> " (50%) "
313 <> show (percentile 75 (VS.fromList sim)) <> " (75%) "
314 <> show (percentile 90 (VS.fromList sim)) <> " (90%)\n") p
315 where
316 --------------------------------------
317 sim :: [Double]
318 sim = sort $ map snd pts
319 --------------------------------------
320 pts :: [Pointer]
321 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
322 --------------------------------------
323
324
325 traceReBranches :: Level -> Phylo -> Phylo
326 traceReBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " after relinking :\n"
327 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
328 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
329 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
330 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
331 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
332 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
333 where
334 --------------------------------------
335 brs :: [Double]
336 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
337 $ filter (\(id,_) -> (fst id) == lvl)
338 $ getGroupsByBranches p
339 --------------------------------------
340
341
342 traceBranches :: Level -> Phylo -> Phylo
343 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
344 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
345 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
346 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
347 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
348 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
349 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
350 where
351 --------------------------------------
352 brs :: [Double]
353 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
354 $ filter (\(id,_) -> (fst id) == lvl)
355 $ getGroupsByBranches p
356 --------------------------------------