]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/LevelMaker.hs
still some refactoring
[gargantext.git] / src / Gargantext / Viz / Phylo / LevelMaker.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TypeSynonymInstances #-}
17 {-# LANGUAGE FlexibleInstances #-}
18
19 module Gargantext.Viz.Phylo.LevelMaker
20 where
21
22 import Control.Parallel.Strategies
23 import Control.Lens hiding (both, Level)
24 import Data.List ((++), sort, concat, nub, zip, last)
25 import Data.Map (Map, (!), empty, singleton)
26 import Data.Text (Text)
27 import Data.Tuple.Extra
28 import Gargantext.Prelude
29 import Gargantext.Viz.Phylo
30 import Gargantext.Viz.Phylo.Metrics
31 import Gargantext.Viz.Phylo.Aggregates
32 import Gargantext.Viz.Phylo.Cluster
33 import Gargantext.Viz.Phylo.BranchMaker
34 import Gargantext.Viz.Phylo.LinkMaker
35 import Gargantext.Viz.Phylo.Tools
36 import Gargantext.Text.Context (TermList)
37
38 import qualified Data.Vector.Storable as VS
39 import qualified Data.Set as Set
40 import qualified Data.Vector as Vector
41
42 import Debug.Trace (trace)
43 import Numeric.Statistics (percentile)
44
45
46 -------------------------
47 -- | PhyloLevelMaker | --
48 -------------------------
49
50
51 -- | A typeClass for polymorphic PhyloLevel functions
52 class PhyloLevelMaker aggregate
53 where
54 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
55 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
56 -- | To create a list of PhyloGroups based on a list of aggregates a
57 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
58
59
60 instance PhyloLevelMaker PhyloCluster
61 where
62 --------------------------------------
63 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
64 addPhyloLevel lvl m p
65 | lvl > 1 = addPhyloLevel' lvl m p
66 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
67 --------------------------------------
68 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
69 toPhyloGroups lvl (d,d') l m p =
70 let clusters = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
71 clusters' = clusters `using` parList rdeepseq
72 in clusters'
73 --------------------------------------
74
75
76 instance PhyloLevelMaker PhyloFis
77 where
78 --------------------------------------
79 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
80 addPhyloLevel lvl m p
81 | lvl == 1 = addPhyloLevel' lvl m p
82 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
83 --------------------------------------
84 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
85 toPhyloGroups lvl (d,d') l _ p =
86 let groups = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis p) $ zip [1..] l
87 groups' = groups `using` parList rdeepseq
88 in groups'
89 --------------------------------------
90
91
92 instance PhyloLevelMaker Document
93 where
94 --------------------------------------
95 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
96 addPhyloLevel lvl m p
97 | lvl == 0 = addPhyloLevel' lvl m p
98 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
99 --------------------------------------
100 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
101 toPhyloGroups lvl (d,d') l _m p = map (\ngram -> ngramsToGroup (d,d') lvl (getIdxInRoots ngram p) ngram [ngram] p)
102 $ (nub . concat)
103 $ map text l
104 --------------------------------------
105
106
107 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
108 addPhyloLevel' :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
109 addPhyloLevel' lvl m p = alterPhyloPeriods
110 (\period -> let pId = _phylo_periodId period
111 in over (phylo_periodLevels)
112 (\phyloLevels ->
113 let groups = toPhyloGroups lvl pId (m ! pId) m p
114 in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
115 ) period) p
116
117
118 ----------------------
119 -- | toPhyloGroup | --
120 ----------------------
121
122
123 -- | To transform a Clique into a PhyloGroup
124 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
125 cliqueToGroup prd lvl idx lbl fis p = PhyloGroup ((prd, lvl), idx) lbl ngrams
126 (getNgramsMeta cooc ngrams)
127 -- empty
128 (singleton "support" (fromIntegral $ getSupport fis))
129 Nothing
130 cooc
131 [] [] [] childs
132 where
133 --------------------------------------
134 cooc :: Map (Int, Int) Double
135 cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p)
136 --------------------------------------
137 ngrams :: [Int]
138 ngrams = sort $ map (\x -> getIdxInRoots x p)
139 $ Set.toList
140 $ getClique fis
141 --------------------------------------
142 childs :: [Pointer]
143 childs = map (\n -> (((prd, lvl - 1), n),1)) ngrams
144 --------------------------------------
145
146
147 -- | To transform a Cluster into a Phylogroup
148 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
149 clusterToGroup prd lvl idx lbl groups _m p =
150 PhyloGroup ((prd, lvl), idx) lbl ngrams
151 (getNgramsMeta cooc ngrams)
152 -- empty
153 empty
154 Nothing
155 cooc
156 ascLink desLink [] childs
157 where
158 --------------------------------------
159 cooc :: Map (Int, Int) Double
160 cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p)
161 --------------------------------------
162 childs :: [Pointer]
163 childs = map (\g -> (getGroupId g, 1)) groups
164 ascLink = concat $ map getGroupPeriodParents groups
165 desLink = concat $ map getGroupPeriodChilds groups
166 --------------------------------------
167 ngrams :: [Int]
168 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
169 --------------------------------------
170
171
172 -- | To transform a list of Ngrams into a PhyloGroup
173 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
174 ngramsToGroup prd lvl idx lbl ngrams p = PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty empty Nothing
175 (getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
176 [] [] [] []
177
178
179 ----------------------
180 -- | toPhyloLevel | --
181 ----------------------
182
183
184 -- | To reconstruct the Phylo from a set of Document to a given Level
185 toPhylo :: PhyloQueryBuild -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
186 toPhylo q c termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
187 where
188 --------------------------------------
189 phylo1 :: Phylo
190 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
191 --------------------------------------
192 phylo0 :: Phylo
193 phylo0 = tracePhyloN 0
194 $ addPhyloLevel 0 phyloDocs phyloBase
195 --------------------------------------
196 phyloDocs :: Map (Date, Date) [Document]
197 phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) c
198 --------------------------------------
199 phyloBase :: Phylo
200 phyloBase = tracePhyloBase
201 $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c termList fis
202 --------------------------------------
203
204
205 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
206 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
207 toNthLevel lvlMax prox clus p
208 | lvl >= lvlMax = p
209 | otherwise = toNthLevel lvlMax prox clus
210 $ traceBranches (lvl + 1)
211 $ setPhyloBranches (lvl + 1)
212 $ transposePeriodLinks (lvl + 1)
213 $ tracePhyloN (lvl + 1)
214 $ setLevelLinks (lvl, lvl + 1)
215 $ addPhyloLevel (lvl + 1)
216 (clusters) p
217 where
218 --------------------------------------
219 clusters :: Map (Date,Date) [PhyloCluster]
220 clusters = phyloToClusters lvl clus p
221 --------------------------------------
222 lvl :: Level
223 lvl = getLastLevel p
224 --------------------------------------
225
226
227 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
228 toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
229 toPhylo1 clus prox d p = case clus of
230 Fis (FisParams k s t) -> traceBranches 1
231 -- $ reLinkPhyloBranches 1
232 -- $ traceBranches 1
233 $ setPhyloBranches 1
234 $ traceTempoMatching Descendant 1
235 $ interTempoMatching Descendant 1 prox
236 $ traceTempoMatching Ascendant 1
237 $ interTempoMatching Ascendant 1 prox
238 $ tracePhyloN 1
239 $ setLevelLinks (0,1)
240 $ addPhyloLevel 1 phyloFis phylo'
241 where
242 --------------------------------------
243 phyloFis :: Map (Date, Date) [PhyloFis]
244 phyloFis = refineFis (getPhyloFis phylo') k s t
245 --------------------------------------
246 phylo' :: Phylo
247 phylo' = docsToFis d p
248 --------------------------------------
249
250 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
251
252
253 -- | To create the base of the Phylo (foundations, periods, cooc, etc)
254 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
255 toPhyloBase q p c termList fis = initPhyloBase periods foundations nbDocs cooc fis p
256 where
257 --------------------------------------
258 cooc :: Map Date (Map (Int,Int) Double)
259 cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
260 --------------------------------------
261 nbDocs :: Map Date Double
262 nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
263 --------------------------------------
264 foundations :: PhyloFoundations
265 foundations = PhyloFoundations (initFoundationsRoots (termListToNgrams termList)) termList
266 --------------------------------------
267 periods :: [(Date,Date)]
268 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
269 $ both date (head' "LevelMaker" c,last c)
270 --------------------------------------
271
272
273 -----------------
274 -- | Tracers | --
275 -----------------
276
277
278 tracePhyloN :: Level -> Phylo -> Phylo
279 tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n"
280 <> show (length $ getGroupsWithLevel lvl p) <> " groups created \n") p
281
282
283 tracePhyloBase :: Phylo -> Phylo
284 tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
285 <> show (length $ _phylo_periods p) <> " periods from "
286 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
287 <> " to "
288 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
289 <> "\n"
290 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
291
292
293 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
294 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
295 <> "count : " <> show (length pts) <> " pointers\n") p
296 where
297 --------------------------------------
298 pts :: [Pointer]
299 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
300 --------------------------------------
301
302
303 traceBranches :: Level -> Phylo -> Phylo
304 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
305 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
306 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
307 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
308 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
309 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
310 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
311 where
312 --------------------------------------
313 brs :: [Double]
314 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
315 $ filter (\(id,_) -> (fst id) == lvl)
316 $ getGroupsByBranches p
317 --------------------------------------