]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/LevelMaker.hs
[Phylo] readings
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / LevelMaker.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE TypeSynonymInstances #-}
14
15 module Gargantext.Core.Viz.Phylo.LevelMaker
16 where
17
18 import Control.Parallel.Strategies
19 import Control.Lens hiding (both, Level)
20 import Data.List ((++), sort, concat, nub, last, null)
21 import Data.Map (Map, (!), empty, singleton, size)
22 import Data.Text (Text)
23 import Data.Tuple.Extra
24 import Data.Vector (Vector)
25 import Gargantext.Prelude
26 import Gargantext.Core.Viz.Phylo
27 import Gargantext.Core.Viz.Phylo.Metrics
28 import Gargantext.Core.Viz.Phylo.Aggregates
29 import Gargantext.Core.Viz.Phylo.Cluster
30 import Gargantext.Core.Viz.Phylo.BranchMaker
31 import Gargantext.Core.Viz.Phylo.LinkMaker
32 import Gargantext.Core.Viz.Phylo.Tools
33 import Gargantext.Core.Text.Context (TermList)
34
35 import qualified Data.Vector.Storable as VS
36 import qualified Data.Set as Set
37 import qualified Data.Vector as Vector
38
39 import Debug.Trace (trace)
40 import Numeric.Statistics (percentile)
41
42
43 -------------------------
44 -- | PhyloLevelMaker | --
45 -------------------------
46
47
48 -- | A typeClass for polymorphic PhyloLevel functions
49 class PhyloLevelMaker aggregate
50 where
51 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
52 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
53 -- | To create a list of PhyloGroups based on a list of aggregates a
54 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
55
56
57 instance PhyloLevelMaker PhyloCluster
58 where
59 --------------------------------------
60 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
61 addPhyloLevel lvl m p
62 | lvl > 1 = addPhyloLevel' lvl m p
63 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
64 --------------------------------------
65 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
66 toPhyloGroups lvl (d,d') l m p =
67 let clusters = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
68 clusters' = clusters `using` parList rdeepseq
69 in clusters'
70 --------------------------------------
71
72
73 instance PhyloLevelMaker PhyloFis
74 where
75 --------------------------------------
76 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
77 addPhyloLevel lvl m p
78 | lvl == 1 = addPhyloLevel' lvl m p
79 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
80 --------------------------------------
81 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
82 toPhyloGroups lvl (d,d') l _ p =
83 let groups = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis (getPhyloCooc p) (getFoundationsRoots p)) $ zip [1..] l
84 groups' = groups `using` parList rdeepseq
85 in groups'
86 --------------------------------------
87
88
89 instance PhyloLevelMaker Document
90 where
91 --------------------------------------
92 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
93 addPhyloLevel lvl m p
94 | lvl == 0 = addPhyloLevel' lvl m p
95 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
96 --------------------------------------
97 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
98 toPhyloGroups lvl (d,d') l _m p = map (\ngram -> ngramsToGroup (d,d') lvl (getIdxInRoots ngram p) ngram [ngram] p)
99 $ (nub . concat)
100 $ map text l
101 --------------------------------------
102
103
104 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
105 addPhyloLevel' :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
106 addPhyloLevel' lvl m p = alterPhyloPeriods
107 (\period -> let pId = _phylo_periodId period
108 in over (phylo_periodLevels)
109 (\phyloLevels ->
110 let groups = toPhyloGroups lvl pId (m ! pId) m p
111 in trace (show (length groups) <> " groups for " <> show (pId) ) $ phyloLevels ++ [PhyloLevel (pId, lvl) groups]
112 ) period) p
113
114
115 ----------------------
116 -- | toPhyloGroup | --
117 ----------------------
118
119
120 -- | To transform a Clique into a PhyloGroup
121 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Map Date (Map (Int,Int) Double) -> Vector Ngrams -> PhyloGroup
122 cliqueToGroup prd lvl idx lbl fis cooc' root = PhyloGroup ((prd, lvl), idx) lbl ngrams
123 (getNgramsMeta cooc ngrams)
124 -- empty
125 (singleton "support" (fromIntegral $ getSupport fis))
126 Nothing
127 cooc
128 [] [] [] childs
129 where
130 --------------------------------------
131 cooc :: Map (Int, Int) Double
132 cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) cooc'
133 --------------------------------------
134 ngrams :: [Int]
135 ngrams = sort $ map (\x -> getIdxInRoots' x root)
136 $ Set.toList
137 $ getClique fis
138 --------------------------------------
139 childs :: [Pointer]
140 childs = map (\n -> (((prd, lvl - 1), n),1)) ngrams
141 --------------------------------------
142
143
144 -- | To transform a Cluster into a Phylogroup
145 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
146 clusterToGroup prd lvl idx lbl groups _m p =
147 PhyloGroup ((prd, lvl), idx) lbl ngrams
148 (getNgramsMeta cooc ngrams)
149 -- empty
150 empty
151 Nothing
152 cooc
153 ascLink desLink [] childs
154 where
155 --------------------------------------
156 cooc :: Map (Int, Int) Double
157 cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p)
158 --------------------------------------
159 childs :: [Pointer]
160 childs = map (\g -> (getGroupId g, 1)) groups
161 ascLink = concat $ map getGroupPeriodParents groups
162 desLink = concat $ map getGroupPeriodChilds groups
163 --------------------------------------
164 ngrams :: [Int]
165 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
166 --------------------------------------
167
168
169 -- | To transform a list of Ngrams into a PhyloGroup
170 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
171 ngramsToGroup prd lvl idx lbl ngrams p = PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty empty Nothing
172 (getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
173 [] [] [] []
174
175
176 ----------------------
177 -- | toPhyloLevel | --
178 ----------------------
179
180
181 -- | To reconstruct the Phylo from a set of Document to a given Level
182 toPhylo :: PhyloQueryBuild -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
183 toPhylo q c termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
184 where
185 --------------------------------------
186 phylo1 :: Phylo
187 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phyloBase
188 -- phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo
189 --------------------------------------
190 -- phylo0 :: Phylo
191 -- phylo0 = tracePhyloN 0
192 -- $ addPhyloLevel 0 phyloDocs phyloBase
193 --------------------------------------
194 phyloDocs :: Map (Date, Date) [Document]
195 phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) c
196 --------------------------------------
197 phyloBase :: Phylo
198 phyloBase = tracePhyloBase
199 $ toPhyloBase q init c termList fis
200 where
201 init = initPhyloParam (Just defaultPhyloVersion)
202 (Just defaultSoftware )
203 (Just q )
204 ---------------------------------------
205
206
207 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
208 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
209 toNthLevel lvlMax prox clus p
210 | lvl >= lvlMax = p
211 | otherwise = toNthLevel lvlMax prox clus
212 $ traceBranches (lvl + 1)
213 $ setPhyloBranches (lvl + 1)
214 -- \$ transposePeriodLinks (lvl + 1)
215 $ traceTranspose (lvl + 1) Descendant
216 $ transposeLinks (lvl + 1) Descendant
217 $ traceTranspose (lvl + 1) Ascendant
218 $ transposeLinks (lvl + 1) Ascendant
219 $ tracePhyloN (lvl + 1)
220 $ setLevelLinks (lvl, lvl + 1)
221 $ addPhyloLevel (lvl + 1) (clusters) p
222 where
223 --------------------------------------
224 clusters :: Map (Date,Date) [PhyloCluster]
225 clusters = phyloToClusters lvl clus p
226 --------------------------------------
227 lvl :: Level
228 lvl = getLastLevel p
229 --------------------------------------
230
231
232 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
233 toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
234 toPhylo1 clus prox d p = case clus of
235 Fis (FisParams k s t) -> traceBranches 1
236 -- \$ reLinkPhyloBranches 1
237 -- \$ traceBranches 1
238 $ setPhyloBranches 1
239 $ traceTempoMatching Descendant 1
240 $ interTempoMatching Descendant 1 prox
241 $ traceTempoMatching Ascendant 1
242 $ interTempoMatching Ascendant 1 prox
243 $ tracePhyloN 1
244 -- \$ setLevelLinks (0,1)
245 $ addPhyloLevel 1 (getPhyloFis phyloFis)
246 $ trace (show (size $ getPhyloFis phyloFis) <> " Fis created") $ phyloFis
247 where
248 --------------------------------------
249 phyloFis :: Phylo
250 phyloFis = if (null $ getPhyloFis p)
251 then p & phylo_fis .~ refineFis (docsToFis d p) k s t
252 else p & phylo_fis .~ docsToFis d p
253 --------------------------------------
254
255 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
256
257
258 -- | To create the base of the Phylo (foundations, periods, cooc, etc)
259 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
260 toPhyloBase q p c termList fis = initPhyloBase periods foundations nbDocs cooc fis p
261 where
262 --------------------------------------
263 cooc :: Map Date (Map (Int,Int) Double)
264 cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
265 --------------------------------------
266 nbDocs :: Map Date Double
267 nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
268 --------------------------------------
269 foundations :: PhyloFoundations
270 foundations = PhyloFoundations (initFoundationsRoots (termListToNgrams termList)) termList
271 --------------------------------------
272 periods :: [(Date,Date)]
273 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
274 $ both date (head' "toPhyloBase" c, last' "toPhyloBase" c)
275 --------------------------------------
276
277
278 -----------------
279 -- | Tracers | --
280 -----------------
281
282
283 tracePhyloN :: Level -> Phylo -> Phylo
284 tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n"
285 <> show (length $ getGroupsWithLevel lvl p) <> " groups created \n") p
286
287 traceTranspose :: Level -> Filiation -> Phylo -> Phylo
288 traceTranspose lvl fil p = trace ("----\n Transpose " <> show (fil) <> " links for " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n") p
289
290
291 tracePhyloBase :: Phylo -> Phylo
292 tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
293 <> show (length $ _phylo_periods p) <> " periods from "
294 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
295 <> " to "
296 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
297 <> "\n"
298 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
299
300
301 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
302 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
303 <> "count : " <> show (length pts) <> " pointers\n") p
304 where
305 --------------------------------------
306 pts :: [Pointer]
307 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
308 --------------------------------------
309
310
311 traceBranches :: Level -> Phylo -> Phylo
312 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
313 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
314 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
315 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
316 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
317 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
318 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
319 where
320 --------------------------------------
321 brs :: [Double]
322 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
323 $ filter (\(id,_) -> (fst id) == lvl)
324 $ getGroupsByBranches p
325 --------------------------------------