]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/LevelMaker.hs
Merge branch 'dev-doc-table-optimization' of ssh://gitlab.iscpif.fr:20022/gargantext...
[gargantext.git] / src / Gargantext / Viz / Phylo / LevelMaker.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE TypeSynonymInstances #-}
14
15 module Gargantext.Viz.Phylo.LevelMaker
16 where
17
18 import Control.Parallel.Strategies
19 import Control.Lens hiding (both, Level)
20 import Data.List ((++), sort, concat, nub, zip, last, null)
21 import Data.Map (Map, (!), empty, singleton, size)
22 import Data.Text (Text)
23 import Data.Tuple.Extra
24 import Data.Vector (Vector)
25 import Gargantext.Prelude
26 import Gargantext.Viz.Phylo
27 import Gargantext.Viz.Phylo.Metrics
28 import Gargantext.Viz.Phylo.Aggregates
29 import Gargantext.Viz.Phylo.Cluster
30 import Gargantext.Viz.Phylo.BranchMaker
31 import Gargantext.Viz.Phylo.LinkMaker
32 import Gargantext.Viz.Phylo.Tools
33 import Gargantext.Text.Context (TermList)
34
35 import qualified Data.Vector.Storable as VS
36 import qualified Data.Set as Set
37 import qualified Data.Vector as Vector
38
39 import Debug.Trace (trace)
40 import Numeric.Statistics (percentile)
41
42
43 -------------------------
44 -- | PhyloLevelMaker | --
45 -------------------------
46
47
48 -- | A typeClass for polymorphic PhyloLevel functions
49 class PhyloLevelMaker aggregate
50 where
51 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
52 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
53 -- | To create a list of PhyloGroups based on a list of aggregates a
54 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
55
56
57 instance PhyloLevelMaker PhyloCluster
58 where
59 --------------------------------------
60 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
61 addPhyloLevel lvl m p
62 | lvl > 1 = addPhyloLevel' lvl m p
63 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
64 --------------------------------------
65 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
66 toPhyloGroups lvl (d,d') l m p =
67 let clusters = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
68 clusters' = clusters `using` parList rdeepseq
69 in clusters'
70 --------------------------------------
71
72
73 instance PhyloLevelMaker PhyloFis
74 where
75 --------------------------------------
76 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
77 addPhyloLevel lvl m p
78 | lvl == 1 = addPhyloLevel' lvl m p
79 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
80 --------------------------------------
81 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
82 toPhyloGroups lvl (d,d') l _ p =
83 let groups = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis (getPhyloCooc p) (getFoundationsRoots p)) $ zip [1..] l
84 groups' = groups `using` parList rdeepseq
85 in groups'
86 --------------------------------------
87
88
89 instance PhyloLevelMaker Document
90 where
91 --------------------------------------
92 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
93 addPhyloLevel lvl m p
94 | lvl == 0 = addPhyloLevel' lvl m p
95 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
96 --------------------------------------
97 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
98 toPhyloGroups lvl (d,d') l _m p = map (\ngram -> ngramsToGroup (d,d') lvl (getIdxInRoots ngram p) ngram [ngram] p)
99 $ (nub . concat)
100 $ map text l
101 --------------------------------------
102
103
104 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
105 addPhyloLevel' :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
106 addPhyloLevel' lvl m p = alterPhyloPeriods
107 (\period -> let pId = _phylo_periodId period
108 in over (phylo_periodLevels)
109 (\phyloLevels ->
110 let groups = toPhyloGroups lvl pId (m ! pId) m p
111 in trace (show (length groups) <> " groups for " <> show (pId) ) $ phyloLevels ++ [PhyloLevel (pId, lvl) groups]
112 ) period) p
113
114
115 ----------------------
116 -- | toPhyloGroup | --
117 ----------------------
118
119
120 -- | To transform a Clique into a PhyloGroup
121 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Map Date (Map (Int,Int) Double) -> Vector Ngrams -> PhyloGroup
122 cliqueToGroup prd lvl idx lbl fis cooc' root = PhyloGroup ((prd, lvl), idx) lbl ngrams
123 (getNgramsMeta cooc ngrams)
124 -- empty
125 (singleton "support" (fromIntegral $ getSupport fis))
126 Nothing
127 cooc
128 [] [] [] childs
129 where
130 --------------------------------------
131 cooc :: Map (Int, Int) Double
132 cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) cooc'
133 --------------------------------------
134 ngrams :: [Int]
135 ngrams = sort $ map (\x -> getIdxInRoots' x root)
136 $ Set.toList
137 $ getClique fis
138 --------------------------------------
139 childs :: [Pointer]
140 childs = map (\n -> (((prd, lvl - 1), n),1)) ngrams
141 --------------------------------------
142
143
144 -- | To transform a Cluster into a Phylogroup
145 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
146 clusterToGroup prd lvl idx lbl groups _m p =
147 PhyloGroup ((prd, lvl), idx) lbl ngrams
148 (getNgramsMeta cooc ngrams)
149 -- empty
150 empty
151 Nothing
152 cooc
153 ascLink desLink [] childs
154 where
155 --------------------------------------
156 cooc :: Map (Int, Int) Double
157 cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p)
158 --------------------------------------
159 childs :: [Pointer]
160 childs = map (\g -> (getGroupId g, 1)) groups
161 ascLink = concat $ map getGroupPeriodParents groups
162 desLink = concat $ map getGroupPeriodChilds groups
163 --------------------------------------
164 ngrams :: [Int]
165 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
166 --------------------------------------
167
168
169 -- | To transform a list of Ngrams into a PhyloGroup
170 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
171 ngramsToGroup prd lvl idx lbl ngrams p = PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty empty Nothing
172 (getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
173 [] [] [] []
174
175
176 ----------------------
177 -- | toPhyloLevel | --
178 ----------------------
179
180
181 -- | To reconstruct the Phylo from a set of Document to a given Level
182 toPhylo :: PhyloQueryBuild -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
183 toPhylo q c termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
184 where
185 --------------------------------------
186 phylo1 :: Phylo
187 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phyloBase
188 -- phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo
189 --------------------------------------
190 -- phylo0 :: Phylo
191 -- phylo0 = tracePhyloN 0
192 -- $ addPhyloLevel 0 phyloDocs phyloBase
193 --------------------------------------
194 phyloDocs :: Map (Date, Date) [Document]
195 phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) c
196 --------------------------------------
197 phyloBase :: Phylo
198 phyloBase = tracePhyloBase
199 $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c termList fis
200 --------------------------------------
201
202
203 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
204 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
205 toNthLevel lvlMax prox clus p
206 | lvl >= lvlMax = p
207 | otherwise = toNthLevel lvlMax prox clus
208 $ traceBranches (lvl + 1)
209 $ setPhyloBranches (lvl + 1)
210 -- \$ transposePeriodLinks (lvl + 1)
211 $ traceTranspose (lvl + 1) Descendant
212 $ transposeLinks (lvl + 1) Descendant
213 $ traceTranspose (lvl + 1) Ascendant
214 $ transposeLinks (lvl + 1) Ascendant
215 $ tracePhyloN (lvl + 1)
216 $ setLevelLinks (lvl, lvl + 1)
217 $ addPhyloLevel (lvl + 1)
218 (clusters) p
219 where
220 --------------------------------------
221 clusters :: Map (Date,Date) [PhyloCluster]
222 clusters = phyloToClusters lvl clus p
223 --------------------------------------
224 lvl :: Level
225 lvl = getLastLevel p
226 --------------------------------------
227
228
229 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
230 toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
231 toPhylo1 clus prox d p = case clus of
232 Fis (FisParams k s t) -> traceBranches 1
233 -- \$ reLinkPhyloBranches 1
234 -- \$ traceBranches 1
235 $ setPhyloBranches 1
236 $ traceTempoMatching Descendant 1
237 $ interTempoMatching Descendant 1 prox
238 $ traceTempoMatching Ascendant 1
239 $ interTempoMatching Ascendant 1 prox
240 $ tracePhyloN 1
241 -- \$ setLevelLinks (0,1)
242 $ addPhyloLevel 1 (getPhyloFis phyloFis)
243 $ trace (show (size $ getPhyloFis phyloFis) <> " Fis created") $ phyloFis
244 where
245 --------------------------------------
246 phyloFis :: Phylo
247 phyloFis = if (null $ getPhyloFis p)
248 then p & phylo_fis .~ refineFis (docsToFis d p) k s t
249 else p & phylo_fis .~ docsToFis d p
250 --------------------------------------
251
252 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
253
254
255 -- | To create the base of the Phylo (foundations, periods, cooc, etc)
256 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
257 toPhyloBase q p c termList fis = initPhyloBase periods foundations nbDocs cooc fis p
258 where
259 --------------------------------------
260 cooc :: Map Date (Map (Int,Int) Double)
261 cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
262 --------------------------------------
263 nbDocs :: Map Date Double
264 nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
265 --------------------------------------
266 foundations :: PhyloFoundations
267 foundations = PhyloFoundations (initFoundationsRoots (termListToNgrams termList)) termList
268 --------------------------------------
269 periods :: [(Date,Date)]
270 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
271 $ both date (head' "toPhyloBase" c, last' "toPhyloBase" c)
272 --------------------------------------
273
274
275 -----------------
276 -- | Tracers | --
277 -----------------
278
279
280 tracePhyloN :: Level -> Phylo -> Phylo
281 tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n"
282 <> show (length $ getGroupsWithLevel lvl p) <> " groups created \n") p
283
284 traceTranspose :: Level -> Filiation -> Phylo -> Phylo
285 traceTranspose lvl fil p = trace ("----\n Transpose " <> show (fil) <> " links for " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n") p
286
287
288 tracePhyloBase :: Phylo -> Phylo
289 tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
290 <> show (length $ _phylo_periods p) <> " periods from "
291 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
292 <> " to "
293 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
294 <> "\n"
295 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
296
297
298 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
299 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
300 <> "count : " <> show (length pts) <> " pointers\n") p
301 where
302 --------------------------------------
303 pts :: [Pointer]
304 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
305 --------------------------------------
306
307
308 traceBranches :: Level -> Phylo -> Phylo
309 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
310 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
311 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
312 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
313 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
314 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
315 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
316 where
317 --------------------------------------
318 brs :: [Double]
319 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
320 $ filter (\(id,_) -> (fst id) == lvl)
321 $ getGroupsByBranches p
322 --------------------------------------