]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/LevelMaker.hs
Merge branch 'dev-phylo' of https://gitlab.iscpif.fr/gargantext/haskell-gargantext...
[gargantext.git] / src / Gargantext / Viz / Phylo / LevelMaker.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TypeSynonymInstances #-}
17 {-# LANGUAGE FlexibleInstances #-}
18
19 module Gargantext.Viz.Phylo.LevelMaker
20 where
21
22 import Control.Lens hiding (both, Level)
23 import Data.List ((++), sort, concat, nub, zip, last)
24 import Data.Map (Map, (!), empty, singleton,mapWithKey)
25 import Data.Text (Text)
26 import Data.Tuple.Extra
27 import Gargantext.Prelude
28 import Gargantext.Viz.Phylo
29 import Gargantext.Viz.Phylo.Aggregates.Cluster
30 import Gargantext.Viz.Phylo.Aggregates.Document
31 import Gargantext.Viz.Phylo.Aggregates.Fis
32 import Gargantext.Viz.Phylo.BranchMaker
33 import Gargantext.Viz.Phylo.LinkMaker
34 import Gargantext.Viz.Phylo.Tools
35 import Gargantext.Viz.Phylo.Aggregates.Cooc
36 import Gargantext.Text.Context (TermList)
37
38 import qualified Data.Vector.Storable as VS
39 import qualified Data.Set as Set
40 import qualified Data.Vector as Vector
41
42 import Debug.Trace (trace)
43 import Numeric.Statistics (percentile)
44
45
46 -- | A typeClass for polymorphic PhyloLevel functions
47 class PhyloLevelMaker aggregate
48 where
49 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
50 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
51 -- | To create a list of PhyloGroups based on a list of aggregates a
52 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
53
54
55 instance PhyloLevelMaker PhyloCluster
56 where
57 --------------------------------------
58 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
59 addPhyloLevel lvl m p
60 | lvl > 1 = toPhyloLevel lvl m p
61 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
62 --------------------------------------
63 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
64 toPhyloGroups lvl (d,d') l m p = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
65 --------------------------------------
66
67
68 instance PhyloLevelMaker PhyloFis
69 where
70 --------------------------------------
71 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
72 addPhyloLevel lvl m p
73 | lvl == 1 = toPhyloLevel lvl m p
74 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
75 --------------------------------------
76 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
77 toPhyloGroups lvl (d,d') l _ p = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis p) $ zip [1..] l
78 --------------------------------------
79
80
81 instance PhyloLevelMaker Document
82 where
83 --------------------------------------
84 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
85 addPhyloLevel lvl m p
86 | lvl == 0 = toPhyloLevel lvl m p
87 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
88 --------------------------------------
89 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
90 toPhyloGroups lvl (d,d') l _m p = map (\(idx,ngram) -> ngramsToGroup (d,d') lvl idx ngram [ngram] p)
91 $ zip [1..]
92 $ (nub . concat)
93 $ map text l
94 --------------------------------------
95
96
97 -- | To transform a Cluster into a Phylogroup
98 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
99 clusterToGroup prd lvl idx lbl groups _m p =
100 PhyloGroup ((prd, lvl), idx) lbl ngrams empty
101 Nothing
102 (getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
103 [] [] [] (map (\g -> (getGroupId g, 1)) groups)
104 where
105 --------------------------------------
106 ngrams :: [Int]
107 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
108 --------------------------------------
109
110
111 -- | To transform a Clique into a PhyloGroup
112 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
113 cliqueToGroup prd lvl idx lbl fis p =
114 PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing
115 (getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
116 [] [] [] []
117 where
118 --------------------------------------
119 ngrams :: [Int]
120 ngrams = sort $ map (\x -> getIdxInRoots x p)
121 $ Set.toList
122 $ getClique fis
123 --------------------------------------
124
125
126 -- | To transform a list of Ngrams into a PhyloGroup
127 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
128 ngramsToGroup prd lvl idx lbl ngrams p =
129 PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing
130 (getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
131 [] [] [] []
132
133
134 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
135 toPhyloLevel :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
136 toPhyloLevel lvl m p = alterPhyloPeriods
137 (\period -> let pId = _phylo_periodId period
138 in over (phylo_periodLevels)
139 (\phyloLevels ->
140 let groups = toPhyloGroups lvl pId (m ! pId) m p
141 in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
142 ) period) p
143
144
145 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
146 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
147 toNthLevel lvlMax prox clus p
148 | lvl >= lvlMax = p
149 | otherwise = toNthLevel lvlMax prox clus
150 $ traceBranches (lvl + 1)
151 $ setPhyloBranches (lvl + 1)
152 $ transposePeriodLinks (lvl + 1)
153 $ setLevelLinks (lvl, lvl + 1)
154 $ trace (show (mapWithKey (\k v -> (k,length v)) clusters))
155 $ addPhyloLevel (lvl + 1)
156 (clusters) p
157 where
158 --------------------------------------
159 clusters = phyloToClusters lvl clus p
160 --------------------------------------
161 lvl :: Level
162 lvl = getLastLevel p
163 --------------------------------------
164
165
166 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
167 toPhylo1 :: Cluster -> Proximity -> [Metric] -> [Filter] -> Map (Date, Date) [Document] -> Phylo -> Phylo
168 toPhylo1 clus prox metrics filters d p = case clus of
169 Fis (FisParams k s t) -> traceBranches 1
170 $ setPhyloBranches 1
171 $ traceTempoMatching Descendant 1
172 $ interTempoMatching Descendant 1 prox
173 $ traceTempoMatching Ascendant 1
174 $ interTempoMatching Ascendant 1 prox
175 $ setLevelLinks (0,1)
176 $ setLevelLinks (1,0)
177 $ addPhyloLevel 1 phyloFis phylo'
178 where
179 --------------------------------------
180 phyloFis :: Map (Date, Date) [PhyloFis]
181 phyloFis = toPhyloFis' (getPhyloFis phylo') k s t metrics filters
182 --------------------------------------
183 phylo' :: Phylo
184 phylo' = docsToFis' d p
185 --------------------------------------
186
187 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
188
189
190 -- | To reconstruct the Level 0 of a Phylo
191 toPhylo0 :: Map (Date, Date) [Document] -> Phylo -> Phylo
192 toPhylo0 d p = addPhyloLevel 0 d p
193
194
195 class PhyloMaker corpus
196 where
197 toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
198 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> corpus -> [Ngrams] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
199 corpusToDocs :: corpus -> Phylo -> Map (Date,Date) [Document]
200
201 instance PhyloMaker [(Date, Text)]
202 where
203 --------------------------------------
204 toPhylo q c roots termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
205 where
206 --------------------------------------
207 phylo1 :: Phylo
208 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
209 --------------------------------------
210 phylo0 :: Phylo
211 phylo0 = toPhylo0 phyloDocs phyloBase
212 --------------------------------------
213 phyloDocs :: Map (Date, Date) [Document]
214 phyloDocs = corpusToDocs c phyloBase
215 --------------------------------------
216 phyloBase :: Phylo
217 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList fis
218 --------------------------------------
219 --------------------------------------
220 toPhyloBase q p c roots termList fis = initPhyloBase periods foundations nbDocs cooc fis p
221 where
222 --------------------------------------
223 cooc :: Map Date (Map (Int,Int) Double)
224 cooc = docsToCooc (parseDocs (foundations ^. phylo_foundationsRoots) c) (foundations ^. phylo_foundationsRoots)
225 --------------------------------------
226 nbDocs :: Map Date Double
227 nbDocs = countDocs c
228 --------------------------------------
229 foundations :: PhyloFoundations
230 foundations = PhyloFoundations (initFoundationsRoots roots) termList
231 --------------------------------------
232 periods :: [(Date,Date)]
233 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
234 $ both fst (head' "LevelMaker" c,last c)
235 --------------------------------------
236 --------------------------------------
237 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) $ parseDocs (getFoundationsRoots p) c
238
239
240 instance PhyloMaker [Document]
241 where
242 --------------------------------------
243 toPhylo q c roots termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
244 where
245 --------------------------------------
246 phylo1 :: Phylo
247 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
248 --------------------------------------
249 phylo0 :: Phylo
250 phylo0 = toPhylo0 phyloDocs phyloBase
251 --------------------------------------
252 phyloDocs :: Map (Date, Date) [Document]
253 phyloDocs = corpusToDocs c phyloBase
254 --------------------------------------
255 phyloBase :: Phylo
256 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList fis
257 --------------------------------------
258 --------------------------------------
259 toPhyloBase q p c roots termList fis = initPhyloBase periods foundations nbDocs cooc fis p
260 where
261 --------------------------------------
262 cooc :: Map Date (Map (Int,Int) Double)
263 cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
264 --------------------------------------
265 nbDocs :: Map Date Double
266 nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
267 --------------------------------------
268 foundations :: PhyloFoundations
269 foundations = PhyloFoundations (initFoundationsRoots roots) termList
270 --------------------------------------
271 periods :: [(Date,Date)]
272 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
273 $ both date (head' "LevelMaker" c,last c)
274 --------------------------------------
275 --------------------------------------
276 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) c
277
278
279 -----------------
280 -- | Tracers | --
281 -----------------
282
283
284 tracePhyloBase :: Phylo -> Phylo
285 tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
286 <> show (length $ _phylo_periods p) <> " periods from "
287 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
288 <> " to "
289 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
290 <> "\n"
291 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
292
293
294 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
295 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
296 <> "count : " <> show (length pts) <> " pointers\n"
297 <> "similarity : " <> show (percentile 25 (VS.fromList sim)) <> " (25%) "
298 <> show (percentile 50 (VS.fromList sim)) <> " (50%) "
299 <> show (percentile 75 (VS.fromList sim)) <> " (75%) "
300 <> show (percentile 90 (VS.fromList sim)) <> " (90%)\n") p
301 where
302 --------------------------------------
303 sim :: [Double]
304 sim = sort $ map snd pts
305 --------------------------------------
306 pts :: [Pointer]
307 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
308 --------------------------------------
309
310
311 traceBranches :: Level -> Phylo -> Phylo
312 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
313 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
314 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
315 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
316 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
317 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
318 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
319 where
320 --------------------------------------
321 brs :: [Double]
322 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
323 $ filter (\(id,_) -> (fst id) == lvl)
324 $ getGroupsByBranches p
325 --------------------------------------