]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/LevelMaker.hs
add some tracers and fix the temporal matching
[gargantext.git] / src / Gargantext / Viz / Phylo / LevelMaker.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TypeSynonymInstances #-}
17 {-# LANGUAGE FlexibleInstances #-}
18
19 module Gargantext.Viz.Phylo.LevelMaker
20 where
21
22 import Control.Lens hiding (both, Level)
23 import Data.List ((++), sort, concat, nub, zip, last)
24 import Data.Map (Map, (!), empty, restrictKeys, filterWithKey, singleton, union)
25 import Data.Text (Text)
26 import Data.Tuple.Extra
27 import Gargantext.Prelude
28 import Gargantext.Viz.Phylo
29 import Gargantext.Viz.Phylo.Aggregates.Cluster
30 import Gargantext.Viz.Phylo.Aggregates.Cooc
31 import Gargantext.Viz.Phylo.Aggregates.Document
32 import Gargantext.Viz.Phylo.Aggregates.Fis
33 import Gargantext.Viz.Phylo.BranchMaker
34 import Gargantext.Viz.Phylo.LinkMaker
35 import Gargantext.Viz.Phylo.Tools
36 import Gargantext.Text.Context (TermList)
37
38 import qualified Data.Vector.Storable as VS
39
40 import qualified Data.Set as Set
41 import qualified Data.Vector as Vector
42
43 import Debug.Trace (trace)
44 import Numeric.Statistics (percentile)
45
46
47 -- | A typeClass for polymorphic PhyloLevel functions
48 class PhyloLevelMaker aggregate
49 where
50 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
51 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
52 -- | To create a list of PhyloGroups based on a list of aggregates a
53 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
54
55
56 instance PhyloLevelMaker PhyloCluster
57 where
58 --------------------------------------
59 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
60 addPhyloLevel lvl m p
61 | lvl > 1 = toPhyloLevel lvl m p
62 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
63 --------------------------------------
64 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
65 toPhyloGroups lvl (d,d') l m p = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
66 --------------------------------------
67
68
69 instance PhyloLevelMaker PhyloFis
70 where
71 --------------------------------------
72 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
73 addPhyloLevel lvl m p
74 | lvl == 1 = toPhyloLevel lvl m p
75 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
76 --------------------------------------
77 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
78 toPhyloGroups lvl (d,d') l m p = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis m p) $ zip [1..] l
79 --------------------------------------
80
81
82 instance PhyloLevelMaker Document
83 where
84 --------------------------------------
85 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
86 addPhyloLevel lvl m p
87 | lvl == 0 = toPhyloLevel lvl m p
88 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
89 --------------------------------------
90 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
91 toPhyloGroups lvl (d,d') l _m p = map (\(idx,ngram) -> ngramsToGroup (d,d') lvl idx ngram [ngram] p)
92 $ zip [1..]
93 $ (nub . concat)
94 $ map text l
95 --------------------------------------
96
97
98 -- | To transform a Cluster into a Phylogroup
99 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster] -> Phylo -> PhyloGroup
100 clusterToGroup prd lvl idx lbl groups _m p =
101 PhyloGroup ((prd, lvl), idx) lbl ngrams empty cooc Nothing [] [] [] (map (\g -> (getGroupId g, 1)) groups)
102 where
103 --------------------------------------
104 ngrams :: [Int]
105 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
106 --------------------------------------
107 cooc :: Map (Int, Int) Double
108 cooc = filterWithKey (\k _ -> elem (fst k) ngrams && elem (snd k) ngrams)
109 $ foldl union empty
110 $ map getGroupCooc
111 $ getGroupsWithFilters 1 prd p
112 --------------------------------------
113
114
115 -- | To transform a Clique into a PhyloGroup
116 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Map (Date, Date) [PhyloFis] -> Phylo -> PhyloGroup
117 cliqueToGroup prd lvl idx lbl fis m p =
118 PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) cooc Nothing [] [] [] []
119 where
120 --------------------------------------
121 ngrams :: [Int]
122 ngrams = sort $ map (\x -> getIdxInRoots x p)
123 $ Set.toList
124 $ getClique fis
125 --------------------------------------
126 cooc :: Map (Int, Int) Double
127 cooc = filterWithKey (\k _ -> elem (fst k) ngrams && elem (snd k) ngrams)
128 $ fisToCooc (restrictKeys m $ Set.fromList [prd]) p
129 --------------------------------------
130
131
132 -- | To transform a list of Ngrams into a PhyloGroup
133 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
134 ngramsToGroup prd lvl idx lbl ngrams p =
135 PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty empty Nothing [] [] [] []
136
137
138 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
139 toPhyloLevel :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
140 toPhyloLevel lvl m p = alterPhyloPeriods
141 (\period -> let pId = _phylo_periodId period
142 in over (phylo_periodLevels)
143 (\phyloLevels ->
144 let groups = toPhyloGroups lvl pId (m ! pId) m p
145 in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
146 ) period) p
147
148
149 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
150 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
151 toNthLevel lvlMax prox clus p
152 | lvl >= lvlMax = p
153 | otherwise = toNthLevel lvlMax prox clus
154 $ traceBranches (lvl + 1)
155 $ setPhyloBranches (lvl + 1)
156 $ traceTempoMatching Descendant (lvl + 1)
157 $ interTempoMatching Descendant (lvl + 1) prox
158 $ traceTempoMatching Ascendant (lvl + 1)
159 $ interTempoMatching Ascendant (lvl + 1) prox
160 $ setLevelLinks (lvl, lvl + 1)
161 $ addPhyloLevel (lvl + 1)
162 (phyloToClusters lvl clus p) p
163 where
164 --------------------------------------
165 lvl :: Level
166 lvl = getLastLevel p
167 --------------------------------------
168
169
170 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
171 toPhylo1 :: Cluster -> Proximity -> [Metric] -> [Filter] -> Map (Date, Date) [Document] -> Phylo -> Phylo
172 toPhylo1 clus prox metrics filters d p = case clus of
173 Fis (FisParams k s t) -> traceBranches 1
174 $ setPhyloBranches 1
175 $ traceTempoMatching Descendant 1
176 $ interTempoMatching Descendant 1 prox
177 $ traceTempoMatching Ascendant 1
178 $ interTempoMatching Ascendant 1 prox
179 $ setLevelLinks (0,1)
180 $ setLevelLinks (1,0)
181 $ addPhyloLevel 1 phyloFis p
182 where
183 --------------------------------------
184 phyloFis :: Map (Date, Date) [PhyloFis]
185 phyloFis = toPhyloFis d k s t metrics filters
186 --------------------------------------
187
188 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
189
190
191 -- | To reconstruct the Level 0 of a Phylo
192 toPhylo0 :: Map (Date, Date) [Document] -> Phylo -> Phylo
193 toPhylo0 d p = addPhyloLevel 0 d p
194
195
196 class PhyloMaker corpus
197 where
198 toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> TermList -> Phylo
199 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> corpus -> [Ngrams] -> TermList -> Phylo
200 corpusToDocs :: corpus -> Phylo -> Map (Date,Date) [Document]
201
202 instance PhyloMaker [(Date, Text)]
203 where
204 --------------------------------------
205 toPhylo q c roots termList = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
206 where
207 --------------------------------------
208 phylo1 :: Phylo
209 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
210 --------------------------------------
211 phylo0 :: Phylo
212 phylo0 = toPhylo0 phyloDocs phyloBase
213 --------------------------------------
214 phyloDocs :: Map (Date, Date) [Document]
215 phyloDocs = corpusToDocs c phyloBase
216 --------------------------------------
217 phyloBase :: Phylo
218 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
219 --------------------------------------
220 --------------------------------------
221 toPhyloBase q p c roots termList = initPhyloBase periods foundations p
222 where
223 --------------------------------------
224 foundations :: PhyloFoundations
225 foundations = PhyloFoundations (initFoundationsRoots roots) termList
226 --------------------------------------
227 periods :: [(Date,Date)]
228 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
229 $ both fst (head' "LevelMaker" c,last c)
230 --------------------------------------
231 --------------------------------------
232 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) $ parseDocs (getFoundationsRoots p) c
233
234
235 instance PhyloMaker [Document]
236 where
237 --------------------------------------
238 toPhylo q c roots termList = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
239 where
240 --------------------------------------
241 phylo1 :: Phylo
242 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
243 --------------------------------------
244 phylo0 :: Phylo
245 phylo0 = toPhylo0 phyloDocs phyloBase
246 --------------------------------------
247 phyloDocs :: Map (Date, Date) [Document]
248 phyloDocs = corpusToDocs c phyloBase
249 --------------------------------------
250 phyloBase :: Phylo
251 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
252 --------------------------------------
253 --------------------------------------
254 toPhyloBase q p c roots termList = initPhyloBase periods foundations p
255 where
256 --------------------------------------
257 foundations :: PhyloFoundations
258 foundations = PhyloFoundations (initFoundationsRoots roots) termList
259 --------------------------------------
260 periods :: [(Date,Date)]
261 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
262 $ both date (head' "LevelMaker" c,last c)
263 --------------------------------------
264 --------------------------------------
265 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) c
266
267
268 -----------------
269 -- | Tracers | --
270 -----------------
271
272
273 tracePhyloBase :: Phylo -> Phylo
274 tracePhyloBase p = trace ( "----\nPhyloBase : \n"
275 <> show (length $ _phylo_periods p) <> " periods from "
276 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
277 <> " to "
278 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
279 <> "\n"
280 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
281
282
283
284 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
285 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
286 <> "count : " <> show (length pts) <> " pointers\n"
287 <> "similarity : " <> show (percentile 25 (VS.fromList sim)) <> " (25%) "
288 <> show (percentile 50 (VS.fromList sim)) <> " (50%) "
289 <> show (percentile 75 (VS.fromList sim)) <> " (75%) "
290 <> show (percentile 90 (VS.fromList sim)) <> " (90%)\n") p
291 where
292 --------------------------------------
293 sim :: [Double]
294 sim = sort $ map snd pts
295 --------------------------------------
296 pts :: [Pointer]
297 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
298 --------------------------------------
299
300
301 traceBranches :: Level -> Phylo -> Phylo
302 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
303 <> "count : " <> show (length $ getBranchIds p) <> " branches\n"
304 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
305 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
306 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
307 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
308 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
309 where
310 --------------------------------------
311 brs :: [Double]
312 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
313 $ filter (\(id,_) -> (fst id) == lvl)
314 $ getGroupsByBranches p
315 --------------------------------------