]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/LevelMaker.hs
add trace to cluster and optimisation to find candidates
[gargantext.git] / src / Gargantext / Viz / Phylo / LevelMaker.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TypeSynonymInstances #-}
17 {-# LANGUAGE FlexibleInstances #-}
18
19 module Gargantext.Viz.Phylo.LevelMaker
20 where
21
22 import Control.Lens hiding (both, Level)
23 import Data.List ((++), sort, concat, nub, zip, last)
24 import Data.Map (Map, (!), empty, restrictKeys, filterWithKey, singleton, union)
25 import Data.Text (Text)
26 import Data.Tuple.Extra
27 import Gargantext.Prelude
28 import Gargantext.Viz.Phylo
29 import Gargantext.Viz.Phylo.Aggregates.Cluster
30 import Gargantext.Viz.Phylo.Aggregates.Cooc
31 import Gargantext.Viz.Phylo.Aggregates.Document
32 import Gargantext.Viz.Phylo.Aggregates.Fis
33 import Gargantext.Viz.Phylo.BranchMaker
34 import Gargantext.Viz.Phylo.LinkMaker
35 import Gargantext.Viz.Phylo.Tools
36 import Gargantext.Text.Context (TermList)
37
38 import qualified Data.Vector.Storable as VS
39 import qualified Data.Set as Set
40 import qualified Data.Vector as Vector
41
42 import Debug.Trace (trace)
43 import Numeric.Statistics (percentile)
44
45
46 -- | A typeClass for polymorphic PhyloLevel functions
47 class PhyloLevelMaker aggregate
48 where
49 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
50 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
51 -- | To create a list of PhyloGroups based on a list of aggregates a
52 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
53
54
55 instance PhyloLevelMaker PhyloCluster
56 where
57 --------------------------------------
58 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
59 addPhyloLevel lvl m p
60 | lvl > 1 = toPhyloLevel lvl m p
61 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
62 --------------------------------------
63 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
64 toPhyloGroups lvl (d,d') l m p = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
65 --------------------------------------
66
67
68 instance PhyloLevelMaker PhyloFis
69 where
70 --------------------------------------
71 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
72 addPhyloLevel lvl m p
73 | lvl == 1 = toPhyloLevel lvl m p
74 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
75 --------------------------------------
76 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
77 toPhyloGroups lvl (d,d') l m p = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis m p) $ zip [1..] l
78 --------------------------------------
79
80
81 instance PhyloLevelMaker Document
82 where
83 --------------------------------------
84 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
85 addPhyloLevel lvl m p
86 | lvl == 0 = toPhyloLevel lvl m p
87 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
88 --------------------------------------
89 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
90 toPhyloGroups lvl (d,d') l _m p = map (\(idx,ngram) -> ngramsToGroup (d,d') lvl idx ngram [ngram] p)
91 $ zip [1..]
92 $ (nub . concat)
93 $ map text l
94 --------------------------------------
95
96
97 -- | To transform a Cluster into a Phylogroup
98 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster] -> Phylo -> PhyloGroup
99 clusterToGroup prd lvl idx lbl groups _m p =
100 PhyloGroup ((prd, lvl), idx) lbl ngrams empty cooc Nothing [] [] [] (map (\g -> (getGroupId g, 1)) groups)
101 where
102 --------------------------------------
103 ngrams :: [Int]
104 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
105 --------------------------------------
106 cooc :: Map (Int, Int) Double
107 cooc = filterWithKey (\k _ -> elem (fst k) ngrams && elem (snd k) ngrams)
108 $ foldl union empty
109 $ map getGroupCooc
110 $ getGroupsWithFilters 1 prd p
111 --------------------------------------
112
113
114 -- | To transform a Clique into a PhyloGroup
115 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Map (Date, Date) [PhyloFis] -> Phylo -> PhyloGroup
116 cliqueToGroup prd lvl idx lbl fis m p =
117 PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) cooc Nothing [] [] [] []
118 where
119 --------------------------------------
120 ngrams :: [Int]
121 ngrams = sort $ map (\x -> getIdxInRoots x p)
122 $ Set.toList
123 $ getClique fis
124 --------------------------------------
125 cooc :: Map (Int, Int) Double
126 cooc = filterWithKey (\k _ -> elem (fst k) ngrams && elem (snd k) ngrams)
127 $ fisToCooc (restrictKeys m $ Set.fromList [prd]) p
128 --------------------------------------
129
130
131 -- | To transform a list of Ngrams into a PhyloGroup
132 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
133 ngramsToGroup prd lvl idx lbl ngrams p =
134 PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty empty Nothing [] [] [] []
135
136
137 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
138 toPhyloLevel :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
139 toPhyloLevel lvl m p = alterPhyloPeriods
140 (\period -> let pId = _phylo_periodId period
141 in over (phylo_periodLevels)
142 (\phyloLevels ->
143 let groups = toPhyloGroups lvl pId (m ! pId) m p
144 in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
145 ) period) p
146
147
148 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
149 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
150 toNthLevel lvlMax prox clus p
151 | lvl >= lvlMax = p
152 | otherwise = toNthLevel lvlMax prox clus
153 $ traceBranches (lvl + 1)
154 $ setPhyloBranches (lvl + 1)
155 $ traceTempoMatching Descendant (lvl + 1)
156 $ interTempoMatching Descendant (lvl + 1) prox
157 $ traceTempoMatching Ascendant (lvl + 1)
158 $ interTempoMatching Ascendant (lvl + 1) prox
159 $ setLevelLinks (lvl, lvl + 1)
160 $ addPhyloLevel (lvl + 1)
161 (phyloToClusters lvl clus p) p
162 where
163 --------------------------------------
164 lvl :: Level
165 lvl = getLastLevel p
166 --------------------------------------
167
168
169 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
170 toPhylo1 :: Cluster -> Proximity -> [Metric] -> [Filter] -> Map (Date, Date) [Document] -> Phylo -> Phylo
171 toPhylo1 clus prox metrics filters d p = case clus of
172 Fis (FisParams k s t) -> traceBranches 1
173 $ setPhyloBranches 1
174 $ traceTempoMatching Descendant 1
175 $ interTempoMatching Descendant 1 prox
176 $ traceTempoMatching Ascendant 1
177 $ interTempoMatching Ascendant 1 prox
178 $ setLevelLinks (0,1)
179 $ setLevelLinks (1,0)
180 $ addPhyloLevel 1 phyloFis p
181 where
182 --------------------------------------
183 phyloFis :: Map (Date, Date) [PhyloFis]
184 phyloFis = toPhyloFis d k s t metrics filters
185 --------------------------------------
186
187 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
188
189
190 -- | To reconstruct the Level 0 of a Phylo
191 toPhylo0 :: Map (Date, Date) [Document] -> Phylo -> Phylo
192 toPhylo0 d p = addPhyloLevel 0 d p
193
194
195 class PhyloMaker corpus
196 where
197 toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> TermList -> Phylo
198 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> corpus -> [Ngrams] -> TermList -> Phylo
199 corpusToDocs :: corpus -> Phylo -> Map (Date,Date) [Document]
200
201 instance PhyloMaker [(Date, Text)]
202 where
203 --------------------------------------
204 toPhylo q c roots termList = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
205 where
206 --------------------------------------
207 phylo1 :: Phylo
208 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
209 --------------------------------------
210 phylo0 :: Phylo
211 phylo0 = toPhylo0 phyloDocs phyloBase
212 --------------------------------------
213 phyloDocs :: Map (Date, Date) [Document]
214 phyloDocs = corpusToDocs c phyloBase
215 --------------------------------------
216 phyloBase :: Phylo
217 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
218 --------------------------------------
219 --------------------------------------
220 toPhyloBase q p c roots termList = initPhyloBase periods foundations p
221 where
222 --------------------------------------
223 foundations :: PhyloFoundations
224 foundations = PhyloFoundations (initFoundationsRoots roots) termList
225 --------------------------------------
226 periods :: [(Date,Date)]
227 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
228 $ both fst (head' "LevelMaker" c,last c)
229 --------------------------------------
230 --------------------------------------
231 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) $ parseDocs (getFoundationsRoots p) c
232
233
234 instance PhyloMaker [Document]
235 where
236 --------------------------------------
237 toPhylo q c roots termList = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
238 where
239 --------------------------------------
240 phylo1 :: Phylo
241 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
242 --------------------------------------
243 phylo0 :: Phylo
244 phylo0 = toPhylo0 phyloDocs phyloBase
245 --------------------------------------
246 phyloDocs :: Map (Date, Date) [Document]
247 phyloDocs = corpusToDocs c phyloBase
248 --------------------------------------
249 phyloBase :: Phylo
250 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
251 --------------------------------------
252 --------------------------------------
253 toPhyloBase q p c roots termList = initPhyloBase periods foundations p
254 where
255 --------------------------------------
256 foundations :: PhyloFoundations
257 foundations = PhyloFoundations (initFoundationsRoots roots) termList
258 --------------------------------------
259 periods :: [(Date,Date)]
260 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
261 $ both date (head' "LevelMaker" c,last c)
262 --------------------------------------
263 --------------------------------------
264 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) c
265
266
267 -----------------
268 -- | Tracers | --
269 -----------------
270
271
272 tracePhyloBase :: Phylo -> Phylo
273 tracePhyloBase p = trace ( "\n-----------------\n--| PhyloBase |--\n-----------------\n\n"
274 <> show (length $ _phylo_periods p) <> " periods from "
275 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
276 <> " to "
277 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
278 <> "\n"
279 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
280
281
282 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
283 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
284 <> "count : " <> show (length pts) <> " pointers\n"
285 <> "similarity : " <> show (percentile 25 (VS.fromList sim)) <> " (25%) "
286 <> show (percentile 50 (VS.fromList sim)) <> " (50%) "
287 <> show (percentile 75 (VS.fromList sim)) <> " (75%) "
288 <> show (percentile 90 (VS.fromList sim)) <> " (90%)\n") p
289 where
290 --------------------------------------
291 sim :: [Double]
292 sim = sort $ map snd pts
293 --------------------------------------
294 pts :: [Pointer]
295 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
296 --------------------------------------
297
298
299 traceBranches :: Level -> Phylo -> Phylo
300 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
301 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
302 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
303 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
304 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
305 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
306 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
307 where
308 --------------------------------------
309 brs :: [Double]
310 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
311 $ filter (\(id,_) -> (fst id) == lvl)
312 $ getGroupsByBranches p
313 --------------------------------------