]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/LevelMaker.hs
fix temporalMatching
[gargantext.git] / src / Gargantext / Viz / Phylo / LevelMaker.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TypeSynonymInstances #-}
17 {-# LANGUAGE FlexibleInstances #-}
18
19 module Gargantext.Viz.Phylo.LevelMaker
20 where
21
22 import Control.Lens hiding (both, Level)
23 import Data.List ((++), sort, concat, nub, zip, last)
24 import Data.Map (Map, (!), empty, singleton)
25 import Data.Text (Text)
26 import Data.Tuple.Extra
27 import Gargantext.Prelude
28 import Gargantext.Viz.Phylo
29 import Gargantext.Viz.Phylo.Aggregates.Cluster
30 import Gargantext.Viz.Phylo.Aggregates.Document
31 import Gargantext.Viz.Phylo.Aggregates.Fis
32 import Gargantext.Viz.Phylo.BranchMaker
33 import Gargantext.Viz.Phylo.LinkMaker
34 import Gargantext.Viz.Phylo.Tools
35 import Gargantext.Viz.Phylo.Aggregates.Cooc
36 import Gargantext.Text.Context (TermList)
37
38 import qualified Data.Vector.Storable as VS
39 import qualified Data.Set as Set
40 import qualified Data.Vector as Vector
41
42 import Debug.Trace (trace)
43 import Numeric.Statistics (percentile)
44
45
46 -- | A typeClass for polymorphic PhyloLevel functions
47 class PhyloLevelMaker aggregate
48 where
49 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
50 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
51 -- | To create a list of PhyloGroups based on a list of aggregates a
52 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
53
54
55 instance PhyloLevelMaker PhyloCluster
56 where
57 --------------------------------------
58 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
59 addPhyloLevel lvl m p
60 | lvl > 1 = toPhyloLevel lvl m p
61 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
62 --------------------------------------
63 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
64 toPhyloGroups lvl (d,d') l m p = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
65 --------------------------------------
66
67
68 instance PhyloLevelMaker PhyloFis
69 where
70 --------------------------------------
71 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
72 addPhyloLevel lvl m p
73 | lvl == 1 = toPhyloLevel lvl m p
74 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
75 --------------------------------------
76 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
77 toPhyloGroups lvl (d,d') l _ p = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis p) $ zip [1..] l
78 --------------------------------------
79
80
81 instance PhyloLevelMaker Document
82 where
83 --------------------------------------
84 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
85 addPhyloLevel lvl m p
86 | lvl == 0 = toPhyloLevel lvl m p
87 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
88 --------------------------------------
89 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
90 toPhyloGroups lvl (d,d') l _m p = map (\(idx,ngram) -> ngramsToGroup (d,d') lvl idx ngram [ngram] p)
91 $ zip [1..]
92 $ (nub . concat)
93 $ map text l
94 --------------------------------------
95
96
97 -- | To transform a Cluster into a Phylogroup
98 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
99 clusterToGroup prd lvl idx lbl groups _m p =
100 PhyloGroup ((prd, lvl), idx) lbl ngrams empty
101 Nothing
102 (getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
103 [] [] [] (map (\g -> (getGroupId g, 1)) groups)
104 where
105 --------------------------------------
106 ngrams :: [Int]
107 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
108 --------------------------------------
109
110
111 -- | To transform a Clique into a PhyloGroup
112 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
113 cliqueToGroup prd lvl idx lbl fis p =
114 PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing
115 (getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
116 [] [] [] []
117 where
118 --------------------------------------
119 ngrams :: [Int]
120 ngrams = sort $ map (\x -> getIdxInRoots x p)
121 $ Set.toList
122 $ getClique fis
123 --------------------------------------
124
125
126 -- | To transform a list of Ngrams into a PhyloGroup
127 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
128 ngramsToGroup prd lvl idx lbl ngrams p =
129 PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing
130 (getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
131 [] [] [] []
132
133
134 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
135 toPhyloLevel :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
136 toPhyloLevel lvl m p = alterPhyloPeriods
137 (\period -> let pId = _phylo_periodId period
138 in over (phylo_periodLevels)
139 (\phyloLevels ->
140 let groups = toPhyloGroups lvl pId (m ! pId) m p
141 in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
142 ) period) p
143
144
145 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
146 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
147 toNthLevel lvlMax prox clus p
148 | lvl >= lvlMax = p
149 | otherwise = toNthLevel lvlMax prox clus
150 $ traceBranches (lvl + 1)
151 $ setPhyloBranches (lvl + 1)
152 $ transposePeriodLinks (lvl + 1)
153 $ setLevelLinks (lvl, lvl + 1)
154 $ addPhyloLevel (lvl + 1)
155 (phyloToClusters lvl clus p) p
156 where
157 --------------------------------------
158 lvl :: Level
159 lvl = getLastLevel p
160 --------------------------------------
161
162
163 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
164 toPhylo1 :: Cluster -> Proximity -> [Metric] -> [Filter] -> Map (Date, Date) [Document] -> Phylo -> Phylo
165 toPhylo1 clus prox metrics filters d p = case clus of
166 Fis (FisParams k s t) -> traceBranches 1
167 $ setPhyloBranches 1
168 $ traceTempoMatching Descendant 1
169 $ interTempoMatching Descendant 1 prox
170 $ traceTempoMatching Ascendant 1
171 $ interTempoMatching Ascendant 1 prox
172 $ setLevelLinks (0,1)
173 $ setLevelLinks (1,0)
174 $ addPhyloLevel 1 phyloFis p
175 where
176 --------------------------------------
177 phyloFis :: Map (Date, Date) [PhyloFis]
178 phyloFis = toPhyloFis d k s t metrics filters
179 --------------------------------------
180
181 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
182
183
184 -- | To reconstruct the Level 0 of a Phylo
185 toPhylo0 :: Map (Date, Date) [Document] -> Phylo -> Phylo
186 toPhylo0 d p = addPhyloLevel 0 d p
187
188
189 class PhyloMaker corpus
190 where
191 toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> TermList -> Phylo
192 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> corpus -> [Ngrams] -> TermList -> Phylo
193 corpusToDocs :: corpus -> Phylo -> Map (Date,Date) [Document]
194
195 instance PhyloMaker [(Date, Text)]
196 where
197 --------------------------------------
198 toPhylo q c roots termList = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
199 where
200 --------------------------------------
201 phylo1 :: Phylo
202 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
203 --------------------------------------
204 phylo0 :: Phylo
205 phylo0 = toPhylo0 phyloDocs phyloBase
206 --------------------------------------
207 phyloDocs :: Map (Date, Date) [Document]
208 phyloDocs = corpusToDocs c phyloBase
209 --------------------------------------
210 phyloBase :: Phylo
211 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
212 --------------------------------------
213 --------------------------------------
214 toPhyloBase q p c roots termList = initPhyloBase periods foundations nbDocs cooc p
215 where
216 --------------------------------------
217 cooc :: Map Date (Map (Int,Int) Double)
218 cooc = docsToCooc (parseDocs (foundations ^. phylo_foundationsRoots) c) (foundations ^. phylo_foundationsRoots)
219 --------------------------------------
220 nbDocs :: Map Date Double
221 nbDocs = countDocs c
222 --------------------------------------
223 foundations :: PhyloFoundations
224 foundations = PhyloFoundations (initFoundationsRoots roots) termList
225 --------------------------------------
226 periods :: [(Date,Date)]
227 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
228 $ both fst (head' "LevelMaker" c,last c)
229 --------------------------------------
230 --------------------------------------
231 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) $ parseDocs (getFoundationsRoots p) c
232
233
234 instance PhyloMaker [Document]
235 where
236 --------------------------------------
237 toPhylo q c roots termList = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
238 where
239 --------------------------------------
240 phylo1 :: Phylo
241 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
242 --------------------------------------
243 phylo0 :: Phylo
244 phylo0 = toPhylo0 phyloDocs phyloBase
245 --------------------------------------
246 phyloDocs :: Map (Date, Date) [Document]
247 phyloDocs = corpusToDocs c phyloBase
248 --------------------------------------
249 phyloBase :: Phylo
250 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
251 --------------------------------------
252 --------------------------------------
253 toPhyloBase q p c roots termList = initPhyloBase periods foundations nbDocs cooc p
254 where
255 --------------------------------------
256 cooc :: Map Date (Map (Int,Int) Double)
257 cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
258 --------------------------------------
259 nbDocs :: Map Date Double
260 nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
261 --------------------------------------
262 foundations :: PhyloFoundations
263 foundations = PhyloFoundations (initFoundationsRoots roots) termList
264 --------------------------------------
265 periods :: [(Date,Date)]
266 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
267 $ both date (head' "LevelMaker" c,last c)
268 --------------------------------------
269 --------------------------------------
270 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) c
271
272
273 -----------------
274 -- | Tracers | --
275 -----------------
276
277
278 tracePhyloBase :: Phylo -> Phylo
279 tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
280 <> show (length $ _phylo_periods p) <> " periods from "
281 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
282 <> " to "
283 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
284 <> "\n"
285 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
286
287
288 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
289 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
290 <> "count : " <> show (length pts) <> " pointers\n"
291 <> "similarity : " <> show (percentile 25 (VS.fromList sim)) <> " (25%) "
292 <> show (percentile 50 (VS.fromList sim)) <> " (50%) "
293 <> show (percentile 75 (VS.fromList sim)) <> " (75%) "
294 <> show (percentile 90 (VS.fromList sim)) <> " (90%)\n") p
295 where
296 --------------------------------------
297 sim :: [Double]
298 sim = sort $ map snd pts
299 --------------------------------------
300 pts :: [Pointer]
301 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
302 --------------------------------------
303
304
305 traceBranches :: Level -> Phylo -> Phylo
306 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
307 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
308 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
309 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
310 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
311 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
312 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
313 where
314 --------------------------------------
315 brs :: [Double]
316 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
317 $ filter (\(id,_) -> (fst id) == lvl)
318 $ getGroupsByBranches p
319 --------------------------------------