]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/LevelMaker.hs
[REFACT] before scoring new ngrams lists.
[gargantext.git] / src / Gargantext / Viz / Phylo / LevelMaker.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TypeSynonymInstances #-}
17 {-# LANGUAGE FlexibleInstances #-}
18
19 module Gargantext.Viz.Phylo.LevelMaker
20 where
21
22 import Control.Lens hiding (both, Level)
23 import Data.List ((++), sort, concat, nub, zip, last)
24 import Data.Map (Map, (!), empty, singleton)
25 import Data.Text (Text)
26 import Data.Tuple.Extra
27 import Gargantext.Prelude
28 import Gargantext.Viz.Phylo
29 import Gargantext.Viz.Phylo.Aggregates.Cluster
30 import Gargantext.Viz.Phylo.Aggregates.Document
31 import Gargantext.Viz.Phylo.Aggregates.Fis
32 import Gargantext.Viz.Phylo.BranchMaker
33 import Gargantext.Viz.Phylo.LinkMaker
34 import Gargantext.Viz.Phylo.Tools
35 import Gargantext.Text.Context (TermList)
36
37 import qualified Data.Vector.Storable as VS
38 import qualified Data.Set as Set
39 import qualified Data.Vector as Vector
40
41 import Debug.Trace (trace)
42 import Numeric.Statistics (percentile)
43
44
45 -- | A typeClass for polymorphic PhyloLevel functions
46 class PhyloLevelMaker aggregate
47 where
48 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
49 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
50 -- | To create a list of PhyloGroups based on a list of aggregates a
51 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
52
53
54 instance PhyloLevelMaker PhyloCluster
55 where
56 --------------------------------------
57 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
58 addPhyloLevel lvl m p
59 | lvl > 1 = toPhyloLevel lvl m p
60 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
61 --------------------------------------
62 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
63 toPhyloGroups lvl (d,d') l m _ = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m) $ zip [1..] l
64 --------------------------------------
65
66
67 instance PhyloLevelMaker PhyloFis
68 where
69 --------------------------------------
70 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
71 addPhyloLevel lvl m p
72 | lvl == 1 = toPhyloLevel lvl m p
73 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
74 --------------------------------------
75 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
76 toPhyloGroups lvl (d,d') l _ p = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis p) $ zip [1..] l
77 --------------------------------------
78
79
80 instance PhyloLevelMaker Document
81 where
82 --------------------------------------
83 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
84 addPhyloLevel lvl m p
85 | lvl == 0 = toPhyloLevel lvl m p
86 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
87 --------------------------------------
88 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
89 toPhyloGroups lvl (d,d') l _m p = map (\(idx,ngram) -> ngramsToGroup (d,d') lvl idx ngram [ngram] p)
90 $ zip [1..]
91 $ (nub . concat)
92 $ map text l
93 --------------------------------------
94
95
96 -- | To transform a Cluster into a Phylogroup
97 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> PhyloGroup
98 clusterToGroup prd lvl idx lbl groups _m =
99 PhyloGroup ((prd, lvl), idx) lbl ngrams empty Nothing [] [] [] (map (\g -> (getGroupId g, 1)) groups)
100 where
101 --------------------------------------
102 ngrams :: [Int]
103 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
104 --------------------------------------
105
106
107 -- | To transform a Clique into a PhyloGroup
108 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
109 cliqueToGroup prd lvl idx lbl fis p =
110 PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing [] [] [] []
111 where
112 --------------------------------------
113 ngrams :: [Int]
114 ngrams = sort $ map (\x -> getIdxInRoots x p)
115 $ Set.toList
116 $ getClique fis
117 --------------------------------------
118
119
120 -- | To transform a list of Ngrams into a PhyloGroup
121 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
122 ngramsToGroup prd lvl idx lbl ngrams p =
123 PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing [] [] [] []
124
125
126 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
127 toPhyloLevel :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
128 toPhyloLevel lvl m p = alterPhyloPeriods
129 (\period -> let pId = _phylo_periodId period
130 in over (phylo_periodLevels)
131 (\phyloLevels ->
132 let groups = toPhyloGroups lvl pId (m ! pId) m p
133 in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
134 ) period) p
135
136
137 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
138 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
139 toNthLevel lvlMax prox clus p
140 | lvl >= lvlMax = p
141 | otherwise = toNthLevel lvlMax prox clus
142 $ traceBranches (lvl + 1)
143 $ setPhyloBranches (lvl + 1)
144 -- $ traceTempoMatching Descendant (lvl + 1)
145 -- $ interTempoMatching Descendant (lvl + 1) prox
146 -- $ traceTempoMatching Ascendant (lvl + 1)
147 -- $ interTempoMatching Ascendant (lvl + 1) prox
148 $ transposePeriodLinks (lvl + 1)
149 $ setLevelLinks (lvl, lvl + 1)
150 $ addPhyloLevel (lvl + 1)
151 (phyloToClusters lvl clus p) p
152 where
153 --------------------------------------
154 lvl :: Level
155 lvl = getLastLevel p
156 --------------------------------------
157
158
159 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
160 toPhylo1 :: Cluster -> Proximity -> [Metric] -> [Filter] -> Map (Date, Date) [Document] -> Phylo -> Phylo
161 toPhylo1 clus prox metrics filters d p = case clus of
162 Fis (FisParams k s t) -> traceBranches 1
163 $ setPhyloBranches 1
164 $ traceTempoMatching Descendant 1
165 $ interTempoMatching Descendant 1 prox
166 $ traceTempoMatching Ascendant 1
167 $ interTempoMatching Ascendant 1 prox
168 $ setLevelLinks (0,1)
169 $ setLevelLinks (1,0)
170 $ addPhyloLevel 1 phyloFis p
171 where
172 --------------------------------------
173 phyloFis :: Map (Date, Date) [PhyloFis]
174 phyloFis = toPhyloFis d k s t metrics filters
175 --------------------------------------
176
177 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
178
179
180 -- | To reconstruct the Level 0 of a Phylo
181 toPhylo0 :: Map (Date, Date) [Document] -> Phylo -> Phylo
182 toPhylo0 d p = addPhyloLevel 0 d p
183
184
185 class PhyloMaker corpus
186 where
187 toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> TermList -> Phylo
188 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> corpus -> [Ngrams] -> TermList -> Phylo
189 corpusToDocs :: corpus -> Phylo -> Map (Date,Date) [Document]
190
191 instance PhyloMaker [(Date, Text)]
192 where
193 --------------------------------------
194 toPhylo q c roots termList = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
195 where
196 --------------------------------------
197 phylo1 :: Phylo
198 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
199 --------------------------------------
200 phylo0 :: Phylo
201 phylo0 = toPhylo0 phyloDocs phyloBase
202 --------------------------------------
203 phyloDocs :: Map (Date, Date) [Document]
204 phyloDocs = corpusToDocs c phyloBase
205 --------------------------------------
206 phyloBase :: Phylo
207 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
208 --------------------------------------
209 --------------------------------------
210 toPhyloBase q p c roots termList = initPhyloBase periods foundations p
211 where
212 --------------------------------------
213 foundations :: PhyloFoundations
214 foundations = PhyloFoundations (initFoundationsRoots roots) termList
215 --------------------------------------
216 periods :: [(Date,Date)]
217 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
218 $ both fst (head' "LevelMaker" c,last c)
219 --------------------------------------
220 --------------------------------------
221 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) $ parseDocs (getFoundationsRoots p) c
222
223
224 instance PhyloMaker [Document]
225 where
226 --------------------------------------
227 toPhylo q c roots termList = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
228 where
229 --------------------------------------
230 phylo1 :: Phylo
231 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
232 --------------------------------------
233 phylo0 :: Phylo
234 phylo0 = toPhylo0 phyloDocs phyloBase
235 --------------------------------------
236 phyloDocs :: Map (Date, Date) [Document]
237 phyloDocs = corpusToDocs c phyloBase
238 --------------------------------------
239 phyloBase :: Phylo
240 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
241 --------------------------------------
242 --------------------------------------
243 toPhyloBase q p c roots termList = initPhyloBase periods foundations p
244 where
245 --------------------------------------
246 foundations :: PhyloFoundations
247 foundations = PhyloFoundations (initFoundationsRoots roots) termList
248 --------------------------------------
249 periods :: [(Date,Date)]
250 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
251 $ both date (head' "LevelMaker" c,last c)
252 --------------------------------------
253 --------------------------------------
254 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) c
255
256
257 -----------------
258 -- | Tracers | --
259 -----------------
260
261
262 tracePhyloBase :: Phylo -> Phylo
263 tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
264 <> show (length $ _phylo_periods p) <> " periods from "
265 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
266 <> " to "
267 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
268 <> "\n"
269 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
270
271
272 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
273 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
274 <> "count : " <> show (length pts) <> " pointers\n"
275 <> "similarity : " <> show (percentile 25 (VS.fromList sim)) <> " (25%) "
276 <> show (percentile 50 (VS.fromList sim)) <> " (50%) "
277 <> show (percentile 75 (VS.fromList sim)) <> " (75%) "
278 <> show (percentile 90 (VS.fromList sim)) <> " (90%)\n") p
279 where
280 --------------------------------------
281 sim :: [Double]
282 sim = sort $ map snd pts
283 --------------------------------------
284 pts :: [Pointer]
285 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
286 --------------------------------------
287
288
289 traceBranches :: Level -> Phylo -> Phylo
290 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
291 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
292 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
293 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
294 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
295 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
296 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
297 where
298 --------------------------------------
299 brs :: [Double]
300 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
301 $ filter (\(id,_) -> (fst id) == lvl)
302 $ getGroupsByBranches p
303 --------------------------------------