2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TypeSynonymInstances #-}
17 {-# LANGUAGE FlexibleInstances #-}
19 module Gargantext.Viz.Phylo.LevelMaker
22 import Control.Lens hiding (both, Level)
23 import Data.List ((++), sort, concat, nub, zip, last)
24 import Data.Map (Map, (!), empty, restrictKeys, filterWithKey, singleton, union)
25 import Data.Text (Text)
26 import Data.Tuple.Extra
27 import Gargantext.Prelude
28 import Gargantext.Viz.Phylo
29 import Gargantext.Viz.Phylo.Aggregates.Cluster
30 import Gargantext.Viz.Phylo.Aggregates.Cooc
31 import Gargantext.Viz.Phylo.Aggregates.Document
32 import Gargantext.Viz.Phylo.Aggregates.Fis
33 import Gargantext.Viz.Phylo.BranchMaker
34 import Gargantext.Viz.Phylo.LinkMaker
35 import Gargantext.Viz.Phylo.Tools
36 import Gargantext.Text.Context (TermList)
38 import qualified Data.Vector.Storable as VS
40 import qualified Data.Set as Set
41 import qualified Data.Vector as Vector
43 import Debug.Trace (trace)
44 import Numeric.Statistics (percentile)
47 -- | A typeClass for polymorphic PhyloLevel functions
48 class PhyloLevelMaker aggregate
50 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
51 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
52 -- | To create a list of PhyloGroups based on a list of aggregates a
53 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
56 instance PhyloLevelMaker PhyloCluster
58 --------------------------------------
59 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
61 | lvl > 1 = toPhyloLevel lvl m p
62 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
63 --------------------------------------
64 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
65 toPhyloGroups lvl (d,d') l m p = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
66 --------------------------------------
69 instance PhyloLevelMaker PhyloFis
71 --------------------------------------
72 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
74 | lvl == 1 = toPhyloLevel lvl m p
75 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
76 --------------------------------------
77 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
78 toPhyloGroups lvl (d,d') l m p = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis m p) $ zip [1..] l
79 --------------------------------------
82 instance PhyloLevelMaker Document
84 --------------------------------------
85 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
87 | lvl == 0 = toPhyloLevel lvl m p
88 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
89 --------------------------------------
90 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
91 toPhyloGroups lvl (d,d') l _m p = map (\(idx,ngram) -> ngramsToGroup (d,d') lvl idx ngram [ngram] p)
95 --------------------------------------
98 -- | To transform a Cluster into a Phylogroup
99 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster] -> Phylo -> PhyloGroup
100 clusterToGroup prd lvl idx lbl groups _m p =
101 PhyloGroup ((prd, lvl), idx) lbl ngrams empty cooc Nothing [] [] [] (map (\g -> (getGroupId g, 1)) groups)
103 --------------------------------------
105 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
106 --------------------------------------
107 cooc :: Map (Int, Int) Double
108 cooc = filterWithKey (\k _ -> elem (fst k) ngrams && elem (snd k) ngrams)
111 $ getGroupsWithFilters 1 prd p
112 --------------------------------------
115 -- | To transform a Clique into a PhyloGroup
116 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Map (Date, Date) [PhyloFis] -> Phylo -> PhyloGroup
117 cliqueToGroup prd lvl idx lbl fis m p =
118 PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) cooc Nothing [] [] [] []
120 --------------------------------------
122 ngrams = sort $ map (\x -> getIdxInRoots x p)
125 --------------------------------------
126 cooc :: Map (Int, Int) Double
127 cooc = filterWithKey (\k _ -> elem (fst k) ngrams && elem (snd k) ngrams)
128 $ fisToCooc (restrictKeys m $ Set.fromList [prd]) p
129 --------------------------------------
132 -- | To transform a list of Ngrams into a PhyloGroup
133 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
134 ngramsToGroup prd lvl idx lbl ngrams p =
135 PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty empty Nothing [] [] [] []
138 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
139 toPhyloLevel :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
140 toPhyloLevel lvl m p = alterPhyloPeriods
141 (\period -> let pId = _phylo_periodId period
142 in over (phylo_periodLevels)
144 let groups = toPhyloGroups lvl pId (m ! pId) m p
145 in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
149 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
150 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
151 toNthLevel lvlMax prox clus p
153 | otherwise = toNthLevel lvlMax prox clus
154 $ traceBranches (lvl + 1)
155 $ setPhyloBranches (lvl + 1)
156 $ traceTempoMatching Descendant (lvl + 1)
157 $ interTempoMatching Descendant (lvl + 1) prox
158 $ traceTempoMatching Ascendant (lvl + 1)
159 $ interTempoMatching Ascendant (lvl + 1) prox
160 $ setLevelLinks (lvl, lvl + 1)
161 $ addPhyloLevel (lvl + 1)
162 (phyloToClusters lvl clus p) p
164 --------------------------------------
167 --------------------------------------
170 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
171 toPhylo1 :: Cluster -> Proximity -> [Metric] -> [Filter] -> Map (Date, Date) [Document] -> Phylo -> Phylo
172 toPhylo1 clus prox metrics filters d p = case clus of
173 Fis (FisParams k s t) -> traceBranches 1
175 $ traceTempoMatching Descendant 1
176 $ interTempoMatching Descendant 1 prox
177 $ traceTempoMatching Ascendant 1
178 $ interTempoMatching Ascendant 1 prox
179 $ setLevelLinks (0,1)
180 $ setLevelLinks (1,0)
181 $ addPhyloLevel 1 phyloFis p
183 --------------------------------------
184 phyloFis :: Map (Date, Date) [PhyloFis]
185 phyloFis = toPhyloFis d k s t metrics filters
186 --------------------------------------
188 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
191 -- | To reconstruct the Level 0 of a Phylo
192 toPhylo0 :: Map (Date, Date) [Document] -> Phylo -> Phylo
193 toPhylo0 d p = addPhyloLevel 0 d p
196 class PhyloMaker corpus
198 toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> TermList -> Phylo
199 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> corpus -> [Ngrams] -> TermList -> Phylo
200 corpusToDocs :: corpus -> Phylo -> Map (Date,Date) [Document]
202 instance PhyloMaker [(Date, Text)]
204 --------------------------------------
205 toPhylo q c roots termList = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
207 --------------------------------------
209 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
210 --------------------------------------
212 phylo0 = toPhylo0 phyloDocs phyloBase
213 --------------------------------------
214 phyloDocs :: Map (Date, Date) [Document]
215 phyloDocs = corpusToDocs c phyloBase
216 --------------------------------------
218 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
219 --------------------------------------
220 --------------------------------------
221 toPhyloBase q p c roots termList = initPhyloBase periods foundations p
223 --------------------------------------
224 foundations :: PhyloFoundations
225 foundations = PhyloFoundations (initFoundationsRoots roots) termList
226 --------------------------------------
227 periods :: [(Date,Date)]
228 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
229 $ both fst (head' "LevelMaker" c,last c)
230 --------------------------------------
231 --------------------------------------
232 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) $ parseDocs (getFoundationsRoots p) c
235 instance PhyloMaker [Document]
237 --------------------------------------
238 toPhylo q c roots termList = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
240 --------------------------------------
242 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
243 --------------------------------------
245 phylo0 = toPhylo0 phyloDocs phyloBase
246 --------------------------------------
247 phyloDocs :: Map (Date, Date) [Document]
248 phyloDocs = corpusToDocs c phyloBase
249 --------------------------------------
251 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
252 --------------------------------------
253 --------------------------------------
254 toPhyloBase q p c roots termList = initPhyloBase periods foundations p
256 --------------------------------------
257 foundations :: PhyloFoundations
258 foundations = PhyloFoundations (initFoundationsRoots roots) termList
259 --------------------------------------
260 periods :: [(Date,Date)]
261 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
262 $ both date (head' "LevelMaker" c,last c)
263 --------------------------------------
264 --------------------------------------
265 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) c
273 tracePhyloBase :: Phylo -> Phylo
274 tracePhyloBase p = trace ( "----\nPhyloBase : \n"
275 <> show (length $ _phylo_periods p) <> " periods from "
276 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
278 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
280 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
284 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
285 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
286 <> "count : " <> show (length pts) <> " pointers\n"
287 <> "similarity : " <> show (percentile 25 (VS.fromList sim)) <> " (25%) "
288 <> show (percentile 50 (VS.fromList sim)) <> " (50%) "
289 <> show (percentile 75 (VS.fromList sim)) <> " (75%) "
290 <> show (percentile 90 (VS.fromList sim)) <> " (90%)\n") p
292 --------------------------------------
294 sim = sort $ map snd pts
295 --------------------------------------
297 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
298 --------------------------------------
301 traceBranches :: Level -> Phylo -> Phylo
302 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
303 <> "count : " <> show (length $ getBranchIds p) <> " branches\n"
304 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
305 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
306 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
307 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
308 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
310 --------------------------------------
312 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
313 $ filter (\(id,_) -> (fst id) == lvl)
314 $ getGroupsByBranches p
315 --------------------------------------