]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/SynchronicClustering.hs
Merge branch 'dev' into dev-phylo
[gargantext.git] / src / Gargantext / Viz / Phylo / SynchronicClustering.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.SynchronicClustering
3 Description : Module dedicated to the adaptative synchronic clustering of a Phylo.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
15
16 module Gargantext.Viz.Phylo.SynchronicClustering where
17
18 import Gargantext.Prelude
19 import Gargantext.Viz.AdaptativePhylo
20 import Gargantext.Viz.Phylo.PhyloTools
21 import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard)
22
23 import Data.List ((++), null, intersect, nub, concat, sort)
24 import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
25
26 import Control.Lens hiding (Level)
27 -- import Debug.Trace (trace)
28
29
30 -------------------------
31 -- | New Level Maker | --
32 -------------------------
33
34 toBranchId :: PhyloGroup -> PhyloBranchId
35 toBranchId child = ((child ^. phylo_groupLevel) + 1, snd (child ^. phylo_groupBranchId))
36
37 mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup
38 mergeGroups coocs id mapIds childs =
39 let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs
40 in PhyloGroup (fst $ fst id) (snd $ fst id) (snd id) ""
41 (sum $ map _phylo_groupSupport childs) ngrams
42 (ngramsToCooc ngrams coocs) (toBranchId (head' "mergeGroups" childs))
43 empty [] (map (\g -> (getGroupId g, 1)) childs)
44 (updatePointers $ concat $ map _phylo_groupPeriodParents childs)
45 (updatePointers $ concat $ map _phylo_groupPeriodChilds childs)
46 where
47 updatePointers :: [Pointer] -> [Pointer]
48 updatePointers pointers = map (\(pId,w) -> (mapIds ! pId,w)) pointers
49
50
51 addPhyloLevel :: Level -> Phylo -> Phylo
52 addPhyloLevel lvl phylo =
53 over ( phylo_periods . traverse )
54 (\phyloPrd -> phyloPrd & phylo_periodLevels
55 %~ (insert (phyloPrd ^. phylo_periodPeriod, lvl) (PhyloLevel (phyloPrd ^. phylo_periodPeriod) lvl empty))) phylo
56
57
58 toNextLevel :: Phylo -> [PhyloGroup] -> Phylo
59 toNextLevel phylo groups =
60 let curLvl = getLastLevel phylo
61 oldGroups = fromList $ map (\g -> (getGroupId g, getLevelParentId g)) groups
62 newGroups = fromListWith (++)
63 -- | 5) group the parents by periods
64 $ foldlWithKey (\acc id groups' ->
65 -- | 4) create the parent group
66 let parent = mergeGroups (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [(fst . fst) id]) id oldGroups groups'
67 in acc ++ [(parent ^. phylo_groupPeriod, [parent])]) []
68 -- | 3) group the current groups by parentId
69 $ fromListWith (++) $ map (\g -> (getLevelParentId g, [g])) groups
70 in traceSynchronyEnd
71 $ over ( phylo_periods . traverse . phylo_periodLevels . traverse
72 -- | 6) update each period at curLvl + 1
73 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == (curLvl + 1)))
74 -- | 7) by adding the parents
75 (\phyloLvl ->
76 if member (phyloLvl ^. phylo_levelPeriod) newGroups
77 then phyloLvl & phylo_levelGroups
78 .~ fromList (map (\g -> (getGroupId g, g)) $ newGroups ! (phyloLvl ^. phylo_levelPeriod))
79 else phyloLvl)
80 -- | 2) add the curLvl + 1 phyloLevel to the phylo
81 $ addPhyloLevel (curLvl + 1)
82 -- | 1) update the current groups (with level parent pointers) in the phylo
83 $ updatePhyloGroups curLvl (fromList $ map (\g -> (getGroupId g, g)) groups) phylo
84
85
86 --------------------
87 -- | Clustering | --
88 --------------------
89
90
91 toPairs :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
92 toPairs groups = filter (\(g,g') -> (not . null) $ intersect (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))
93 $ listToCombi' groups
94
95 groupsToEdges :: Proximity -> Double -> Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)]
96 groupsToEdges prox thr docs groups =
97 case prox of
98 WeightedLogJaccard sens _ _ -> filter (\(_,w) -> w >= thr)
99 $ map (\(g,g') -> ((g,g'), weightedLogJaccard sens docs (g ^. phylo_groupCooc) (g' ^. phylo_groupCooc) (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams)))
100 $ toPairs groups
101 _ -> undefined
102
103
104 toRelatedComponents :: [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] -> [[PhyloGroup]]
105 toRelatedComponents nodes edges = relatedComponents $ ((map (\((g,g'),_) -> [g,g']) edges) ++ (map (\g -> [g]) nodes))
106
107 toParentId :: PhyloGroup -> PhyloGroupId
108 toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), child ^. phylo_groupIndex)
109
110
111 reduceBranch :: Proximity -> Double -> Map Date Double -> [PhyloGroup] -> [PhyloGroup]
112 reduceBranch prox thr docs branch =
113 -- | 1) reduce a branch as a set of periods & groups
114 let periods = fromListWith (++)
115 $ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
116 in (concat . concat . elems)
117 $ mapWithKey (\prd groups ->
118 -- | 2) for each period, transform the groups as a proximity graph filtered by a threshold
119 let edges = groupsToEdges prox thr ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) groups
120 in map (\comp ->
121 -- | 4) add to each groups their futur level parent group
122 let parentId = toParentId (head' "parentId" comp)
123 in map (\g -> g & phylo_groupLevelParents %~ (++ [(parentId,1)]) ) comp )
124 -- |3) reduce the graph a a set of related components
125 $ toRelatedComponents groups edges) periods
126
127
128 synchronicClustering :: Phylo -> Phylo
129 synchronicClustering phylo =
130 case (phyloSynchrony $ getConfig phylo) of
131 ByProximityThreshold thr -> toNextLevel phylo
132 $ concat
133 $ map (\branch -> reduceBranch (phyloProximity $ getConfig phylo) thr (phylo ^. phylo_timeDocs) branch)
134 $ phyloToLastBranches
135 $ traceSynchronyStart phylo
136 ByProximityDistribution -> undefined