2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
17 module Gargantext.Viz.Phylo.LinkMaker
20 import Control.Parallel.Strategies
21 import Control.Lens hiding (both, Level)
22 import Data.List ((++), sortOn, null, tail, splitAt, elem, concat, delete, intersect, nub, groupBy, union, inits, scanl, find)
23 import Data.Tuple.Extra
24 import Data.Map (Map,(!),fromListWith,elems,restrictKeys,unionWith,member)
25 import Gargantext.Prelude
26 import Gargantext.Viz.Phylo
27 import Gargantext.Viz.Phylo.Tools
28 import Gargantext.Viz.Phylo.Metrics.Proximity
29 import qualified Data.List as List
30 import qualified Data.Maybe as Maybe
31 import qualified Data.Map as Map
33 import qualified Data.Vector.Storable as VS
34 import Debug.Trace (trace)
35 import Numeric.Statistics (percentile)
37 -----------------------------
38 -- | From Level to level | --
39 -----------------------------
42 -- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
43 linkGroupToGroups :: PhyloGroup -> [PhyloGroup] -> PhyloGroup
44 linkGroupToGroups current targets = over (phylo_groupLevelParents) addPointers current
46 --------------------------------------
47 addPointers :: [Pointer] -> [Pointer]
48 addPointers lp = lp ++ Maybe.mapMaybe (\target ->
49 if (elem (getGroupId current) (getGroupLevelChildsId target))
50 then Just ((getGroupId target),1)
52 --------------------------------------
55 setLevelLinks :: (Level,Level) -> Phylo -> Phylo
56 setLevelLinks (lvl,lvl') p = alterGroupWithLevel (\group -> linkGroupToGroups group
57 $ filter (\g' -> (not . null) $ intersect (getGroupNgrams group) (getGroupNgrams g'))
58 $ getGroupsWithFilters lvl' (getGroupPeriod group) p) lvl p
61 -------------------------------
62 -- | From Period to Period | --
63 -------------------------------
66 -- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
67 getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
68 getNextPeriods to' limit id l = case to' of
69 Descendant -> take limit $ (tail . snd) next
70 Ascendant -> take limit $ (reverse . fst) next
71 _ -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] Filiation type not defined")
73 --------------------------------------
74 next :: ([PhyloPeriodId], [PhyloPeriodId])
76 --------------------------------------
78 idx = case (List.elemIndex id l) of
79 Nothing -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PhyloPeriodId not defined")
81 --------------------------------------
84 -- | To get the number of docs produced during a list of periods
85 periodsToNbDocs :: [PhyloPeriodId] -> Phylo -> Double
86 periodsToNbDocs prds phylo = sum $ elems
87 $ restrictKeys (phylo ^. phylo_docsByYears)
91 -- | To process a given Proximity
92 processProximity :: Proximity -> Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> [Int] -> [Int] -> Double
93 processProximity proximity nbDocs cooc cooc' ngrams ngrams' = case proximity of
94 WeightedLogJaccard (WLJParams _ sens) -> weightedLogJaccard sens nbDocs cooc cooc' ngrams ngrams'
95 Hamming (HammingParams _) -> hamming cooc cooc'
96 _ -> panic "[ERR][Viz.Phylo.LinkMaker.processProximity] Unknown proximity"
99 filterProximity :: Double -> Proximity -> Bool
100 filterProximity score prox = case prox of
101 WeightedLogJaccard (WLJParams thr _) -> score >= thr
102 Hamming (HammingParams thr) -> score <= thr
103 _ -> panic "[ERR][Viz.Phylo.LinkMaker.filterProximity] Unknown proximity"
106 makePairs :: [(Date,Date)] -> PhyloGroup -> Phylo -> [(PhyloGroup,PhyloGroup)]
107 makePairs prds g p = filter (\pair -> ((last' "makePairs" prds) == (getGroupPeriod $ fst pair))
108 || ((last' "makePairs" prds) == (getGroupPeriod $ snd pair)))
110 $ filter (\g' -> (elem (getGroupPeriod g') prds)
111 && ((not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
112 && (((last' "makePairs" prds) == (getGroupPeriod g))
113 ||((matchWithPairs g (g,g') p) >= (getPhyloMatchingFrameTh p))))
114 $ getGroupsWithLevel (getGroupLevel g) p
116 matchWithPairs :: PhyloGroup -> (PhyloGroup,PhyloGroup) -> Phylo -> Double
117 matchWithPairs g1 (g2,g3) p =
118 let nbDocs = periodsToNbDocs [(getGroupPeriod g1),(getGroupPeriod g2),(getGroupPeriod g3)] p
121 else unionWith (+) (getGroupCooc g2) (getGroupCooc g3)
122 ngrams = if (g2 == g3)
123 then getGroupNgrams g2
124 else union (getGroupNgrams g2) (getGroupNgrams g3)
125 in processProximity (getPhyloProximity p) nbDocs (getGroupCooc g1) cooc (getGroupNgrams g1) ngrams
128 phyloGroupMatching :: [PhyloPeriodId] -> PhyloGroup -> Phylo -> [Pointer]
129 phyloGroupMatching periods g p = case pointers of
131 Just pts -> head' "phyloGroupMatching"
132 -- | Keep only the best set of pointers grouped by proximity
133 $ groupBy (\pt pt' -> snd pt == snd pt')
134 $ reverse $ sortOn snd pts
135 -- | Find the first time frame where at leats one pointer satisfies the proximity threshold
137 --------------------------------------
138 pointers :: Maybe [Pointer]
139 pointers = find (not . null)
140 -- | For each time frame, process the Proximity on relevant pairs of targeted groups
141 $ scanl (\acc frame ->
142 let pairs = makePairs frame g p
143 in acc ++ ( filter (\(_,proxi) -> filterProximity proxi (getPhyloProximity p))
146 let proxi = matchWithPairs g (t,t') p
149 then [(getGroupId t,proxi)]
150 else [(getGroupId t,proxi),(getGroupId t',proxi)] ) pairs ) ) []
151 -- | [[1900],[1900,1901],[1900,1901,1902],...] | length max => + 5 years
153 --------------------------------------
156 -- | To add some Pointer to a PhyloGroup
157 addPointers' :: Filiation -> [Pointer] -> PhyloGroup -> PhyloGroup
158 addPointers' fil pts g = g & case fil of
159 Descendant -> phylo_groupPeriodChilds %~ (++ pts)
160 Ascendant -> phylo_groupPeriodParents %~ (++ pts)
161 _ -> panic ("[ERR][Viz.Phylo.LinkMaker.addPointers] Wrong type of filiation")
165 -- | To update a list of phyloGroups with some Pointers
166 updateGroups :: Filiation -> Level -> Map PhyloGroupId [Pointer] -> Phylo -> Phylo
167 updateGroups fil lvl m p = alterPhyloGroups (\gs -> map (\g -> if ((getGroupLevel g) == lvl) && (member (getGroupId g) m)
168 then addPointers' fil (m ! (getGroupId g)) g
173 -- | Optimisation : to keep only the groups that have at least one ngrams in commons with the target
174 initCandidates :: PhyloGroup -> [PhyloPeriodId] -> [PhyloGroup] -> [PhyloGroup]
175 initCandidates g prds gs = filter (\g' -> elem (getGroupPeriod g') prds)
176 $ filter (\g' -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
180 -- | a init avec la [[head groups]] et la tail groups
181 toBranches :: [[PhyloGroup]] -> [PhyloGroup] -> [[PhyloGroup]]
184 | otherwise = toBranches mem' $ tail gs
186 --------------------------------------
187 mem' :: [[PhyloGroup]]
188 mem' = if (null withHead)
189 then mem ++ [[head' "toBranches" gs]]
190 else (filter (\gs' -> not $ elem gs' withHead) mem)
192 [(concat withHead) ++ [head' "toBranches" gs]]
193 --------------------------------------
194 withHead :: [[PhyloGroup]]
195 withHead = filter (\gs' -> (not . null)
196 $ intersect (concat $ map getGroupNgrams gs')
197 (getGroupNgrams $ (head' "toBranches" gs))
199 --------------------------------------
202 -- | To process an intertemporal matching task to a Phylo at a given level
203 -- | 1) split all groups (of the level) in branches (ie:related components sharing at least one ngram)
204 -- | 2) for each branch, for each group find the best candidates (by Filiation and Proximity) and create the corresponding pointers
205 -- | 3) update all the groups with the new pointers if they exist
206 interTempoMatching :: Filiation -> Level -> Proximity -> Phylo -> Phylo
207 interTempoMatching fil lvl _ p = updateGroups fil lvl (Map.fromList pointers) p
209 --------------------------------------
210 pointers :: [(PhyloGroupId,[Pointer])]
212 let pts = map (\g -> let periods = getNextPeriods fil (getPhyloMatchingFrame p) (getGroupPeriod g) (getPhyloPeriods p)
213 in (getGroupId g, phyloGroupMatching periods g p)) groups
214 pts' = pts `using` parList rdeepseq
216 --------------------------------------
217 groups :: [PhyloGroup]
218 groups = getGroupsWithLevel lvl p
219 --------------------------------------
222 ------------------------------------------------------------------------
223 -- | Make links from Period to Period after level 1
225 -- | Transpose the parent/child pointers from one level to another
226 transposePeriodLinks :: Level -> Phylo -> Phylo
227 transposePeriodLinks lvl p = alterPhyloGroups
228 (\gs -> if ((not . null) gs) && (elem lvl $ map getGroupLevel gs)
230 let groups = map (\g -> g & phylo_groupPeriodParents .~ (trackPointers (reduceGroups g lvlGroups)
231 $ g ^. phylo_groupPeriodParents)
232 & phylo_groupPeriodChilds .~ (trackPointers (reduceGroups g lvlGroups)
233 $ g ^. phylo_groupPeriodChilds )) gs
234 groups' = groups `using` parList rdeepseq
239 --------------------------------------
240 -- | find an other way to find the group from the id
241 trackPointers :: Map PhyloGroupId PhyloGroup -> [Pointer] -> [Pointer]
242 trackPointers m pts = Map.toList
243 $ fromListWith (\w w' -> max w w')
244 $ map (\(id,_w) -> (getGroupLevelParentId $ m ! id,_w)) pts
245 --------------------------------------
246 reduceGroups :: PhyloGroup -> [PhyloGroup] -> Map PhyloGroupId PhyloGroup
247 reduceGroups g gs = Map.fromList
248 $ map (\g' -> (getGroupId g',g'))
249 $ filter (\g' -> ((not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))) gs
250 --------------------------------------
251 lvlGroups :: [PhyloGroup]
252 lvlGroups = getGroupsWithLevel (lvl - 1) p
253 --------------------------------------
260 traceMatching :: Filiation -> Level -> Double -> [Double] -> Phylo -> Phylo
261 traceMatching fil lvl thr lst p = trace ( "----\n" <> show (fil) <> " unfiltered temporal Matching in Phylo" <> show (lvl) <> " :\n"
262 <> "count : " <> show (length lst) <> " potential pointers (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
263 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
264 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
265 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
266 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") p
269 tracePreBranches :: [[PhyloGroup]] -> [[PhyloGroup]]
270 tracePreBranches bs = trace (show (length bs) <> " pre-branches" <> "\n"
271 <> "with sizes : " <> show (map length bs) <> "\n") bs