2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
14 module Gargantext.Viz.Phylo.LinkMaker
17 import Control.Parallel.Strategies
18 import Control.Lens hiding (both, Level)
19 import Data.List ((++), sortOn, null, tail, splitAt, elem, concat, delete, intersect, elemIndex, groupBy, union, inits, scanl, find)
20 import Data.Tuple.Extra
21 import Data.Map (Map, (!), fromListWith, elems, restrictKeys, filterWithKey, keys, unionWith, unions, intersectionWith, member, fromList)
22 import Gargantext.Prelude
23 import Gargantext.Viz.Phylo
24 import Gargantext.Viz.Phylo.Tools
25 import Gargantext.Viz.Phylo.Metrics
26 import qualified Data.List as List
27 import qualified Data.Maybe as Maybe
28 import qualified Data.Map as Map
30 import qualified Data.Vector.Storable as VS
31 import Debug.Trace (trace)
32 import Numeric.Statistics (percentile)
34 -----------------------------
35 -- | From Level to level | --
36 -----------------------------
39 -- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
40 linkGroupToGroups :: PhyloGroup -> [PhyloGroup] -> PhyloGroup
41 linkGroupToGroups current targets = over (phylo_groupLevelParents) addPointers current
43 --------------------------------------
44 addPointers :: [Pointer] -> [Pointer]
45 addPointers lp = lp ++ Maybe.mapMaybe (\target ->
46 if (elem (getGroupId current) (getGroupLevelChildsId target))
47 then Just ((getGroupId target),1)
49 --------------------------------------
52 setLevelLinks :: (Level,Level) -> Phylo -> Phylo
53 setLevelLinks (lvl,lvl') p = alterGroupWithLevel (\group -> linkGroupToGroups group
54 $ filter (\g' -> (not . null) $ intersect (getGroupNgrams group) (getGroupNgrams g'))
55 $ getGroupsWithFilters lvl' (getGroupPeriod group) p) lvl p
58 -------------------------------
59 -- | From Period to Period | --
60 -------------------------------
63 -- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
64 getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
65 getNextPeriods to' limit id l = case to' of
66 Descendant -> take limit $ (tail . snd) next
67 Ascendant -> take limit $ (reverse . fst) next
68 _ -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] Filiation type not defined")
70 --------------------------------------
71 next :: ([PhyloPeriodId], [PhyloPeriodId])
73 --------------------------------------
75 idx = case (List.elemIndex id l) of
76 Nothing -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PhyloPeriodId not defined")
78 --------------------------------------
81 -- | To get the number of docs produced during a list of periods
82 periodsToNbDocs :: [PhyloPeriodId] -> Phylo -> Double
83 periodsToNbDocs prds phylo = sum $ elems
84 $ restrictKeys (phylo ^. phylo_docsByYears)
88 -- | To process a given Proximity
89 processProximity :: Proximity -> Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> [Int] -> [Int] -> Double
90 processProximity proximity nbDocs cooc cooc' ngrams ngrams' = case proximity of
91 WeightedLogJaccard (WLJParams _ sens) -> weightedLogJaccard sens nbDocs cooc cooc' ngrams ngrams'
92 Hamming (HammingParams _) -> hamming cooc cooc'
93 _ -> panic "[ERR][Viz.Phylo.LinkMaker.processProximity] Unknown proximity"
96 filterProximity :: Double -> Proximity -> Bool
97 filterProximity score prox = case prox of
98 WeightedLogJaccard (WLJParams thr _) -> score >= thr
99 Hamming (HammingParams thr) -> score <= thr
100 _ -> panic "[ERR][Viz.Phylo.LinkMaker.filterProximity] Unknown proximity"
103 makePairs :: [(Date,Date)] -> PhyloGroup -> Phylo -> [(PhyloGroup,PhyloGroup)]
104 makePairs prds g p = filter (\pair -> ((last' "makePairs" prds) == (getGroupPeriod $ fst pair))
105 || ((last' "makePairs" prds) == (getGroupPeriod $ snd pair)))
107 $ filter (\g' -> (elem (getGroupPeriod g') prds)
108 && ((not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
109 && (((last' "makePairs" prds) == (getGroupPeriod g))
110 ||((matchWithPairs g (g,g') p) >= (getPhyloMatchingFrameTh p))))
111 $ getGroupsWithLevel (getGroupLevel g) p
113 matchWithPairs :: PhyloGroup -> (PhyloGroup,PhyloGroup) -> Phylo -> Double
114 matchWithPairs g1 (g2,g3) p =
115 let nbDocs = periodsToNbDocs [(getGroupPeriod g1),(getGroupPeriod g2),(getGroupPeriod g3)] p
118 else unionWith (+) (getGroupCooc g2) (getGroupCooc g3)
119 ngrams = if (g2 == g3)
120 then getGroupNgrams g2
121 else union (getGroupNgrams g2) (getGroupNgrams g3)
122 in processProximity (getPhyloProximity p) nbDocs (getGroupCooc g1) cooc (getGroupNgrams g1) ngrams
125 phyloGroupMatching :: [PhyloPeriodId] -> PhyloGroup -> Phylo -> [Pointer]
126 phyloGroupMatching periods g p = case pointers of
128 Just pts -> head' "phyloGroupMatching"
129 -- | Keep only the best set of pointers grouped by proximity
130 $ groupBy (\pt pt' -> snd pt == snd pt')
131 $ reverse $ sortOn snd pts
132 -- | Find the first time frame where at leats one pointer satisfies the proximity threshold
134 --------------------------------------
135 pointers :: Maybe [Pointer]
136 pointers = find (not . null)
137 -- | For each time frame, process the Proximity on relevant pairs of targeted groups
138 $ scanl (\acc frame ->
139 let pairs = makePairs frame g p
140 in acc ++ ( filter (\(_,proxi) -> filterProximity proxi (getPhyloProximity p))
143 let proxi = matchWithPairs g (t,t') p
146 then [(getGroupId t,proxi)]
147 else [(getGroupId t,proxi),(getGroupId t',proxi)] ) pairs ) ) []
148 -- | [[1900],[1900,1901],[1900,1901,1902],...] | length max => + 5 years
150 --------------------------------------
153 -- | To add some Pointer to a PhyloGroup
154 addPointers' :: Filiation -> [Pointer] -> PhyloGroup -> PhyloGroup
155 addPointers' fil pts g = g & case fil of
156 Descendant -> phylo_groupPeriodChilds %~ (++ pts)
157 Ascendant -> phylo_groupPeriodParents %~ (++ pts)
158 _ -> panic ("[ERR][Viz.Phylo.LinkMaker.addPointers] Wrong type of filiation")
162 -- | To update a list of phyloGroups with some Pointers
163 updateGroups :: Filiation -> Level -> Map PhyloGroupId [Pointer] -> Phylo -> Phylo
164 updateGroups fil lvl m p = alterPhyloGroups (\gs -> map (\g -> if ((getGroupLevel g) == lvl) && (member (getGroupId g) m)
165 then addPointers' fil (m ! (getGroupId g)) g
170 -- | Optimisation : to keep only the groups that have at least one ngrams in commons with the target
171 initCandidates :: PhyloGroup -> [PhyloPeriodId] -> [PhyloGroup] -> [PhyloGroup]
172 initCandidates g prds gs = filter (\g' -> elem (getGroupPeriod g') prds)
173 $ filter (\g' -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
177 -- | a init avec la [[head groups]] et la tail groups
178 toBranches :: [[PhyloGroup]] -> [PhyloGroup] -> [[PhyloGroup]]
181 | otherwise = toBranches mem' $ tail gs
183 --------------------------------------
184 mem' :: [[PhyloGroup]]
185 mem' = if (null withHead)
186 then mem ++ [[head' "toBranches" gs]]
187 else (filter (\gs' -> not $ elem gs' withHead) mem)
189 [(concat withHead) ++ [head' "toBranches" gs]]
190 --------------------------------------
191 withHead :: [[PhyloGroup]]
192 withHead = filter (\gs' -> (not . null)
193 $ intersect (concat $ map getGroupNgrams gs')
194 (getGroupNgrams $ (head' "toBranches" gs))
196 --------------------------------------
199 -- | To process an intertemporal matching task to a Phylo at a given level
200 -- | 1) split all groups (of the level) in branches (ie:related components sharing at least one ngram)
201 -- | 2) for each branch, for each group find the best candidates (by Filiation and Proximity) and create the corresponding pointers
202 -- | 3) update all the groups with the new pointers if they exist
203 interTempoMatching :: Filiation -> Level -> Proximity -> Phylo -> Phylo
204 interTempoMatching fil lvl _ p = updateGroups fil lvl (Map.fromList pointers) p
206 --------------------------------------
207 pointers :: [(PhyloGroupId,[Pointer])]
209 let pts = map (\g -> let periods = getNextPeriods fil (getPhyloMatchingFrame p) (getGroupPeriod g) (getPhyloPeriods p)
210 in (getGroupId g, phyloGroupMatching periods g p)) groups
211 pts' = pts `using` parList rdeepseq
213 --------------------------------------
214 groups :: [PhyloGroup]
215 groups = getGroupsWithLevel lvl p
216 --------------------------------------
219 ------------------------------------------------------------------------
220 -- | Make links from Period to Period after level 1
223 listToTuple :: (a -> b) -> [a] -> [(b,a)]
224 listToTuple f l = map (\x -> (f x, x)) l
227 groupsToMaps :: Ord b => (PhyloGroup -> b) -> [PhyloGroup] -> [Map PhyloGroupId PhyloGroup]
228 groupsToMaps f gs = map (\gs' -> fromList $ listToTuple getGroupId gs')
229 $ groupBy ((==) `on` f)
233 phyloToPeriodMaps :: Level -> Filiation -> Phylo -> [Map PhyloGroupId PhyloGroup]
234 phyloToPeriodMaps lvl fil p =
235 let prdMap = groupsToMaps (fst . getGroupPeriod) (getGroupsWithLevel lvl p)
237 Ascendant -> reverse prdMap
239 _ -> panic ("[ERR][Viz.Phylo.LinkMaker.phyloToPeriodMaps] Wrong type of filiation")
242 trackPointersRec :: Filiation -> Map PhyloGroupId PhyloGroup -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
243 trackPointersRec fil m gs res =
244 if (null gs) then res
245 else if (Map.null m) then res ++ gs
247 let g = head' "track" gs
248 pts = Map.fromList $ getGroupPointers PeriodEdge fil g
249 pts' = Map.toList $ fromListWith (\w w' -> max w w') $ concat $ elems
250 $ intersectionWith (\w g' -> map (\(id,_w') -> (id, w))
251 $ getGroupPointers LevelEdge Ascendant g') pts m
252 res' = res ++ [case fil of
253 Ascendant -> g & phylo_groupPeriodParents .~ pts'
254 Descendant -> g & phylo_groupPeriodChilds .~ pts'
255 _ -> panic ("[ERR][Viz.Phylo.LinkMaker.transposeLinks] Wrong type of filiation")]
256 in trackPointersRec fil (filterWithKey (\k _ -> not $ elem k (keys pts)) m) (tail' "track" gs) res'
260 transposeLinks :: Level -> Filiation -> Phylo -> Phylo
261 transposeLinks lvl fil p =
262 let prdMap = zip (phyloToPeriodMaps (lvl - 1) fil p) (phyloToPeriodMaps lvl fil p)
263 transposed = map (\(gs,gs') ->
264 let idx = fromJust $ elemIndex (gs,gs') prdMap
265 next = take (getPhyloMatchingFrame p) $ snd $ splitAt (idx + 1) prdMap
266 groups = trackPointersRec fil (unions $ map fst next) (elems gs') []
267 in (getGroupPeriod $ head' "transpose" groups ,groups)
269 transposed' = Map.fromList $ (transposed `using` parList rdeepseq)
271 (\gs -> if ((not . null) gs) && (lvl == (getGroupLevel $ head' "transpose" gs))
272 then transposed' ! (getGroupPeriod $ head' "transpose" gs)
278 -- | Transpose the parent/child pointers from one level to another
279 transposePeriodLinks :: Level -> Phylo -> Phylo
280 transposePeriodLinks lvl p = alterPhyloGroups
281 (\gs -> if ((not . null) gs) && (elem lvl $ map getGroupLevel gs)
283 let groups = map (\g -> let m = reduceGroups g lvlGroups
284 in g & phylo_groupPeriodParents .~ (trackPointers m $ g ^. phylo_groupPeriodParents)
285 & phylo_groupPeriodChilds .~ (trackPointers m $ g ^. phylo_groupPeriodChilds )) gs
286 groups' = groups `using` parList rdeepseq
291 --------------------------------------
292 -- | find an other way to find the group from the id
293 trackPointers :: Map PhyloGroupId PhyloGroup -> [Pointer] -> [Pointer]
294 trackPointers m pts = Map.toList
295 $ fromListWith (\w w' -> max w w')
296 $ map (\(id,_w) -> (getGroupLevelParentId $ m ! id,_w)) pts
297 --------------------------------------
298 reduceGroups :: PhyloGroup -> [PhyloGroup] -> Map PhyloGroupId PhyloGroup
299 reduceGroups g gs = Map.fromList
300 $ map (\g' -> (getGroupId g',g'))
301 $ filter (\g' -> ((not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))) gs
302 --------------------------------------
303 lvlGroups :: [PhyloGroup]
304 lvlGroups = getGroupsWithLevel (lvl - 1) p
305 --------------------------------------
312 traceMatching :: Filiation -> Level -> Double -> [Double] -> Phylo -> Phylo
313 traceMatching fil lvl thr lst p = trace ( "----\n" <> show (fil) <> " unfiltered temporal Matching in Phylo" <> show (lvl) <> " :\n"
314 <> "count : " <> show (length lst) <> " potential pointers (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
315 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
316 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
317 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
318 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") p
321 tracePreBranches :: [[PhyloGroup]] -> [[PhyloGroup]]
322 tracePreBranches bs = trace (show (length bs) <> " pre-branches" <> "\n"
323 <> "with sizes : " <> show (map length bs) <> "\n") bs