2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
14 module Gargantext.Viz.Phylo.LinkMaker
17 import Control.Parallel.Strategies
18 import Control.Lens hiding (both, Level)
19 import Data.List ((++), sortOn, null, tail, splitAt, elem, concat, delete, intersect, elemIndex, groupBy, union, inits, scanl, find)
20 import Data.Tuple.Extra
21 import Data.Map (Map, (!), fromListWith, elems, restrictKeys, filterWithKey, keys, unionWith, unions, intersectionWith, member, fromList)
22 import Gargantext.Prelude
23 import Gargantext.Viz.Phylo
24 import Gargantext.Viz.Phylo.Tools
25 import Gargantext.Viz.Phylo.Metrics
26 import qualified Data.List as List
27 import qualified Data.Maybe as Maybe
28 import qualified Data.Map as Map
30 import qualified Data.Vector.Storable as VS
31 import Debug.Trace (trace)
32 import Numeric.Statistics (percentile)
34 -----------------------------
35 -- | From Level to level | --
36 -----------------------------
39 -- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
40 linkGroupToGroups :: PhyloGroup -> [PhyloGroup] -> PhyloGroup
41 linkGroupToGroups current targets = over (phylo_groupLevelParents) addPointers current
43 --------------------------------------
44 addPointers :: [Pointer] -> [Pointer]
45 addPointers lp = lp ++ Maybe.mapMaybe (\target ->
46 if (elem (getGroupId current) (getGroupLevelChildsId target))
47 then Just ((getGroupId target),1)
49 --------------------------------------
52 setLevelLinks :: (Level,Level) -> Phylo -> Phylo
53 setLevelLinks (lvl,lvl') p = alterGroupWithLevel (\group -> linkGroupToGroups group
54 $ filter (\g' -> (not . null) $ intersect (getGroupNgrams group) (getGroupNgrams g'))
55 $ getGroupsWithFilters lvl' (getGroupPeriod group) p) lvl p
58 -------------------------------
59 -- | From Period to Period | --
60 -------------------------------
63 -- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
64 getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
65 getNextPeriods to' limit id l = case to' of
66 Descendant -> take limit $ (tail . snd) next
67 Ascendant -> take limit $ (reverse . fst) next
68 _ -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] Filiation type not defined")
70 --------------------------------------
71 next :: ([PhyloPeriodId], [PhyloPeriodId])
73 --------------------------------------
75 idx = case (List.elemIndex id l) of
76 Nothing -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PhyloPeriodId not defined")
78 --------------------------------------
81 -- | To get the number of docs produced during a list of periods
82 periodsToNbDocs :: [PhyloPeriodId] -> Phylo -> Double
83 periodsToNbDocs prds phylo = sum $ elems
84 $ restrictKeys (phylo ^. phylo_docsByYears)
88 -- | To process a given Proximity
89 processProximity :: Proximity -> Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> [Int] -> [Int] -> Double
90 processProximity proximity nbDocs cooc cooc' ngrams ngrams' = case proximity of
91 WeightedLogJaccard (WLJParams _ sens) -> weightedLogJaccard sens nbDocs cooc cooc' ngrams ngrams'
92 Hamming (HammingParams _) -> hamming cooc cooc'
93 _ -> panic "[ERR][Viz.Phylo.LinkMaker.processProximity] Unknown proximity"
96 filterProximity :: Double -> Proximity -> Bool
97 filterProximity score prox = case prox of
98 WeightedLogJaccard (WLJParams thr _) -> score >= thr
99 Hamming (HammingParams thr) -> score <= thr
100 _ -> panic "[ERR][Viz.Phylo.LinkMaker.filterProximity] Unknown proximity"
103 makePairs :: [(Date,Date)] -> PhyloGroup -> Phylo -> [(PhyloGroup,PhyloGroup)]
104 makePairs prds g p = filter (\pair -> ((last' "makePairs" prds) == (getGroupPeriod $ fst pair))
105 || ((last' "makePairs" prds) == (getGroupPeriod $ snd pair)))
107 $ filter (\g' -> (elem (getGroupPeriod g') prds)
108 && ((not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
109 && (((last' "makePairs" prds) == (getGroupPeriod g))
110 ||((matchWithPairs g (g,g') p) >= (getPhyloMatchingFrameTh p))))
111 $ getGroupsWithLevel (getGroupLevel g) p
113 matchWithPairs :: PhyloGroup -> (PhyloGroup,PhyloGroup) -> Phylo -> Double
114 matchWithPairs g1 (g2,g3) p =
115 let nbDocs = periodsToNbDocs [(getGroupPeriod g1),(getGroupPeriod g2),(getGroupPeriod g3)] p
118 else unionWith (+) (getGroupCooc g2) (getGroupCooc g3)
119 ngrams = if (g2 == g3)
120 then getGroupNgrams g2
121 else union (getGroupNgrams g2) (getGroupNgrams g3)
122 in processProximity (getPhyloProximity p) nbDocs (getGroupCooc g1) cooc (getGroupNgrams g1) ngrams
125 phyloGroupMatching :: [PhyloPeriodId] -> PhyloGroup -> Phylo -> [Pointer]
126 phyloGroupMatching periods g p = case pointers of
128 Just pts -> head' "phyloGroupMatching"
129 -- Keep only the best set of pointers grouped by proximity
130 $ groupBy (\pt pt' -> snd pt == snd pt')
131 $ reverse $ sortOn snd pts
132 -- Find the first time frame where at leats one pointer satisfies the proximity threshold
134 --------------------------------------
135 pointers :: Maybe [Pointer]
136 pointers = find (not . null)
137 -- For each time frame, process the Proximity on relevant pairs of targeted groups
138 $ scanl (\acc frame ->
139 let pairs = makePairs frame g p
140 in acc ++ ( filter (\(_,proxi) -> filterProximity proxi (getPhyloProximity p))
143 let proxi = matchWithPairs g (t,t') p
146 then [(getGroupId t,proxi)]
147 else [(getGroupId t,proxi),(getGroupId t',proxi)] ) pairs ) ) []
148 -- [[1900],[1900,1901],[1900,1901,1902],...] | length max => + 5 years
150 --------------------------------------
153 -- | To add some Pointer to a PhyloGroup
154 addPointers' :: Filiation -> [Pointer] -> PhyloGroup -> PhyloGroup
155 addPointers' fil pts g = g & case fil of
156 Descendant -> phylo_groupPeriodChilds %~ (++ pts)
157 Ascendant -> phylo_groupPeriodParents %~ (++ pts)
158 _ -> panic ("[ERR][Viz.Phylo.LinkMaker.addPointers] Wrong type of filiation")
162 -- | To update a list of phyloGroups with some Pointers
163 updateGroups :: Filiation -> Level -> Map PhyloGroupId [Pointer] -> Phylo -> Phylo
164 updateGroups fil lvl m p = alterPhyloGroups (\gs -> map (\g -> if ((getGroupLevel g) == lvl) && (member (getGroupId g) m)
165 then addPointers' fil (m ! (getGroupId g)) g
170 -- | Optimisation : to keep only the groups that have at least one ngrams in commons with the target
171 initCandidates :: PhyloGroup -> [PhyloPeriodId] -> [PhyloGroup] -> [PhyloGroup]
172 initCandidates g prds gs = filter (\g' -> elem (getGroupPeriod g') prds)
173 $ filter (\g' -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
177 -- | a init avec la [[head groups]] et la tail groups
178 toBranches :: [[PhyloGroup]] -> [PhyloGroup] -> [[PhyloGroup]]
181 | otherwise = toBranches mem' $ tail gs
183 --------------------------------------
184 mem' :: [[PhyloGroup]]
185 mem' = if (null withHead)
186 then mem ++ [[head' "toBranches" gs]]
187 else (filter (\gs' -> not $ elem gs' withHead) mem)
189 [(concat withHead) ++ [head' "toBranches" gs]]
190 --------------------------------------
191 withHead :: [[PhyloGroup]]
192 withHead = filter (\gs' -> (not . null)
193 $ intersect (concat $ map getGroupNgrams gs')
194 (getGroupNgrams $ (head' "toBranches" gs))
196 --------------------------------------
199 -- | To process an intertemporal matching task to a Phylo at a given level
200 -- | 1) split all groups (of the level) in branches (ie:related components sharing at least one ngram)
201 -- | 2) for each branch, for each group find the best candidates (by Filiation and Proximity) and create the corresponding pointers
202 -- | 3) update all the groups with the new pointers if they exist
203 interTempoMatching :: Filiation -> Level -> Proximity -> Phylo -> Phylo
204 interTempoMatching fil lvl _ p = updateGroups fil lvl (Map.fromList pointers) p
206 --------------------------------------
207 pointers :: [(PhyloGroupId,[Pointer])]
209 let pts = map (\g -> let periods = getNextPeriods fil (getPhyloMatchingFrame p) (getGroupPeriod g) (getPhyloPeriods p)
210 in (getGroupId g, phyloGroupMatching periods g p)) groups
211 pts' = pts `using` parList rdeepseq
213 --------------------------------------
214 groups :: [PhyloGroup]
215 groups = getGroupsWithLevel lvl p
216 --------------------------------------
219 ------------------------------------------------------------------------
220 -- | Make links from Period to Period after level 1
221 listToTuple :: (a -> b) -> [a] -> [(b,a)]
222 listToTuple f l = map (\x -> (f x, x)) l
225 groupsToMaps :: Ord b => (PhyloGroup -> b) -> [PhyloGroup] -> [Map PhyloGroupId PhyloGroup]
226 groupsToMaps f gs = map (\gs' -> fromList $ listToTuple getGroupId gs')
227 $ groupBy ((==) `on` f)
231 phyloToPeriodMaps :: Level -> Filiation -> Phylo -> [Map PhyloGroupId PhyloGroup]
232 phyloToPeriodMaps lvl fil p =
233 let prdMap = groupsToMaps (fst . getGroupPeriod) (getGroupsWithLevel lvl p)
235 Ascendant -> reverse prdMap
237 _ -> panic ("[ERR][Viz.Phylo.LinkMaker.phyloToPeriodMaps] Wrong type of filiation")
240 trackPointersRec :: Filiation -> Map PhyloGroupId PhyloGroup -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
241 trackPointersRec fil m gs res =
242 if (null gs) then res
243 else if (Map.null m) then res ++ gs
245 let g = head' "track" gs
246 pts = Map.fromList $ getGroupPointers PeriodEdge fil g
247 pts' = Map.toList $ fromListWith (\w w' -> max w w') $ concat $ elems
248 $ intersectionWith (\w g' -> map (\(id,_w') -> (id, w))
249 $ getGroupPointers LevelEdge Ascendant g') pts m
250 res' = res ++ [case fil of
251 Ascendant -> g & phylo_groupPeriodParents .~ pts'
252 Descendant -> g & phylo_groupPeriodChilds .~ pts'
253 _ -> panic ("[ERR][Viz.Phylo.LinkMaker.transposeLinks] Wrong type of filiation")]
254 in trackPointersRec fil (filterWithKey (\k _ -> not $ elem k (keys pts)) m) (tail' "track" gs) res'
258 transposeLinks :: Level -> Filiation -> Phylo -> Phylo
259 transposeLinks lvl fil p =
260 let prdMap = zip (phyloToPeriodMaps (lvl - 1) fil p) (phyloToPeriodMaps lvl fil p)
261 transposed = map (\(gs,gs') ->
262 let idx = fromJust $ elemIndex (gs,gs') prdMap
263 next = take (getPhyloMatchingFrame p) $ snd $ splitAt (idx + 1) prdMap
264 groups = trackPointersRec fil (unions $ map fst next) (elems gs') []
265 in (getGroupPeriod $ head' "transpose" groups ,groups)
267 transposed' = Map.fromList $ (transposed `using` parList rdeepseq)
269 (\gs -> if ((not . null) gs) && (lvl == (getGroupLevel $ head' "transpose" gs))
270 then transposed' ! (getGroupPeriod $ head' "transpose" gs)
276 -- | Transpose the parent/child pointers from one level to another
277 transposePeriodLinks :: Level -> Phylo -> Phylo
278 transposePeriodLinks lvl p = alterPhyloGroups
279 (\gs -> if ((not . null) gs) && (elem lvl $ map getGroupLevel gs)
281 let groups = map (\g -> let m = reduceGroups g lvlGroups
282 in g & phylo_groupPeriodParents .~ (trackPointers m $ g ^. phylo_groupPeriodParents)
283 & phylo_groupPeriodChilds .~ (trackPointers m $ g ^. phylo_groupPeriodChilds )) gs
284 groups' = groups `using` parList rdeepseq
289 --------------------------------------
290 -- | find an other way to find the group from the id
291 trackPointers :: Map PhyloGroupId PhyloGroup -> [Pointer] -> [Pointer]
292 trackPointers m pts = Map.toList
293 $ fromListWith (\w w' -> max w w')
294 $ map (\(id,_w) -> (getGroupLevelParentId $ m ! id,_w)) pts
295 --------------------------------------
296 reduceGroups :: PhyloGroup -> [PhyloGroup] -> Map PhyloGroupId PhyloGroup
297 reduceGroups g gs = Map.fromList
298 $ map (\g' -> (getGroupId g',g'))
299 $ filter (\g' -> ((not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))) gs
300 --------------------------------------
301 lvlGroups :: [PhyloGroup]
302 lvlGroups = getGroupsWithLevel (lvl - 1) p
303 --------------------------------------
310 traceMatching :: Filiation -> Level -> Double -> [Double] -> Phylo -> Phylo
311 traceMatching fil lvl thr lst p = trace ( "----\n" <> show (fil) <> " unfiltered temporal Matching in Phylo" <> show (lvl) <> " :\n"
312 <> "count : " <> show (length lst) <> " potential pointers (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
313 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
314 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
315 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
316 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") p
319 tracePreBranches :: [[PhyloGroup]] -> [[PhyloGroup]]
320 tracePreBranches bs = trace (show (length bs) <> " pre-branches" <> "\n"
321 <> "with sizes : " <> show (map length bs) <> "\n") bs