]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/LinkMaker.hs
fix temporalMatching
[gargantext.git] / src / Gargantext / Viz / Phylo / LinkMaker.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Viz.Phylo.LinkMaker
18 where
19
20 import Control.Lens hiding (both, Level)
21 import Data.List ((++), sortOn, null, tail, splitAt, elem, concat, sort, delete, intersect, nub, groupBy)
22 import Data.Tuple.Extra
23 import Data.Map (Map,(!),fromListWith,elems,restrictKeys,unionWith)
24 import Gargantext.Prelude
25 import Gargantext.Viz.Phylo
26 import Gargantext.Viz.Phylo.Tools
27 import Gargantext.Viz.Phylo.Metrics.Proximity
28 import qualified Data.List as List
29 import qualified Data.Maybe as Maybe
30 import qualified Data.Map as Map
31
32 import qualified Data.Vector.Storable as VS
33 import Debug.Trace (trace)
34 import Numeric.Statistics (percentile)
35
36 -----------------------------
37 -- | From Level to level | --
38 -----------------------------
39
40
41 -- | To choose a LevelLink strategy based an a given Level
42 shouldLink :: (Level,Level) -> PhyloGroup -> PhyloGroup -> Bool
43 shouldLink (lvl,_lvl) g g'
44 | lvl <= 1 = doesContainsOrd (getGroupNgrams g) (getGroupNgrams g')
45 | lvl > 1 = elem (getGroupId g) (getGroupLevelChildsId g')
46 | otherwise = panic ("[ERR][Viz.Phylo.LinkMaker.shouldLink] Level not defined")
47
48
49 -- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
50 linkGroupToGroups :: (Level,Level) -> PhyloGroup -> [PhyloGroup] -> PhyloGroup
51 linkGroupToGroups (lvl,lvl') current targets
52 | lvl < lvl' = setLevelParents current
53 | lvl > lvl' = setLevelChilds current
54 | otherwise = current
55 where
56 --------------------------------------
57 setLevelChilds :: PhyloGroup -> PhyloGroup
58 setLevelChilds = over (phylo_groupLevelChilds) addPointers
59 --------------------------------------
60 setLevelParents :: PhyloGroup -> PhyloGroup
61 setLevelParents = over (phylo_groupLevelParents) addPointers
62 --------------------------------------
63 addPointers :: [Pointer] -> [Pointer]
64 addPointers lp = lp ++ Maybe.mapMaybe (\target ->
65 if shouldLink (lvl,lvl') current target
66 then Just ((getGroupId target),1)
67 else Nothing) targets
68 --------------------------------------
69
70
71 -- | To set the LevelLink of all the PhyloGroups of a Phylo
72 setLevelLinks :: (Level,Level) -> Phylo -> Phylo
73 setLevelLinks (lvl,lvl') p = alterPhyloGroups (\gs -> map (\g -> if getGroupLevel g == lvl
74 then linkGroupToGroups (lvl,lvl') g (filterCandidates g
75 $ filter (\g' -> getGroupPeriod g' == getGroupPeriod g) gs')
76 else g) gs) p
77 where
78 --------------------------------------
79 gs' :: [PhyloGroup]
80 gs' = getGroupsWithLevel lvl' p
81 --------------------------------------
82
83
84 -------------------------------
85 -- | From Period to Period | --
86 -------------------------------
87
88
89 -- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
90 getNextPeriods :: Filiation -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
91 getNextPeriods to' id l = case to' of
92 Descendant -> (tail . snd) next
93 Ascendant -> (reverse . fst) next
94 _ -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] Filiation type not defined")
95 where
96 --------------------------------------
97 next :: ([PhyloPeriodId], [PhyloPeriodId])
98 next = splitAt idx l
99 --------------------------------------
100 idx :: Int
101 idx = case (List.elemIndex id l) of
102 Nothing -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PhyloPeriodId not defined")
103 Just i -> i
104 --------------------------------------
105
106
107 -- | To get the number of docs produced during a list of periods
108 periodsToNbDocs :: [PhyloPeriodId] -> Phylo -> Double
109 periodsToNbDocs prds phylo = sum $ elems
110 $ restrictKeys (phylo ^. phylo_docsByYears)
111 $ periodsToYears prds
112
113
114 -- | To process a given Proximity
115 processProximity :: Proximity -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double -> Double
116 processProximity proximity cooc cooc' nbDocs = case proximity of
117 WeightedLogJaccard (WLJParams _ sens) -> weightedLogJaccard sens cooc cooc' nbDocs
118 Hamming (HammingParams _) -> hamming cooc cooc'
119 _ -> panic "[ERR][Viz.Phylo.LinkMaker.processProximity] Unknown proximity"
120
121
122
123 -- | Find the best candidates to be time-linked with a group g1 (recursively until the limit of periods is reached)
124 -- | 1) find the next periods and get the mini cooc matrix of g1
125 -- | 2) build the pairs of candidates (single groups or tuples)
126 -- | 3) process the proximity mesure and select the best ones to create the pointers (ie: all the max)
127 findBestCandidates :: Filiation -> Int -> Int -> Proximity -> [(Date,Date)] -> [PhyloGroup] -> PhyloGroup -> Phylo -> ([Pointer],[Double])
128 findBestCandidates filiation depth limit proximity periods candidates g1 phylo
129 | depth > limit || null nextPeriods = ([],[])
130 | (not . null) pointers = (head' "findBestCandidates" $ groupBy (\x y -> snd x == snd y) pointers
131 ,map snd similarities)
132 | otherwise = findBestCandidates filiation (depth + 1) limit proximity periods candidates g1 phylo
133 where
134 --------------------------------------
135 pointers :: [(PhyloGroupId, Double)]
136 pointers = reverse $ sortOn snd $ filter (\(_,score) -> case proximity of
137 WeightedLogJaccard (WLJParams thr _) -> score >= thr
138 Hamming (HammingParams thr) -> score <= thr
139 _ -> panic "[ERR][Viz.Phylo.LinkMaker.findBestCandidates] Unknown proximity"
140 ) similarities
141 --------------------------------------
142 similarities :: [(PhyloGroupId, Double)]
143 similarities = concat $ map (\(g2,g3) -> let nbDocs = periodsToNbDocs [(getGroupPeriod g1),(getGroupPeriod g2),(getGroupPeriod g3)] phylo
144 cooc2 = getGroupCooc g2
145 cooc3 = getGroupCooc g3
146 score = processProximity proximity cooc1 (unionWith (+) cooc2 cooc3) nbDocs
147 in nub $ [(getGroupId g2,score),(getGroupId g3,score)]) pairsOfCandidates
148 --------------------------------------
149 pairsOfCandidates :: [(PhyloGroup,PhyloGroup)]
150 pairsOfCandidates = listToFullCombi $ filter (\g -> elem (getGroupPeriod g) nextPeriods) candidates
151 --------------------------------------
152 cooc1 :: Map (Int,Int) Double
153 cooc1 = getGroupCooc g1
154 --------------------------------------
155 nextPeriods :: [(Date,Date)]
156 nextPeriods = take depth periods
157 --------------------------------------
158
159
160 -- | To add some Pointer to a PhyloGroup
161 addPointers' :: Filiation -> [Pointer] -> PhyloGroup -> PhyloGroup
162 addPointers' fil pts g = g & case fil of
163 Descendant -> phylo_groupPeriodChilds %~ (++ pts)
164 Ascendant -> phylo_groupPeriodParents %~ (++ pts)
165 _ -> panic ("[ERR][Viz.Phylo.LinkMaker.addPointers] Wrong type of filiation")
166
167
168
169 -- | To update a list of phyloGroups with some Pointers
170 updateGroups :: Filiation -> Level -> Map PhyloGroupId [Pointer] -> Phylo -> Phylo
171 updateGroups fil lvl m p = alterPhyloGroups (\gs -> map (\g -> if (getGroupLevel g) == lvl
172 then addPointers' fil (m ! (getGroupId g)) g
173 else g ) gs) p
174
175
176
177 -- | Optimisation : to keep only the groups that have at least one ngrams in commons with the target
178 filterCandidates :: PhyloGroup -> [PhyloGroup] -> [PhyloGroup]
179 filterCandidates g gs = filter (\g' -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
180 $ delete g gs
181
182
183 -- | a init avec la [[head groups]] et la tail groups
184 toBranches :: [[PhyloGroup]] -> [PhyloGroup] -> [[PhyloGroup]]
185 toBranches mem gs
186 | null gs = mem
187 | otherwise = toBranches mem' $ tail gs
188 where
189 --------------------------------------
190 mem' :: [[PhyloGroup]]
191 mem' = if (null withHead)
192 then mem ++ [[head' "toBranches" gs]]
193 else (filter (\gs' -> not $ elem gs' withHead) mem)
194 ++
195 [(concat withHead) ++ [head' "toBranches" gs]]
196 --------------------------------------
197 withHead :: [[PhyloGroup]]
198 withHead = filter (\gs' -> (not . null)
199 $ intersect (concat $ map getGroupNgrams gs')
200 (getGroupNgrams $ (head' "toBranches" gs))
201 ) mem
202 --------------------------------------
203
204
205 -- | To process an intertemporal matching task to a Phylo at a given level
206 -- | 1) split all groups (of the level) in branches (ie:related components sharing at least one ngram)
207 -- | 2) for each branch, for each group find the best candidates (by Filiation and Proximity) and create the corresponding pointers
208 -- | 3) update all the groups with the new pointers if they exist
209 interTempoMatching :: Filiation -> Level -> Proximity -> Phylo -> Phylo
210 interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) debug $ updateGroups fil lvl pointersMap p
211 where
212 --------------------------------------
213 debug :: [Double]
214 debug = sort $ concat $ map (snd . snd) pointers
215 --------------------------------------
216 pointersMap :: Map PhyloGroupId [Pointer]
217 pointersMap = Map.fromList $ map (\(id,x) -> (id,fst x)) pointers
218 --------------------------------------
219 pointers :: [(PhyloGroupId,([Pointer],[Double]))]
220 pointers = concat
221 $ map (\branche ->
222 map (\g -> ( getGroupId g
223 , findBestCandidates fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) (getPhyloPeriods p)) (filterCandidates g branche) g p )
224 ) branche ) branches
225 --------------------------------------
226 branches :: [[PhyloGroup]]
227 branches = tracePreBranches
228 $ toBranches [[head' "interTempoMatching" (getGroupsWithLevel lvl p)]]
229 $ tail (getGroupsWithLevel lvl p)
230 --------------------------------------
231
232
233 ------------------------------------------------------------------------
234 -- | Make links from Period to Period after level 1
235
236 toLevelUp :: [Pointer] -> Phylo -> [Pointer]
237 toLevelUp lst p = Map.toList
238 $ map (\ws -> maximum ws)
239 $ fromListWith (++) [(id, [w]) | (id, w) <- pointers]
240 where
241 --------------------------------------
242 pointers :: [Pointer]
243 pointers = map (\(id,v) -> (getGroupLevelParentId $ getGroupFromId id p, v)) lst
244 --------------------------------------
245
246
247 -- | Transpose the parent/child pointers from one level to another
248 transposePeriodLinks :: Level -> Phylo -> Phylo
249 transposePeriodLinks lvl p = alterGroupWithLevel
250 (\g ->
251 --------------------------------------
252 let childs = getGroupsFromIds (map fst $ getGroupLevelChilds g) p
253 ascLink = toLevelUp (concat $ map getGroupPeriodParents childs) p
254 desLink = toLevelUp (concat $ map getGroupPeriodChilds childs) p
255 --------------------------------------
256 in g & phylo_groupPeriodParents %~ (++ ascLink)
257 & phylo_groupPeriodChilds %~ (++ desLink)
258 --------------------------------------
259 ) lvl p
260
261
262 ----------------
263 -- | Tracer | --
264 ----------------
265
266
267 traceMatching :: Filiation -> Level -> Double -> [Double] -> Phylo -> Phylo
268 traceMatching fil lvl thr lst p = trace ( "----\n" <> show (fil) <> " unfiltered temporal Matching in Phylo" <> show (lvl) <> " :\n"
269 <> "count : " <> show (length lst) <> " potential pointers (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
270 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
271 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
272 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
273 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") p
274
275 tracePreBranches :: [[PhyloGroup]] -> [[PhyloGroup]]
276 tracePreBranches bs = trace (show (length bs) <> " pre-branches" <> "\n"
277 <> "with sizes : " <> show (map length bs) <> "\n") bs
278