]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/LinkMaker.hs
Merge branch 'dev' into dev-phylo
[gargantext.git] / src / Gargantext / Viz / Phylo / LinkMaker.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Viz.Phylo.LinkMaker
18 where
19
20 import Control.Lens hiding (both, Level)
21 import Data.List ((++), sortOn, null, tail, splitAt, elem, concat, sort, delete, intersect)
22 import Data.Tuple.Extra
23 import Data.Map (Map,(!),fromListWith)
24 import Gargantext.Prelude
25 import Gargantext.Viz.Phylo
26 import Gargantext.Viz.Phylo.Tools
27 import Gargantext.Viz.Phylo.Metrics.Proximity
28 import Gargantext.Viz.Phylo.Aggregates.Cooc
29 import qualified Data.List as List
30 import qualified Data.Maybe as Maybe
31 import qualified Data.Map as Map
32
33 import qualified Data.Vector.Storable as VS
34 import Debug.Trace (trace)
35 import Numeric.Statistics (percentile)
36
37
38 ------------------------------------------------------------------------
39 -- | Make links from Level to Level
40
41
42 -- | To choose a LevelLink strategy based an a given Level
43 shouldLink :: (Level,Level) -> PhyloGroup -> PhyloGroup -> Bool
44 shouldLink (lvl,_lvl) g g'
45 | lvl <= 1 = doesContainsOrd (getGroupNgrams g) (getGroupNgrams g')
46 | lvl > 1 = elem (getGroupId g) (getGroupLevelChildsId g')
47 | otherwise = panic ("[ERR][Viz.Phylo.LinkMaker.shouldLink] Level not defined")
48
49
50 -- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
51 linkGroupToGroups :: (Level,Level) -> PhyloGroup -> [PhyloGroup] -> PhyloGroup
52 linkGroupToGroups (lvl,lvl') current targets
53 | lvl < lvl' = setLevelParents current
54 | lvl > lvl' = setLevelChilds current
55 | otherwise = current
56 where
57 --------------------------------------
58 setLevelChilds :: PhyloGroup -> PhyloGroup
59 setLevelChilds = over (phylo_groupLevelChilds) addPointers
60 --------------------------------------
61 setLevelParents :: PhyloGroup -> PhyloGroup
62 setLevelParents = over (phylo_groupLevelParents) addPointers
63 --------------------------------------
64 addPointers :: [Pointer] -> [Pointer]
65 addPointers lp = lp ++ Maybe.mapMaybe (\target ->
66 if shouldLink (lvl,lvl') current target
67 then Just ((getGroupId target),1)
68 else Nothing) targets
69 --------------------------------------
70
71
72 -- | To set the LevelLink of all the PhyloGroups of a Phylo
73 setLevelLinks :: (Level,Level) -> Phylo -> Phylo
74 setLevelLinks (lvl,lvl') p = alterPhyloGroups (\gs -> map (\g -> if getGroupLevel g == lvl
75 then linkGroupToGroups (lvl,lvl') g (filterCandidates g
76 $ filter (\g' -> getGroupPeriod g' == getGroupPeriod g) gs')
77 else g) gs) p
78 where
79 --------------------------------------
80 gs' :: [PhyloGroup]
81 gs' = getGroupsWithLevel lvl' p
82 --------------------------------------
83
84
85 ------------------------------------------------------------------------
86 -- | Make links from Period to Period
87
88
89 -- | To apply the corresponding proximity function based on a given Proximity
90 applyProximity :: Proximity -> PhyloGroup -> PhyloGroup -> Map (Int, Int) Double -> (PhyloGroupId, Double)
91 applyProximity prox g1 g2 cooc = case prox of
92 WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2), weightedLogJaccard s (getSubCooc (getGroupNgrams g1) cooc) (getSubCooc (getGroupNgrams g2) cooc))
93 Hamming (HammingParams _) -> ((getGroupId g2), hamming (getSubCooc (getGroupNgrams g1) cooc) (getSubCooc (getGroupNgrams g2) cooc))
94 _ -> panic ("[ERR][Viz.Phylo.Example.applyProximity] Proximity function not defined")
95
96
97 -- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
98 getNextPeriods :: Filiation -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
99 getNextPeriods to' id l = case to' of
100 Descendant -> (tail . snd) next
101 Ascendant -> (reverse . fst) next
102 _ -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] Filiation type not defined")
103 where
104 --------------------------------------
105 next :: ([PhyloPeriodId], [PhyloPeriodId])
106 next = splitAt idx l
107 --------------------------------------
108 idx :: Int
109 idx = case (List.elemIndex id l) of
110 Nothing -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PhyloPeriodId not defined")
111 Just i -> i
112 --------------------------------------
113
114
115 -- | To find the best candidates regarding a given proximity
116 findBestCandidates' :: Filiation -> Int -> Int -> Proximity -> [PhyloPeriodId] -> [PhyloGroup] -> PhyloGroup -> Phylo -> ([Pointer],[Double])
117 findBestCandidates' fil depth limit prox prds gs g p
118 | depth > limit || null next = ([],[])
119 | (not . null) bestScores = (take 2 bestScores, map snd scores)
120 | otherwise = findBestCandidates' fil (depth + 1) limit prox prds gs g p
121 where
122 --------------------------------------
123 next :: [PhyloPeriodId]
124 next = take depth prds
125 --------------------------------------
126 cooc :: Map (Int, Int) Double
127 cooc = getCooc next p
128 --------------------------------------
129 candidates :: [PhyloGroup]
130 candidates = filter (\g' -> elem (getGroupPeriod g') next) gs
131 --------------------------------------
132 scores :: [(PhyloGroupId, Double)]
133 scores = map (\g' -> applyProximity prox g g' cooc) candidates
134 --------------------------------------
135 bestScores :: [(PhyloGroupId, Double)]
136 bestScores = reverse
137 $ sortOn snd
138 $ filter (\(_id,score) -> case prox of
139 WeightedLogJaccard (WLJParams thr _) -> score >= thr
140 Hamming (HammingParams thr) -> score <= thr
141 Filiation -> panic "[ERR][Viz.Phylo.LinkMaker.findBestCandidates] Filiation"
142 ) scores
143 --------------------------------------
144
145
146 -- | To add some Pointer to a PhyloGroup
147 addPointers' :: Filiation -> [Pointer] -> PhyloGroup -> PhyloGroup
148 addPointers' fil pts g = g & case fil of
149 Descendant -> phylo_groupPeriodChilds %~ (++ pts)
150 Ascendant -> phylo_groupPeriodParents %~ (++ pts)
151 _ -> panic ("[ERR][Viz.Phylo.LinkMaker.addPointers] Wrong type of filiation")
152
153
154
155 -- | To update a list of phyloGroups with some Pointers
156 updateGroups :: Filiation -> Level -> Map PhyloGroupId [Pointer] -> Phylo -> Phylo
157 updateGroups fil lvl m p = alterPhyloGroups (\gs -> map (\g -> if (getGroupLevel g) == lvl
158 then addPointers' fil (m ! (getGroupId g)) g
159 else g ) gs) p
160
161
162
163 -- | Optimisation : to keep only the groups that have at least one ngrams in commons with the target
164 filterCandidates :: PhyloGroup -> [PhyloGroup] -> [PhyloGroup]
165 filterCandidates g gs = filter (\g' -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
166 $ delete g gs
167
168
169
170 -- | To apply the intertemporal matching to Phylo at a given level
171 interTempoMatching :: Filiation -> Level -> Proximity -> Phylo -> Phylo
172 interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) scores
173 $ updateGroups fil lvl pointers p
174 where
175 --------------------------------------
176 pointers :: Map PhyloGroupId [Pointer]
177 pointers = Map.fromList $ map (\(id,x) -> (id,fst x)) candidates
178 --------------------------------------
179 scores :: [Double]
180 scores = sort $ concat $ map (snd . snd) candidates
181 --------------------------------------
182 candidates :: [(PhyloGroupId,([Pointer],[Double]))]
183 candidates = map (\g -> ( getGroupId g, findBestCandidates' fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) prds) (filterCandidates g gs) g p)) gs
184 --------------------------------------
185 gs :: [PhyloGroup]
186 gs = getGroupsWithLevel lvl p
187 --------------------------------------
188 prds :: [PhyloPeriodId]
189 prds = getPhyloPeriods p
190 --------------------------------------
191
192
193 ------------------------------------------------------------------------
194 -- | Make links from Period to Period after level 1
195
196 toLevelUp :: [Pointer] -> Phylo -> [Pointer]
197 toLevelUp lst p = Map.toList
198 $ map (\ws -> maximum ws)
199 $ fromListWith (++) [(id, [w]) | (id, w) <- pointers]
200 where
201 --------------------------------------
202 pointers :: [Pointer]
203 pointers = map (\(id,v) -> (getGroupLevelParentId $ getGroupFromId id p, v)) lst
204 --------------------------------------
205
206
207 transposePeriodLinks :: Level -> Phylo -> Phylo
208 transposePeriodLinks lvl p = alterGroupWithLevel
209 (\g ->
210 --------------------------------------
211 let childs = getGroupsFromIds (map fst $ getGroupLevelChilds g) p
212 ascLink = toLevelUp (concat $ map getGroupPeriodParents childs) p
213 desLink = toLevelUp (concat $ map getGroupPeriodChilds childs) p
214 --------------------------------------
215 in g & phylo_groupPeriodParents %~ (++ ascLink)
216 & phylo_groupPeriodChilds %~ (++ desLink)
217 --------------------------------------
218 ) lvl p
219
220 ----------------
221 -- | Tracer | --
222 ----------------
223
224
225 traceMatching :: Filiation -> Level -> Double -> [Double] -> Phylo -> Phylo
226 traceMatching fil lvl thr lst p = trace ( "----\n" <> show (fil) <> " unfiltered temporal Matching in Phylo" <> show (lvl) <> " :\n"
227 <> "count : " <> show (length lst) <> " potential pointers (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
228 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
229 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
230 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
231 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") p
232