]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/TemporalMatching.hs
working on temporal matching
[gargantext.git] / src / Gargantext / Viz / Phylo / TemporalMatching.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.TemporalMatching
3 Description : Module dedicated to the adaptative temporal matching of a Phylo.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
15
16 module Gargantext.Viz.Phylo.TemporalMatching where
17
18 import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, find, groupBy, scanl, any, nub)
19 import Data.Map (Map, fromList, toList, fromListWith, filterWithKey, elems, restrictKeys)
20
21 import Gargantext.Prelude
22 import Gargantext.Viz.AdaptativePhylo
23 import Gargantext.Viz.Phylo.PhyloTools
24 import Gargantext.Viz.Phylo.SynchronicClustering
25
26 import Control.Lens hiding (Level)
27
28
29 -------------------
30 -- | Proximity | --
31 -------------------
32
33 -- periodsToNbDocs :: [PhyloPeriodId] -> Phylo -> Double
34 -- periodsToNbDocs prds phylo = sum $ elems
35 -- $ restrictKeys (phylo ^. phylo_docsByYears)
36 -- $ periodsToYears prds
37
38 -- matchWithPairs :: PhyloGroup -> (PhyloGroup,PhyloGroup) -> Phylo -> Double
39 -- matchWithPairs g1 (g2,g3) p =
40 -- let nbDocs = periodsToNbDocs [(getGroupPeriod g1),(getGroupPeriod g2),(getGroupPeriod g3)] p
41 -- cooc = if (g2 == g3)
42 -- then getGroupCooc g2
43 -- else unionWith (+) (getGroupCooc g2) (getGroupCooc g3)
44 -- ngrams = if (g2 == g3)
45 -- then getGroupNgrams g2
46 -- else union (getGroupNgrams g2) (getGroupNgrams g3)
47 -- in processProximity (getPhyloProximity p) nbDocs (getGroupCooc g1) cooc (getGroupNgrams g1) ngrams
48
49
50 toProximity :: Map Date Double -> PhyloGroup -> PhyloGroup -> PhyloGroup -> Double
51 toProximity docs group target target' =
52 let nbDocs = sum $ elems docs
53 in undefined
54
55 ------------------------
56 -- | Local Matching | --
57 ------------------------
58
59
60 makePairs :: [PhyloGroup] -> [PhyloPeriodId] -> Double -> Map Date Double -> PhyloGroup -> [(PhyloGroup,PhyloGroup)]
61 makePairs candidates periods thr docs group = case null periods of
62 True -> []
63 -- | at least on of the pair candidates should be from the last added period
64 False -> filter (\(cdt,cdt') -> (inLastPeriod cdt periods)
65 || (inLastPeriod cdt' periods))
66 $ listToKeys
67 -- | remove poor candidates from previous periods
68 $ filter (\cdt -> (inLastPeriod cdt periods)
69 || ((toProximity (reframeDocs docs periods) group group cdt) >= thr)) candidates
70 where
71 inLastPeriod :: PhyloGroup -> [PhyloPeriodId] -> Bool
72 inLastPeriod g prds = (g ^. phylo_groupPeriod) == (last' "makePairs" prds)
73
74
75 phyloGroupMatching :: [[PhyloGroup]] -> Filiation -> Double -> Map Date Double -> PhyloGroup -> PhyloGroup
76 phyloGroupMatching candidates fil thr docs group = case pointers of
77 Nothing -> addPointers group fil TemporalPointer []
78 Just pts -> addPointers group fil TemporalPointer
79 $ head' "phyloGroupMatching"
80 -- | Keep only the best set of pointers grouped by proximity
81 $ groupBy (\pt pt' -> snd pt == snd pt')
82 $ reverse $ sortOn snd pts
83 -- | Find the first time frame where at leats one pointer satisfies the proximity threshold
84 where
85 pointers :: Maybe [Pointer]
86 pointers = find (not . null)
87 -- | for each time frame, process the proximity on relevant pairs of targeted groups
88 $ scanl (\acc groups ->
89 let periods = nub $ map (\g' -> g' ^. phylo_groupPeriod) $ concat groups
90 pairs = makePairs (concat groups) periods thr docs group
91 in acc ++ ( filter (\(_,proximity) -> proximity >= thr )
92 $ concat
93 $ map (\(c,c') ->
94 -- | process the proximity between the current group and a pair of candidates
95 let proximity = toProximity (reframeDocs docs periods) group c c'
96 in if (c == c')
97 then [(getGroupId c,proximity)]
98 else [(getGroupId c,proximity),(getGroupId c',proximity)] ) pairs)
99 ) []
100 -- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
101 $ inits candidates
102
103
104
105
106 matchGroupToGroups :: [[PhyloGroup]] -> PhyloGroup -> PhyloGroup
107 matchGroupToGroups candidates group = undefined
108
109
110 -----------------------------
111 -- | Adaptative Matching | --
112 -----------------------------
113
114
115 getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
116 getNextPeriods fil max pId pIds =
117 case fil of
118 ToChilds -> take max $ (tail . snd) $ splitAt (elemIndex' pId pIds) pIds
119 ToParents -> take max $ (reverse . fst) $ splitAt (elemIndex' pId pIds) pIds
120
121
122 getCandidates :: Filiation -> PhyloGroup -> [PhyloPeriodId] -> [PhyloGroup] -> [[PhyloGroup]]
123 getCandidates fil g pIds targets =
124 case fil of
125 ToChilds -> targets'
126 ToParents -> reverse targets'
127 where
128 targets' :: [[PhyloGroup]]
129 targets' = map (\groups' -> filter (\g' -> (not . null) $ intersect (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams)) groups') $ elems
130 $ filterWithKey (\k _ -> elem k pIds)
131 $ fromListWith (++)
132 $ sortOn (fst . fst)
133 $ map (\g' -> (g' ^. phylo_groupPeriod,[g'])) targets
134
135
136 shouldBreak :: Double -> [(Double,[PhyloGroup])] -> Bool
137 shouldBreak thr branches = any (\(quality,_) -> quality < thr) branches
138
139
140 toBranchQuality :: [[PhyloGroup]] -> [(Double,[PhyloGroup])]
141 toBranchQuality branches = undefined
142
143
144 reframeDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double
145 reframeDocs docs periods = restrictKeys docs $ periodsToYears periods
146
147
148 adaptativeMatching :: Int -> Double -> Double -> Double -> Map Date Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloPeriodId] -> [PhyloGroup]
149 adaptativeMatching maxTime thrStep thrMatch thrQua docs groups candidates periods =
150 -- | check if we should break some of the new branches or not
151 case shouldBreak thrQua branches' of
152 True -> concat $ map (\(s,b) ->
153 if s >= thrQua
154 -- | we keep the branch as it is
155 then b
156 -- | we break the branch using an increased temporal matching threshold
157 else let nextGroups = undefined
158 nextCandidates = undefined
159 nextPeriods = undefined
160 in adaptativeMatching maxTime thrStep (thrMatch + thrStep) thrQua
161 (reframeDocs docs nextPeriods)
162 nextGroups nextCandidates nextPeriods
163 ) branches'
164 -- | the quality of all the new branches is sufficient
165 False -> concat branches
166 where
167 -- | 3) process a quality score for each new branch
168 branches' :: [(Double,[PhyloGroup])]
169 branches' = toBranchQuality branches
170 -- | 2) group the new groups into branches
171 branches :: [[PhyloGroup]]
172 branches = relatedComponents groups'
173 -- | 1) connect each group to its parents and childs
174 groups' :: [PhyloGroup]
175 groups' = map (\group ->
176 let childs = getCandidates ToChilds group
177 (getNextPeriods ToChilds maxTime (group ^. phylo_groupPeriod) periods) candidates
178 parents = getCandidates ToParents group
179 (getNextPeriods ToParents maxTime (group ^. phylo_groupPeriod) periods) candidates
180 -- | match the group to its possible childs then parents
181 in matchGroupToGroups parents $ matchGroupToGroups childs group
182 ) groups
183
184
185 temporalMatching :: Phylo -> Phylo
186 temporalMatching phylo =
187 let branches = fromList $ map (\g -> (getGroupId g, g))
188 $ adaptativeMatching (timeMatching $ getConfig phylo) 0 0 0
189 (phylo ^. phylo_timeDocs)
190 (getGroupsFromLevel 1 phylo) (getGroupsFromLevel 1 phylo) (getPeriodIds phylo)
191 in updatePhyloGroups 1 branches phylo