2 Module : Gargantext.Viz.Phylo.TemporalMatching
3 Description : Module dedicated to the adaptative temporal matching of a Phylo.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
16 module Gargantext.Viz.Phylo.TemporalMatching where
18 import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, find, groupBy, scanl, any, nub)
19 import Data.Map (Map, fromList, toList, fromListWith, filterWithKey, elems, restrictKeys)
21 import Gargantext.Prelude
22 import Gargantext.Viz.AdaptativePhylo
23 import Gargantext.Viz.Phylo.PhyloTools
24 import Gargantext.Viz.Phylo.SynchronicClustering
26 import Control.Lens hiding (Level)
33 -- periodsToNbDocs :: [PhyloPeriodId] -> Phylo -> Double
34 -- periodsToNbDocs prds phylo = sum $ elems
35 -- $ restrictKeys (phylo ^. phylo_docsByYears)
36 -- $ periodsToYears prds
38 -- matchWithPairs :: PhyloGroup -> (PhyloGroup,PhyloGroup) -> Phylo -> Double
39 -- matchWithPairs g1 (g2,g3) p =
40 -- let nbDocs = periodsToNbDocs [(getGroupPeriod g1),(getGroupPeriod g2),(getGroupPeriod g3)] p
41 -- cooc = if (g2 == g3)
42 -- then getGroupCooc g2
43 -- else unionWith (+) (getGroupCooc g2) (getGroupCooc g3)
44 -- ngrams = if (g2 == g3)
45 -- then getGroupNgrams g2
46 -- else union (getGroupNgrams g2) (getGroupNgrams g3)
47 -- in processProximity (getPhyloProximity p) nbDocs (getGroupCooc g1) cooc (getGroupNgrams g1) ngrams
50 toProximity :: Map Date Double -> PhyloGroup -> PhyloGroup -> PhyloGroup -> Double
51 toProximity docs group target target' =
52 let nbDocs = sum $ elems docs
55 ------------------------
56 -- | Local Matching | --
57 ------------------------
60 makePairs :: [PhyloGroup] -> [PhyloPeriodId] -> Double -> Map Date Double -> PhyloGroup -> [(PhyloGroup,PhyloGroup)]
61 makePairs candidates periods thr docs group = case null periods of
63 -- | at least on of the pair candidates should be from the last added period
64 False -> filter (\(cdt,cdt') -> (inLastPeriod cdt periods)
65 || (inLastPeriod cdt' periods))
67 -- | remove poor candidates from previous periods
68 $ filter (\cdt -> (inLastPeriod cdt periods)
69 || ((toProximity (reframeDocs docs periods) group group cdt) >= thr)) candidates
71 inLastPeriod :: PhyloGroup -> [PhyloPeriodId] -> Bool
72 inLastPeriod g prds = (g ^. phylo_groupPeriod) == (last' "makePairs" prds)
75 phyloGroupMatching :: [[PhyloGroup]] -> Filiation -> Double -> Map Date Double -> PhyloGroup -> PhyloGroup
76 phyloGroupMatching candidates fil thr docs group = case pointers of
77 Nothing -> addPointers group fil TemporalPointer []
78 Just pts -> addPointers group fil TemporalPointer
79 $ head' "phyloGroupMatching"
80 -- | Keep only the best set of pointers grouped by proximity
81 $ groupBy (\pt pt' -> snd pt == snd pt')
82 $ reverse $ sortOn snd pts
83 -- | Find the first time frame where at leats one pointer satisfies the proximity threshold
85 pointers :: Maybe [Pointer]
86 pointers = find (not . null)
87 -- | for each time frame, process the proximity on relevant pairs of targeted groups
88 $ scanl (\acc groups ->
89 let periods = nub $ map (\g' -> g' ^. phylo_groupPeriod) $ concat groups
90 pairs = makePairs (concat groups) periods thr docs group
91 in acc ++ ( filter (\(_,proximity) -> proximity >= thr )
94 -- | process the proximity between the current group and a pair of candidates
95 let proximity = toProximity (reframeDocs docs periods) group c c'
97 then [(getGroupId c,proximity)]
98 else [(getGroupId c,proximity),(getGroupId c',proximity)] ) pairs)
100 -- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
106 matchGroupToGroups :: [[PhyloGroup]] -> PhyloGroup -> PhyloGroup
107 matchGroupToGroups candidates group = undefined
110 -----------------------------
111 -- | Adaptative Matching | --
112 -----------------------------
115 getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
116 getNextPeriods fil max pId pIds =
118 ToChilds -> take max $ (tail . snd) $ splitAt (elemIndex' pId pIds) pIds
119 ToParents -> take max $ (reverse . fst) $ splitAt (elemIndex' pId pIds) pIds
122 getCandidates :: Filiation -> PhyloGroup -> [PhyloPeriodId] -> [PhyloGroup] -> [[PhyloGroup]]
123 getCandidates fil g pIds targets =
126 ToParents -> reverse targets'
128 targets' :: [[PhyloGroup]]
129 targets' = map (\groups' -> filter (\g' -> (not . null) $ intersect (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams)) groups') $ elems
130 $ filterWithKey (\k _ -> elem k pIds)
133 $ map (\g' -> (g' ^. phylo_groupPeriod,[g'])) targets
136 shouldBreak :: Double -> [(Double,[PhyloGroup])] -> Bool
137 shouldBreak thr branches = any (\(quality,_) -> quality < thr) branches
140 toBranchQuality :: [[PhyloGroup]] -> [(Double,[PhyloGroup])]
141 toBranchQuality branches = undefined
144 reframeDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double
145 reframeDocs docs periods = restrictKeys docs $ periodsToYears periods
148 adaptativeMatching :: Int -> Double -> Double -> Double -> Map Date Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloPeriodId] -> [PhyloGroup]
149 adaptativeMatching maxTime thrStep thrMatch thrQua docs groups candidates periods =
150 -- | check if we should break some of the new branches or not
151 case shouldBreak thrQua branches' of
152 True -> concat $ map (\(s,b) ->
154 -- | we keep the branch as it is
156 -- | we break the branch using an increased temporal matching threshold
157 else let nextGroups = undefined
158 nextCandidates = undefined
159 nextPeriods = undefined
160 in adaptativeMatching maxTime thrStep (thrMatch + thrStep) thrQua
161 (reframeDocs docs nextPeriods)
162 nextGroups nextCandidates nextPeriods
164 -- | the quality of all the new branches is sufficient
165 False -> concat branches
167 -- | 3) process a quality score for each new branch
168 branches' :: [(Double,[PhyloGroup])]
169 branches' = toBranchQuality branches
170 -- | 2) group the new groups into branches
171 branches :: [[PhyloGroup]]
172 branches = relatedComponents groups'
173 -- | 1) connect each group to its parents and childs
174 groups' :: [PhyloGroup]
175 groups' = map (\group ->
176 let childs = getCandidates ToChilds group
177 (getNextPeriods ToChilds maxTime (group ^. phylo_groupPeriod) periods) candidates
178 parents = getCandidates ToParents group
179 (getNextPeriods ToParents maxTime (group ^. phylo_groupPeriod) periods) candidates
180 -- | match the group to its possible childs then parents
181 in matchGroupToGroups parents $ matchGroupToGroups childs group
185 temporalMatching :: Phylo -> Phylo
186 temporalMatching phylo =
187 let branches = fromList $ map (\g -> (getGroupId g, g))
188 $ adaptativeMatching (timeMatching $ getConfig phylo) 0 0 0
189 (phylo ^. phylo_timeDocs)
190 (getGroupsFromLevel 1 phylo) (getGroupsFromLevel 1 phylo) (getPeriodIds phylo)
191 in updatePhyloGroups 1 branches phylo