import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools
-import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard)
+import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos)
import Gargantext.Viz.Phylo.PhyloExport (processDynamics)
-import Data.List ((++), null, intersect, nub, concat, sort, sortOn, init, all, group, maximum, groupBy)
-import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member, singleton)
+import Data.List ((++), null, intersect, nub, concat, sort, sortOn, all, groupBy, group, maximum)
+import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
+import Data.Text (Text)
import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
-------------------------
mergeBranchIds :: [[Int]] -> [Int]
-mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq) ids
+mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids
where
-- | 2) find the most Up Left ids in the hierarchy of similarity
-- mostUpLeft :: [[Int]] -> [[Int]]
-- mostUpLeft ids' =
-- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
-- inf = (fst . minimum) groupIds
- -- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
+ -- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
-- | 1) find the most frequent ids
- mostFreq :: [[Int]] -> [[Int]]
- mostFreq ids' =
- let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids'
- sup = (fst . maximum) groupIds
- in map snd $ filter (\gIds -> fst gIds == sup) groupIds
+ mostFreq' :: [[Int]] -> [[Int]]
+ mostFreq' ids' =
+ let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids'
+ sup = (fst . maximum) groupIds
+ in map snd $ filter (\gIds -> fst gIds == sup) groupIds
+
+
+mergeMeta :: [Int] -> [PhyloGroup] -> Map Text [Double]
+mergeMeta bId groups =
+ let ego = head' "mergeMeta" $ filter (\g -> (snd (g ^. phylo_groupBranchId)) == bId) groups
+ in fromList [("breaks",(ego ^. phylo_groupMeta) ! "breaks"),("seaLevels",(ego ^. phylo_groupMeta) ! "seaLevels")]
groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
groupsToBranches' groups =
-- | run the related component algorithm
- let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs'))
- $ sortOn (\gs -> fst $ fst $ head' "egos" gs)
- $ map (\g -> [getGroupId g]
- ++ (map fst $ g ^. phylo_groupPeriodParents)
- ++ (map fst $ g ^. phylo_groupPeriodChilds) ) $ elems groups
- -- | first find the related components by inside each ego's period
- graph' = map relatedComponents egos
- -- | then run it for the all the periods
- graph = relatedComponents $ concat (graph' `using` parList rdeepseq)
+ let egos = map (\g -> [getGroupId g]
+ ++ (map fst $ g ^. phylo_groupPeriodParents)
+ ++ (map fst $ g ^. phylo_groupPeriodChilds) ) $ elems groups
+ graph = relatedComponents egos
-- | update each group's branch id
in map (\ids ->
- -- intervenir ici
let groups' = elems $ restrictKeys groups (Set.fromList ids)
bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
- in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl + 1,bId))) groups') graph
-
-
--- toBranchId :: PhyloGroup -> PhyloBranchId
--- toBranchId child = ((child ^. phylo_groupLevel) + 1, snd (child ^. phylo_groupBranchId))
+ in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl,bId))) groups') graph
-getLastThr :: [PhyloGroup] -> Double
-getLastThr childs = maximum $ concat $ map (\g -> (g ^. phylo_groupMeta) ! "thr") childs
mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup
mergeGroups coocs id mapIds childs =
in PhyloGroup (fst $ fst id) (snd $ fst id) (snd id) ""
(sum $ map _phylo_groupSupport childs) ngrams
(ngramsToCooc ngrams coocs)
- ((snd $ fst id),(mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) childs))
- (singleton "thr" [getLastThr childs]) [] (map (\g -> (getGroupId g, 1)) childs)
+ ((snd $ fst id),bId)
+ (mergeMeta bId childs) [] (map (\g -> (getGroupId g, 1)) childs)
(updatePointers $ concat $ map _phylo_groupPeriodParents childs)
(updatePointers $ concat $ map _phylo_groupPeriodChilds childs)
- where
+ where
+ --------------------
+ bId :: [Int]
+ bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) childs
+ --------------------
updatePointers :: [Pointer] -> [Pointer]
updatePointers pointers = map (\(pId,w) -> (mapIds ! pId,w)) pointers
toNextLevel' :: Phylo -> [PhyloGroup] -> Phylo
-toNextLevel' phylo groups =
+toNextLevel' phylo groups =
let curLvl = getLastLevel phylo
oldGroups = fromList $ map (\g -> (getGroupId g, getLevelParentId g)) groups
newGroups = concat $ groupsToBranches'
$ foldl' (\acc g -> acc ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodParents) ) [] groups
-groupsToEdges :: Proximity -> Synchrony -> Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)]
-groupsToEdges prox sync docs groups =
+groupsToEdges :: Proximity -> Synchrony -> Double -> Map Int Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)]
+groupsToEdges prox sync nbDocs diago groups =
case sync of
ByProximityThreshold thr sens _ strat ->
filter (\(_,w) -> w >= thr)
toEdges :: Double -> [(PhyloGroup,PhyloGroup)] -> [((PhyloGroup,PhyloGroup),Double)]
toEdges sens edges =
case prox of
- WeightedLogJaccard _ _ _ -> map (\(g,g') ->
- ((g,g'), weightedLogJaccard sens docs
- (g ^. phylo_groupCooc) (g' ^. phylo_groupCooc)
- (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
+ WeightedLogJaccard _ -> map (\(g,g') ->
+ ((g,g'), weightedLogJaccard' sens nbDocs diago
+ (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
_ -> undefined
toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), child ^. phylo_groupIndex)
-reduceGroups :: Proximity -> Synchrony -> Map Date Double -> [PhyloGroup] -> [PhyloGroup]
-reduceGroups prox sync docs branch =
+reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
+reduceGroups prox sync docs diagos branch =
-- | 1) reduce a branch as a set of periods & groups
let periods = fromListWith (++)
$ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
in (concat . concat . elems)
$ mapWithKey (\prd groups ->
-- | 2) for each period, transform the groups as a proximity graph filtered by a threshold
- let edges = groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) groups
+ let diago = reduceDiagos $ filterDiago diagos [prd]
+ edges = groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) diago groups
in map (\comp ->
-- | 4) add to each groups their futur level parent group
let parentId = toParentId (head' "parentId" comp)
$ toRelatedComponents groups edges) periods
-getGroupRealBId :: Double -> PhyloGroup -> [Int]
-getGroupRealBId step g =
- let nb = round(getGroupThr g / step) + 2
- in take nb (snd $ g ^. phylo_groupBranchId)
-
-
-
-adjustClustering :: Synchrony -> Double -> [[PhyloGroup]] -> [[PhyloGroup]]
-adjustClustering sync step branches = case sync of
- ByProximityThreshold _ _ scope _ ->
- case scope of
+adjustClustering :: Synchrony -> [[PhyloGroup]] -> [[PhyloGroup]]
+adjustClustering sync branches = case sync of
+ ByProximityThreshold _ _ scope _ -> case scope of
SingleBranch -> branches
- SiblingBranches -> groupBy (\g g' -> (init $ getGroupRealBId step g) == (init $ getGroupRealBId step g'))
+ SiblingBranches -> groupBy (\g g' -> (last' "adjustClustering" $ (g ^. phylo_groupMeta) ! "breaks")
+ == (last' "adjustClustering" $ (g' ^. phylo_groupMeta) ! "breaks"))
$ sortOn _phylo_groupBranchId $ concat branches
- -- SiblingBranches -> elems $ fromListWith (++) $ map (\b -> ((init . snd . _phylo_groupBranchId) $ head' "adjustClustering" b,b)) branches
AllBranches -> [concat branches]
ByProximityDistribution _ _ -> branches
let prox = phyloProximity $ getConfig phylo
sync = phyloSynchrony $ getConfig phylo
docs = phylo ^. phylo_timeDocs
- newBranches = map (\branch -> reduceGroups prox sync docs branch)
+ diagos = map coocToDiago $ phylo ^. phylo_timeCooc
+ newBranches = map (\branch -> reduceGroups prox sync docs diagos branch)
$ map processDynamics
- $ adjustClustering sync (getPhyloThresholdStep phylo)
+ $ adjustClustering sync
$ phyloToLastBranches
$ traceSynchronyStart phylo
newBranches' = newBranches `using` parList rdeepseq