]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/PhyloExport.hs
add the dynamics and the labels
[gargantext.git] / src / Gargantext / Viz / Phylo / PhyloExport.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
15 {-# LANGUAGE TypeSynonymInstances #-}
16 {-# LANGUAGE FlexibleInstances #-}
17
18 module Gargantext.Viz.Phylo.PhyloExport where
19
20 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault)
21 import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!))
22 import Data.Text (Text)
23 import Data.Vector (Vector)
24
25 import Gargantext.Prelude
26 import Gargantext.Viz.AdaptativePhylo
27 import Gargantext.Viz.Phylo.PhyloTools
28
29 import Control.Lens
30 import Data.GraphViz.Types.Generalised (DotGraph)
31
32 --------------------
33 -- | Dot export | --
34 --------------------
35
36
37 toDot :: PhyloExport -> DotGraph DotId
38 toDot export = undefined
39
40 -----------------
41 -- | Metrics | --
42 -----------------
43
44 -- | Return the conditional probability of i knowing j
45 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
46 conditional m i j = (findWithDefault 0 (i,j) m)
47 / (m ! (j,j))
48
49
50 -- | Return the inclusion score of a given ngram
51 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
52 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
53 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
54
55
56 -----------------
57 -- | Taggers | --
58 -----------------
59
60 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
61 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
62 $ take nth
63 $ reverse
64 $ sortOn snd $ zip [0..] meta
65
66
67 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
68 mostInclusive nth foundations export =
69 over ( export_branches
70 . traverse )
71 (\b ->
72 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
73 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
74 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
75 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
76 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
77 in b & branch_label .~ lbl ) export
78
79
80 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
81 mostEmergentInclusive nth foundations export =
82 over ( export_groups
83 . traverse )
84 (\g ->
85 let lbl = ngramsToLabel foundations
86 $ take nth
87 $ map (\(_,(_,idx)) -> idx)
88 $ concat
89 $ map (\groups -> sortOn (fst . snd) groups)
90 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
91 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
92 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
93 in g & phylo_groupLabel .~ lbl ) export
94
95
96 processLabels :: [Label] -> Vector Ngrams -> PhyloExport -> PhyloExport
97 processLabels labels foundations export =
98 foldl (\export' label ->
99 case label of
100 GroupLabel tagger nth ->
101 case tagger of
102 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
103 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
104 BranchLabel tagger nth ->
105 case tagger of
106 MostInclusive -> undefined
107 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
108
109
110 ------------------
111 -- | Dynamics | --
112 ------------------
113
114
115 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
116 toDynamics n parents group m =
117 let prd = group ^. phylo_groupPeriod
118 bid = group ^. phylo_groupBranchId
119 end = last' "dynamics" (sort $ map snd $ elems m)
120 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
121 -- | decrease
122 then 2
123 else if ((fst prd) == (fst $ m ! n))
124 -- | recombination
125 then 0
126 else if isNew
127 -- | emergence
128 then 1
129 else 3
130 where
131 --------------------------------------
132 isNew :: Bool
133 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
134
135
136 processDynamics :: [PhyloGroup] -> [PhyloGroup]
137 processDynamics groups =
138 map (\g ->
139 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
140 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
141 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
142 where
143 --------------------------------------
144 mapNgrams :: Map Int (Date,Date)
145 mapNgrams = map (\dates ->
146 let dates' = sort dates
147 in (head' "dynamics" dates', last' "dynamics" dates'))
148 $ fromListWith (++)
149 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
150 $ (g ^. phylo_groupNgrams))) [] groups
151
152
153 ---------------------
154 -- | phyloExport | --
155 ---------------------
156
157
158 toPhyloExport :: Phylo -> DotGraph DotId
159 toPhyloExport phylo = toDot
160 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) export
161 where
162 export :: PhyloExport
163 export = PhyloExport groups branches
164 --------------------------------------
165 branches :: [PhyloBranch]
166 branches = map (\bId -> PhyloBranch bId "") $ nub $ map _phylo_groupBranchId groups
167 --------------------------------------
168 groups :: [PhyloGroup]
169 groups = processDynamics
170 $ getGroupsFromLevel (phyloLevel $ getConfig phylo) phylo