2 Module : Gargantext.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
15 {-# LANGUAGE TypeSynonymInstances #-}
16 {-# LANGUAGE FlexibleInstances #-}
18 module Gargantext.Viz.Phylo.PhyloExport where
20 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
21 import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy)
22 import Data.Vector (Vector)
24 import Prelude (writeFile)
25 import Gargantext.Prelude
26 import Gargantext.Viz.AdaptativePhylo
27 import Gargantext.Viz.Phylo.PhyloTools
30 import Data.GraphViz hiding (DotGraph, Order)
31 import Data.GraphViz.Types.Generalised (DotGraph)
32 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
33 import Data.GraphViz.Types.Monadic
34 import Data.Text.Lazy (fromStrict, pack, unpack)
35 import System.FilePath
36 import Debug.Trace (trace)
38 import qualified Data.Text as Text
39 import qualified Data.Text.Lazy as Lazy
40 import qualified Data.GraphViz.Attributes.HTML as H
46 dotToFile :: FilePath -> DotGraph DotId -> IO ()
47 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
49 dotToString :: DotGraph DotId -> [Char]
50 dotToString dotG = unpack (printDotGraph dotG)
52 dynamicToColor :: Double -> H.Attribute
54 | d == 0 = H.BGColor (toColor LightCoral)
55 | d == 1 = H.BGColor (toColor Khaki)
56 | d == 2 = H.BGColor (toColor SkyBlue)
57 | otherwise = H.Color (toColor Black)
59 pickLabelColor :: [Double] -> H.Attribute
61 | elem 0 lst = dynamicToColor 0
62 | elem 2 lst = dynamicToColor 2
63 | elem 1 lst = dynamicToColor 1
64 | otherwise = dynamicToColor 3
66 toDotLabel :: Text.Text -> Label
67 toDotLabel lbl = StrLabel $ fromStrict lbl
69 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
70 toAttr k v = customAttribute k v
72 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
73 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
75 groupIdToDotId :: PhyloGroupId -> DotId
76 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
78 branchIdToDotId :: PhyloBranchId -> DotId
79 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
81 periodIdToDotId :: PhyloPeriodId -> DotId
82 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
84 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
85 groupToTable fdt g = H.Table H.HTable
86 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
87 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
88 , H.tableRows = [header]
89 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
90 <> ( map ngramsToRow $ splitEvery 4
91 $ reverse $ sortOn (snd . snd)
92 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
93 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
95 --------------------------------------
96 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
97 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
98 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
99 --------------------------------------
102 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
103 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
104 <> (fromStrict " ( ")
105 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
106 <> (fromStrict " , ")
107 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
108 <> (fromStrict " ) "))]]
109 --------------------------------------
111 branchToDotNode :: PhyloBranch -> Dot DotId
113 node (branchIdToDotId $ b ^. branch_id)
114 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
115 <> (metaToAttr $ b ^. branch_meta)
116 <> [ toAttr "nodeType" "branch"
117 , toAttr "branchId" (pack $ show (snd $ b ^. branch_id)) ])
119 periodToDotNode :: (Date,Date) -> Dot DotId
120 periodToDotNode prd =
121 node (periodIdToDotId prd)
122 ([Shape Square, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
123 <> [ toAttr "nodeType" "period"
124 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
125 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
128 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Dot DotId
129 groupToDotNode fdt g =
130 node (groupIdToDotId $ getGroupId g)
131 ([FontName "Arial", Shape Square, toLabel (groupToTable fdt g)]
132 <> [ toAttr "nodeType" "group"
133 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
134 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
135 , toAttr "branchId" (pack $ show (snd $ g ^. phylo_groupBranchId))])
138 toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
139 toDotEdge source target lbl edgeType = edge source target
141 GroupToGroup -> [ Width 2, Color [toWColor Black], Constraint True
142 , Label (StrLabel $ fromStrict lbl)]
143 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
144 , Label (StrLabel $ fromStrict lbl)]
145 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
146 , Label (StrLabel $ fromStrict lbl)]
147 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
150 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
151 mergePointers groups =
152 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
153 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
154 in unionWith (\w w' -> max w w') toChilds toParents
157 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
158 exportToDot phylo export =
159 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
161 -- | 1) init the dot graph
162 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
163 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
165 , Style [SItem Filled []],Color [toWColor White]])
167 -- | 2) create a layer for the branches labels
168 subgraph (Str "Branches peaks") $ do
170 graphAttrs [Rank SameRank]
172 -- | 3) group the branches by hierarchy
174 subgraph (Str "Branches clade") $ do
175 graphAttrs [Rank SameRank]
177 -- | 4) create a node for each branch
178 mapM branchToDotNode branches
179 ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
181 -- | 5) create a layer for each period
182 _ <- mapM (\period ->
183 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
184 graphAttrs [Rank SameRank]
185 periodToDotNode period
187 -- | 6) create a node for each group
188 mapM (\g -> groupToDotNode (getRoots phylo) g) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
189 ) $ getPeriodIds phylo
191 -- | 7) create the edges between a branch and its first groups
192 _ <- mapM (\(bId,groups) ->
193 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
196 $ map (\groups -> head' "toDot"
197 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
198 $ sortOn (fst . _phylo_groupPeriod) groups)
199 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
201 -- | 8) create the edges between the groups
202 _ <- mapM (\((k,k'),_) ->
203 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
204 ) $ (toList . mergePointers) $ export ^. export_groups
206 -- | 7) create the edges between the periods
207 _ <- mapM (\(prd,prd') ->
208 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
209 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
211 -- | 8) create the edges between the branches
212 _ <- mapM (\(bId,bId') ->
213 toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
214 (Text.pack $ show(branchIdsToProximity bId bId'
215 (getThresholdInit $ phyloProximity $ getConfig phylo)
216 (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
217 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
220 graphAttrs [Rank SameRank]
230 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
231 filterByBranchSize thr export =
232 let branches' = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
233 in export & export_branches .~ (fst branches')
234 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd branches')))
237 processFilters :: [Filter] -> PhyloExport -> PhyloExport
238 processFilters filters export =
239 foldl (\export' f -> case f of
240 ByBranchSize thr -> filterByBranchSize thr export'
247 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
248 sortByHierarchy depth branches =
249 if (length branches == 1)
253 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
254 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
255 ++ (sortByHierarchy (depth + 1) (snd partitions)))
256 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
257 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
260 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
261 sortByBirthDate order export =
262 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
263 branches' = case order of
265 Desc -> reverse branches
266 in export & export_branches .~ branches'
268 processSort :: Sort -> PhyloExport -> PhyloExport
269 processSort sort' export = case sort' of
270 ByBirthDate o -> sortByBirthDate o export
271 ByHierarchy -> export & export_branches .~ sortByHierarchy 0 (export ^. export_branches)
278 -- | Return the conditional probability of i knowing j
279 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
280 conditional m i j = (findWithDefault 0 (i,j) m)
284 -- | Return the genericity score of a given ngram
285 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
286 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
287 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
290 -- | Return the specificity score of a given ngram
291 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
292 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
293 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
296 -- | Return the inclusion score of a given ngram
297 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
298 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
299 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
302 ngramsMetrics :: PhyloExport -> PhyloExport
303 ngramsMetrics export =
306 (\g -> g & phylo_groupMeta %~ insert "genericity"
307 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
308 & phylo_groupMeta %~ insert "specificity"
309 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
310 & phylo_groupMeta %~ insert "inclusion"
311 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
315 branchDating :: PhyloExport -> PhyloExport
316 branchDating export =
317 over ( export_branches
320 let groups = sortOn fst
321 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
322 then acc ++ [g ^. phylo_groupPeriod]
323 else acc ) [] $ export ^. export_groups
324 birth = fst $ head' "birth" groups
325 age = (snd $ last' "age" groups) - birth
326 in b & branch_meta %~ insert "birth" [fromIntegral birth]
327 & branch_meta %~ insert "age" [fromIntegral age]
328 & branch_meta %~ insert "size" [fromIntegral $ length groups] ) export
330 processMetrics :: PhyloExport -> PhyloExport
331 processMetrics export = ngramsMetrics
332 $ branchDating export
339 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
340 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
343 $ sortOn snd $ zip [0..] meta
346 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
347 mostInclusive nth foundations export =
348 over ( export_branches
351 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
352 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
353 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
354 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
355 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
356 in b & branch_label .~ lbl ) export
359 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
360 mostEmergentInclusive nth foundations export =
364 let lbl = ngramsToLabel foundations
366 $ map (\(_,(_,idx)) -> idx)
368 $ map (\groups -> sortOn (fst . snd) groups)
369 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
370 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
371 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
372 in g & phylo_groupLabel .~ lbl ) export
375 processLabels :: [PhyloLabel] -> Vector Ngrams -> PhyloExport -> PhyloExport
376 processLabels labels foundations export =
377 foldl (\export' label ->
379 GroupLabel tagger nth ->
381 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
382 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
383 BranchLabel tagger nth ->
385 MostInclusive -> mostInclusive nth foundations export'
386 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
394 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
395 toDynamics n parents group m =
396 let prd = group ^. phylo_groupPeriod
397 end = last' "dynamics" (sort $ map snd $ elems m)
398 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
401 else if ((fst prd) == (fst $ m ! n))
409 --------------------------------------
411 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
414 processDynamics :: [PhyloGroup] -> [PhyloGroup]
415 processDynamics groups =
417 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
418 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
419 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
421 --------------------------------------
422 mapNgrams :: Map Int (Date,Date)
423 mapNgrams = map (\dates ->
424 let dates' = sort dates
425 in (head' "dynamics" dates', last' "dynamics" dates'))
427 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
428 $ (g ^. phylo_groupNgrams))) [] groups
431 ---------------------
432 -- | phyloExport | --
433 ---------------------
436 toPhyloExport :: Phylo -> DotGraph DotId
437 toPhyloExport phylo = exportToDot phylo
438 $ processFilters (exportFilter $ getConfig phylo)
439 $ processSort (exportSort $ getConfig phylo)
440 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo)
441 $ processMetrics export
443 export :: PhyloExport
444 export = PhyloExport groups branches
445 --------------------------------------
446 branches :: [PhyloBranch]
447 branches = traceExportBranches $ map (\bId -> PhyloBranch bId "" empty) $ nub $ map _phylo_groupBranchId groups
448 --------------------------------------
449 groups :: [PhyloGroup]
450 groups = processDynamics
451 $ getGroupsFromLevel (phyloLevel $ getConfig phylo) phylo
454 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
455 traceExportBranches branches = trace ("\n" <> "-- | Export " <> show(length branches) <> " branches") branches