2 Module : Gargantext.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
15 {-# LANGUAGE TypeSynonymInstances #-}
16 {-# LANGUAGE FlexibleInstances #-}
18 module Gargantext.Viz.Phylo.PhyloExport where
20 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
21 import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy)
22 import Data.Text (Text)
23 import Data.Vector (Vector)
25 import Prelude (writeFile)
26 import Gargantext.Prelude
27 import Gargantext.Viz.AdaptativePhylo
28 import Gargantext.Viz.Phylo.PhyloTools
31 import Data.GraphViz hiding (DotGraph, Order)
32 import Data.GraphViz.Types.Generalised (DotGraph)
33 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
34 import Data.GraphViz.Types.Monadic
35 import Data.Text.Lazy (fromStrict, pack, unpack)
36 import System.FilePath
37 import Debug.Trace (trace)
39 import qualified Data.Text as Text
40 import qualified Data.Text.Lazy as Lazy
41 import qualified Data.GraphViz.Attributes.HTML as H
47 dotToFile :: FilePath -> DotGraph DotId -> IO ()
48 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
50 dotToString :: DotGraph DotId -> [Char]
51 dotToString dotG = unpack (printDotGraph dotG)
53 dynamicToColor :: Double -> H.Attribute
55 | d == 0 = H.BGColor (toColor LightCoral)
56 | d == 1 = H.BGColor (toColor Khaki)
57 | d == 2 = H.BGColor (toColor SkyBlue)
58 | otherwise = H.Color (toColor Black)
60 pickLabelColor :: [Double] -> H.Attribute
62 | elem 0 lst = dynamicToColor 0
63 | elem 2 lst = dynamicToColor 2
64 | elem 1 lst = dynamicToColor 1
65 | otherwise = dynamicToColor 3
67 toDotLabel :: Text.Text -> Label
68 toDotLabel lbl = StrLabel $ fromStrict lbl
70 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
71 toAttr k v = customAttribute k v
73 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
74 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
76 groupIdToDotId :: PhyloGroupId -> DotId
77 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
79 branchIdToDotId :: PhyloBranchId -> DotId
80 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
82 periodIdToDotId :: PhyloPeriodId -> DotId
83 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
85 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
86 groupToTable fdt g = H.Table H.HTable
87 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
88 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
89 , H.tableRows = [header]
90 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
91 <> ( map ngramsToRow $ splitEvery 4
92 $ reverse $ sortOn (snd . snd)
93 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
94 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
96 --------------------------------------
97 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
98 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
99 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
100 --------------------------------------
103 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
104 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
105 <> (fromStrict " ( ")
106 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
107 <> (fromStrict " , ")
108 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
109 <> (fromStrict " ) "))]]
110 --------------------------------------
112 branchToDotNode :: PhyloBranch -> Dot DotId
114 node (branchIdToDotId $ b ^. branch_id)
115 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
116 <> (metaToAttr $ b ^. branch_meta)
117 <> [ toAttr "nodeType" "branch"
118 , toAttr "branchId" (pack $ show (snd $ b ^. branch_id)) ])
120 periodToDotNode :: (Date,Date) -> Dot DotId
121 periodToDotNode prd =
122 node (periodIdToDotId prd)
123 ([Shape Square, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
124 <> [ toAttr "nodeType" "period"
125 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
126 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
129 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Dot DotId
130 groupToDotNode fdt g =
131 node (groupIdToDotId $ getGroupId g)
132 ([FontName "Arial", Shape Square, toLabel (groupToTable fdt g)]
133 <> [ toAttr "nodeType" "group"
134 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
135 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
136 , toAttr "branchId" (pack $ show (snd $ g ^. phylo_groupBranchId))])
139 toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
140 toDotEdge from to lbl edgeType = edge from to
142 GroupToGroup -> [ Width 2, Color [toWColor Black], Constraint True
143 , Label (StrLabel $ fromStrict lbl)]
144 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
145 , Label (StrLabel $ fromStrict lbl)]
146 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
147 , Label (StrLabel $ fromStrict lbl)]
148 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
151 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
152 mergePointers groups =
153 let toChilds = fromList $ concat $ map (\g -> map (\(to,w) -> ((getGroupId g,to),w)) $ g ^. phylo_groupPeriodChilds) groups
154 toParents = fromList $ concat $ map (\g -> map (\(to,w) -> ((to,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
155 in unionWith (\w w' -> max w w') toChilds toParents
158 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
159 exportToDot phylo export =
160 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
162 -- | 1) init the dot graph
163 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
164 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
166 , Style [SItem Filled []],Color [toWColor White]])
168 -- | 2) create a layer for the branches labels
169 subgraph (Str "Branches peaks") $ do
171 graphAttrs [Rank SameRank]
173 -- | 3) group the branches by hierarchy
175 subgraph (Str "Branches clade") $ do
176 graphAttrs [Rank SameRank]
178 -- | 4) create a node for each branch
179 mapM branchToDotNode branches
180 ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
182 -- | 5) create a layer for each period
184 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
185 graphAttrs [Rank SameRank]
186 periodToDotNode period
188 -- | 6) create a node for each group
189 mapM (\g -> groupToDotNode (getRoots phylo) g) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
190 ) $ getPeriodIds phylo
192 -- | 7) create the edges between a branch and its first groups
193 mapM (\(bId,groups) ->
194 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
197 $ map (\groups -> head' "toDot"
198 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
199 $ sortOn (fst . _phylo_groupPeriod) groups)
200 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
202 -- | 8) create the edges between the groups
204 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
205 ) $ (toList . mergePointers) $ export ^. export_groups
207 -- | 7) create the edges between the periods
209 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
210 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
212 -- | 8) create the edges between the branches
214 toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
215 (Text.pack $ show(branchIdsToProximity bId bId'
216 (getThresholdInit $ phyloProximity $ getConfig phylo)
217 (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
218 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
221 graphAttrs [Rank SameRank]
231 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
232 filterByBranchSize thr export =
233 let branches' = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
234 in export & export_branches .~ (fst branches')
235 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd branches')))
238 processFilters :: [Filter] -> PhyloExport -> PhyloExport
239 processFilters filters export =
240 foldl (\export' f -> case f of
241 ByBranchSize thr -> filterByBranchSize thr export'
249 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
250 sortByHierarchy depth branches =
251 if (length branches == 1)
255 let parts = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
256 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst parts))
257 ++ (sortByHierarchy (depth + 1) (snd parts)))
258 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
259 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
262 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
263 sortByBirthDate order export =
264 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
265 branches' = case order of
267 Desc -> reverse branches
268 in export & export_branches .~ branches'
270 processSort :: Sort -> PhyloExport -> PhyloExport
271 processSort sort' export = case sort' of
272 ByBirthDate o -> sortByBirthDate o export
273 ByHierarchy -> export & export_branches .~ sortByHierarchy 0 (export ^. export_branches)
280 -- | Return the conditional probability of i knowing j
281 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
282 conditional m i j = (findWithDefault 0 (i,j) m)
286 -- | Return the genericity score of a given ngram
287 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
288 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
289 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
292 -- | Return the specificity score of a given ngram
293 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
294 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
295 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
298 -- | Return the inclusion score of a given ngram
299 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
300 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
301 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
304 ngramsMetrics :: PhyloExport -> PhyloExport
305 ngramsMetrics export =
308 (\g -> g & phylo_groupMeta %~ insert "genericity"
309 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
310 & phylo_groupMeta %~ insert "specificity"
311 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
312 & phylo_groupMeta %~ insert "inclusion"
313 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
317 branchDating :: PhyloExport -> PhyloExport
318 branchDating export =
319 over ( export_branches
322 let groups = sortOn fst
323 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
324 then acc ++ [g ^. phylo_groupPeriod]
325 else acc ) [] $ export ^. export_groups
326 birth = fst $ head' "birth" groups
327 age = (snd $ last' "age" groups) - birth
328 in b & branch_meta %~ insert "birth" [fromIntegral birth]
329 & branch_meta %~ insert "age" [fromIntegral age]
330 & branch_meta %~ insert "size" [fromIntegral $ length groups] ) export
332 processMetrics :: PhyloExport -> PhyloExport
333 processMetrics export = ngramsMetrics
334 $ branchDating export
341 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
342 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
345 $ sortOn snd $ zip [0..] meta
348 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
349 mostInclusive nth foundations export =
350 over ( export_branches
353 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
354 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
355 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
356 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
357 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
358 in b & branch_label .~ lbl ) export
361 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
362 mostEmergentInclusive nth foundations export =
366 let lbl = ngramsToLabel foundations
368 $ map (\(_,(_,idx)) -> idx)
370 $ map (\groups -> sortOn (fst . snd) groups)
371 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
372 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
373 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
374 in g & phylo_groupLabel .~ lbl ) export
377 processLabels :: [PhyloLabel] -> Vector Ngrams -> PhyloExport -> PhyloExport
378 processLabels labels foundations export =
379 foldl (\export' label ->
381 GroupLabel tagger nth ->
383 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
384 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
385 BranchLabel tagger nth ->
387 MostInclusive -> mostInclusive nth foundations export'
388 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
396 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
397 toDynamics n parents group m =
398 let prd = group ^. phylo_groupPeriod
399 bid = group ^. phylo_groupBranchId
400 end = last' "dynamics" (sort $ map snd $ elems m)
401 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
404 else if ((fst prd) == (fst $ m ! n))
412 --------------------------------------
414 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
417 processDynamics :: [PhyloGroup] -> [PhyloGroup]
418 processDynamics groups =
420 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
421 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
422 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
424 --------------------------------------
425 mapNgrams :: Map Int (Date,Date)
426 mapNgrams = map (\dates ->
427 let dates' = sort dates
428 in (head' "dynamics" dates', last' "dynamics" dates'))
430 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
431 $ (g ^. phylo_groupNgrams))) [] groups
434 ---------------------
435 -- | phyloExport | --
436 ---------------------
439 toPhyloExport :: Phylo -> DotGraph DotId
440 toPhyloExport phylo = exportToDot phylo
441 $ processFilters (exportFilter $ getConfig phylo)
442 $ processSort (exportSort $ getConfig phylo)
443 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo)
444 $ processMetrics export
446 export :: PhyloExport
447 export = PhyloExport groups branches
448 --------------------------------------
449 branches :: [PhyloBranch]
450 branches = traceExportBranches $ map (\bId -> PhyloBranch bId "" empty) $ nub $ map _phylo_groupBranchId groups
451 --------------------------------------
452 groups :: [PhyloGroup]
453 groups = processDynamics
454 $ getGroupsFromLevel (phyloLevel $ getConfig phylo) phylo
457 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
458 traceExportBranches branches = trace ("\n" <> "-- | Export " <> show(length branches) <> " branches") branches