2 Module : Gargantext.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
15 {-# LANGUAGE TypeSynonymInstances #-}
16 {-# LANGUAGE FlexibleInstances #-}
18 module Gargantext.Viz.Phylo.PhyloExport where
20 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
21 import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy)
22 import Data.Vector (Vector)
24 import Prelude (writeFile)
25 import Gargantext.Prelude
26 import Gargantext.Viz.AdaptativePhylo
27 import Gargantext.Viz.Phylo.PhyloTools
30 import Data.GraphViz hiding (DotGraph, Order)
31 import Data.GraphViz.Types.Generalised (DotGraph)
32 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
33 import Data.GraphViz.Types.Monadic
34 import Data.Text.Lazy (fromStrict, pack, unpack)
35 import System.FilePath
36 import Debug.Trace (trace)
38 import qualified Data.Text as Text
39 import qualified Data.Text.Lazy as Lazy
40 import qualified Data.GraphViz.Attributes.HTML as H
46 dotToFile :: FilePath -> DotGraph DotId -> IO ()
47 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
49 dotToString :: DotGraph DotId -> [Char]
50 dotToString dotG = unpack (printDotGraph dotG)
52 dynamicToColor :: Double -> H.Attribute
54 | d == 0 = H.BGColor (toColor LightCoral)
55 | d == 1 = H.BGColor (toColor Khaki)
56 | d == 2 = H.BGColor (toColor SkyBlue)
57 | otherwise = H.Color (toColor Black)
59 pickLabelColor :: [Double] -> H.Attribute
61 | elem 0 lst = dynamicToColor 0
62 | elem 2 lst = dynamicToColor 2
63 | elem 1 lst = dynamicToColor 1
64 | otherwise = dynamicToColor 3
66 toDotLabel :: Text.Text -> Label
67 toDotLabel lbl = StrLabel $ fromStrict lbl
69 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
70 toAttr k v = customAttribute k v
72 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
73 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
75 groupIdToDotId :: PhyloGroupId -> DotId
76 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
78 branchIdToDotId :: PhyloBranchId -> DotId
79 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
81 periodIdToDotId :: PhyloPeriodId -> DotId
82 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
84 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
85 groupToTable fdt g = H.Table H.HTable
86 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
87 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
88 , H.tableRows = [header]
89 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
90 <> ( map ngramsToRow $ splitEvery 4
91 $ reverse $ sortOn (snd . snd)
92 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
93 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
95 --------------------------------------
96 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
97 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
98 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
99 --------------------------------------
102 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
103 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
104 <> (fromStrict " ( ")
105 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
106 <> (fromStrict " , ")
107 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
108 <> (fromStrict " ) "))]]
109 --------------------------------------
111 branchToDotNode :: PhyloBranch -> Dot DotId
113 node (branchIdToDotId $ b ^. branch_id)
114 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
115 <> (metaToAttr $ b ^. branch_meta)
116 <> [ toAttr "nodeType" "branch"
117 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id)) ])
119 periodToDotNode :: (Date,Date) -> Dot DotId
120 periodToDotNode prd =
121 node (periodIdToDotId prd)
122 ([Shape Square, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
123 <> [ toAttr "nodeType" "period"
124 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
125 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
128 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Dot DotId
129 groupToDotNode fdt g =
130 node (groupIdToDotId $ getGroupId g)
131 ([FontName "Arial", Shape Square, toLabel (groupToTable fdt g)]
132 <> [ toAttr "nodeType" "group"
133 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
134 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
135 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
136 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))])
139 toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
140 toDotEdge source target lbl edgeType = edge source target
142 GroupToGroup -> [ Width 10, Color [toWColor Black], Constraint True
143 , Label (StrLabel $ fromStrict lbl)]
144 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
145 , Label (StrLabel $ fromStrict lbl)]
146 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
147 , Label (StrLabel $ fromStrict lbl)]
148 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
151 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
152 mergePointers groups =
153 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
154 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
155 in unionWith (\w w' -> max w w') toChilds toParents
158 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
159 exportToDot phylo export =
160 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
161 <> show(length $ export ^. export_groups) <> " groups "
162 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n") $
163 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
165 -- | 1) init the dot graph
166 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
167 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
169 , Style [SItem Filled []],Color [toWColor White]]
170 -- | home made attributes
171 <> [(toAttr (fromStrict "nbDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
172 ,(toAttr (fromStrict "proxiName") $ pack $ show (getProximityName $ phyloProximity $ getConfig phylo))
173 ,(toAttr (fromStrict "proxiInit") $ pack $ show (getProximityInit $ phyloProximity $ getConfig phylo))
174 ,(toAttr (fromStrict "proxiStep") $ pack $ show (getProximityStep $ phyloProximity $ getConfig phylo))
175 ,(toAttr (fromStrict "quaFactor") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo))
179 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
181 -- | 2) create a layer for the branches labels
182 subgraph (Str "Branches peaks") $ do
184 graphAttrs [Rank SameRank]
186 -- | 3) group the branches by hierarchy
187 -- mapM (\branches ->
188 -- subgraph (Str "Branches clade") $ do
189 -- graphAttrs [Rank SameRank]
191 -- -- | 4) create a node for each branch
192 -- mapM branchToDotNode branches
193 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
195 mapM branchToDotNode $ export ^. export_branches
197 -- | 5) create a layer for each period
198 _ <- mapM (\period ->
199 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
200 graphAttrs [Rank SameRank]
201 periodToDotNode period
203 -- | 6) create a node for each group
204 mapM (\g -> groupToDotNode (getRoots phylo) g) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
205 ) $ getPeriodIds phylo
207 -- | 7) create the edges between a branch and its first groups
208 _ <- mapM (\(bId,groups) ->
209 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
212 $ map (\groups -> head' "toDot"
213 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
214 $ sortOn (fst . _phylo_groupPeriod) groups)
215 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
217 -- | 8) create the edges between the groups
218 _ <- mapM (\((k,k'),_) ->
219 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
220 ) $ (toList . mergePointers) $ export ^. export_groups
222 -- | 7) create the edges between the periods
223 _ <- mapM (\(prd,prd') ->
224 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
225 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
227 -- | 8) create the edges between the branches
228 _ <- mapM (\(bId,bId') ->
229 toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
230 (Text.pack $ show(branchIdsToProximity bId bId'
231 (getThresholdInit $ phyloProximity $ getConfig phylo)
232 (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
233 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
236 graphAttrs [Rank SameRank]
246 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
247 filterByBranchSize thr export =
248 let branches' = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
249 in export & export_branches .~ (fst branches')
250 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd branches')))
253 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
254 processFilters filters qua export =
255 foldl (\export' f -> case f of
256 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
257 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
258 else filterByBranchSize thr export'
265 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
266 sortByHierarchy depth branches =
267 if (length branches == 1)
271 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
272 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
273 ++ (sortByHierarchy (depth + 1) (snd partitions)))
274 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
275 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
278 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
279 sortByBirthDate order export =
280 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
281 branches' = case order of
283 Desc -> reverse branches
284 in export & export_branches .~ branches'
286 processSort :: Sort -> PhyloExport -> PhyloExport
287 processSort sort' export = case sort' of
288 ByBirthDate o -> sortByBirthDate o export
289 ByHierarchy -> export & export_branches .~ sortByHierarchy 0 (export ^. export_branches)
296 -- | Return the conditional probability of i knowing j
297 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
298 conditional m i j = (findWithDefault 0 (i,j) m)
302 -- | Return the genericity score of a given ngram
303 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
304 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
305 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
308 -- | Return the specificity score of a given ngram
309 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
310 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
311 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
314 -- | Return the inclusion score of a given ngram
315 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
316 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
317 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
320 ngramsMetrics :: PhyloExport -> PhyloExport
321 ngramsMetrics export =
324 (\g -> g & phylo_groupMeta %~ insert "genericity"
325 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
326 & phylo_groupMeta %~ insert "specificity"
327 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
328 & phylo_groupMeta %~ insert "inclusion"
329 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
333 branchDating :: PhyloExport -> PhyloExport
334 branchDating export =
335 over ( export_branches
338 let groups = sortOn fst
339 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
340 then acc ++ [g ^. phylo_groupPeriod]
341 else acc ) [] $ export ^. export_groups
343 birth = fst $ head' "birth" groups
344 age = (snd $ last' "age" groups) - birth
345 in b & branch_meta %~ insert "birth" [fromIntegral birth]
346 & branch_meta %~ insert "age" [fromIntegral age]
347 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
349 processMetrics :: PhyloExport -> PhyloExport
350 processMetrics export = ngramsMetrics
351 $ branchDating export
358 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
359 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
362 $ sortOn snd $ zip [0..] meta
365 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
366 mostInclusive nth foundations export =
367 over ( export_branches
370 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
371 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
372 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
373 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
374 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
375 in b & branch_label .~ lbl ) export
378 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
379 mostEmergentInclusive nth foundations export =
383 let lbl = ngramsToLabel foundations
385 $ map (\(_,(_,idx)) -> idx)
387 $ map (\groups -> sortOn (fst . snd) groups)
388 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
389 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
390 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
391 in g & phylo_groupLabel .~ lbl ) export
394 processLabels :: [PhyloLabel] -> Vector Ngrams -> PhyloExport -> PhyloExport
395 processLabels labels foundations export =
396 foldl (\export' label ->
398 GroupLabel tagger nth ->
400 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
401 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
402 BranchLabel tagger nth ->
404 MostInclusive -> mostInclusive nth foundations export'
405 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
413 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
414 toDynamics n parents group m =
415 let prd = group ^. phylo_groupPeriod
416 end = last' "dynamics" (sort $ map snd $ elems m)
417 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
420 else if ((fst prd) == (fst $ m ! n))
428 --------------------------------------
430 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
433 processDynamics :: [PhyloGroup] -> [PhyloGroup]
434 processDynamics groups =
436 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
437 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
438 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
440 --------------------------------------
441 mapNgrams :: Map Int (Date,Date)
442 mapNgrams = map (\dates ->
443 let dates' = sort dates
444 in (head' "dynamics" dates', last' "dynamics" dates'))
446 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
447 $ (g ^. phylo_groupNgrams))) [] groups
450 ---------------------
451 -- | phyloExport | --
452 ---------------------
455 toPhyloExport :: Phylo -> DotGraph DotId
456 toPhyloExport phylo = exportToDot phylo
457 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
458 $ processSort (exportSort $ getConfig phylo)
459 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo)
460 $ processMetrics export
462 export :: PhyloExport
463 export = PhyloExport groups branches
464 --------------------------------------
465 branches :: [PhyloBranch]
466 branches = traceExportBranches $ map (\bId -> PhyloBranch bId "" empty) $ nub $ map _phylo_groupBranchId groups
467 --------------------------------------
468 groups :: [PhyloGroup]
469 groups = traceExportGroups
471 $ getGroupsFromLevel (phyloLevel $ getConfig phylo) phylo
474 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
475 traceExportBranches branches = trace ("\n"
476 <> "-- | Export " <> show(length branches) <> " branches") branches
478 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
479 traceExportGroups groups = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Export " <> show(length groups) <> " groups and "
480 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"