2 Module : Gargantext.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
15 {-# LANGUAGE TypeSynonymInstances #-}
16 {-# LANGUAGE FlexibleInstances #-}
18 module Gargantext.Viz.Phylo.PhyloExport where
20 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
21 import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy)
22 import Data.Vector (Vector)
24 import Prelude (writeFile)
25 import Gargantext.Prelude
26 import Gargantext.Viz.AdaptativePhylo
27 import Gargantext.Viz.Phylo.PhyloTools
30 import Data.GraphViz hiding (DotGraph, Order)
31 import Data.GraphViz.Types.Generalised (DotGraph)
32 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
33 import Data.GraphViz.Types.Monadic
34 import Data.Text.Lazy (fromStrict, pack, unpack)
35 import System.FilePath
36 import Debug.Trace (trace)
38 import qualified Data.Text as Text
39 import qualified Data.Text.Lazy as Lazy
40 import qualified Data.GraphViz.Attributes.HTML as H
46 dotToFile :: FilePath -> DotGraph DotId -> IO ()
47 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
49 dotToString :: DotGraph DotId -> [Char]
50 dotToString dotG = unpack (printDotGraph dotG)
52 dynamicToColor :: Double -> H.Attribute
54 | d == 0 = H.BGColor (toColor LightCoral)
55 | d == 1 = H.BGColor (toColor Khaki)
56 | d == 2 = H.BGColor (toColor SkyBlue)
57 | otherwise = H.Color (toColor Black)
59 pickLabelColor :: [Double] -> H.Attribute
61 | elem 0 lst = dynamicToColor 0
62 | elem 2 lst = dynamicToColor 2
63 | elem 1 lst = dynamicToColor 1
64 | otherwise = dynamicToColor 3
66 toDotLabel :: Text.Text -> Label
67 toDotLabel lbl = StrLabel $ fromStrict lbl
69 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
70 toAttr k v = customAttribute k v
72 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
73 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
75 groupIdToDotId :: PhyloGroupId -> DotId
76 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
78 branchIdToDotId :: PhyloBranchId -> DotId
79 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
81 periodIdToDotId :: PhyloPeriodId -> DotId
82 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
84 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
85 groupToTable fdt g = H.Table H.HTable
86 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
87 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
88 , H.tableRows = [header]
89 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
90 <> ( map ngramsToRow $ splitEvery 4
91 $ reverse $ sortOn (snd . snd)
92 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
93 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
95 --------------------------------------
96 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
97 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
98 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
99 --------------------------------------
102 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
103 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
104 <> (fromStrict " ( ")
105 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
106 <> (fromStrict " , ")
107 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
108 <> (fromStrict " ) "))]]
109 --------------------------------------
111 branchToDotNode :: PhyloBranch -> Dot DotId
113 node (branchIdToDotId $ b ^. branch_id)
114 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
115 <> (metaToAttr $ b ^. branch_meta)
116 <> [ toAttr "nodeType" "branch"
117 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ b ^. branch_id)) ])
119 periodToDotNode :: (Date,Date) -> Dot DotId
120 periodToDotNode prd =
121 node (periodIdToDotId prd)
122 ([Shape Square, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
123 <> [ toAttr "nodeType" "period"
124 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
125 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
128 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Dot DotId
129 groupToDotNode fdt g =
130 node (groupIdToDotId $ getGroupId g)
131 ([FontName "Arial", Shape Square, toLabel (groupToTable fdt g)]
132 <> [ toAttr "nodeType" "group"
133 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
134 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
135 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))])
138 toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
139 toDotEdge source target lbl edgeType = edge source target
141 GroupToGroup -> [ Width 2, Color [toWColor Black], Constraint True
142 , Label (StrLabel $ fromStrict lbl)]
143 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
144 , Label (StrLabel $ fromStrict lbl)]
145 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
146 , Label (StrLabel $ fromStrict lbl)]
147 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
150 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
151 mergePointers groups =
152 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
153 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
154 in unionWith (\w w' -> max w w') toChilds toParents
157 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
158 exportToDot phylo export =
159 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
160 <> show(length $ export ^. export_groups) <> " groups to a dot file\n") $
161 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
163 -- | 1) init the dot graph
164 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
165 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
167 , Style [SItem Filled []],Color [toWColor White]]
168 -- | home made attributes
169 <> [(toAttr (fromStrict "nbDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
170 ,(toAttr (fromStrict "proxiName") $ pack $ show (getProximityName $ phyloProximity $ getConfig phylo))
171 ,(toAttr (fromStrict "proxiInit") $ pack $ show (getProximityInit $ phyloProximity $ getConfig phylo))
172 ,(toAttr (fromStrict "proxiStep") $ pack $ show (getProximityStep $ phyloProximity $ getConfig phylo))
173 ,(toAttr (fromStrict "quaFactor") $ pack $ show (_qua_relevance $ phyloQuality $ getConfig phylo))
177 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
179 -- | 2) create a layer for the branches labels
180 subgraph (Str "Branches peaks") $ do
182 graphAttrs [Rank SameRank]
184 -- | 3) group the branches by hierarchy
185 -- mapM (\branches ->
186 -- subgraph (Str "Branches clade") $ do
187 -- graphAttrs [Rank SameRank]
189 -- -- | 4) create a node for each branch
190 -- mapM branchToDotNode branches
191 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
193 mapM branchToDotNode $ export ^. export_branches
195 -- | 5) create a layer for each period
196 _ <- mapM (\period ->
197 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
198 graphAttrs [Rank SameRank]
199 periodToDotNode period
201 -- | 6) create a node for each group
202 mapM (\g -> groupToDotNode (getRoots phylo) g) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
203 ) $ getPeriodIds phylo
205 -- | 7) create the edges between a branch and its first groups
206 _ <- mapM (\(bId,groups) ->
207 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
210 $ map (\groups -> head' "toDot"
211 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
212 $ sortOn (fst . _phylo_groupPeriod) groups)
213 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
215 -- | 8) create the edges between the groups
216 _ <- mapM (\((k,k'),_) ->
217 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
218 ) $ (toList . mergePointers) $ export ^. export_groups
220 -- | 7) create the edges between the periods
221 _ <- mapM (\(prd,prd') ->
222 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
223 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
225 -- | 8) create the edges between the branches
226 _ <- mapM (\(bId,bId') ->
227 toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
228 (Text.pack $ show(branchIdsToProximity bId bId'
229 (getThresholdInit $ phyloProximity $ getConfig phylo)
230 (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
231 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
234 graphAttrs [Rank SameRank]
244 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
245 filterByBranchSize thr export =
246 let branches' = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
247 in export & export_branches .~ (fst branches')
248 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd branches')))
251 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
252 processFilters filters qua export =
253 foldl (\export' f -> case f of
254 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
255 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
256 else filterByBranchSize thr export'
263 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
264 sortByHierarchy depth branches =
265 if (length branches == 1)
269 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
270 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
271 ++ (sortByHierarchy (depth + 1) (snd partitions)))
272 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
273 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
276 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
277 sortByBirthDate order export =
278 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
279 branches' = case order of
281 Desc -> reverse branches
282 in export & export_branches .~ branches'
284 processSort :: Sort -> PhyloExport -> PhyloExport
285 processSort sort' export = case sort' of
286 ByBirthDate o -> sortByBirthDate o export
287 ByHierarchy -> export & export_branches .~ sortByHierarchy 0 (export ^. export_branches)
294 -- | Return the conditional probability of i knowing j
295 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
296 conditional m i j = (findWithDefault 0 (i,j) m)
300 -- | Return the genericity score of a given ngram
301 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
302 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
303 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
306 -- | Return the specificity score of a given ngram
307 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
308 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
309 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
312 -- | Return the inclusion score of a given ngram
313 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
314 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
315 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
318 ngramsMetrics :: PhyloExport -> PhyloExport
319 ngramsMetrics export =
322 (\g -> g & phylo_groupMeta %~ insert "genericity"
323 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
324 & phylo_groupMeta %~ insert "specificity"
325 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
326 & phylo_groupMeta %~ insert "inclusion"
327 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
331 branchDating :: PhyloExport -> PhyloExport
332 branchDating export =
333 over ( export_branches
336 let groups = sortOn fst
337 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
338 then acc ++ [g ^. phylo_groupPeriod]
339 else acc ) [] $ export ^. export_groups
340 birth = fst $ head' "birth" groups
341 age = (snd $ last' "age" groups) - birth
342 in b & branch_meta %~ insert "birth" [fromIntegral birth]
343 & branch_meta %~ insert "age" [fromIntegral age]
344 & branch_meta %~ insert "size" [fromIntegral $ length groups] ) export
346 processMetrics :: PhyloExport -> PhyloExport
347 processMetrics export = ngramsMetrics
348 $ branchDating export
355 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
356 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
359 $ sortOn snd $ zip [0..] meta
362 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
363 mostInclusive nth foundations export =
364 over ( export_branches
367 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
368 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
369 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
370 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
371 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
372 in b & branch_label .~ lbl ) export
375 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
376 mostEmergentInclusive nth foundations export =
380 let lbl = ngramsToLabel foundations
382 $ map (\(_,(_,idx)) -> idx)
384 $ map (\groups -> sortOn (fst . snd) groups)
385 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
386 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
387 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
388 in g & phylo_groupLabel .~ lbl ) export
391 processLabels :: [PhyloLabel] -> Vector Ngrams -> PhyloExport -> PhyloExport
392 processLabels labels foundations export =
393 foldl (\export' label ->
395 GroupLabel tagger nth ->
397 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
398 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
399 BranchLabel tagger nth ->
401 MostInclusive -> mostInclusive nth foundations export'
402 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
410 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
411 toDynamics n parents group m =
412 let prd = group ^. phylo_groupPeriod
413 end = last' "dynamics" (sort $ map snd $ elems m)
414 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
417 else if ((fst prd) == (fst $ m ! n))
425 --------------------------------------
427 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
430 processDynamics :: [PhyloGroup] -> [PhyloGroup]
431 processDynamics groups =
433 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
434 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
435 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
437 --------------------------------------
438 mapNgrams :: Map Int (Date,Date)
439 mapNgrams = map (\dates ->
440 let dates' = sort dates
441 in (head' "dynamics" dates', last' "dynamics" dates'))
443 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
444 $ (g ^. phylo_groupNgrams))) [] groups
447 ---------------------
448 -- | phyloExport | --
449 ---------------------
452 toPhyloExport :: Phylo -> DotGraph DotId
453 toPhyloExport phylo = exportToDot phylo
454 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
455 $ processSort (exportSort $ getConfig phylo)
456 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo)
457 $ processMetrics export
459 export :: PhyloExport
460 export = PhyloExport groups branches
461 --------------------------------------
462 branches :: [PhyloBranch]
463 branches = traceExportBranches $ map (\bId -> PhyloBranch bId "" empty) $ nub $ map _phylo_groupBranchId groups
464 --------------------------------------
465 groups :: [PhyloGroup]
466 groups = processDynamics
467 $ getGroupsFromLevel (phyloLevel $ getConfig phylo) phylo
470 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
471 traceExportBranches branches = trace ("\n" <> "-- | Export " <> show(length branches) <> " branches") branches