2 Module : Gargantext.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
15 {-# LANGUAGE TypeSynonymInstances #-}
16 {-# LANGUAGE FlexibleInstances #-}
18 module Gargantext.Viz.Phylo.PhyloExport where
20 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, delete)
21 import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy)
22 import Data.Vector (Vector)
24 import Prelude (writeFile)
25 import Gargantext.Prelude
26 import Gargantext.Viz.AdaptativePhylo
27 import Gargantext.Viz.Phylo.PhyloTools
30 import Data.GraphViz hiding (DotGraph, Order)
31 import Data.GraphViz.Types.Generalised (DotGraph)
32 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
33 import Data.GraphViz.Types.Monadic
34 import Data.Text.Lazy (fromStrict, pack, unpack)
35 import System.FilePath
36 import Debug.Trace (trace)
38 import qualified Data.Text as Text
39 import qualified Data.Vector as Vector
40 import qualified Data.Text.Lazy as Lazy
41 import qualified Data.GraphViz.Attributes.HTML as H
47 dotToFile :: FilePath -> DotGraph DotId -> IO ()
48 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
50 dotToString :: DotGraph DotId -> [Char]
51 dotToString dotG = unpack (printDotGraph dotG)
53 dynamicToColor :: Double -> H.Attribute
55 | d == 0 = H.BGColor (toColor LightCoral)
56 | d == 1 = H.BGColor (toColor Khaki)
57 | d == 2 = H.BGColor (toColor SkyBlue)
58 | otherwise = H.Color (toColor Black)
60 pickLabelColor :: [Double] -> H.Attribute
62 | elem 0 lst = dynamicToColor 0
63 | elem 2 lst = dynamicToColor 2
64 | elem 1 lst = dynamicToColor 1
65 | otherwise = dynamicToColor 3
67 toDotLabel :: Text.Text -> Label
68 toDotLabel lbl = StrLabel $ fromStrict lbl
70 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
71 toAttr k v = customAttribute k v
73 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
74 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
76 groupIdToDotId :: PhyloGroupId -> DotId
77 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
79 branchIdToDotId :: PhyloBranchId -> DotId
80 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
82 periodIdToDotId :: PhyloPeriodId -> DotId
83 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
85 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
86 groupToTable fdt g = H.Table H.HTable
87 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
88 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
89 , H.tableRows = [header]
90 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
91 <> ( map ngramsToRow $ splitEvery 4
92 $ reverse $ sortOn (snd . snd)
93 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
94 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
96 --------------------------------------
97 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
98 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
99 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
100 --------------------------------------
103 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
104 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
105 <> (fromStrict " ( ")
106 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
107 <> (fromStrict " , ")
108 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
109 <> (fromStrict " ) ")
110 <> (pack $ show (getGroupId g)))]]
111 --------------------------------------
113 branchToDotNode :: PhyloBranch -> Dot DotId
115 node (branchIdToDotId $ b ^. branch_id)
116 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
117 <> (metaToAttr $ b ^. branch_meta)
118 <> [ toAttr "nodeType" "branch"
119 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id)) ])
121 periodToDotNode :: (Date,Date) -> Dot DotId
122 periodToDotNode prd =
123 node (periodIdToDotId prd)
124 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
125 <> [ toAttr "nodeType" "period"
126 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
127 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
130 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Dot DotId
131 groupToDotNode fdt g =
132 node (groupIdToDotId $ getGroupId g)
133 ([FontName "Arial", Shape BoxShape, toLabel (groupToTable fdt g)]
134 <> [ toAttr "nodeType" "group"
135 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
136 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
137 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
138 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))])
141 toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
142 toDotEdge source target lbl edgeType = edge source target
144 GroupToGroup -> [ Width 10, Color [toWColor Black], Constraint True
145 , Label (StrLabel $ fromStrict lbl)]
146 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
147 , Label (StrLabel $ fromStrict lbl)]
148 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
149 , Label (StrLabel $ fromStrict lbl)]
150 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
153 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
154 mergePointers groups =
155 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
156 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
157 in unionWith (\w w' -> max w w') toChilds toParents
160 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
161 exportToDot phylo export =
162 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
163 <> show(length $ export ^. export_groups) <> " groups "
164 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
165 <> "##########################") $
166 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
168 -- | 1) init the dot graph
169 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
170 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
172 , Style [SItem Filled []],Color [toWColor White]]
173 -- | home made attributes
174 <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
175 ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
176 ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
177 ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
178 ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
179 ,(toAttr (fromStrict "proxiName") $ pack $ show (getProximityName $ phyloProximity $ getConfig phylo))
180 ,(toAttr (fromStrict "proxiInit") $ pack $ show (getProximityInit $ phyloProximity $ getConfig phylo))
181 ,(toAttr (fromStrict "proxiStep") $ pack $ show (getProximityStep $ phyloProximity $ getConfig phylo))
182 ,(toAttr (fromStrict "quaGranularity") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo))
186 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
188 -- | 2) create a layer for the branches labels
189 subgraph (Str "Branches peaks") $ do
191 graphAttrs [Rank SameRank]
193 -- | 3) group the branches by hierarchy
194 -- mapM (\branches ->
195 -- subgraph (Str "Branches clade") $ do
196 -- graphAttrs [Rank SameRank]
198 -- -- | 4) create a node for each branch
199 -- mapM branchToDotNode branches
200 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
202 mapM branchToDotNode $ export ^. export_branches
204 -- | 5) create a layer for each period
205 _ <- mapM (\period ->
206 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
207 graphAttrs [Rank SameRank]
208 periodToDotNode period
210 -- | 6) create a node for each group
211 mapM (\g -> groupToDotNode (getRoots phylo) g) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
212 ) $ getPeriodIds phylo
214 -- | 7) create the edges between a branch and its first groups
215 _ <- mapM (\(bId,groups) ->
216 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
219 $ map (\groups -> head' "toDot"
220 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
221 $ sortOn (fst . _phylo_groupPeriod) groups)
222 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
224 -- | 8) create the edges between the groups
225 _ <- mapM (\((k,k'),_) ->
226 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
227 ) $ (toList . mergePointers) $ export ^. export_groups
229 -- | 7) create the edges between the periods
230 _ <- mapM (\(prd,prd') ->
231 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
232 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
234 -- | 8) create the edges between the branches
235 _ <- mapM (\(bId,bId') ->
236 toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
237 (Text.pack $ show(branchIdsToProximity bId bId'
238 (getThresholdInit $ phyloProximity $ getConfig phylo)
239 (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
240 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
243 graphAttrs [Rank SameRank]
253 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
254 filterByBranchSize thr export =
255 let branches' = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
256 in export & export_branches .~ (fst branches')
257 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd branches')))
260 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
261 processFilters filters qua export =
262 foldl (\export' f -> case f of
263 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
264 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
265 else filterByBranchSize thr export'
272 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
273 sortByHierarchy depth branches =
274 if (length branches == 1)
278 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
279 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
280 ++ (sortByHierarchy (depth + 1) (snd partitions)))
281 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
282 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
285 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
286 sortByBirthDate order export =
287 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
288 branches' = case order of
290 Desc -> reverse branches
291 in export & export_branches .~ branches'
293 processSort :: Sort -> PhyloExport -> PhyloExport
294 processSort sort' export = case sort' of
295 ByBirthDate o -> sortByBirthDate o export
296 ByHierarchy -> export & export_branches .~ sortByHierarchy 0 (export ^. export_branches)
303 -- | Return the conditional probability of i knowing j
304 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
305 conditional m i j = (findWithDefault 0 (i,j) m)
309 -- | Return the genericity score of a given ngram
310 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
311 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
312 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
315 -- | Return the specificity score of a given ngram
316 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
317 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
318 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
321 -- | Return the inclusion score of a given ngram
322 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
323 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
324 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
327 ngramsMetrics :: PhyloExport -> PhyloExport
328 ngramsMetrics export =
331 (\g -> g & phylo_groupMeta %~ insert "genericity"
332 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
333 & phylo_groupMeta %~ insert "specificity"
334 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
335 & phylo_groupMeta %~ insert "inclusion"
336 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
340 branchDating :: PhyloExport -> PhyloExport
341 branchDating export =
342 over ( export_branches
345 let groups = sortOn fst
346 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
347 then acc ++ [g ^. phylo_groupPeriod]
348 else acc ) [] $ export ^. export_groups
350 birth = fst $ head' "birth" groups
351 age = (snd $ last' "age" groups) - birth
352 in b & branch_meta %~ insert "birth" [fromIntegral birth]
353 & branch_meta %~ insert "age" [fromIntegral age]
354 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
356 processMetrics :: PhyloExport -> PhyloExport
357 processMetrics export = ngramsMetrics
358 $ branchDating export
365 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
366 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
369 $ sortOn snd $ zip [0..] meta
372 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
373 mostInclusive nth foundations export =
374 over ( export_branches
377 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
378 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
379 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
380 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
381 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
382 in b & branch_label .~ lbl ) export
385 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
386 mostEmergentInclusive nth foundations export =
390 let lbl = ngramsToLabel foundations
392 $ map (\(_,(_,idx)) -> idx)
394 $ map (\groups -> sortOn (fst . snd) groups)
395 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
396 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
397 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
398 in g & phylo_groupLabel .~ lbl ) export
401 processLabels :: [PhyloLabel] -> Vector Ngrams -> PhyloExport -> PhyloExport
402 processLabels labels foundations export =
403 foldl (\export' label ->
405 GroupLabel tagger nth ->
407 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
408 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
409 BranchLabel tagger nth ->
411 MostInclusive -> mostInclusive nth foundations export'
412 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
420 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
421 toDynamics n parents group m =
422 let prd = group ^. phylo_groupPeriod
423 end = last' "dynamics" (sort $ map snd $ elems m)
424 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
427 else if ((fst prd) == (fst $ m ! n))
435 --------------------------------------
437 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
440 processDynamics :: [PhyloGroup] -> [PhyloGroup]
441 processDynamics groups =
443 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
444 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
445 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
447 --------------------------------------
448 mapNgrams :: Map Int (Date,Date)
449 mapNgrams = map (\dates ->
450 let dates' = sort dates
451 in (head' "dynamics" dates', last' "dynamics" dates'))
453 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
454 $ (g ^. phylo_groupNgrams))) [] groups
457 ---------------------
458 -- | phyloExport | --
459 ---------------------
462 toPhyloExport :: Phylo -> DotGraph DotId
463 toPhyloExport phylo = exportToDot phylo
464 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
465 $ processSort (exportSort $ getConfig phylo)
466 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo)
467 $ processMetrics export
469 export :: PhyloExport
470 export = PhyloExport groups branches
471 --------------------------------------
472 branches :: [PhyloBranch]
473 branches = map (\bId -> PhyloBranch bId "" empty) $ nub $ map _phylo_groupBranchId groups
474 --------------------------------------
475 groups :: [PhyloGroup]
476 groups = traceExportGroups
478 $ map (\g -> g & phylo_groupMeta %~ delete "dynamics")
479 $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
480 $ tracePhyloInfo phylo
483 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
484 traceExportBranches branches = trace ("\n"
485 <> "-- | Export " <> show(length branches) <> " branches") branches
487 tracePhyloInfo :: Phylo -> Phylo
488 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with β = "
489 <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
490 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
494 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
495 traceExportGroups groups = trace ("\n" <> "-- | Export "
496 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
497 <> show(length groups) <> " groups and "
498 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"