2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE TypeSynonymInstances #-}
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
15 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
16 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
17 import Data.Vector (Vector)
19 import Prelude (writeFile)
20 import Gargantext.Prelude
21 import Gargantext.Core.Viz.AdaptativePhylo
22 import Gargantext.Core.Viz.Phylo.PhyloTools
23 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
25 import Control.Lens hiding (Level)
26 import Control.Parallel.Strategies (parList, rdeepseq, using)
27 import Data.GraphViz hiding (DotGraph, Order)
28 import Data.GraphViz.Types.Generalised (DotGraph)
29 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
30 import Data.GraphViz.Types.Monadic
31 import Data.Text.Lazy (fromStrict, pack, unpack)
32 import System.FilePath
33 import Debug.Trace (trace)
35 import qualified Data.Text as Text
36 import qualified Data.Vector as Vector
37 import qualified Data.Text.Lazy as Lazy
38 import qualified Data.GraphViz.Attributes.HTML as H
44 dotToFile :: FilePath -> DotGraph DotId -> IO ()
45 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
47 dotToString :: DotGraph DotId -> [Char]
48 dotToString dotG = unpack (printDotGraph dotG)
50 dynamicToColor :: Double -> H.Attribute
52 | d == 0 = H.BGColor (toColor LightCoral)
53 | d == 1 = H.BGColor (toColor Khaki)
54 | d == 2 = H.BGColor (toColor SkyBlue)
55 | otherwise = H.Color (toColor Black)
57 pickLabelColor :: [Double] -> H.Attribute
59 | elem 0 lst = dynamicToColor 0
60 | elem 2 lst = dynamicToColor 2
61 | elem 1 lst = dynamicToColor 1
62 | otherwise = dynamicToColor 3
64 toDotLabel :: Text.Text -> Label
65 toDotLabel lbl = StrLabel $ fromStrict lbl
67 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
68 toAttr k v = customAttribute k v
70 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
71 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
73 groupIdToDotId :: PhyloGroupId -> DotId
74 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
76 branchIdToDotId :: PhyloBranchId -> DotId
77 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
79 periodIdToDotId :: PhyloPeriodId -> DotId
80 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
82 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
83 groupToTable fdt g = H.Table H.HTable
84 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
85 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
86 , H.tableRows = [header]
87 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
88 <> ( map ngramsToRow $ splitEvery 4
89 $ reverse $ sortOn (snd . snd)
90 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
91 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
93 --------------------------------------
94 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
95 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
96 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
97 --------------------------------------
100 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
101 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
102 <> (fromStrict " ( ")
103 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
104 <> (fromStrict " , ")
105 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
106 <> (fromStrict " ) ")
107 <> (pack $ show (getGroupId g)))]]
108 --------------------------------------
110 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
111 branchToDotNode b bId =
112 node (branchIdToDotId $ b ^. branch_id)
113 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
114 <> (metaToAttr $ b ^. branch_meta)
115 <> [ toAttr "nodeType" "branch"
116 , toAttr "bId" (pack $ show bId)
117 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
118 , toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
119 , toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
120 , toAttr "label" (pack $ show $ b ^. branch_label)
123 periodToDotNode :: (Date,Date) -> Dot DotId
124 periodToDotNode prd =
125 node (periodIdToDotId prd)
126 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
127 <> [ toAttr "nodeType" "period"
128 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
129 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
132 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
133 groupToDotNode fdt g bId =
134 node (groupIdToDotId $ getGroupId g)
135 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
136 <> [ toAttr "nodeType" "group"
137 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
138 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
139 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
140 , toAttr "bId" (pack $ show bId)
141 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
142 , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
143 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
144 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
148 toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
149 toDotEdge source target lbl edgeType = edge source target
151 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl)]
152 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
153 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
154 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl)]
155 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
158 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
159 mergePointers groups =
160 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
161 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
162 in unionWith (\w w' -> max w w') toChilds toParents
164 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
165 mergeAncestors groups = concat
166 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
167 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
170 toBid :: PhyloGroup -> [PhyloBranch] -> Int
172 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
173 in fromJust $ elemIndex b' bs
175 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
176 exportToDot phylo export =
177 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
178 <> show(length $ export ^. export_groups) <> " groups "
179 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
180 <> "##########################") $
181 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
183 {- 1) init the dot graph -}
184 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
185 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
187 , Style [SItem Filled []],Color [toWColor White]]
188 {-- home made attributes -}
189 <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
190 ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
191 ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
192 ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
193 ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
194 ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
195 ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
199 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
201 -- 2) create a layer for the branches labels -}
202 subgraph (Str "Branches peaks") $ do
204 graphAttrs [Rank SameRank]
206 -- 3) group the branches by hierarchy
207 -- mapM (\branches ->
208 -- subgraph (Str "Branches clade") $ do
209 -- graphAttrs [Rank SameRank]
211 -- -- 4) create a node for each branch
212 -- mapM branchToDotNode branches
213 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
215 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
217 {-- 5) create a layer for each period -}
218 _ <- mapM (\period ->
219 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
220 graphAttrs [Rank SameRank]
221 periodToDotNode period
223 {-- 6) create a node for each group -}
224 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
225 ) $ getPeriodIds phylo
227 {-- 7) create the edges between a branch and its first groups -}
228 _ <- mapM (\(bId,groups) ->
229 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
232 $ map (\groups -> head' "toDot"
233 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
234 $ sortOn (fst . _phylo_groupPeriod) groups)
235 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
237 {- 8) create the edges between the groups -}
238 _ <- mapM (\((k,k'),v) ->
239 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
240 ) $ (toList . mergePointers) $ export ^. export_groups
242 _ <- mapM (\((k,k'),v) ->
243 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
244 ) $ mergeAncestors $ export ^. export_groups
246 -- 10) create the edges between the periods
247 _ <- mapM (\(prd,prd') ->
248 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
249 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
251 {- 8) create the edges between the branches
252 -- _ <- mapM (\(bId,bId') ->
253 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
254 -- (Text.pack $ show(branchIdsToProximity bId bId'
255 -- (getThresholdInit $ phyloProximity $ getConfig phylo)
256 -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
257 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
261 graphAttrs [Rank SameRank]
268 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
269 filterByBranchSize thr export =
270 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
271 in export & export_branches .~ (fst splited)
272 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
275 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
276 processFilters filters qua export =
277 foldl (\export' f -> case f of
278 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
279 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
280 else filterByBranchSize thr export'
287 branchToIso :: [PhyloBranch] -> [PhyloBranch]
288 branchToIso branches =
291 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
293 $ ([0] ++ (map (\(b,b') ->
294 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
295 lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
297 if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
298 then (b' ^. branch_seaLevel) !! (lmin - 1)
299 else (b' ^. branch_seaLevel) !! (idx - 1)
300 ) $ listToSeq branches))
301 in map (\(x,b) -> b & branch_x .~ x)
304 branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
305 branchToIso' start step branches =
306 let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
308 $ ([0] ++ (map (\(b,b') ->
309 let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
310 in 1 - start - step * root) $ listToSeq branches))
311 in map (\(x,b) -> b & branch_x .~ x)
315 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
316 sortByHierarchy depth branches =
317 if (length branches == 1)
321 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
322 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
323 ++ (sortByHierarchy (depth + 1) (snd partitions)))
324 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
325 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
328 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
329 sortByBirthDate order export =
330 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
331 branches' = case order of
333 Desc -> reverse branches
334 in export & export_branches .~ branches'
336 processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
337 processSort sort' elev export = case sort' of
338 ByBirthDate o -> sortByBirthDate o export
339 ByHierarchy -> export & export_branches .~ (branchToIso' (_cons_start elev) (_cons_step elev)
340 $ sortByHierarchy 0 (export ^. export_branches))
347 -- | Return the conditional probability of i knowing j
348 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
349 conditional m i j = (findWithDefault 0 (i,j) m)
353 -- | Return the genericity score of a given ngram
354 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
355 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
356 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
359 -- | Return the specificity score of a given ngram
360 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
361 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
362 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
365 -- | Return the inclusion score of a given ngram
366 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
367 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
368 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
371 ngramsMetrics :: PhyloExport -> PhyloExport
372 ngramsMetrics export =
375 (\g -> g & phylo_groupMeta %~ insert "genericity"
376 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
377 & phylo_groupMeta %~ insert "specificity"
378 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
379 & phylo_groupMeta %~ insert "inclusion"
380 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
384 branchDating :: PhyloExport -> PhyloExport
385 branchDating export =
386 over ( export_branches
389 let groups = sortOn fst
390 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
391 then acc ++ [g ^. phylo_groupPeriod]
392 else acc ) [] $ export ^. export_groups
394 birth = fst $ head' "birth" groups
395 age = (snd $ last' "age" groups) - birth
396 in b & branch_meta %~ insert "birth" [fromIntegral birth]
397 & branch_meta %~ insert "age" [fromIntegral age]
398 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
400 processMetrics :: PhyloExport -> PhyloExport
401 processMetrics export = ngramsMetrics
402 $ branchDating export
409 nk :: Int -> [[Int]] -> Int
411 $ map (\g -> if (elem n g)
416 tf :: Int -> [[Int]] -> Double
417 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
420 idf :: Int -> [[Int]] -> Double
421 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
424 findTfIdf :: [[Int]] -> [(Int,Double)]
425 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ sort $ nub $ concat groups
428 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
429 findEmergences groups freq =
430 let ngrams = map _phylo_groupNgrams groups
431 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
432 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
433 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
434 in reverse $ sortOn snd
435 $ map (\(n,_) -> if (member n freq)
440 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
441 mostEmergentTfIdf nth freq foundations export =
442 over ( export_branches
445 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
446 tfidf = findTfIdf (map _phylo_groupNgrams groups)
447 emergences = findEmergences groups freq
448 selected = if (null emergences)
449 then map fst $ take nth tfidf
450 else [fst $ head' "mostEmergentTfIdf" emergences]
451 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
452 in b & branch_label .~ (ngramsToLabel foundations selected)) export
455 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
456 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
459 $ sortOn snd $ zip [0..] meta
462 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
463 mostInclusive nth foundations export =
464 over ( export_branches
467 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
468 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
469 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
470 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
471 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
472 in b & branch_label .~ lbl ) export
475 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
476 mostEmergentInclusive nth foundations export =
480 let lbl = ngramsToLabel foundations
482 $ map (\(_,(_,idx)) -> idx)
484 $ map (\groups -> sortOn (fst . snd) groups)
485 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
486 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
487 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
488 in g & phylo_groupLabel .~ lbl ) export
491 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
492 processLabels labels foundations freq export =
493 foldl (\export' label ->
495 GroupLabel tagger nth ->
497 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
498 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
499 BranchLabel tagger nth ->
501 MostInclusive -> mostInclusive nth foundations export'
502 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
503 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
511 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
512 toDynamics n parents g m =
513 let prd = g ^. phylo_groupPeriod
514 end = last' "dynamics" (sort $ map snd $ elems m)
515 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
518 else if ((fst prd) == (fst $ m ! n))
526 --------------------------------------
528 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
531 processDynamics :: [PhyloGroup] -> [PhyloGroup]
532 processDynamics groups =
534 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
535 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
536 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
538 --------------------------------------
539 mapNgrams :: Map Int (Date,Date)
540 mapNgrams = map (\dates ->
541 let dates' = sort dates
542 in (head' "dynamics" dates', last' "dynamics" dates'))
544 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
545 $ (g ^. phylo_groupNgrams))) [] groups
552 getGroupThr :: Double -> PhyloGroup -> Double
554 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
555 breaks = (g ^. phylo_groupMeta) ! "breaks"
556 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
558 toAncestor :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
559 toAncestor nbDocs diago proximity step candidates ego =
560 let curr = ego ^. phylo_groupAncestors
561 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
562 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
563 $ map (\g -> (g, toProximity nbDocs diago proximity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
564 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
567 headsToAncestors :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
568 headsToAncestors nbDocs diago proximity step heads acc =
572 let ego = head' "headsToAncestors" heads
573 heads' = tail' "headsToAncestors" heads
574 in headsToAncestors nbDocs diago proximity step heads' (acc ++ [toAncestor nbDocs diago proximity step heads' ego])
577 toHorizon :: Phylo -> Phylo
579 let phyloAncestor = updatePhyloGroups
581 (fromList $ map (\g -> (getGroupId g, g))
583 $ tracePhyloAncestors newGroups) phylo
584 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
585 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel level phyloAncestor
586 in updatePhyloGroups level reBranched phylo
588 -- | 1) for each periods
589 periods :: [PhyloPeriodId]
590 periods = getPeriodIds phylo
593 level = getLastLevel phylo
596 frame = getTimeFrame $ timeUnit $ getConfig phylo
597 -- | 2) find ancestors between groups without parents
598 mapGroups :: [[PhyloGroup]]
599 mapGroups = map (\prd ->
600 let groups = getGroupsFromLevelPeriods level [prd] phylo
601 childs = getPreviousChildIds level frame prd periods phylo
602 heads = filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
603 noHeads = groups \\ heads
604 nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
605 diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
606 proximity = (phyloProximity $ getConfig phylo)
607 step = case getSeaElevation phylo of
609 Adaptative _ -> undefined
610 -- in headsToAncestors nbDocs diago proximity heads groups []
611 in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego)
612 $ headsToAncestors nbDocs diago proximity step heads []
614 -- | 3) process this task concurrently
615 newGroups :: [[PhyloGroup]]
616 newGroups = mapGroups `using` parList rdeepseq
617 --------------------------------------
619 getPreviousChildIds :: Level -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> Phylo -> [PhyloGroupId]
620 getPreviousChildIds lvl frame curr prds phylo =
621 concat $ map ((map fst) . _phylo_groupPeriodChilds)
622 $ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo
624 ---------------------
625 -- | phyloExport | --
626 ---------------------
628 toPhyloExport :: Phylo -> DotGraph DotId
629 toPhyloExport phylo = exportToDot phylo
630 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
631 $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
632 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo)
633 $ processMetrics export
635 export :: PhyloExport
636 export = PhyloExport groups branches
637 --------------------------------------
638 branches :: [PhyloBranch]
639 branches = map (\g ->
640 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
641 breaks = (g ^. phylo_groupMeta) ! "breaks"
642 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
643 in PhyloBranch (g ^. phylo_groupBranchId)
647 (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
651 $ map (\gs -> head' "export" gs)
652 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
653 $ sortOn (\g -> g ^. phylo_groupBranchId) groups
654 --------------------------------------
655 groups :: [PhyloGroup]
656 groups = traceExportGroups
658 $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
663 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
664 traceExportBranches branches = trace ("\n"
665 <> "-- | Export " <> show(length branches) <> " branches") branches
667 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
668 tracePhyloAncestors groups = trace ("\n"
669 <> "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors"
672 tracePhyloInfo :: Phylo -> Phylo
673 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with β = "
674 <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
675 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
679 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
680 traceExportGroups groups = trace ("\n" <> "-- | Export "
681 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
682 <> show(length groups) <> " groups and "
683 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"