2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE TypeSynonymInstances #-}
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
15 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
16 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
17 import Data.Vector (Vector)
19 import Prelude (writeFile)
20 import Gargantext.Prelude
21 import Gargantext.Core.Viz.AdaptativePhylo
22 import Gargantext.Core.Viz.Phylo.PhyloTools
23 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
25 import Control.Lens hiding (Level)
26 import Control.Parallel.Strategies (parList, rdeepseq, using)
27 import Data.GraphViz hiding (DotGraph, Order)
28 import Data.GraphViz.Types.Generalised (DotGraph)
29 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
30 import Data.GraphViz.Types.Monadic
31 import Data.Text.Lazy (fromStrict, pack, unpack)
32 import System.FilePath
33 import Debug.Trace (trace)
35 import qualified Data.Text as Text
36 import qualified Data.Vector as Vector
37 import qualified Data.Text.Lazy as Lazy
38 import qualified Data.GraphViz.Attributes.HTML as H
44 dotToFile :: FilePath -> DotGraph DotId -> IO ()
45 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
47 dotToString :: DotGraph DotId -> [Char]
48 dotToString dotG = unpack (printDotGraph dotG)
50 dynamicToColor :: Double -> H.Attribute
52 | d == 0 = H.BGColor (toColor LightCoral)
53 | d == 1 = H.BGColor (toColor Khaki)
54 | d == 2 = H.BGColor (toColor SkyBlue)
55 | otherwise = H.Color (toColor Black)
57 pickLabelColor :: [Double] -> H.Attribute
59 | elem 0 lst = dynamicToColor 0
60 | elem 2 lst = dynamicToColor 2
61 | elem 1 lst = dynamicToColor 1
62 | otherwise = dynamicToColor 3
64 toDotLabel :: Text.Text -> Label
65 toDotLabel lbl = StrLabel $ fromStrict lbl
67 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
68 toAttr k v = customAttribute k v
70 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
71 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
73 groupIdToDotId :: PhyloGroupId -> DotId
74 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
76 branchIdToDotId :: PhyloBranchId -> DotId
77 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
79 periodIdToDotId :: PhyloPeriodId -> DotId
80 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
82 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
83 groupToTable fdt g = H.Table H.HTable
84 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
85 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
86 , H.tableRows = [header]
87 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
88 <> ( map ngramsToRow $ splitEvery 4
89 $ reverse $ sortOn (snd . snd)
90 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
91 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
93 --------------------------------------
94 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
95 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
96 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
97 --------------------------------------
100 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
101 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
102 <> (fromStrict " ( ")
103 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
104 <> (fromStrict " , ")
105 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
106 <> (fromStrict " ) ")
107 <> (pack $ show (getGroupId g)))]]
108 --------------------------------------
110 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
111 branchToDotNode b bId =
112 node (branchIdToDotId $ b ^. branch_id)
113 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
114 <> (metaToAttr $ b ^. branch_meta)
115 <> [ toAttr "nodeType" "branch"
116 , toAttr "bId" (pack $ show bId)
117 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
118 , toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
119 , toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
120 , toAttr "label" (pack $ show $ b ^. branch_label)
123 periodToDotNode :: (Date,Date) -> Dot DotId
124 periodToDotNode prd =
125 node (periodIdToDotId prd)
126 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
127 <> [ toAttr "nodeType" "period"
128 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
129 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
132 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
133 groupToDotNode fdt g bId =
134 node (groupIdToDotId $ getGroupId g)
135 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
136 <> [ toAttr "nodeType" "group"
137 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
138 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
139 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
140 , toAttr "bId" (pack $ show bId)
141 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
142 , toAttr "label" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
143 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
144 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
148 toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
149 toDotEdge source target lbl edgeType = edge source target
151 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True
152 , Label (StrLabel $ fromStrict lbl)] <> [toAttr "edgeType" "link" ]
153 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
154 , Label (StrLabel $ fromStrict lbl)] <> [toAttr "edgeType" "branchLink" ]
155 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
156 , Label (StrLabel $ fromStrict lbl)]
157 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)])
158 , Label (StrLabel $ fromStrict lbl), PenWidth 4] <> [toAttr "edgeType" "ancestorLink" ]
159 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
162 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
163 mergePointers groups =
164 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
165 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
166 in unionWith (\w w' -> max w w') toChilds toParents
168 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
169 mergeAncestors groups = concat
170 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
171 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
174 toBid :: PhyloGroup -> [PhyloBranch] -> Int
176 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
177 in fromJust $ elemIndex b' bs
179 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
180 exportToDot phylo export =
181 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
182 <> show(length $ export ^. export_groups) <> " groups "
183 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
184 <> "##########################") $
185 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
187 {- 1) init the dot graph -}
188 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
189 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
191 , Style [SItem Filled []],Color [toWColor White]]
192 {-- home made attributes -}
193 <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
194 ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
195 ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
196 ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
197 ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
198 ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
199 ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
203 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
205 -- 2) create a layer for the branches labels -}
206 subgraph (Str "Branches peaks") $ do
208 graphAttrs [Rank SameRank]
210 -- 3) group the branches by hierarchy
211 -- mapM (\branches ->
212 -- subgraph (Str "Branches clade") $ do
213 -- graphAttrs [Rank SameRank]
215 -- -- 4) create a node for each branch
216 -- mapM branchToDotNode branches
217 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
219 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
221 {-- 5) create a layer for each period -}
222 _ <- mapM (\period ->
223 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
224 graphAttrs [Rank SameRank]
225 periodToDotNode period
227 {-- 6) create a node for each group -}
228 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
229 ) $ getPeriodIds phylo
231 {-- 7) create the edges between a branch and its first groups -}
232 _ <- mapM (\(bId,groups) ->
233 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
236 $ map (\groups -> head' "toDot"
237 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
238 $ sortOn (fst . _phylo_groupPeriod) groups)
239 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
241 {- 8) create the edges between the groups -}
242 _ <- mapM (\((k,k'),_) ->
243 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
244 ) $ (toList . mergePointers) $ export ^. export_groups
246 _ <- mapM (\((k,k'),_) ->
247 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToAncestor
248 ) $ mergeAncestors $ export ^. export_groups
250 -- 10) create the edges between the periods
251 _ <- mapM (\(prd,prd') ->
252 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
253 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
255 {- 8) create the edges between the branches
256 -- _ <- mapM (\(bId,bId') ->
257 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
258 -- (Text.pack $ show(branchIdsToProximity bId bId'
259 -- (getThresholdInit $ phyloProximity $ getConfig phylo)
260 -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
261 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
265 graphAttrs [Rank SameRank]
272 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
273 filterByBranchSize thr export =
274 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
275 in export & export_branches .~ (fst splited)
276 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
279 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
280 processFilters filters qua export =
281 foldl (\export' f -> case f of
282 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
283 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
284 else filterByBranchSize thr export'
291 branchToIso :: [PhyloBranch] -> [PhyloBranch]
292 branchToIso branches =
295 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
297 $ ([0] ++ (map (\(b,b') ->
298 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
299 in (b' ^. branch_seaLevel) !! (idx - 1)
300 ) $ listToSeq branches))
301 in map (\(x,b) -> b & branch_x .~ x)
305 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
306 sortByHierarchy depth branches =
307 if (length branches == 1)
308 then branchToIso branches
309 else branchToIso $ concat
311 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
312 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
313 ++ (sortByHierarchy (depth + 1) (snd partitions)))
314 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
315 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
318 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
319 sortByBirthDate order export =
320 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
321 branches' = case order of
323 Desc -> reverse branches
324 in export & export_branches .~ branches'
326 processSort :: Sort -> PhyloExport -> PhyloExport
327 processSort sort' export = case sort' of
328 ByBirthDate o -> sortByBirthDate o export
329 ByHierarchy -> export & export_branches .~ sortByHierarchy 0 (export ^. export_branches)
336 -- | Return the conditional probability of i knowing j
337 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
338 conditional m i j = (findWithDefault 0 (i,j) m)
342 -- | Return the genericity score of a given ngram
343 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
344 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
345 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
348 -- | Return the specificity score of a given ngram
349 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
350 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
351 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
354 -- | Return the inclusion score of a given ngram
355 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
356 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
357 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
360 ngramsMetrics :: PhyloExport -> PhyloExport
361 ngramsMetrics export =
364 (\g -> g & phylo_groupMeta %~ insert "genericity"
365 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
366 & phylo_groupMeta %~ insert "specificity"
367 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
368 & phylo_groupMeta %~ insert "inclusion"
369 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
373 branchDating :: PhyloExport -> PhyloExport
374 branchDating export =
375 over ( export_branches
378 let groups = sortOn fst
379 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
380 then acc ++ [g ^. phylo_groupPeriod]
381 else acc ) [] $ export ^. export_groups
383 birth = fst $ head' "birth" groups
384 age = (snd $ last' "age" groups) - birth
385 in b & branch_meta %~ insert "birth" [fromIntegral birth]
386 & branch_meta %~ insert "age" [fromIntegral age]
387 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
389 processMetrics :: PhyloExport -> PhyloExport
390 processMetrics export = ngramsMetrics
391 $ branchDating export
398 nk :: Int -> [[Int]] -> Int
400 $ map (\g -> if (elem n g)
405 tf :: Int -> [[Int]] -> Double
406 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
409 idf :: Int -> [[Int]] -> Double
410 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
413 findTfIdf :: [[Int]] -> [(Int,Double)]
414 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ sort $ nub $ concat groups
417 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
418 findEmergences groups freq =
419 let ngrams = map _phylo_groupNgrams groups
420 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
421 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
422 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
423 in reverse $ sortOn snd
424 $ map (\(n,_) -> if (member n freq)
429 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
430 mostEmergentTfIdf nth freq foundations export =
431 over ( export_branches
434 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
435 tfidf = findTfIdf (map _phylo_groupNgrams groups)
436 emergences = findEmergences groups freq
437 selected = if (null emergences)
438 then map fst $ take nth tfidf
439 else [fst $ head' "mostEmergentTfIdf" emergences]
440 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
441 in b & branch_label .~ (ngramsToLabel foundations selected)) export
444 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
445 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
448 $ sortOn snd $ zip [0..] meta
451 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
452 mostInclusive nth foundations export =
453 over ( export_branches
456 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
457 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
458 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
459 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
460 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
461 in b & branch_label .~ lbl ) export
464 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
465 mostEmergentInclusive nth foundations export =
469 let lbl = ngramsToLabel foundations
471 $ map (\(_,(_,idx)) -> idx)
473 $ map (\groups -> sortOn (fst . snd) groups)
474 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
475 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
476 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
477 in g & phylo_groupLabel .~ lbl ) export
480 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
481 processLabels labels foundations freq export =
482 foldl (\export' label ->
484 GroupLabel tagger nth ->
486 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
487 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
488 BranchLabel tagger nth ->
490 MostInclusive -> mostInclusive nth foundations export'
491 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
492 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
500 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
501 toDynamics n parents g m =
502 let prd = g ^. phylo_groupPeriod
503 end = last' "dynamics" (sort $ map snd $ elems m)
504 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
507 else if ((fst prd) == (fst $ m ! n))
515 --------------------------------------
517 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
520 processDynamics :: [PhyloGroup] -> [PhyloGroup]
521 processDynamics groups =
523 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
524 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
525 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
527 --------------------------------------
528 mapNgrams :: Map Int (Date,Date)
529 mapNgrams = map (\dates ->
530 let dates' = sort dates
531 in (head' "dynamics" dates', last' "dynamics" dates'))
533 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
534 $ (g ^. phylo_groupNgrams))) [] groups
541 getGroupThr :: Double -> PhyloGroup -> Double
543 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
544 breaks = (g ^. phylo_groupMeta) ! "breaks"
545 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
547 toAncestor :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
548 toAncestor nbDocs diago proximity step candidates ego =
549 let curr = ego ^. phylo_groupAncestors
550 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
551 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
552 $ map (\g -> (g, toProximity nbDocs diago proximity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
553 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
556 headsToAncestors :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
557 headsToAncestors nbDocs diago proximity step heads acc =
561 let ego = head' "headsToAncestors" heads
562 heads' = tail' "headsToAncestors" heads
563 in headsToAncestors nbDocs diago proximity step heads' (acc ++ [toAncestor nbDocs diago proximity step heads' ego])
566 toHorizon :: Phylo -> Phylo
568 let phyloAncestor = updatePhyloGroups
570 (fromList $ map (\g -> (getGroupId g, g))
572 $ tracePhyloAncestors newGroups) phylo
573 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
574 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel level phyloAncestor
575 in updatePhyloGroups level reBranched phylo
577 -- | 1) for each periods
578 periods :: [PhyloPeriodId]
579 periods = getPeriodIds phylo
582 level = getLastLevel phylo
585 frame = getTimeFrame $ timeUnit $ getConfig phylo
586 -- | 2) find ancestors between groups without parents
587 mapGroups :: [[PhyloGroup]]
588 mapGroups = map (\prd ->
589 let groups = getGroupsFromLevelPeriods level [prd] phylo
590 childs = getPreviousChildIds level frame prd periods phylo
591 heads = filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
592 noHeads = groups \\ heads
593 nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
594 diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
595 proximity = (phyloProximity $ getConfig phylo)
596 step = case getSeaElevation phylo of
598 Adaptative _ -> undefined
599 -- in headsToAncestors nbDocs diago proximity heads groups []
600 in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego)
601 $ headsToAncestors nbDocs diago proximity step heads []
603 -- | 3) process this task concurrently
604 newGroups :: [[PhyloGroup]]
605 newGroups = mapGroups `using` parList rdeepseq
606 --------------------------------------
608 getPreviousChildIds :: Level -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> Phylo -> [PhyloGroupId]
609 getPreviousChildIds lvl frame curr prds phylo =
610 concat $ map ((map fst) . _phylo_groupPeriodChilds)
611 $ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo
613 ---------------------
614 -- | phyloExport | --
615 ---------------------
617 toPhyloExport :: Phylo -> DotGraph DotId
618 toPhyloExport phylo = exportToDot phylo
619 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
620 $ processSort (exportSort $ getConfig phylo)
621 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo)
622 $ processMetrics export
624 export :: PhyloExport
625 export = PhyloExport groups branches
626 --------------------------------------
627 branches :: [PhyloBranch]
628 branches = map (\g ->
629 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
630 breaks = (g ^. phylo_groupMeta) ! "breaks"
631 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
632 in PhyloBranch (g ^. phylo_groupBranchId)
636 (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
640 $ map (\gs -> head' "export" gs)
641 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
642 $ sortOn (\g -> g ^. phylo_groupBranchId) groups
643 --------------------------------------
644 groups :: [PhyloGroup]
645 groups = traceExportGroups
647 $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
652 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
653 traceExportBranches branches = trace ("\n"
654 <> "-- | Export " <> show(length branches) <> " branches") branches
656 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
657 tracePhyloAncestors groups = trace ("\n"
658 <> "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors"
661 tracePhyloInfo :: Phylo -> Phylo
662 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with β = "
663 <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
664 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
668 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
669 traceExportGroups groups = trace ("\n" <> "-- | Export "
670 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
671 <> show(length groups) <> " groups and "
672 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"