2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE TypeSynonymInstances #-}
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
15 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
16 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
17 import Data.Vector (Vector)
19 import Prelude (writeFile)
20 import Gargantext.Prelude
21 import Gargantext.Core.Viz.AdaptativePhylo
22 import Gargantext.Core.Viz.Phylo.PhyloTools
23 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
25 import Control.Lens hiding (Level)
26 import Control.Parallel.Strategies (parList, rdeepseq, using)
27 import Data.GraphViz hiding (DotGraph, Order)
28 import Data.GraphViz.Types.Generalised (DotGraph)
29 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
30 import Data.GraphViz.Types.Monadic
31 import Data.Text.Lazy (fromStrict, pack, unpack)
32 import System.FilePath
33 import Debug.Trace (trace)
35 import qualified Data.Text as Text
36 import qualified Data.Vector as Vector
37 import qualified Data.Text.Lazy as Lazy
38 import qualified Data.GraphViz.Attributes.HTML as H
44 dotToFile :: FilePath -> DotGraph DotId -> IO ()
45 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
47 dotToString :: DotGraph DotId -> [Char]
48 dotToString dotG = unpack (printDotGraph dotG)
50 dynamicToColor :: Double -> H.Attribute
52 | d == 0 = H.BGColor (toColor LightCoral)
53 | d == 1 = H.BGColor (toColor Khaki)
54 | d == 2 = H.BGColor (toColor SkyBlue)
55 | otherwise = H.Color (toColor Black)
57 pickLabelColor :: [Double] -> H.Attribute
59 | elem 0 lst = dynamicToColor 0
60 | elem 2 lst = dynamicToColor 2
61 | elem 1 lst = dynamicToColor 1
62 | otherwise = dynamicToColor 3
64 toDotLabel :: Text.Text -> Label
65 toDotLabel lbl = StrLabel $ fromStrict lbl
67 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
68 toAttr k v = customAttribute k v
70 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
71 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
73 groupIdToDotId :: PhyloGroupId -> DotId
74 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
76 branchIdToDotId :: PhyloBranchId -> DotId
77 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
79 periodIdToDotId :: PhyloPeriodId -> DotId
80 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
82 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
83 groupToTable fdt g = H.Table H.HTable
84 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
85 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
86 , H.tableRows = [header]
87 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
88 <> ( map ngramsToRow $ splitEvery 4
89 $ reverse $ sortOn (snd . snd)
90 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
91 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
93 --------------------------------------
94 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
95 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
96 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
97 --------------------------------------
100 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
101 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
102 <> (fromStrict " ( ")
103 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
104 <> (fromStrict " , ")
105 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
106 <> (fromStrict " ) ")
107 <> (pack $ show (getGroupId g)))]]
108 --------------------------------------
110 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
111 branchToDotNode b bId =
112 node (branchIdToDotId $ b ^. branch_id)
113 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
114 <> (metaToAttr $ b ^. branch_meta)
115 <> [ toAttr "nodeType" "branch"
116 , toAttr "bId" (pack $ show bId)
117 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
118 , toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
119 , toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
120 , toAttr "label" (pack $ show $ b ^. branch_label)
123 periodToDotNode :: (Date,Date) -> Dot DotId
124 periodToDotNode prd =
125 node (periodIdToDotId prd)
126 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
127 <> [ toAttr "nodeType" "period"
128 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
129 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
132 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
133 groupToDotNode fdt g bId =
134 node (groupIdToDotId $ getGroupId g)
135 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
136 <> [ toAttr "nodeType" "group"
137 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
138 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
139 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
140 , toAttr "bId" (pack $ show bId)
141 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
142 , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
143 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
144 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
145 , toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
149 toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
150 toDotEdge source target lbl edgeType = edge source target
152 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl)]
153 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
154 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
155 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl)]
156 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
159 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
160 mergePointers groups =
161 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
162 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
163 in unionWith (\w w' -> max w w') toChilds toParents
165 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
166 mergeAncestors groups = concat
167 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
168 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
171 toBid :: PhyloGroup -> [PhyloBranch] -> Int
173 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
174 in fromJust $ elemIndex b' bs
176 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
177 exportToDot phylo export =
178 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
179 <> show(length $ export ^. export_groups) <> " groups "
180 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
181 <> "##########################") $
182 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
184 {- 1) init the dot graph -}
185 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
186 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
188 , Style [SItem Filled []],Color [toWColor White]]
189 {-- home made attributes -}
190 <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
191 ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
192 ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
193 ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
194 ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
195 ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
196 -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
200 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
202 -- 2) create a layer for the branches labels -}
203 subgraph (Str "Branches peaks") $ do
205 -- graphAttrs [Rank SameRank]
207 -- 3) group the branches by hierarchy
208 -- mapM (\branches ->
209 -- subgraph (Str "Branches clade") $ do
210 -- graphAttrs [Rank SameRank]
212 -- -- 4) create a node for each branch
213 -- mapM branchToDotNode branches
214 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
216 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
218 {-- 5) create a layer for each period -}
219 _ <- mapM (\period ->
220 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
221 graphAttrs [Rank SameRank]
222 periodToDotNode period
224 {-- 6) create a node for each group -}
225 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
226 ) $ getPeriodIds phylo
228 {-- 7) create the edges between a branch and its first groups -}
229 _ <- mapM (\(bId,groups) ->
230 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
233 $ map (\groups -> head' "toDot"
234 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
235 $ sortOn (fst . _phylo_groupPeriod) groups)
236 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
238 {- 8) create the edges between the groups -}
239 _ <- mapM (\((k,k'),v) ->
240 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
241 ) $ (toList . mergePointers) $ export ^. export_groups
243 _ <- mapM (\((k,k'),v) ->
244 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
245 ) $ mergeAncestors $ export ^. export_groups
247 -- 10) create the edges between the periods
248 _ <- mapM (\(prd,prd') ->
249 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
250 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
252 {- 8) create the edges between the branches
253 -- _ <- mapM (\(bId,bId') ->
254 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
255 -- (Text.pack $ show(branchIdsToProximity bId bId'
256 -- (getThresholdInit $ phyloProximity $ getConfig phylo)
257 -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
258 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
262 graphAttrs [Rank SameRank]
269 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
270 filterByBranchSize thr export =
271 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
272 in export & export_branches .~ (fst splited)
273 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
276 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
277 processFilters filters qua export =
278 foldl (\export' f -> case f of
279 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
280 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
281 else filterByBranchSize thr export'
288 branchToIso :: [PhyloBranch] -> [PhyloBranch]
289 branchToIso branches =
292 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
294 $ ([0] ++ (map (\(b,b') ->
295 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
296 lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
298 if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
299 then (b' ^. branch_seaLevel) !! (lmin - 1)
300 else (b' ^. branch_seaLevel) !! (idx - 1)
301 ) $ listToSeq branches))
302 in map (\(x,b) -> b & branch_x .~ x)
305 branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
306 branchToIso' start step branches =
307 let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
309 $ ([0] ++ (map (\(b,b') ->
310 let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
311 in 1 - start - step * root) $ listToSeq branches))
312 in map (\(x,b) -> b & branch_x .~ x)
316 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
317 sortByHierarchy depth branches =
318 if (length branches == 1)
322 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
323 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
324 ++ (sortByHierarchy (depth + 1) (snd partitions)))
325 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
326 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
329 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
330 sortByBirthDate order export =
331 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
332 branches' = case order of
334 Desc -> reverse branches
335 in export & export_branches .~ branches'
337 processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
338 processSort sort' elev export = case sort' of
339 ByBirthDate o -> sortByBirthDate o export
340 ByHierarchy -> export & export_branches .~ (branchToIso' (_cons_start elev) (_cons_step elev)
341 $ sortByHierarchy 0 (export ^. export_branches))
348 -- | Return the conditional probability of i knowing j
349 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
350 conditional m i j = (findWithDefault 0 (i,j) m)
354 -- | Return the genericity score of a given ngram
355 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
356 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
357 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
360 -- | Return the specificity score of a given ngram
361 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
362 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
363 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
366 -- | Return the inclusion score of a given ngram
367 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
368 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
369 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
372 ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
373 ngramsMetrics phylo export =
376 (\g -> g & phylo_groupMeta %~ insert "genericity"
377 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
378 & phylo_groupMeta %~ insert "specificity"
379 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
380 & phylo_groupMeta %~ insert "inclusion"
381 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
382 & phylo_groupMeta %~ insert "frequence"
383 (map (\n -> getInMap n (phylo ^. phylo_lastTermFreq)) $ g ^. phylo_groupNgrams)
387 branchDating :: PhyloExport -> PhyloExport
388 branchDating export =
389 over ( export_branches
392 let groups = sortOn fst
393 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
394 then acc ++ [g ^. phylo_groupPeriod]
395 else acc ) [] $ export ^. export_groups
397 birth = fst $ head' "birth" groups
398 age = (snd $ last' "age" groups) - birth
399 in b & branch_meta %~ insert "birth" [fromIntegral birth]
400 & branch_meta %~ insert "age" [fromIntegral age]
401 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
403 processMetrics :: Phylo -> PhyloExport -> PhyloExport
404 processMetrics phylo export = ngramsMetrics phylo
405 $ branchDating export
412 nk :: Int -> [[Int]] -> Int
414 $ map (\g -> if (elem n g)
419 tf :: Int -> [[Int]] -> Double
420 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
423 idf :: Int -> [[Int]] -> Double
424 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
427 findTfIdf :: [[Int]] -> [(Int,Double)]
428 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ sort $ nub $ concat groups
431 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
432 findEmergences groups freq =
433 let ngrams = map _phylo_groupNgrams groups
434 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
435 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
436 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
437 in reverse $ sortOn snd
438 $ map (\(n,_) -> if (member n freq)
443 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
444 mostEmergentTfIdf nth freq foundations export =
445 over ( export_branches
448 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
449 tfidf = findTfIdf (map _phylo_groupNgrams groups)
450 emergences = findEmergences groups freq
451 selected = if (null emergences)
452 then map fst $ take nth tfidf
453 else [fst $ head' "mostEmergentTfIdf" emergences]
454 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
455 in b & branch_label .~ (ngramsToLabel foundations selected)) export
458 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
459 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
462 $ sortOn snd $ zip [0..] meta
465 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
466 mostInclusive nth foundations export =
467 over ( export_branches
470 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
471 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
472 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
473 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
474 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
475 in b & branch_label .~ lbl ) export
478 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
479 mostEmergentInclusive nth foundations export =
483 let lbl = ngramsToLabel foundations
485 $ map (\(_,(_,idx)) -> idx)
487 $ map (\groups -> sortOn (fst . snd) groups)
488 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
489 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
490 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
491 in g & phylo_groupLabel .~ lbl ) export
494 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
495 processLabels labels foundations freq export =
496 foldl (\export' label ->
498 GroupLabel tagger nth ->
500 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
501 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
502 BranchLabel tagger nth ->
504 MostInclusive -> mostInclusive nth foundations export'
505 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
506 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
514 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
515 toDynamics n parents g m =
516 let prd = g ^. phylo_groupPeriod
517 end = last' "dynamics" (sort $ map snd $ elems m)
518 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
521 else if ((fst prd) == (fst $ m ! n))
529 --------------------------------------
531 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
534 processDynamics :: [PhyloGroup] -> [PhyloGroup]
535 processDynamics groups =
537 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
538 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
539 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
541 --------------------------------------
542 mapNgrams :: Map Int (Date,Date)
543 mapNgrams = map (\dates ->
544 let dates' = sort dates
545 in (head' "dynamics" dates', last' "dynamics" dates'))
547 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
548 $ (g ^. phylo_groupNgrams))) [] groups
555 getGroupThr :: Double -> PhyloGroup -> Double
557 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
558 breaks = (g ^. phylo_groupMeta) ! "breaks"
559 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
561 toAncestor :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
562 toAncestor nbDocs diago proximity step candidates ego =
563 let curr = ego ^. phylo_groupAncestors
564 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
565 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
566 $ map (\g -> (g, toProximity nbDocs diago proximity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
567 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
570 headsToAncestors :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
571 headsToAncestors nbDocs diago proximity step heads acc =
575 let ego = head' "headsToAncestors" heads
576 heads' = tail' "headsToAncestors" heads
577 in headsToAncestors nbDocs diago proximity step heads' (acc ++ [toAncestor nbDocs diago proximity step heads' ego])
580 toHorizon :: Phylo -> Phylo
582 let phyloAncestor = updatePhyloGroups
584 (fromList $ map (\g -> (getGroupId g, g))
586 $ tracePhyloAncestors newGroups) phylo
587 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
588 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel level phyloAncestor
589 in updatePhyloGroups level reBranched phylo
591 -- | 1) for each periods
592 periods :: [PhyloPeriodId]
593 periods = getPeriodIds phylo
596 level = getLastLevel phylo
599 frame = getTimeFrame $ timeUnit $ getConfig phylo
600 -- | 2) find ancestors between groups without parents
601 mapGroups :: [[PhyloGroup]]
602 mapGroups = map (\prd ->
603 let groups = getGroupsFromLevelPeriods level [prd] phylo
604 childs = getPreviousChildIds level frame prd periods phylo
605 -- maybe add a better filter for non isolated ancestors
606 heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
607 $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
608 noHeads = groups \\ heads
609 nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
610 diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
611 proximity = (phyloProximity $ getConfig phylo)
612 step = case getSeaElevation phylo of
614 Adaptative _ -> undefined
615 -- in headsToAncestors nbDocs diago proximity heads groups []
616 in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego)
617 $ headsToAncestors nbDocs diago proximity step heads []
619 -- | 3) process this task concurrently
620 newGroups :: [[PhyloGroup]]
621 newGroups = mapGroups `using` parList rdeepseq
622 --------------------------------------
624 getPreviousChildIds :: Level -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> Phylo -> [PhyloGroupId]
625 getPreviousChildIds lvl frame curr prds phylo =
626 concat $ map ((map fst) . _phylo_groupPeriodChilds)
627 $ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo
629 ---------------------
630 -- | phyloExport | --
631 ---------------------
633 toPhyloExport :: Phylo -> DotGraph DotId
634 toPhyloExport phylo = exportToDot phylo
635 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
636 $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
637 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo)
638 $ processMetrics phylo export
640 export :: PhyloExport
641 export = PhyloExport groups branches
642 --------------------------------------
643 branches :: [PhyloBranch]
644 branches = map (\g ->
645 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
646 breaks = (g ^. phylo_groupMeta) ! "breaks"
647 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
648 in PhyloBranch (g ^. phylo_groupBranchId)
652 (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
656 $ map (\gs -> head' "export" gs)
657 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
658 $ sortOn (\g -> g ^. phylo_groupBranchId) groups
659 --------------------------------------
660 groups :: [PhyloGroup]
661 groups = traceExportGroups
663 $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
664 $ tracePhyloInfo phylo
668 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
669 traceExportBranches branches = trace ("\n"
670 <> "-- | Export " <> show(length branches) <> " branches") branches
672 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
673 tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
675 tracePhyloInfo :: Phylo -> Phylo
676 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with λ = "
677 <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
678 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
682 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
683 traceExportGroups groups = trace ("\n" <> "-- | Export "
684 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
685 <> show(length groups) <> " groups and "
686 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"