2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE TypeSynonymInstances #-}
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
15 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
16 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
17 import Data.Vector (Vector)
19 import Prelude (writeFile)
20 import Gargantext.Prelude
21 import Gargantext.Core.Viz.AdaptativePhylo
22 import Gargantext.Core.Viz.Phylo.PhyloTools
23 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
25 import Control.Lens hiding (Level)
26 import Control.Parallel.Strategies (parList, rdeepseq, using)
27 import Data.GraphViz hiding (DotGraph, Order)
28 import Data.GraphViz.Types.Generalised (DotGraph)
29 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
30 import Data.GraphViz.Types.Monadic
31 import Data.Text.Lazy (fromStrict, pack, unpack)
32 import System.FilePath
33 import Debug.Trace (trace)
35 import qualified Data.Text as Text
36 import qualified Data.Vector as Vector
37 import qualified Data.Text.Lazy as Lazy
38 import qualified Data.GraphViz.Attributes.HTML as H
44 dotToFile :: FilePath -> DotGraph DotId -> IO ()
45 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
47 dotToString :: DotGraph DotId -> [Char]
48 dotToString dotG = unpack (printDotGraph dotG)
50 dynamicToColor :: Double -> H.Attribute
52 | d == 0 = H.BGColor (toColor LightCoral)
53 | d == 1 = H.BGColor (toColor Khaki)
54 | d == 2 = H.BGColor (toColor SkyBlue)
55 | otherwise = H.Color (toColor Black)
57 pickLabelColor :: [Double] -> H.Attribute
59 | elem 0 lst = dynamicToColor 0
60 | elem 2 lst = dynamicToColor 2
61 | elem 1 lst = dynamicToColor 1
62 | otherwise = dynamicToColor 3
64 toDotLabel :: Text.Text -> Label
65 toDotLabel lbl = StrLabel $ fromStrict lbl
67 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
68 toAttr k v = customAttribute k v
70 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
71 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
73 groupIdToDotId :: PhyloGroupId -> DotId
74 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
76 branchIdToDotId :: PhyloBranchId -> DotId
77 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
79 periodIdToDotId :: PhyloPeriodId -> DotId
80 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
82 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
83 groupToTable fdt g = H.Table H.HTable
84 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
85 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
86 , H.tableRows = [header]
87 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
88 <> ( map ngramsToRow $ splitEvery 4
89 $ reverse $ sortOn (snd . snd)
90 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
91 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
93 --------------------------------------
94 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
95 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
96 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
97 --------------------------------------
100 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
101 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
102 <> (fromStrict " ( ")
103 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
104 <> (fromStrict " , ")
105 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
106 <> (fromStrict " ) ")
107 <> (pack $ show (getGroupId g)))]]
108 --------------------------------------
110 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
111 branchToDotNode b bId =
112 node (branchIdToDotId $ b ^. branch_id)
113 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
114 <> (metaToAttr $ b ^. branch_meta)
115 <> [ toAttr "nodeType" "branch"
116 , toAttr "bId" (pack $ show bId)
117 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
118 , toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
119 , toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
120 , toAttr "label" (pack $ show $ b ^. branch_label)
123 periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
124 periodToDotNode prd prd' =
125 node (periodIdToDotId prd)
126 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
127 <> [ toAttr "nodeType" "period"
128 , toAttr "strFrom" (fromStrict $ Text.pack $ (show $ fst prd'))
129 , toAttr "strTo" (fromStrict $ Text.pack $ (show $ snd prd'))
130 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
131 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
134 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
135 groupToDotNode fdt g bId =
136 node (groupIdToDotId $ getGroupId g)
137 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
138 <> [ toAttr "nodeType" "group"
139 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
140 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
141 , toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
142 , toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
143 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
144 , toAttr "bId" (pack $ show bId)
145 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
146 , toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
147 , toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
148 , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
149 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
150 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
151 , toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
155 toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
156 toDotEdge source target lbl edgeType = edge source target
158 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl)]
159 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
160 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
161 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl)]
162 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
165 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
166 mergePointers groups =
167 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
168 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
169 in unionWith (\w w' -> max w w') toChilds toParents
171 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
172 mergeAncestors groups = concat
173 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
174 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
177 toBid :: PhyloGroup -> [PhyloBranch] -> Int
179 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
180 in fromJust $ elemIndex b' bs
182 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
183 exportToDot phylo export =
184 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
185 <> show(length $ export ^. export_groups) <> " groups "
186 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
187 <> "##########################") $
188 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
190 {- 1) init the dot graph -}
191 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
192 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
194 , Style [SItem Filled []],Color [toWColor White]]
195 {-- home made attributes -}
196 <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
197 ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
198 ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
199 ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
200 ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
201 ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
202 ,(toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo))
203 ,(toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo)
204 -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
208 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
210 -- 2) create a layer for the branches labels -}
211 subgraph (Str "Branches peaks") $ do
213 -- graphAttrs [Rank SameRank]
215 -- 3) group the branches by hierarchy
216 -- mapM (\branches ->
217 -- subgraph (Str "Branches clade") $ do
218 -- graphAttrs [Rank SameRank]
220 -- -- 4) create a node for each branch
221 -- mapM branchToDotNode branches
222 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
224 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
226 {-- 5) create a layer for each period -}
227 _ <- mapM (\period ->
228 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
229 graphAttrs [Rank SameRank]
230 periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriod')
232 {-- 6) create a node for each group -}
233 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
234 ) $ phylo ^. phylo_periods
236 {-- 7) create the edges between a branch and its first groups -}
237 _ <- mapM (\(bId,groups) ->
238 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
241 $ map (\groups -> head' "toDot"
242 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
243 $ sortOn (fst . _phylo_groupPeriod) groups)
244 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
246 {- 8) create the edges between the groups -}
247 _ <- mapM (\((k,k'),v) ->
248 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
249 ) $ (toList . mergePointers) $ export ^. export_groups
251 _ <- mapM (\((k,k'),v) ->
252 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
253 ) $ mergeAncestors $ export ^. export_groups
255 -- 10) create the edges between the periods
256 _ <- mapM (\(prd,prd') ->
257 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
258 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
260 {- 8) create the edges between the branches
261 -- _ <- mapM (\(bId,bId') ->
262 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
263 -- (Text.pack $ show(branchIdsToProximity bId bId'
264 -- (getThresholdInit $ phyloProximity $ getConfig phylo)
265 -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
266 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
270 graphAttrs [Rank SameRank]
277 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
278 filterByBranchSize thr export =
279 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
280 in export & export_branches .~ (fst splited)
281 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
284 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
285 processFilters filters qua export =
286 foldl (\export' f -> case f of
287 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
288 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
289 else filterByBranchSize thr export'
296 branchToIso :: [PhyloBranch] -> [PhyloBranch]
297 branchToIso branches =
300 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
302 $ ([0] ++ (map (\(b,b') ->
303 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
304 lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
306 if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
307 then (b' ^. branch_seaLevel) !! (lmin - 1)
308 else (b' ^. branch_seaLevel) !! (idx - 1)
309 ) $ listToSeq branches))
310 in map (\(x,b) -> b & branch_x .~ x)
313 branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
314 branchToIso' start step branches =
315 let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
317 $ ([0] ++ (map (\(b,b') ->
318 let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
319 in 1 - start - step * root) $ listToSeq branches))
320 in map (\(x,b) -> b & branch_x .~ x)
324 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
325 sortByHierarchy depth branches =
326 if (length branches == 1)
330 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
331 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
332 ++ (sortByHierarchy (depth + 1) (snd partitions)))
333 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
334 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
337 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
338 sortByBirthDate order export =
339 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
340 branches' = case order of
342 Desc -> reverse branches
343 in export & export_branches .~ branches'
345 processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
346 processSort sort' elev export = case sort' of
347 ByBirthDate o -> sortByBirthDate o export
348 ByHierarchy -> export & export_branches .~ (branchToIso' (_cons_start elev) (_cons_step elev)
349 $ sortByHierarchy 0 (export ^. export_branches))
356 -- | Return the conditional probability of i knowing j
357 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
358 conditional m i j = (findWithDefault 0 (i,j) m)
362 -- | Return the genericity score of a given ngram
363 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
364 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
365 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
368 -- | Return the specificity score of a given ngram
369 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
370 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
371 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
374 -- | Return the inclusion score of a given ngram
375 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
376 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
377 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
380 ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
381 ngramsMetrics phylo export =
384 (\g -> g & phylo_groupMeta %~ insert "genericity"
385 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
386 & phylo_groupMeta %~ insert "specificity"
387 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
388 & phylo_groupMeta %~ insert "inclusion"
389 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
390 & phylo_groupMeta %~ insert "frequence"
391 (map (\n -> getInMap n (phylo ^. phylo_lastTermFreq)) $ g ^. phylo_groupNgrams)
395 branchDating :: PhyloExport -> PhyloExport
396 branchDating export =
397 over ( export_branches
400 let groups = sortOn fst
401 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
402 then acc ++ [g ^. phylo_groupPeriod]
403 else acc ) [] $ export ^. export_groups
405 birth = fst $ head' "birth" groups
406 age = (snd $ last' "age" groups) - birth
407 in b & branch_meta %~ insert "birth" [fromIntegral birth]
408 & branch_meta %~ insert "age" [fromIntegral age]
409 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
411 processMetrics :: Phylo -> PhyloExport -> PhyloExport
412 processMetrics phylo export = ngramsMetrics phylo
413 $ branchDating export
420 nk :: Int -> [[Int]] -> Int
422 $ map (\g -> if (elem n g)
427 tf :: Int -> [[Int]] -> Double
428 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
431 idf :: Int -> [[Int]] -> Double
432 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
435 findTfIdf :: [[Int]] -> [(Int,Double)]
436 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ sort $ nub $ concat groups
439 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
440 findEmergences groups freq =
441 let ngrams = map _phylo_groupNgrams groups
442 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
443 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
444 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
445 in reverse $ sortOn snd
446 $ map (\(n,_) -> if (member n freq)
451 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
452 mostEmergentTfIdf nth freq foundations export =
453 over ( export_branches
456 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
457 tfidf = findTfIdf (map _phylo_groupNgrams groups)
458 emergences = findEmergences groups freq
459 selected = if (null emergences)
460 then map fst $ take nth tfidf
461 else [fst $ head' "mostEmergentTfIdf" emergences]
462 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
463 in b & branch_label .~ (ngramsToLabel foundations selected)) export
466 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
467 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
470 $ sortOn snd $ zip [0..] meta
473 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
474 mostInclusive nth foundations export =
475 over ( export_branches
478 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
479 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
480 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
481 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
482 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
483 in b & branch_label .~ lbl ) export
486 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
487 mostEmergentInclusive nth foundations export =
491 let lbl = ngramsToLabel foundations
493 $ map (\(_,(_,idx)) -> idx)
495 $ map (\groups -> sortOn (fst . snd) groups)
496 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
497 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
498 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
499 in g & phylo_groupLabel .~ lbl ) export
502 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
503 processLabels labels foundations freq export =
504 foldl (\export' label ->
506 GroupLabel tagger nth ->
508 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
509 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
510 BranchLabel tagger nth ->
512 MostInclusive -> mostInclusive nth foundations export'
513 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
514 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
522 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
523 toDynamics n parents g m =
524 let prd = g ^. phylo_groupPeriod
525 end = last' "dynamics" (sort $ map snd $ elems m)
526 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
529 else if ((fst prd) == (fst $ m ! n))
537 --------------------------------------
539 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
542 processDynamics :: [PhyloGroup] -> [PhyloGroup]
543 processDynamics groups =
545 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
546 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
547 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
549 --------------------------------------
550 mapNgrams :: Map Int (Date,Date)
551 mapNgrams = map (\dates ->
552 let dates' = sort dates
553 in (head' "dynamics" dates', last' "dynamics" dates'))
555 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
556 $ (g ^. phylo_groupNgrams))) [] groups
563 getGroupThr :: Double -> PhyloGroup -> Double
565 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
566 breaks = (g ^. phylo_groupMeta) ! "breaks"
567 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
569 toAncestor :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
570 toAncestor nbDocs diago proximity step candidates ego =
571 let curr = ego ^. phylo_groupAncestors
572 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
573 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
574 $ map (\g -> (g, toProximity nbDocs diago proximity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
575 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
578 headsToAncestors :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
579 headsToAncestors nbDocs diago proximity step heads acc =
583 let ego = head' "headsToAncestors" heads
584 heads' = tail' "headsToAncestors" heads
585 in headsToAncestors nbDocs diago proximity step heads' (acc ++ [toAncestor nbDocs diago proximity step heads' ego])
588 toHorizon :: Phylo -> Phylo
590 let phyloAncestor = updatePhyloGroups
592 (fromList $ map (\g -> (getGroupId g, g))
594 $ tracePhyloAncestors newGroups) phylo
595 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
596 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel level phyloAncestor
597 in updatePhyloGroups level reBranched phylo
599 -- | 1) for each periods
600 periods :: [PhyloPeriodId]
601 periods = getPeriodIds phylo
604 level = getLastLevel phylo
607 frame = getTimeFrame $ timeUnit $ getConfig phylo
608 -- | 2) find ancestors between groups without parents
609 mapGroups :: [[PhyloGroup]]
610 mapGroups = map (\prd ->
611 let groups = getGroupsFromLevelPeriods level [prd] phylo
612 childs = getPreviousChildIds level frame prd periods phylo
613 -- maybe add a better filter for non isolated ancestors
614 heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
615 $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
616 noHeads = groups \\ heads
617 nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
618 diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
619 proximity = (phyloProximity $ getConfig phylo)
620 step = case getSeaElevation phylo of
622 Adaptative _ -> undefined
623 -- in headsToAncestors nbDocs diago proximity heads groups []
624 in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego)
625 $ headsToAncestors nbDocs diago proximity step heads []
627 -- | 3) process this task concurrently
628 newGroups :: [[PhyloGroup]]
629 newGroups = mapGroups `using` parList rdeepseq
630 --------------------------------------
632 getPreviousChildIds :: Level -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> Phylo -> [PhyloGroupId]
633 getPreviousChildIds lvl frame curr prds phylo =
634 concat $ map ((map fst) . _phylo_groupPeriodChilds)
635 $ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo
637 ---------------------
638 -- | phyloExport | --
639 ---------------------
641 toPhyloExport :: Phylo -> DotGraph DotId
642 toPhyloExport phylo = exportToDot phylo
643 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
644 $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
645 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo)
646 $ processMetrics phylo export
648 export :: PhyloExport
649 export = PhyloExport groups branches
650 --------------------------------------
651 branches :: [PhyloBranch]
652 branches = map (\g ->
653 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
654 breaks = (g ^. phylo_groupMeta) ! "breaks"
655 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
656 in PhyloBranch (g ^. phylo_groupBranchId)
660 (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
664 $ map (\gs -> head' "export" gs)
665 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
666 $ sortOn (\g -> g ^. phylo_groupBranchId) groups
667 --------------------------------------
668 groups :: [PhyloGroup]
669 groups = traceExportGroups
671 $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
672 $ tracePhyloInfo phylo
676 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
677 traceExportBranches branches = trace ("\n"
678 <> "-- | Export " <> show(length branches) <> " branches") branches
680 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
681 tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
683 tracePhyloInfo :: Phylo -> Phylo
684 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with λ = "
685 <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
686 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
690 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
691 traceExportGroups groups = trace ("\n" <> "-- | Export "
692 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
693 <> show(length groups) <> " groups and "
694 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"