2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE TypeSynonymInstances #-}
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
15 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
16 import Data.List ((++), sort, nub, null, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
17 import Data.Vector (Vector)
19 import Prelude (writeFile)
20 import Gargantext.Prelude
21 import Gargantext.Core.Viz.AdaptativePhylo
22 import Gargantext.Core.Viz.Phylo.PhyloTools
23 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
25 import Control.Lens hiding (Level)
26 import Control.Parallel.Strategies (parList, rdeepseq, using)
27 import Data.GraphViz hiding (DotGraph, Order)
28 import Data.GraphViz.Types.Generalised (DotGraph)
29 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
30 import Data.GraphViz.Types.Monadic
31 import Data.Text.Lazy (fromStrict, pack, unpack)
32 import System.FilePath
33 import Debug.Trace (trace)
35 import qualified Data.Text as Text
36 import qualified Data.Vector as Vector
37 import qualified Data.Text.Lazy as Lazy
38 import qualified Data.GraphViz.Attributes.HTML as H
44 dotToFile :: FilePath -> DotGraph DotId -> IO ()
45 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
47 dotToString :: DotGraph DotId -> [Char]
48 dotToString dotG = unpack (printDotGraph dotG)
50 dynamicToColor :: Double -> H.Attribute
52 | d == 0 = H.BGColor (toColor LightCoral)
53 | d == 1 = H.BGColor (toColor Khaki)
54 | d == 2 = H.BGColor (toColor SkyBlue)
55 | otherwise = H.Color (toColor Black)
57 pickLabelColor :: [Double] -> H.Attribute
59 | elem 0 lst = dynamicToColor 0
60 | elem 2 lst = dynamicToColor 2
61 | elem 1 lst = dynamicToColor 1
62 | otherwise = dynamicToColor 3
64 toDotLabel :: Text.Text -> Label
65 toDotLabel lbl = StrLabel $ fromStrict lbl
67 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
68 toAttr k v = customAttribute k v
70 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
71 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
73 groupIdToDotId :: PhyloGroupId -> DotId
74 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
76 branchIdToDotId :: PhyloBranchId -> DotId
77 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
79 periodIdToDotId :: PhyloPeriodId -> DotId
80 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
82 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
83 groupToTable fdt g = H.Table H.HTable
84 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
85 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
86 , H.tableRows = [header]
87 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
88 <> ( map ngramsToRow $ splitEvery 4
89 $ reverse $ sortOn (snd . snd)
90 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
91 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
93 --------------------------------------
94 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
95 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
96 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
97 --------------------------------------
100 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
101 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
102 <> (fromStrict " ( ")
103 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
104 <> (fromStrict " , ")
105 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
106 <> (fromStrict " ) ")
107 <> (pack $ show (getGroupId g)))]]
108 --------------------------------------
110 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
111 branchToDotNode b bId =
112 node (branchIdToDotId $ b ^. branch_id)
113 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
114 <> (metaToAttr $ b ^. branch_meta)
115 <> [ toAttr "nodeType" "branch"
116 , toAttr "bId" (pack $ show bId)
117 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
118 , toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
119 , toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
120 , toAttr "label" (pack $ show $ b ^. branch_label)
123 periodToDotNode :: (Date,Date) -> Dot DotId
124 periodToDotNode prd =
125 node (periodIdToDotId prd)
126 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
127 <> [ toAttr "nodeType" "period"
128 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
129 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
132 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
133 groupToDotNode fdt g bId =
134 node (groupIdToDotId $ getGroupId g)
135 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
136 <> [ toAttr "nodeType" "group"
137 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
138 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
139 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
140 , toAttr "bId" (pack $ show bId)
141 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))])
144 toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
145 toDotEdge source target lbl edgeType = edge source target
147 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True
148 , Label (StrLabel $ fromStrict lbl)] <> [toAttr "edgeType" "link" ]
149 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
150 , Label (StrLabel $ fromStrict lbl)] <> [toAttr "edgeType" "branchLink" ]
151 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
152 , Label (StrLabel $ fromStrict lbl)]
153 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)])
154 , Label (StrLabel $ fromStrict lbl), PenWidth 4] <> [toAttr "edgeType" "ancestorLink" ]
155 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
158 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
159 mergePointers groups =
160 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
161 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
162 in unionWith (\w w' -> max w w') toChilds toParents
164 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
165 mergeAncestors groups = concat
166 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
167 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
170 toBid :: PhyloGroup -> [PhyloBranch] -> Int
172 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
173 in fromJust $ elemIndex b' bs
175 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
176 exportToDot phylo export =
177 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
178 <> show(length $ export ^. export_groups) <> " groups "
179 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
180 <> "##########################") $
181 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
183 {- 1) init the dot graph -}
184 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
185 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
187 , Style [SItem Filled []],Color [toWColor White]]
188 {-- home made attributes -}
189 <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
190 ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
191 ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
192 ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
193 ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
194 ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
198 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
200 -- 2) create a layer for the branches labels -}
201 subgraph (Str "Branches peaks") $ do
203 graphAttrs [Rank SameRank]
205 -- 3) group the branches by hierarchy
206 -- mapM (\branches ->
207 -- subgraph (Str "Branches clade") $ do
208 -- graphAttrs [Rank SameRank]
210 -- -- 4) create a node for each branch
211 -- mapM branchToDotNode branches
212 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
214 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
216 {-- 5) create a layer for each period -}
217 _ <- mapM (\period ->
218 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
219 graphAttrs [Rank SameRank]
220 periodToDotNode period
222 {-- 6) create a node for each group -}
223 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
224 ) $ getPeriodIds phylo
226 {-- 7) create the edges between a branch and its first groups -}
227 _ <- mapM (\(bId,groups) ->
228 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
231 $ map (\groups -> head' "toDot"
232 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
233 $ sortOn (fst . _phylo_groupPeriod) groups)
234 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
236 {- 8) create the edges between the groups -}
237 _ <- mapM (\((k,k'),_) ->
238 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
239 ) $ (toList . mergePointers) $ export ^. export_groups
241 _ <- mapM (\((k,k'),_) ->
242 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToAncestor
243 ) $ mergeAncestors $ export ^. export_groups
245 -- | 10) create the edges between the periods
246 _ <- mapM (\(prd,prd') ->
247 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
248 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
250 {- 8) create the edges between the branches
251 -- _ <- mapM (\(bId,bId') ->
252 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
253 -- (Text.pack $ show(branchIdsToProximity bId bId'
254 -- (getThresholdInit $ phyloProximity $ getConfig phylo)
255 -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
256 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
260 graphAttrs [Rank SameRank]
267 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
268 filterByBranchSize thr export =
269 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
270 in export & export_branches .~ (fst splited)
271 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
274 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
275 processFilters filters qua export =
276 foldl (\export' f -> case f of
277 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
278 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
279 else filterByBranchSize thr export'
286 branchToIso :: [PhyloBranch] -> [PhyloBranch]
287 branchToIso branches =
290 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
292 $ ([0] ++ (map (\(b,b') ->
293 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
294 in (b' ^. branch_seaLevel) !! (idx - 1)
295 ) $ listToSeq branches))
296 in map (\(x,b) -> b & branch_x .~ x)
300 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
301 sortByHierarchy depth branches =
302 if (length branches == 1)
303 then branchToIso branches
304 else branchToIso $ concat
306 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
307 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
308 ++ (sortByHierarchy (depth + 1) (snd partitions)))
309 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
310 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
313 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
314 sortByBirthDate order export =
315 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
316 branches' = case order of
318 Desc -> reverse branches
319 in export & export_branches .~ branches'
321 processSort :: Sort -> PhyloExport -> PhyloExport
322 processSort sort' export = case sort' of
323 ByBirthDate o -> sortByBirthDate o export
324 ByHierarchy -> export & export_branches .~ sortByHierarchy 0 (export ^. export_branches)
331 -- | Return the conditional probability of i knowing j
332 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
333 conditional m i j = (findWithDefault 0 (i,j) m)
337 -- | Return the genericity score of a given ngram
338 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
339 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
340 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
343 -- | Return the specificity score of a given ngram
344 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
345 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
346 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
349 -- | Return the inclusion score of a given ngram
350 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
351 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
352 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
355 ngramsMetrics :: PhyloExport -> PhyloExport
356 ngramsMetrics export =
359 (\g -> g & phylo_groupMeta %~ insert "genericity"
360 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
361 & phylo_groupMeta %~ insert "specificity"
362 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
363 & phylo_groupMeta %~ insert "inclusion"
364 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
368 branchDating :: PhyloExport -> PhyloExport
369 branchDating export =
370 over ( export_branches
373 let groups = sortOn fst
374 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
375 then acc ++ [g ^. phylo_groupPeriod]
376 else acc ) [] $ export ^. export_groups
378 birth = fst $ head' "birth" groups
379 age = (snd $ last' "age" groups) - birth
380 in b & branch_meta %~ insert "birth" [fromIntegral birth]
381 & branch_meta %~ insert "age" [fromIntegral age]
382 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
384 processMetrics :: PhyloExport -> PhyloExport
385 processMetrics export = ngramsMetrics
386 $ branchDating export
393 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
394 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
397 $ sortOn snd $ zip [0..] meta
400 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
401 mostInclusive nth foundations export =
402 over ( export_branches
405 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
406 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
407 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
408 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
409 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
410 in b & branch_label .~ lbl ) export
413 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
414 mostEmergentInclusive nth foundations export =
418 let lbl = ngramsToLabel foundations
420 $ map (\(_,(_,idx)) -> idx)
422 $ map (\groups -> sortOn (fst . snd) groups)
423 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
424 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
425 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
426 in g & phylo_groupLabel .~ lbl ) export
429 processLabels :: [PhyloLabel] -> Vector Ngrams -> PhyloExport -> PhyloExport
430 processLabels labels foundations export =
431 foldl (\export' label ->
433 GroupLabel tagger nth ->
435 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
436 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
437 BranchLabel tagger nth ->
439 MostInclusive -> mostInclusive nth foundations export'
440 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
448 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
449 toDynamics n parents g m =
450 let prd = g ^. phylo_groupPeriod
451 end = last' "dynamics" (sort $ map snd $ elems m)
452 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
455 else if ((fst prd) == (fst $ m ! n))
463 --------------------------------------
465 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
468 processDynamics :: [PhyloGroup] -> [PhyloGroup]
469 processDynamics groups =
471 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
472 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
473 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
475 --------------------------------------
476 mapNgrams :: Map Int (Date,Date)
477 mapNgrams = map (\dates ->
478 let dates' = sort dates
479 in (head' "dynamics" dates', last' "dynamics" dates'))
481 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
482 $ (g ^. phylo_groupNgrams))) [] groups
489 getGroupThr :: Double -> PhyloGroup -> Double
491 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
492 breaks = (g ^. phylo_groupMeta) ! "breaks"
493 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
495 toAncestor :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
496 toAncestor nbDocs diago proximity step candidates ego =
497 let curr = ego ^. phylo_groupAncestors
498 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
499 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
500 $ map (\g -> (g, toProximity nbDocs diago proximity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
501 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
504 headsToAncestors :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
505 headsToAncestors nbDocs diago proximity step heads acc =
509 let ego = head' "headsToAncestors" heads
510 heads' = tail' "headsToAncestors" heads
511 in headsToAncestors nbDocs diago proximity step heads' (acc ++ [toAncestor nbDocs diago proximity step heads' ego])
514 toHorizon :: Phylo -> Phylo
516 let phyloAncestor = updatePhyloGroups
518 (fromList $ map (\g -> (getGroupId g, g))
520 $ tracePhyloAncestors newGroups) phylo
521 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
522 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel level phyloAncestor
523 in updatePhyloGroups level reBranched phylo
525 -- | 1) for each periods
526 periods :: [PhyloPeriodId]
527 periods = getPeriodIds phylo
530 level = getLastLevel phylo
533 frame = getTimeFrame $ timeUnit $ getConfig phylo
534 -- | 2) find ancestors between groups without parents
535 mapGroups :: [[PhyloGroup]]
536 mapGroups = map (\prd ->
537 let groups = getGroupsFromLevelPeriods level [prd] phylo
538 childs = getPreviousChildIds level frame prd periods phylo
539 heads = filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
540 noHeads = groups \\ heads
541 nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
542 diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
543 proximity = (phyloProximity $ getConfig phylo)
544 step = case getSeaElevation phylo of
546 Adaptative _ -> undefined
547 -- in headsToAncestors nbDocs diago proximity heads groups []
548 in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego)
549 $ headsToAncestors nbDocs diago proximity step heads []
551 -- | 3) process this task concurrently
552 newGroups :: [[PhyloGroup]]
553 newGroups = mapGroups `using` parList rdeepseq
554 --------------------------------------
556 getPreviousChildIds :: Level -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> Phylo -> [PhyloGroupId]
557 getPreviousChildIds lvl frame curr prds phylo =
558 concat $ map ((map fst) . _phylo_groupPeriodChilds)
559 $ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo
561 ---------------------
562 -- | phyloExport | --
563 ---------------------
565 toPhyloExport :: Phylo -> DotGraph DotId
566 toPhyloExport phylo = exportToDot phylo
567 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
568 $ processSort (exportSort $ getConfig phylo)
569 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo)
570 $ processMetrics export
572 export :: PhyloExport
573 export = PhyloExport groups branches
574 --------------------------------------
575 branches :: [PhyloBranch]
576 branches = map (\g ->
577 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
578 breaks = (g ^. phylo_groupMeta) ! "breaks"
579 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
580 in PhyloBranch (g ^. phylo_groupBranchId)
584 (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
588 $ map (\gs -> head' "export" gs)
589 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
590 $ sortOn (\g -> g ^. phylo_groupBranchId) groups
591 --------------------------------------
592 groups :: [PhyloGroup]
593 groups = traceExportGroups
595 $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
600 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
601 traceExportBranches branches = trace ("\n"
602 <> "-- | Export " <> show(length branches) <> " branches") branches
604 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
605 tracePhyloAncestors groups = trace ("\n"
606 <> "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors"
609 tracePhyloInfo :: Phylo -> Phylo
610 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with β = "
611 <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
612 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
616 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
617 traceExportGroups groups = trace ("\n" <> "-- | Export "
618 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
619 <> show(length groups) <> " groups and "
620 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"