2 Module : Gargantext.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE TypeSynonymInstances #-}
13 module Gargantext.Viz.Phylo.PhyloExport where
15 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
16 import Data.List ((++), sort, nub, null, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
17 import Data.Vector (Vector)
19 import Prelude (writeFile)
20 import Gargantext.Prelude
21 import Gargantext.Viz.AdaptativePhylo
22 import Gargantext.Viz.Phylo.PhyloTools
23 import Gargantext.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
25 import Control.Lens hiding (Level)
26 import Control.Parallel.Strategies (parList, rdeepseq, using)
27 import Data.GraphViz hiding (DotGraph, Order)
28 import Data.GraphViz.Types.Generalised (DotGraph)
29 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
30 import Data.GraphViz.Types.Monadic
31 import Data.Text.Lazy (fromStrict, pack, unpack)
32 import System.FilePath
33 import Debug.Trace (trace)
35 import qualified Data.Text as Text
36 import qualified Data.Vector as Vector
37 import qualified Data.Text.Lazy as Lazy
38 import qualified Data.GraphViz.Attributes.HTML as H
44 dotToFile :: FilePath -> DotGraph DotId -> IO ()
45 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
47 dotToString :: DotGraph DotId -> [Char]
48 dotToString dotG = unpack (printDotGraph dotG)
50 dynamicToColor :: Double -> H.Attribute
52 | d == 0 = H.BGColor (toColor LightCoral)
53 | d == 1 = H.BGColor (toColor Khaki)
54 | d == 2 = H.BGColor (toColor SkyBlue)
55 | otherwise = H.Color (toColor Black)
57 pickLabelColor :: [Double] -> H.Attribute
59 | elem 0 lst = dynamicToColor 0
60 | elem 2 lst = dynamicToColor 2
61 | elem 1 lst = dynamicToColor 1
62 | otherwise = dynamicToColor 3
64 toDotLabel :: Text.Text -> Label
65 toDotLabel lbl = StrLabel $ fromStrict lbl
67 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
68 toAttr k v = customAttribute k v
70 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
71 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
73 groupIdToDotId :: PhyloGroupId -> DotId
74 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
76 branchIdToDotId :: PhyloBranchId -> DotId
77 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
79 periodIdToDotId :: PhyloPeriodId -> DotId
80 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
82 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
83 groupToTable fdt g = H.Table H.HTable
84 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
85 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
86 , H.tableRows = [header]
87 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
88 <> ( map ngramsToRow $ splitEvery 4
89 $ reverse $ sortOn (snd . snd)
90 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
91 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
93 --------------------------------------
94 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
95 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
96 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
97 --------------------------------------
100 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
101 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
102 <> (fromStrict " ( ")
103 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
104 <> (fromStrict " , ")
105 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
106 <> (fromStrict " ) ")
107 <> (pack $ show (getGroupId g)))]]
108 --------------------------------------
110 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
111 branchToDotNode b bId =
112 node (branchIdToDotId $ b ^. branch_id)
113 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
114 <> (metaToAttr $ b ^. branch_meta)
115 <> [ toAttr "nodeType" "branch"
116 , toAttr "bId" (pack $ show bId)
117 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
118 , toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
119 , toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
120 , toAttr "label" (pack $ show $ b ^. branch_label)
123 periodToDotNode :: (Date,Date) -> Dot DotId
124 periodToDotNode prd =
125 node (periodIdToDotId prd)
126 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
127 <> [ toAttr "nodeType" "period"
128 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
129 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
132 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
133 groupToDotNode fdt g bId =
134 node (groupIdToDotId $ getGroupId g)
135 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
136 <> [ toAttr "nodeType" "group"
137 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
138 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
139 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
140 , toAttr "bId" (pack $ show bId)
141 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
142 , toAttr "label" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
143 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
144 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
148 toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
149 toDotEdge source target lbl edgeType = edge source target
151 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True
152 , Label (StrLabel $ fromStrict lbl)] <> [toAttr "edgeType" "link" ]
153 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
154 , Label (StrLabel $ fromStrict lbl)] <> [toAttr "edgeType" "branchLink" ]
155 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
156 , Label (StrLabel $ fromStrict lbl)]
157 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)])
158 , Label (StrLabel $ fromStrict lbl), PenWidth 4] <> [toAttr "edgeType" "ancestorLink" ]
159 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
162 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
163 mergePointers groups =
164 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
165 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
166 in unionWith (\w w' -> max w w') toChilds toParents
168 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
169 mergeAncestors groups = concat
170 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
171 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
174 toBid :: PhyloGroup -> [PhyloBranch] -> Int
176 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
177 in fromJust $ elemIndex b' bs
179 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
180 exportToDot phylo export =
181 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
182 <> show(length $ export ^. export_groups) <> " groups "
183 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
184 <> "##########################") $
185 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
187 {- 1) init the dot graph -}
188 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
189 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
191 , Style [SItem Filled []],Color [toWColor White]]
192 {-- home made attributes -}
193 <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
194 ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
195 ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
196 ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
197 ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
198 ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
199 ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
203 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
205 -- 2) create a layer for the branches labels -}
206 subgraph (Str "Branches peaks") $ do
208 graphAttrs [Rank SameRank]
210 -- 3) group the branches by hierarchy
211 -- mapM (\branches ->
212 -- subgraph (Str "Branches clade") $ do
213 -- graphAttrs [Rank SameRank]
215 -- -- 4) create a node for each branch
216 -- mapM branchToDotNode branches
217 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
219 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
221 {-- 5) create a layer for each period -}
222 _ <- mapM (\period ->
223 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
224 graphAttrs [Rank SameRank]
225 periodToDotNode period
227 {-- 6) create a node for each group -}
228 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
229 ) $ getPeriodIds phylo
231 {-- 7) create the edges between a branch and its first groups -}
232 _ <- mapM (\(bId,groups) ->
233 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
236 $ map (\groups -> head' "toDot"
237 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
238 $ sortOn (fst . _phylo_groupPeriod) groups)
239 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
241 {- 8) create the edges between the groups -}
242 _ <- mapM (\((k,k'),_) ->
243 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
244 ) $ (toList . mergePointers) $ export ^. export_groups
246 _ <- mapM (\((k,k'),_) ->
247 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToAncestor
248 ) $ mergeAncestors $ export ^. export_groups
250 -- | 10) create the edges between the periods
251 _ <- mapM (\(prd,prd') ->
252 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
253 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
255 {- 8) create the edges between the branches
256 -- _ <- mapM (\(bId,bId') ->
257 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
258 -- (Text.pack $ show(branchIdsToProximity bId bId'
259 -- (getThresholdInit $ phyloProximity $ getConfig phylo)
260 -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
261 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
265 graphAttrs [Rank SameRank]
272 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
273 filterByBranchSize thr export =
274 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
275 in export & export_branches .~ (fst splited)
276 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
279 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
280 processFilters filters qua export =
281 foldl (\export' f -> case f of
282 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
283 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
284 else filterByBranchSize thr export'
291 branchToIso :: [PhyloBranch] -> [PhyloBranch]
292 branchToIso branches =
295 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
297 $ ([0] ++ (map (\(b,b') ->
298 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
299 in (b' ^. branch_seaLevel) !! (idx - 1)
300 ) $ listToSeq branches))
301 in map (\(x,b) -> b & branch_x .~ x)
305 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
306 sortByHierarchy depth branches =
307 if (length branches == 1)
308 then branchToIso branches
309 else branchToIso $ concat
311 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
312 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
313 ++ (sortByHierarchy (depth + 1) (snd partitions)))
314 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
315 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
318 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
319 sortByBirthDate order export =
320 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
321 branches' = case order of
323 Desc -> reverse branches
324 in export & export_branches .~ branches'
326 processSort :: Sort -> PhyloExport -> PhyloExport
327 processSort sort' export = case sort' of
328 ByBirthDate o -> sortByBirthDate o export
329 ByHierarchy -> export & export_branches .~ sortByHierarchy 0 (export ^. export_branches)
336 -- | Return the conditional probability of i knowing j
337 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
338 conditional m i j = (findWithDefault 0 (i,j) m)
342 -- | Return the genericity score of a given ngram
343 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
344 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
345 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
348 -- | Return the specificity score of a given ngram
349 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
350 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
351 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
354 -- | Return the inclusion score of a given ngram
355 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
356 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
357 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
360 ngramsMetrics :: PhyloExport -> PhyloExport
361 ngramsMetrics export =
364 (\g -> g & phylo_groupMeta %~ insert "genericity"
365 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
366 & phylo_groupMeta %~ insert "specificity"
367 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
368 & phylo_groupMeta %~ insert "inclusion"
369 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
373 branchDating :: PhyloExport -> PhyloExport
374 branchDating export =
375 over ( export_branches
378 let groups = sortOn fst
379 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
380 then acc ++ [g ^. phylo_groupPeriod]
381 else acc ) [] $ export ^. export_groups
383 birth = fst $ head' "birth" groups
384 age = (snd $ last' "age" groups) - birth
385 in b & branch_meta %~ insert "birth" [fromIntegral birth]
386 & branch_meta %~ insert "age" [fromIntegral age]
387 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
389 processMetrics :: PhyloExport -> PhyloExport
390 processMetrics export = ngramsMetrics
391 $ branchDating export
398 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
399 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
402 $ sortOn snd $ zip [0..] meta
405 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
406 mostInclusive nth foundations export =
407 over ( export_branches
410 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
411 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
412 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
413 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
414 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
415 in b & branch_label .~ lbl ) export
418 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
419 mostEmergentInclusive nth foundations export =
423 let lbl = ngramsToLabel foundations
425 $ map (\(_,(_,idx)) -> idx)
427 $ map (\groups -> sortOn (fst . snd) groups)
428 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
429 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
430 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
431 in g & phylo_groupLabel .~ lbl ) export
434 processLabels :: [PhyloLabel] -> Vector Ngrams -> PhyloExport -> PhyloExport
435 processLabels labels foundations export =
436 foldl (\export' label ->
438 GroupLabel tagger nth ->
440 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
441 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
442 BranchLabel tagger nth ->
444 MostInclusive -> mostInclusive nth foundations export'
445 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
453 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
454 toDynamics n parents g m =
455 let prd = g ^. phylo_groupPeriod
456 end = last' "dynamics" (sort $ map snd $ elems m)
457 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
460 else if ((fst prd) == (fst $ m ! n))
468 --------------------------------------
470 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
473 processDynamics :: [PhyloGroup] -> [PhyloGroup]
474 processDynamics groups =
476 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
477 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
478 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
480 --------------------------------------
481 mapNgrams :: Map Int (Date,Date)
482 mapNgrams = map (\dates ->
483 let dates' = sort dates
484 in (head' "dynamics" dates', last' "dynamics" dates'))
486 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
487 $ (g ^. phylo_groupNgrams))) [] groups
494 getGroupThr :: Double -> PhyloGroup -> Double
496 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
497 breaks = (g ^. phylo_groupMeta) ! "breaks"
498 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
500 toAncestor :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
501 toAncestor nbDocs diago proximity step candidates ego =
502 let curr = ego ^. phylo_groupAncestors
503 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
504 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
505 $ map (\g -> (g, toProximity nbDocs diago proximity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
506 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
509 headsToAncestors :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
510 headsToAncestors nbDocs diago proximity step heads acc =
514 let ego = head' "headsToAncestors" heads
515 heads' = tail' "headsToAncestors" heads
516 in headsToAncestors nbDocs diago proximity step heads' (acc ++ [toAncestor nbDocs diago proximity step heads' ego])
519 toHorizon :: Phylo -> Phylo
521 let phyloAncestor = updatePhyloGroups
523 (fromList $ map (\g -> (getGroupId g, g))
525 $ tracePhyloAncestors newGroups) phylo
526 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
527 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel level phyloAncestor
528 in updatePhyloGroups level reBranched phylo
530 -- | 1) for each periods
531 periods :: [PhyloPeriodId]
532 periods = getPeriodIds phylo
535 level = getLastLevel phylo
538 frame = getTimeFrame $ timeUnit $ getConfig phylo
539 -- | 2) find ancestors between groups without parents
540 mapGroups :: [[PhyloGroup]]
541 mapGroups = map (\prd ->
542 let groups = getGroupsFromLevelPeriods level [prd] phylo
543 childs = getPreviousChildIds level frame prd periods phylo
544 heads = filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
545 noHeads = groups \\ heads
546 nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
547 diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
548 proximity = (phyloProximity $ getConfig phylo)
549 step = case getSeaElevation phylo of
551 Adaptative _ -> undefined
552 -- in headsToAncestors nbDocs diago proximity heads groups []
553 in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego)
554 $ headsToAncestors nbDocs diago proximity step heads []
556 -- | 3) process this task concurrently
557 newGroups :: [[PhyloGroup]]
558 newGroups = mapGroups `using` parList rdeepseq
559 --------------------------------------
561 getPreviousChildIds :: Level -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> Phylo -> [PhyloGroupId]
562 getPreviousChildIds lvl frame curr prds phylo =
563 concat $ map ((map fst) . _phylo_groupPeriodChilds)
564 $ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo
566 ---------------------
567 -- | phyloExport | --
568 ---------------------
570 toPhyloExport :: Phylo -> DotGraph DotId
571 toPhyloExport phylo = exportToDot phylo
572 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
573 $ processSort (exportSort $ getConfig phylo)
574 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo)
575 $ processMetrics export
577 export :: PhyloExport
578 export = PhyloExport groups branches
579 --------------------------------------
580 branches :: [PhyloBranch]
581 branches = map (\g ->
582 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
583 breaks = (g ^. phylo_groupMeta) ! "breaks"
584 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
585 in PhyloBranch (g ^. phylo_groupBranchId)
589 (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
593 $ map (\gs -> head' "export" gs)
594 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
595 $ sortOn (\g -> g ^. phylo_groupBranchId) groups
596 --------------------------------------
597 groups :: [PhyloGroup]
598 groups = traceExportGroups
600 $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
605 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
606 traceExportBranches branches = trace ("\n"
607 <> "-- | Export " <> show(length branches) <> " branches") branches
609 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
610 tracePhyloAncestors groups = trace ("\n"
611 <> "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors"
614 tracePhyloInfo :: Phylo -> Phylo
615 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with β = "
616 <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
617 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
621 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
622 traceExportGroups groups = trace ("\n" <> "-- | Export "
623 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
624 <> show(length groups) <> " groups and "
625 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"