]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/PhyloExport.hs
changement du seuil
[gargantext.git] / src / Gargantext / Viz / Phylo / PhyloExport.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
15 {-# LANGUAGE TypeSynonymInstances #-}
16 {-# LANGUAGE FlexibleInstances #-}
17
18 module Gargantext.Viz.Phylo.PhyloExport where
19
20 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
21 import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy, inits, elemIndex)
22 import Data.Vector (Vector)
23
24 import Prelude (writeFile)
25 import Gargantext.Prelude
26 import Gargantext.Viz.AdaptativePhylo
27 import Gargantext.Viz.Phylo.PhyloTools
28
29 import Control.Lens
30 import Data.GraphViz hiding (DotGraph, Order)
31 import Data.GraphViz.Types.Generalised (DotGraph)
32 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
33 import Data.GraphViz.Types.Monadic
34 import Data.Text.Lazy (fromStrict, pack, unpack)
35 import System.FilePath
36 import Debug.Trace (trace)
37
38 import qualified Data.Map as Map
39 import qualified Data.Text as Text
40 import qualified Data.Vector as Vector
41 import qualified Data.Text.Lazy as Lazy
42 import qualified Data.GraphViz.Attributes.HTML as H
43
44 --------------------
45 -- | Dot export | --
46 --------------------
47
48 dotToFile :: FilePath -> DotGraph DotId -> IO ()
49 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
50
51 dotToString :: DotGraph DotId -> [Char]
52 dotToString dotG = unpack (printDotGraph dotG)
53
54 dynamicToColor :: Double -> H.Attribute
55 dynamicToColor d
56 | d == 0 = H.BGColor (toColor LightCoral)
57 | d == 1 = H.BGColor (toColor Khaki)
58 | d == 2 = H.BGColor (toColor SkyBlue)
59 | otherwise = H.Color (toColor Black)
60
61 pickLabelColor :: [Double] -> H.Attribute
62 pickLabelColor lst
63 | elem 0 lst = dynamicToColor 0
64 | elem 2 lst = dynamicToColor 2
65 | elem 1 lst = dynamicToColor 1
66 | otherwise = dynamicToColor 3
67
68 toDotLabel :: Text.Text -> Label
69 toDotLabel lbl = StrLabel $ fromStrict lbl
70
71 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
72 toAttr k v = customAttribute k v
73
74 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
75 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
76
77 groupIdToDotId :: PhyloGroupId -> DotId
78 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
79
80 branchIdToDotId :: PhyloBranchId -> DotId
81 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
82
83 periodIdToDotId :: PhyloPeriodId -> DotId
84 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
85
86 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
87 groupToTable fdt g = H.Table H.HTable
88 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
89 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
90 , H.tableRows = [header]
91 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
92 <> ( map ngramsToRow $ splitEvery 4
93 $ reverse $ sortOn (snd . snd)
94 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
95 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
96 where
97 --------------------------------------
98 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
99 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
100 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
101 --------------------------------------
102 header :: H.Row
103 header =
104 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
105 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
106 <> (fromStrict " ( ")
107 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
108 <> (fromStrict " , ")
109 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
110 <> (fromStrict " ) ")
111 <> (pack $ show (getGroupId g)))]]
112 --------------------------------------
113
114 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
115 branchToDotNode b bId =
116 node (branchIdToDotId $ b ^. branch_id)
117 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
118 <> (metaToAttr $ b ^. branch_meta)
119 <> [ toAttr "nodeType" "branch"
120 , toAttr "bId" (pack $ show bId)
121 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
122 , toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
123 , toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
124 , toAttr "label" (pack $ show $ b ^. branch_label)
125 ])
126
127 periodToDotNode :: (Date,Date) -> Dot DotId
128 periodToDotNode prd =
129 node (periodIdToDotId prd)
130 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
131 <> [ toAttr "nodeType" "period"
132 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
133 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
134
135
136 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
137 groupToDotNode fdt g bId =
138 node (groupIdToDotId $ getGroupId g)
139 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
140 <> [ toAttr "nodeType" "group"
141 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
142 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
143 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
144 , toAttr "bId" (pack $ show bId)
145 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))])
146
147
148 toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
149 toDotEdge source target lbl edgeType = edge source target
150 (case edgeType of
151 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True
152 , Label (StrLabel $ fromStrict lbl)] <> [toAttr "edgeType" "link" ]
153 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
154 , Label (StrLabel $ fromStrict lbl)] <> [toAttr "edgeType" "branchLink" ]
155 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
156 , Label (StrLabel $ fromStrict lbl)]
157 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
158
159
160 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
161 mergePointers groups =
162 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
163 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
164 in unionWith (\w w' -> max w w') toChilds toParents
165
166
167 toBid :: PhyloGroup -> [PhyloBranch] -> Int
168 toBid g bs =
169 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
170 in fromJust $ elemIndex b' bs
171
172 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
173 exportToDot phylo export =
174 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
175 <> show(length $ export ^. export_groups) <> " groups "
176 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
177 <> "##########################") $
178 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
179
180 -- | 1) init the dot graph
181 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
182 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
183 , Ratio FillRatio
184 , Style [SItem Filled []],Color [toWColor White]]
185 -- | home made attributes
186 <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
187 ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
188 ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
189 ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
190 ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
191 ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
192 ])
193
194
195 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
196
197 -- | 2) create a layer for the branches labels
198 subgraph (Str "Branches peaks") $ do
199
200 graphAttrs [Rank SameRank]
201
202 -- | 3) group the branches by hierarchy
203 -- mapM (\branches ->
204 -- subgraph (Str "Branches clade") $ do
205 -- graphAttrs [Rank SameRank]
206
207 -- -- | 4) create a node for each branch
208 -- mapM branchToDotNode branches
209 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
210
211 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
212
213 -- | 5) create a layer for each period
214 _ <- mapM (\period ->
215 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
216 graphAttrs [Rank SameRank]
217 periodToDotNode period
218
219 -- | 6) create a node for each group
220 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
221 ) $ getPeriodIds phylo
222
223 -- | 7) create the edges between a branch and its first groups
224 _ <- mapM (\(bId,groups) ->
225 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
226 )
227 $ toList
228 $ map (\groups -> head' "toDot"
229 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
230 $ sortOn (fst . _phylo_groupPeriod) groups)
231 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
232
233 -- | 8) create the edges between the groups
234 _ <- mapM (\((k,k'),_) ->
235 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
236 ) $ (toList . mergePointers) $ export ^. export_groups
237
238 -- | 7) create the edges between the periods
239 _ <- mapM (\(prd,prd') ->
240 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
241 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
242
243 -- | 8) create the edges between the branches
244 -- _ <- mapM (\(bId,bId') ->
245 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
246 -- (Text.pack $ show(branchIdsToProximity bId bId'
247 -- (getThresholdInit $ phyloProximity $ getConfig phylo)
248 -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
249 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
250
251
252 graphAttrs [Rank SameRank]
253
254
255
256
257
258 ----------------
259 -- | Filter | --
260 ----------------
261
262 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
263 filterByBranchSize thr export =
264 let branches' = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
265 in export & export_branches .~ (fst branches')
266 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd branches')))
267
268
269 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
270 processFilters filters qua export =
271 foldl (\export' f -> case f of
272 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
273 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
274 else filterByBranchSize thr export'
275 ) export filters
276
277 --------------
278 -- | Sort | --
279 --------------
280
281 branchToIso :: [PhyloBranch] -> [PhyloBranch]
282 branchToIso branches =
283 let steps = map sum
284 $ inits
285 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
286 $ zip branches
287 $ ([0] ++ (map (\(b,b') ->
288 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
289 in (b' ^. branch_seaLevel) !! (idx - 1)
290 ) $ listToSeq branches))
291 in map (\(x,b) -> b & branch_x .~ x)
292 $ zip steps branches
293
294
295 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
296 sortByHierarchy depth branches =
297 if (length branches == 1)
298 then branchToIso branches
299 else branchToIso $ concat
300 $ map (\branches' ->
301 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
302 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
303 ++ (sortByHierarchy (depth + 1) (snd partitions)))
304 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
305 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
306
307
308 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
309 sortByBirthDate order export =
310 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
311 branches' = case order of
312 Asc -> branches
313 Desc -> reverse branches
314 in export & export_branches .~ branches'
315
316 processSort :: Sort -> PhyloExport -> PhyloExport
317 processSort sort' export = case sort' of
318 ByBirthDate o -> sortByBirthDate o export
319 ByHierarchy -> export & export_branches .~ sortByHierarchy 0 (export ^. export_branches)
320
321
322 -----------------
323 -- | Metrics | --
324 -----------------
325
326 -- | Return the conditional probability of i knowing j
327 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
328 conditional m i j = (findWithDefault 0 (i,j) m)
329 / (m ! (j,j))
330
331
332 -- | Return the genericity score of a given ngram
333 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
334 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
335 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
336
337
338 -- | Return the specificity score of a given ngram
339 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
340 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
341 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
342
343
344 -- | Return the inclusion score of a given ngram
345 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
346 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
347 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
348
349
350 ngramsMetrics :: PhyloExport -> PhyloExport
351 ngramsMetrics export =
352 over ( export_groups
353 . traverse )
354 (\g -> g & phylo_groupMeta %~ insert "genericity"
355 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
356 & phylo_groupMeta %~ insert "specificity"
357 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
358 & phylo_groupMeta %~ insert "inclusion"
359 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
360 ) export
361
362
363 branchDating :: PhyloExport -> PhyloExport
364 branchDating export =
365 over ( export_branches
366 . traverse )
367 (\b ->
368 let groups = sortOn fst
369 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
370 then acc ++ [g ^. phylo_groupPeriod]
371 else acc ) [] $ export ^. export_groups
372 periods = nub groups
373 birth = fst $ head' "birth" groups
374 age = (snd $ last' "age" groups) - birth
375 in b & branch_meta %~ insert "birth" [fromIntegral birth]
376 & branch_meta %~ insert "age" [fromIntegral age]
377 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
378
379 processMetrics :: PhyloExport -> PhyloExport
380 processMetrics export = ngramsMetrics
381 $ branchDating export
382
383
384 -----------------
385 -- | Taggers | --
386 -----------------
387
388 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
389 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
390 $ take nth
391 $ reverse
392 $ sortOn snd $ zip [0..] meta
393
394
395 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
396 mostInclusive nth foundations export =
397 over ( export_branches
398 . traverse )
399 (\b ->
400 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
401 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
402 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
403 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
404 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
405 in b & branch_label .~ lbl ) export
406
407
408 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
409 mostEmergentInclusive nth foundations export =
410 over ( export_groups
411 . traverse )
412 (\g ->
413 let lbl = ngramsToLabel foundations
414 $ take nth
415 $ map (\(_,(_,idx)) -> idx)
416 $ concat
417 $ map (\groups -> sortOn (fst . snd) groups)
418 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
419 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
420 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
421 in g & phylo_groupLabel .~ lbl ) export
422
423
424 processLabels :: [PhyloLabel] -> Vector Ngrams -> PhyloExport -> PhyloExport
425 processLabels labels foundations export =
426 foldl (\export' label ->
427 case label of
428 GroupLabel tagger nth ->
429 case tagger of
430 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
431 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
432 BranchLabel tagger nth ->
433 case tagger of
434 MostInclusive -> mostInclusive nth foundations export'
435 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
436
437
438 ------------------
439 -- | Dynamics | --
440 ------------------
441
442
443 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
444 toDynamics n parents g m =
445 let prd = g ^. phylo_groupPeriod
446 end = last' "dynamics" (sort $ map snd $ elems m)
447 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
448 -- | decrease
449 then 2
450 else if ((fst prd) == (fst $ m ! n))
451 -- | recombination
452 then 0
453 else if isNew
454 -- | emergence
455 then 1
456 else 3
457 where
458 --------------------------------------
459 isNew :: Bool
460 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
461
462
463 processDynamics :: [PhyloGroup] -> [PhyloGroup]
464 processDynamics groups =
465 map (\g ->
466 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
467 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
468 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
469 where
470 --------------------------------------
471 mapNgrams :: Map Int (Date,Date)
472 mapNgrams = map (\dates ->
473 let dates' = sort dates
474 in (head' "dynamics" dates', last' "dynamics" dates'))
475 $ fromListWith (++)
476 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
477 $ (g ^. phylo_groupNgrams))) [] groups
478
479
480 -----------------
481 -- | horizon | --
482 -----------------
483
484 horizonToAncestors :: Double -> Phylo -> [PhyloAncestor]
485 horizonToAncestors delta phylo =
486 let horizon = Map.toList $ Map.filter (\v -> v > delta) $ phylo ^. phylo_horizon
487 ct0 = fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevelPeriods 1 (take 1 (getPeriodIds phylo)) phylo
488 aDelta = toRelatedComponents
489 (elems ct0)
490 (map (\((g,g'),v) -> ((ct0 ! g,ct0 ! g'),v)) horizon)
491 in map (\(id,groups) -> toAncestor id groups) $ zip [1..] aDelta
492 where
493 -- | note : possible bug if we sync clus more than once
494 -- | horizon is calculated at level 1, ancestors have to be related to the last level
495 toAncestor :: Int -> [PhyloGroup] -> PhyloAncestor
496 toAncestor id groups = PhyloAncestor id
497 (foldl' (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups)
498 (concat $ map (\g -> map fst (g ^. phylo_groupLevelParents)) groups)
499
500
501 ---------------------
502 -- | phyloExport | --
503 ---------------------
504
505 toPhyloExport :: Phylo -> DotGraph DotId
506 toPhyloExport phylo = exportToDot phylo
507 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
508 $ processSort (exportSort $ getConfig phylo)
509 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo)
510 $ processMetrics export
511 where
512 export :: PhyloExport
513 export = PhyloExport groups branches (horizonToAncestors 0 phylo)
514 --------------------------------------
515 branches :: [PhyloBranch]
516 branches = map (\g ->
517 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
518 breaks = (g ^. phylo_groupMeta) ! "breaks"
519 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
520 in PhyloBranch (g ^. phylo_groupBranchId)
521 canonId
522 seaLvl
523 0
524 (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
525 0
526 0
527 "" empty)
528 $ map (\gs -> head' "export" gs)
529 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
530 $ sortOn (\g -> g ^. phylo_groupBranchId) groups
531 --------------------------------------
532 groups :: [PhyloGroup]
533 groups = traceExportGroups
534 $ processDynamics
535 $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
536 $ tracePhyloInfo phylo
537
538
539 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
540 traceExportBranches branches = trace ("\n"
541 <> "-- | Export " <> show(length branches) <> " branches") branches
542
543 tracePhyloInfo :: Phylo -> Phylo
544 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with β = "
545 <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
546 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
547 ) phylo
548
549
550 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
551 traceExportGroups groups = trace ("\n" <> "-- | Export "
552 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
553 <> show(length groups) <> " groups and "
554 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"
555 ) groups
556