]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/PhyloExport.hs
[Community] Query search contact with text query on documents
[gargantext.git] / src / Gargantext / Viz / Phylo / PhyloExport.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE TypeSynonymInstances #-}
12
13 module Gargantext.Viz.Phylo.PhyloExport where
14
15 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
16 import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy, inits, elemIndex)
17 import Data.Vector (Vector)
18
19 import Prelude (writeFile)
20 import Gargantext.Prelude
21 import Gargantext.Viz.AdaptativePhylo
22 import Gargantext.Viz.Phylo.PhyloTools
23
24 import Control.Lens
25 import Data.GraphViz hiding (DotGraph, Order)
26 import Data.GraphViz.Types.Generalised (DotGraph)
27 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
28 import Data.GraphViz.Types.Monadic
29 import Data.Text.Lazy (fromStrict, pack, unpack)
30 import System.FilePath
31 import Debug.Trace (trace)
32
33 import qualified Data.Map as Map
34 import qualified Data.Text as Text
35 import qualified Data.Vector as Vector
36 import qualified Data.Text.Lazy as Lazy
37 import qualified Data.GraphViz.Attributes.HTML as H
38
39 --------------------
40 -- | Dot export | --
41 --------------------
42
43 dotToFile :: FilePath -> DotGraph DotId -> IO ()
44 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
45
46 dotToString :: DotGraph DotId -> [Char]
47 dotToString dotG = unpack (printDotGraph dotG)
48
49 dynamicToColor :: Double -> H.Attribute
50 dynamicToColor d
51 | d == 0 = H.BGColor (toColor LightCoral)
52 | d == 1 = H.BGColor (toColor Khaki)
53 | d == 2 = H.BGColor (toColor SkyBlue)
54 | otherwise = H.Color (toColor Black)
55
56 pickLabelColor :: [Double] -> H.Attribute
57 pickLabelColor lst
58 | elem 0 lst = dynamicToColor 0
59 | elem 2 lst = dynamicToColor 2
60 | elem 1 lst = dynamicToColor 1
61 | otherwise = dynamicToColor 3
62
63 toDotLabel :: Text.Text -> Label
64 toDotLabel lbl = StrLabel $ fromStrict lbl
65
66 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
67 toAttr k v = customAttribute k v
68
69 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
70 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
71
72 groupIdToDotId :: PhyloGroupId -> DotId
73 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
74
75 branchIdToDotId :: PhyloBranchId -> DotId
76 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
77
78 periodIdToDotId :: PhyloPeriodId -> DotId
79 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
80
81 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
82 groupToTable fdt g = H.Table H.HTable
83 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
84 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
85 , H.tableRows = [header]
86 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
87 <> ( map ngramsToRow $ splitEvery 4
88 $ reverse $ sortOn (snd . snd)
89 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
90 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
91 where
92 --------------------------------------
93 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
94 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
95 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
96 --------------------------------------
97 header :: H.Row
98 header =
99 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
100 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
101 <> (fromStrict " ( ")
102 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
103 <> (fromStrict " , ")
104 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
105 <> (fromStrict " ) ")
106 <> (pack $ show (getGroupId g)))]]
107 --------------------------------------
108
109 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
110 branchToDotNode b bId =
111 node (branchIdToDotId $ b ^. branch_id)
112 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
113 <> (metaToAttr $ b ^. branch_meta)
114 <> [ toAttr "nodeType" "branch"
115 , toAttr "bId" (pack $ show bId)
116 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
117 , toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
118 , toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
119 , toAttr "label" (pack $ show $ b ^. branch_label)
120 ])
121
122 periodToDotNode :: (Date,Date) -> Dot DotId
123 periodToDotNode prd =
124 node (periodIdToDotId prd)
125 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
126 <> [ toAttr "nodeType" "period"
127 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
128 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
129
130
131 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
132 groupToDotNode fdt g bId =
133 node (groupIdToDotId $ getGroupId g)
134 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
135 <> [ toAttr "nodeType" "group"
136 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
137 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
138 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
139 , toAttr "bId" (pack $ show bId)
140 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))])
141
142
143 toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
144 toDotEdge source target lbl edgeType = edge source target
145 (case edgeType of
146 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True
147 , Label (StrLabel $ fromStrict lbl)] <> [toAttr "edgeType" "link" ]
148 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
149 , Label (StrLabel $ fromStrict lbl)] <> [toAttr "edgeType" "branchLink" ]
150 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
151 , Label (StrLabel $ fromStrict lbl)]
152 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
153
154
155 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
156 mergePointers groups =
157 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
158 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
159 in unionWith (\w w' -> max w w') toChilds toParents
160
161
162 toBid :: PhyloGroup -> [PhyloBranch] -> Int
163 toBid g bs =
164 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
165 in fromJust $ elemIndex b' bs
166
167 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
168 exportToDot phylo export =
169 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
170 <> show(length $ export ^. export_groups) <> " groups "
171 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
172 <> "##########################") $
173 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
174
175 {- 1) init the dot graph -}
176 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
177 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
178 , Ratio FillRatio
179 , Style [SItem Filled []],Color [toWColor White]]
180 {-- home made attributes -}
181 <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
182 ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
183 ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
184 ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
185 ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
186 ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
187 ])
188
189 {-
190 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
191
192 -- 2) create a layer for the branches labels -}
193 subgraph (Str "Branches peaks") $ do
194
195 graphAttrs [Rank SameRank]
196 {-
197 -- 3) group the branches by hierarchy
198 -- mapM (\branches ->
199 -- subgraph (Str "Branches clade") $ do
200 -- graphAttrs [Rank SameRank]
201
202 -- -- 4) create a node for each branch
203 -- mapM branchToDotNode branches
204 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
205 -}
206 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
207
208 {-- 5) create a layer for each period -}
209 _ <- mapM (\period ->
210 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
211 graphAttrs [Rank SameRank]
212 periodToDotNode period
213
214 {-- 6) create a node for each group -}
215 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
216 ) $ getPeriodIds phylo
217
218 {-- 7) create the edges between a branch and its first groups -}
219 _ <- mapM (\(bId,groups) ->
220 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
221 )
222 $ toList
223 $ map (\groups -> head' "toDot"
224 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
225 $ sortOn (fst . _phylo_groupPeriod) groups)
226 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
227
228 {- 8) create the edges between the groups -}
229 _ <- mapM (\((k,k'),_) ->
230 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
231 ) $ (toList . mergePointers) $ export ^. export_groups
232
233 {- 7) create the edges between the periods -}
234 _ <- mapM (\(prd,prd') ->
235 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
236 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
237
238 {- 8) create the edges between the branches
239 -- _ <- mapM (\(bId,bId') ->
240 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
241 -- (Text.pack $ show(branchIdsToProximity bId bId'
242 -- (getThresholdInit $ phyloProximity $ getConfig phylo)
243 -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
244 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
245 -}
246
247
248 graphAttrs [Rank SameRank]
249
250
251 ----------------
252 -- | Filter | --
253 ----------------
254
255 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
256 filterByBranchSize thr export =
257 let branches' = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
258 in export & export_branches .~ (fst branches')
259 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd branches')))
260
261
262 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
263 processFilters filters qua export =
264 foldl (\export' f -> case f of
265 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
266 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
267 else filterByBranchSize thr export'
268 ) export filters
269
270 --------------
271 -- | Sort | --
272 --------------
273
274 branchToIso :: [PhyloBranch] -> [PhyloBranch]
275 branchToIso branches =
276 let steps = map sum
277 $ inits
278 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
279 $ zip branches
280 $ ([0] ++ (map (\(b,b') ->
281 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
282 in (b' ^. branch_seaLevel) !! (idx - 1)
283 ) $ listToSeq branches))
284 in map (\(x,b) -> b & branch_x .~ x)
285 $ zip steps branches
286
287
288 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
289 sortByHierarchy depth branches =
290 if (length branches == 1)
291 then branchToIso branches
292 else branchToIso $ concat
293 $ map (\branches' ->
294 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
295 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
296 ++ (sortByHierarchy (depth + 1) (snd partitions)))
297 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
298 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
299
300
301 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
302 sortByBirthDate order export =
303 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
304 branches' = case order of
305 Asc -> branches
306 Desc -> reverse branches
307 in export & export_branches .~ branches'
308
309 processSort :: Sort -> PhyloExport -> PhyloExport
310 processSort sort' export = case sort' of
311 ByBirthDate o -> sortByBirthDate o export
312 ByHierarchy -> export & export_branches .~ sortByHierarchy 0 (export ^. export_branches)
313
314
315 -----------------
316 -- | Metrics | --
317 -----------------
318
319 -- | Return the conditional probability of i knowing j
320 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
321 conditional m i j = (findWithDefault 0 (i,j) m)
322 / (m ! (j,j))
323
324
325 -- | Return the genericity score of a given ngram
326 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
327 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
328 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
329
330
331 -- | Return the specificity score of a given ngram
332 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
333 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
334 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
335
336
337 -- | Return the inclusion score of a given ngram
338 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
339 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
340 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
341
342
343 ngramsMetrics :: PhyloExport -> PhyloExport
344 ngramsMetrics export =
345 over ( export_groups
346 . traverse )
347 (\g -> g & phylo_groupMeta %~ insert "genericity"
348 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
349 & phylo_groupMeta %~ insert "specificity"
350 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
351 & phylo_groupMeta %~ insert "inclusion"
352 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
353 ) export
354
355
356 branchDating :: PhyloExport -> PhyloExport
357 branchDating export =
358 over ( export_branches
359 . traverse )
360 (\b ->
361 let groups = sortOn fst
362 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
363 then acc ++ [g ^. phylo_groupPeriod]
364 else acc ) [] $ export ^. export_groups
365 periods = nub groups
366 birth = fst $ head' "birth" groups
367 age = (snd $ last' "age" groups) - birth
368 in b & branch_meta %~ insert "birth" [fromIntegral birth]
369 & branch_meta %~ insert "age" [fromIntegral age]
370 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
371
372 processMetrics :: PhyloExport -> PhyloExport
373 processMetrics export = ngramsMetrics
374 $ branchDating export
375
376
377 -----------------
378 -- | Taggers | --
379 -----------------
380
381 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
382 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
383 $ take nth
384 $ reverse
385 $ sortOn snd $ zip [0..] meta
386
387
388 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
389 mostInclusive nth foundations export =
390 over ( export_branches
391 . traverse )
392 (\b ->
393 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
394 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
395 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
396 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
397 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
398 in b & branch_label .~ lbl ) export
399
400
401 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
402 mostEmergentInclusive nth foundations export =
403 over ( export_groups
404 . traverse )
405 (\g ->
406 let lbl = ngramsToLabel foundations
407 $ take nth
408 $ map (\(_,(_,idx)) -> idx)
409 $ concat
410 $ map (\groups -> sortOn (fst . snd) groups)
411 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
412 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
413 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
414 in g & phylo_groupLabel .~ lbl ) export
415
416
417 processLabels :: [PhyloLabel] -> Vector Ngrams -> PhyloExport -> PhyloExport
418 processLabels labels foundations export =
419 foldl (\export' label ->
420 case label of
421 GroupLabel tagger nth ->
422 case tagger of
423 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
424 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
425 BranchLabel tagger nth ->
426 case tagger of
427 MostInclusive -> mostInclusive nth foundations export'
428 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
429
430
431 ------------------
432 -- | Dynamics | --
433 ------------------
434
435
436 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
437 toDynamics n parents g m =
438 let prd = g ^. phylo_groupPeriod
439 end = last' "dynamics" (sort $ map snd $ elems m)
440 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
441 {- decrease -}
442 then 2
443 else if ((fst prd) == (fst $ m ! n))
444 {- recombination -}
445 then 0
446 else if isNew
447 {- emergence -}
448 then 1
449 else 3
450 where
451 --------------------------------------
452 isNew :: Bool
453 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
454
455
456 processDynamics :: [PhyloGroup] -> [PhyloGroup]
457 processDynamics groups =
458 map (\g ->
459 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
460 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
461 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
462 where
463 --------------------------------------
464 mapNgrams :: Map Int (Date,Date)
465 mapNgrams = map (\dates ->
466 let dates' = sort dates
467 in (head' "dynamics" dates', last' "dynamics" dates'))
468 $ fromListWith (++)
469 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
470 $ (g ^. phylo_groupNgrams))) [] groups
471
472
473 -----------------
474 -- | horizon | --
475 -----------------
476
477 horizonToAncestors :: Double -> Phylo -> [PhyloAncestor]
478 horizonToAncestors delta phylo =
479 let horizon = Map.toList $ Map.filter (\v -> v > delta) $ phylo ^. phylo_horizon
480 ct0 = fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevelPeriods 1 (take 1 (getPeriodIds phylo)) phylo
481 aDelta = toRelatedComponents
482 (elems ct0)
483 (map (\((g,g'),v) -> ((ct0 ! g,ct0 ! g'),v)) horizon)
484 in map (\(id,groups) -> toAncestor id groups) $ zip [1..] aDelta
485 where
486 -- | note : possible bug if we sync clus more than once
487 -- | horizon is calculated at level 1, ancestors have to be related to the last level
488 toAncestor :: Int -> [PhyloGroup] -> PhyloAncestor
489 toAncestor id groups = PhyloAncestor id
490 (foldl' (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups)
491 (concat $ map (\g -> map fst (g ^. phylo_groupLevelParents)) groups)
492
493
494 ---------------------
495 -- | phyloExport | --
496 ---------------------
497
498 toPhyloExport :: Phylo -> DotGraph DotId
499 toPhyloExport phylo = exportToDot phylo
500 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
501 $ processSort (exportSort $ getConfig phylo)
502 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo)
503 $ processMetrics export
504 where
505 export :: PhyloExport
506 export = PhyloExport groups branches (horizonToAncestors 0 phylo)
507 --------------------------------------
508 branches :: [PhyloBranch]
509 branches = map (\g ->
510 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
511 breaks = (g ^. phylo_groupMeta) ! "breaks"
512 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
513 in PhyloBranch (g ^. phylo_groupBranchId)
514 canonId
515 seaLvl
516 0
517 (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
518 0
519 0
520 "" empty)
521 $ map (\gs -> head' "export" gs)
522 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
523 $ sortOn (\g -> g ^. phylo_groupBranchId) groups
524 --------------------------------------
525 groups :: [PhyloGroup]
526 groups = traceExportGroups
527 $ processDynamics
528 $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
529 $ tracePhyloInfo phylo
530
531
532 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
533 traceExportBranches branches = trace ("\n"
534 <> "-- | Export " <> show(length branches) <> " branches") branches
535
536 tracePhyloInfo :: Phylo -> Phylo
537 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with β = "
538 <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
539 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
540 ) phylo
541
542
543 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
544 traceExportGroups groups = trace ("\n" <> "-- | Export "
545 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
546 <> show(length groups) <> " groups and "
547 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"
548 ) groups
549