2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE TypeSynonymInstances #-}
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
15 import Control.Lens hiding (Level)
16 import Control.Parallel.Strategies (parList, rdeepseq, using)
17 import Data.GraphViz hiding (DotGraph, Order)
18 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
19 import Data.GraphViz.Types.Generalised (DotGraph)
20 import Data.GraphViz.Types.Monadic
21 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
22 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
23 import Data.Text.Lazy (fromStrict, pack, unpack)
24 import Data.Vector (Vector)
25 import Debug.Trace (trace)
26 import Gargantext.Core.Viz.Phylo
27 import Gargantext.Core.Viz.Phylo.PhyloTools
28 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toSimilarity, getNextPeriods)
29 import Gargantext.Prelude hiding (scale)
30 import Prelude (writeFile)
31 import System.FilePath
32 import qualified Data.GraphViz.Attributes.HTML as H
33 import qualified Data.Text as Text
34 import qualified Data.Text.Lazy as Lazy
35 import qualified Data.Vector as Vector
41 dotToFile :: FilePath -> DotGraph DotId -> IO ()
42 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
44 dotToString :: DotGraph DotId -> [Char]
45 dotToString dotG = unpack (printDotGraph dotG)
47 dynamicToColor :: Int -> H.Attribute
49 | d == 0 = H.BGColor (toColor LightCoral)
50 | d == 1 = H.BGColor (toColor Khaki)
51 | d == 2 = H.BGColor (toColor SkyBlue)
52 | otherwise = H.Color (toColor Black)
54 pickLabelColor :: [Int] -> H.Attribute
56 | elem 0 lst = dynamicToColor 0
57 | elem 1 lst = dynamicToColor 1
58 | elem 2 lst = dynamicToColor 2
59 | otherwise = dynamicToColor 3
61 toDotLabel :: Text.Text -> Label
62 toDotLabel lbl = StrLabel $ fromStrict lbl
64 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
65 toAttr k v = customAttribute k v
67 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
68 metaToAttr meta = map (\(k, v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
70 groupIdToDotId :: PhyloGroupId -> DotId
71 groupIdToDotId (((d, d'), lvl), idx) =
72 (fromStrict . Text.pack) $ "group" <> show d <> show d' <> show lvl <> show idx
74 branchIdToDotId :: PhyloBranchId -> DotId
75 branchIdToDotId bId = (fromStrict . Text.pack) $ "branch" <> show (snd bId)
77 periodIdToDotId :: Period -> DotId
78 periodIdToDotId prd = (fromStrict . Text.pack) $ "period" <> show (fst prd) <> show (snd prd)
80 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
81 branchToDotNode b bId =
82 node (branchIdToDotId $ b ^. branch_id)
83 ( [ FillColor [toWColor CornSilk]
87 , Style [SItem Bold []] ]
88 <> (metaToAttr $ b ^. branch_meta)
89 <> [ toAttr "nodeType" "branch"
90 , toAttr "bId" (pack $ show bId)
91 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
92 , toAttr "branch_x" (fromStrict $ Text.pack $ show $ b ^. branch_x)
93 , toAttr "branch_y" (fromStrict $ Text.pack $ show $ b ^. branch_y)
94 , toAttr "label" (pack $ show $ b ^. branch_label)
97 periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
98 periodToDotNode prd prd' =
99 node (periodIdToDotId prd) $
102 , Label $ toDotLabel $ Text.pack $ show (fst prd) <> " " <> show (snd prd) ]
103 <> [ toAttr "nodeType" "period"
104 , toAttr "strFrom" $ fromStrict $ Text.pack $ show $ fst prd'
105 , toAttr "strTo" $ fromStrict $ Text.pack $ show $ snd prd'
106 , toAttr "from" $ fromStrict $ Text.pack $ show $ fst prd
107 , toAttr "to" $ fromStrict $ Text.pack $ show $ snd prd ]
110 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
111 groupToDotNode fdt g bId =
112 node (groupIdToDotId $ getGroupId g)
113 ([ toAttr "nodeType" "group"
114 , toAttr "gid" (groupIdToDotId $ getGroupId g)
115 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
116 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
117 , toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
118 , toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
119 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
120 , toAttr "bId" (pack $ show bId)
121 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
122 , toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
123 , toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
124 , toAttr "sourceFull" (pack $ show (g ^. phylo_groupSources))
125 , toAttr "density" (pack $ show (g ^. phylo_groupDensity))
126 , toAttr "cooc" (pack $ show (g ^. phylo_groupCooc))
127 , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
128 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
129 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
130 , toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
131 , toAttr "seaLvl" (pack $ show ((g ^. phylo_groupMeta) ! "seaLevels"))
134 toDotEdge' :: DotId -> DotId -> [Char] -> [Char] -> EdgeType -> Dot DotId
135 toDotEdge' source target thr w edgeType = edge source target
137 GroupToGroup -> undefined
138 GroupToGroupMemory -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "memoryLink", toAttr "thr" (pack thr), toAttr "weight" (pack w)]
139 BranchToGroup -> undefined
140 BranchToBranch -> undefined
141 GroupToAncestor -> undefined
142 PeriodToPeriod -> undefined)
145 toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
146 toDotEdge source target lbl edgeType = edge source target
148 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
149 GroupToGroupMemory -> undefined
150 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
151 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
152 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
153 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
156 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
157 mergePointers groups =
158 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
159 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
160 in unionWith (\w w' -> max w w') toChilds toParents
162 mergePointersMemory :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId),(Double,Double))]
163 mergePointersMemory groups =
164 let toChilds = concat $ map (\g -> map (\(target,(t,w)) -> ((getGroupId g,target),(t,w))) $ g ^. phylo_groupPeriodMemoryChilds) groups
165 toParents = concat $ map (\g -> map (\(target,(t,w)) -> ((target,getGroupId g),(t,w))) $ g ^. phylo_groupPeriodMemoryParents) groups
166 in concat [toChilds,toParents]
168 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
169 mergeAncestors groups = concat
170 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
171 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
174 toBid :: PhyloGroup -> [PhyloBranch] -> Int
176 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
177 in fromJust $ elemIndex b' bs
179 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
180 exportToDot phylo export =
181 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
182 <> show(length $ export ^. export_groups) <> " groups "
183 <> show(length $ nub $ concat $ map (^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
184 <> "##########################") $
185 digraph ((Str . fromStrict) $ phyloName $ getConfig phylo) $ do
187 {- 1) init the dot graph -}
188 graphAttrs ( [ Label (toDotLabel $ phyloName $ getConfig phylo)]
189 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
192 , Style [SItem Filled []],Color [toWColor White]]
193 {-- home made attributes -}
194 <> [ toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo)
195 , toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (^. phylo_groupNgrams) $ export ^. export_groups)
196 , toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ getDocsByDate phylo)
197 , toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods)
198 , toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches)
199 , toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups)
200 , toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo)
201 , toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo
202 , toAttr (fromStrict "PhyloScale") $ pack $ show (getLevel phylo)
203 , toAttr (fromStrict "phyloQuality") $ pack $ show (phylo ^. phylo_quality)
204 , toAttr (fromStrict "phyloSeaRiseStart") $ pack $ show (getPhyloSeaRiseStart phylo)
205 , toAttr (fromStrict "phyloSeaRiseSteps") $ pack $ show (getPhyloSeaRiseSteps phylo)
206 -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
210 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
212 -- 2) create a layer for the branches labels -}
213 subgraph (Str "Branches peaks") $ do
215 -- graphAttrs [Rank SameRank]
217 -- 3) group the branches by hierarchy
218 -- mapM (\branches ->
219 -- subgraph (Str "Branches clade") $ do
220 -- graphAttrs [Rank SameRank]
222 -- -- 4) create a node for each branch
223 -- mapM branchToDotNode branches
224 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
226 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
228 {-- 5) create a layer for each period -}
229 _ <- mapM (\period ->
230 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
231 graphAttrs [Rank SameRank]
232 periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriodStr)
234 {-- 6) create a node for each group -}
235 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
236 ) $ phylo ^. phylo_periods
238 {-- 7) create the edges between a branch and its first groups -}
239 _ <- mapM (\(bId,groups) ->
240 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
243 $ map (\groups -> head' "toDot"
244 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
245 $ sortOn (fst . _phylo_groupPeriod) groups)
246 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
248 {- 8) create the edges between the groups -}
249 _ <- mapM (\((k,k'),v) ->
250 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
251 ) $ (toList . mergePointers) $ export ^. export_groups
253 {- 8-bis) create the edges between the groups -}
254 {- _ <- mapM (\((k,k'),v) ->
255 toDotEdge' (groupIdToDotId k) (groupIdToDotId k') (show (fst v)) (show (snd v)) GroupToGroupMemory
256 ) $ mergePointersMemory $ export ^. export_groups -}
258 _ <- mapM (\((k,k'),v) ->
259 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
260 ) $ mergeAncestors $ export ^. export_groups
262 -- 10) create the edges between the periods
263 _ <- mapM (\(prd,prd') ->
264 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
265 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
267 {- 8) create the edges between the branches
268 -- _ <- mapM (\(bId,bId') ->
269 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
270 -- (Text.pack $ show(branchIdsToSimilarity bId bId'
271 -- (getThresholdInit $ phyloSimilarity $ getConfig phylo)
272 -- (getThresholdStep $ phyloSimilarity $ getConfig phylo))) BranchToBranch
273 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
277 graphAttrs [Rank SameRank]
284 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
285 filterByBranchSize thr export =
286 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
287 in export & export_branches .~ (fst splited)
288 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
291 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
292 processFilters filters qua export =
293 foldl (\export' f -> case f of
294 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
295 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
296 else filterByBranchSize thr export'
303 branchToIso :: [PhyloBranch] -> [PhyloBranch]
304 branchToIso branches =
307 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
309 $ ([0] ++ (map (\(b,b') ->
310 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
311 lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
313 if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
314 then (b' ^. branch_seaLevel) !! (lmin - 1)
315 else (b' ^. branch_seaLevel) !! (idx - 1)
316 ) $ listToSeq branches))
317 in map (\(x,b) -> b & branch_x .~ x)
320 branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
321 branchToIso' start step branches =
322 let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
324 $ ([0] ++ (map (\(b,b') ->
325 let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
326 in 1 - start - step * root) $ listToSeq branches))
327 in map (\(x,b) -> b & branch_x .~ x)
331 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
332 sortByHierarchy depth branches =
333 if (length branches == 1)
337 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
338 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
339 ++ (sortByHierarchy (depth + 1) (snd partitions)))
340 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
341 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
344 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
345 sortByBirthDate order export =
346 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
347 branches' = case order of
349 Desc -> reverse branches
350 in export & export_branches .~ branches'
352 processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
353 processSort sort' elev export = case sort' of
354 ByBirthDate o -> sortByBirthDate o export
355 ByHierarchy _ -> case elev of
356 Constante s s' -> export & export_branches .~ (branchToIso' s s' $ sortByHierarchy 0 (export ^. export_branches))
357 Adaptative _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
358 Evolving _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
364 -- | Return the conditional probability of i knowing j
365 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
366 conditional m i j = (findWithDefault 0 (i,j) m)
370 -- | Return the genericity score of a given ngram
371 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
372 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
373 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
376 -- | Return the specificity score of a given ngram
377 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
378 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
379 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
382 -- | Return the inclusion score of a given ngram
383 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
384 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
385 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
388 ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
389 ngramsMetrics phylo export =
392 (\g -> g & phylo_groupMeta %~ insert "genericity"
393 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
394 & phylo_groupMeta %~ insert "specificity"
395 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
396 & phylo_groupMeta %~ insert "inclusion"
397 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
398 & phylo_groupMeta %~ insert "frequence"
399 (map (\n -> getInMap n (getLastRootsFreq phylo)) $ g ^. phylo_groupNgrams)
403 branchDating :: PhyloExport -> PhyloExport
404 branchDating export =
405 over ( export_branches
408 let groups = sortOn fst
409 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
410 then acc ++ [g ^. phylo_groupPeriod]
411 else acc ) [] $ export ^. export_groups
413 birth = fst $ head' "birth" groups
414 death = snd $ last' "death" groups
416 in b & branch_meta %~ insert "birth" [fromIntegral birth]
417 & branch_meta %~ insert "death" [fromIntegral death]
418 & branch_meta %~ insert "age" [fromIntegral age]
419 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
421 processMetrics :: Phylo -> PhyloExport -> PhyloExport
422 processMetrics phylo export = ngramsMetrics phylo
423 $ branchDating export
430 nk :: Int -> [[Int]] -> Int
432 $ map (\g -> if (elem n g)
437 tf :: Int -> [[Int]] -> Double
438 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
441 idf :: Int -> [[Int]] -> Double
442 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
445 findTfIdf :: [[Int]] -> [(Int,Double)]
446 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ nub $ concat groups
449 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
450 findEmergences groups freq =
451 let ngrams = map _phylo_groupNgrams groups
452 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
453 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
454 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
455 in reverse $ sortOn snd
456 $ map (\(n,_) -> if (member n freq)
461 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
462 mostEmergentTfIdf nth freq foundations export =
463 over ( export_branches
466 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
467 tfidf = findTfIdf (map _phylo_groupNgrams groups)
468 emergences = findEmergences groups freq
469 selected = if (null emergences)
470 then map fst $ take nth tfidf
471 else [fst $ head' "mostEmergentTfIdf" emergences]
472 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
473 in b & branch_label .~ (ngramsToLabel foundations selected)) export
476 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
477 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
480 $ sortOn snd $ zip [0..] meta
483 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
484 mostInclusive nth foundations export =
485 over ( export_branches
488 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
489 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
490 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
491 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
492 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
493 in b & branch_label .~ lbl ) export
496 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
497 mostEmergentInclusive nth foundations export =
501 let lbl = ngramsToLabel foundations
503 $ map (\(_,(_,idx)) -> idx)
505 $ map (\groups -> sortOn (fst . snd) groups)
506 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
507 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
508 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
509 in g & phylo_groupLabel .~ lbl ) export
512 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
513 processLabels labels foundations freq export =
514 foldl (\export' label ->
516 GroupLabel tagger nth ->
518 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
519 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
520 BranchLabel tagger nth ->
522 MostInclusive -> mostInclusive nth foundations export'
523 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
524 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
531 -- utiliser & creer une Map FdtId [PhyloGroup]
532 -- n = index of the current term
533 toDynamics :: FdtId -> [PhyloGroup] -> PhyloGroup -> Map FdtId (Date,Date) -> Double
534 toDynamics n elders g m =
535 let prd = g ^. phylo_groupPeriod
536 end = last' "dynamics" (sort $ map snd $ elems m)
537 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
540 else if ((fst prd) == (fst $ m ! n))
548 --------------------------------------
550 isNew = not $ elem n $ concat $ map _phylo_groupNgrams elders
553 processDynamics :: [PhyloGroup] -> [PhyloGroup]
554 processDynamics groups =
556 let elders = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
557 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
558 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n elders g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
560 --------------------------------------
561 mapNgrams :: Map FdtId (Date,Date)
562 mapNgrams = map (\dates ->
563 let dates' = sort dates
564 in (head' "dynamics" dates', last' "dynamics" dates'))
566 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
567 $ (g ^. phylo_groupNgrams))) [] groups
574 getGroupThr :: Double -> PhyloGroup -> Double
576 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
577 breaks = (g ^. phylo_groupMeta) ! "breaks"
578 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
587 toAncestor nbDocs diago similarity step candidates ego =
588 let curr = ego ^. phylo_groupAncestors
589 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
590 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
591 $ map (\g -> (g, toSimilarity nbDocs diago similarity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
592 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
595 headsToAncestors :: Double
602 headsToAncestors nbDocs diago similarity step heads acc =
606 let ego = head' "headsToAncestors" heads
607 heads' = tail' "headsToAncestors" heads
608 in headsToAncestors nbDocs diago similarity step heads' (acc ++ [toAncestor nbDocs diago similarity step heads' ego])
611 toHorizon :: Phylo -> Phylo
613 let phyloAncestor = updatePhyloGroups
615 (fromList $ map (\g -> (getGroupId g, g))
617 $ tracePhyloAncestors newGroups) phylo
618 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
619 $ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromScale scale phyloAncestor
620 in updatePhyloGroups scale reBranched phylo
622 -- | 1) for each periods
624 periods = getPeriodIds phylo
627 scale = getLastLevel phylo
630 frame = getTimeFrame $ timeUnit $ getConfig phylo
631 -- | 2) find ancestors between groups without parents
632 mapGroups :: [[PhyloGroup]]
633 mapGroups = map (\prd ->
634 let groups = getGroupsFromScalePeriods scale [prd] phylo
635 childs = getPreviousChildIds scale frame prd periods phylo
636 -- maybe add a better filter for non isolated ancestors
637 heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
638 $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
639 noHeads = groups \\ heads
640 nbDocs = sum $ elems $ filterDocs (getDocsByDate phylo) [prd]
641 diago = reduceDiagos $ filterDiago (getCoocByDate phylo) [prd]
642 sim = (similarity $ getConfig phylo)
643 step = case getSeaElevation phylo of
647 -- in headsToAncestors nbDocs diago Similarity heads groups []
648 in map (toAncestor nbDocs diago sim step noHeads)
649 $ headsToAncestors nbDocs diago sim step heads []
651 -- | 3) process this task concurrently
652 newGroups :: [[PhyloGroup]]
653 newGroups = mapGroups `using` parList rdeepseq
654 --------------------------------------
656 getPreviousChildIds :: Scale -> Int -> Period -> [Period] -> Phylo -> [PhyloGroupId]
657 getPreviousChildIds lvl frame curr prds phylo =
658 concat $ map ((map fst) . _phylo_groupPeriodChilds)
659 $ getGroupsFromScalePeriods lvl (getNextPeriods ToParents frame curr prds) phylo
661 ---------------------
662 -- | phyloExport | --
663 ---------------------
665 toPhyloExport :: Phylo -> DotGraph DotId
666 toPhyloExport phylo = exportToDot phylo
667 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
668 $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
669 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (getLastRootsFreq phylo)
670 $ processMetrics phylo export
672 export :: PhyloExport
673 export = PhyloExport groups branches
674 --------------------------------------
675 branches :: [PhyloBranch]
676 branches = map (\g ->
677 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
678 breaks = (g ^. phylo_groupMeta) ! "breaks"
679 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
680 in PhyloBranch { _branch_id = g ^. phylo_groupBranchId
681 , _branch_canonId = canonId
682 , _branch_seaLevel = seaLvl
684 , _branch_y = last' "export" $ take (round $ (last' "export" breaks) + 1) seaLvl
688 , _branch_meta = empty })
689 $ map (head' "export")
690 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
691 $ sortOn (^. phylo_groupBranchId) groups
692 --------------------------------------
693 groups :: [PhyloGroup]
694 groups = traceExportGroups
697 $ getGroupsFromScale (phyloScale $ getConfig phylo)
698 $ tracePhyloInfo phylo
701 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
702 traceExportBranches branches = trace ("\n"
703 <> "-- | Export " <> show(length branches) <> " branches") branches
705 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
706 tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
708 tracePhyloInfo :: Phylo -> Phylo
709 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with level = "
710 <> show(getLevel phylo) <> " applied to "
711 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
715 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
716 traceExportGroups groups = trace ("\n" <> "-- | Export "
717 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
718 <> show(length groups) <> " groups and "
719 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"