2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE TypeSynonymInstances #-}
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
15 import Control.Lens hiding (Level)
16 import Control.Parallel.Strategies (parList, rdeepseq, using)
17 import Data.GraphViz hiding (DotGraph, Order)
18 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
19 import Data.GraphViz.Types.Generalised (DotGraph)
20 import Data.GraphViz.Types.Monadic
21 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
22 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
23 import Data.Text.Lazy (fromStrict, pack, unpack)
24 import Data.Vector (Vector)
25 import Debug.Trace (trace)
26 import Gargantext.Core.Viz.Phylo
27 import Gargantext.Core.Viz.Phylo.PhyloTools
28 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toSimilarity, getNextPeriods)
29 import Gargantext.Prelude hiding (scale)
30 import Prelude (writeFile)
31 import System.FilePath
32 import qualified Data.GraphViz.Attributes.HTML as H
33 import qualified Data.Text as Text
34 import qualified Data.Text.Lazy as Lazy
35 import qualified Data.Vector as Vector
41 dotToFile :: FilePath -> DotGraph DotId -> IO ()
42 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
44 dotToString :: DotGraph DotId -> [Char]
45 dotToString dotG = unpack (printDotGraph dotG)
47 dynamicToColor :: Double -> H.Attribute
49 | d == 0 = H.BGColor (toColor LightCoral)
50 | d == 1 = H.BGColor (toColor Khaki)
51 | d == 2 = H.BGColor (toColor SkyBlue)
52 | otherwise = H.Color (toColor Black)
54 pickLabelColor :: [Double] -> H.Attribute
56 | elem 0 lst = dynamicToColor 0
57 | elem 2 lst = dynamicToColor 2
58 | elem 1 lst = dynamicToColor 1
59 | otherwise = dynamicToColor 3
61 toDotLabel :: Text.Text -> Label
62 toDotLabel lbl = StrLabel $ fromStrict lbl
64 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
65 toAttr k v = customAttribute k v
67 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
68 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
70 groupIdToDotId :: PhyloGroupId -> DotId
71 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
73 branchIdToDotId :: PhyloBranchId -> DotId
74 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
76 periodIdToDotId :: Period -> DotId
77 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
79 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
80 groupToTable fdt g = H.Table H.HTable
81 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
82 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
83 , H.tableRows = [header]
84 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
85 <> ( map ngramsToRow $ splitEvery 4
86 $ reverse $ sortOn (snd . snd)
87 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
88 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
90 --------------------------------------
91 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
92 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
93 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
94 --------------------------------------
97 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
98 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
100 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
101 <> (fromStrict " , ")
102 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
103 <> (fromStrict " ) ")
104 <> (pack $ show (getGroupId g)))]]
105 --------------------------------------
107 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
108 branchToDotNode b bId =
109 node (branchIdToDotId $ b ^. branch_id)
110 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
111 <> (metaToAttr $ b ^. branch_meta)
112 <> [ toAttr "nodeType" "branch"
113 , toAttr "bId" (pack $ show bId)
114 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
115 , toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
116 , toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
117 , toAttr "label" (pack $ show $ b ^. branch_label)
120 periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
121 periodToDotNode prd prd' =
122 node (periodIdToDotId prd)
123 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
124 <> [ toAttr "nodeType" "period"
125 , toAttr "strFrom" (fromStrict $ Text.pack $ (show $ fst prd'))
126 , toAttr "strTo" (fromStrict $ Text.pack $ (show $ snd prd'))
127 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
128 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
131 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
132 groupToDotNode fdt g bId =
133 node (groupIdToDotId $ getGroupId g)
134 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
135 <> [ toAttr "nodeType" "group"
136 , toAttr "gid" (groupIdToDotId $ getGroupId g)
137 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
138 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
139 , toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
140 , toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
141 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
142 , toAttr "bId" (pack $ show bId)
143 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
144 , toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
145 , toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
146 , toAttr "sourceFull" (pack $ show (g ^. phylo_groupSources))
147 , toAttr "density" (pack $ show (g ^. phylo_groupDensity))
148 , toAttr "cooc" (pack $ show (g ^. phylo_groupCooc))
149 , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
150 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
151 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
152 , toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
153 , toAttr "seaLvl" (pack $ show ((g ^. phylo_groupMeta) ! "seaLevels"))
157 toDotEdge' :: DotId -> DotId -> [Char] -> [Char] -> EdgeType -> Dot DotId
158 toDotEdge' source target thr w edgeType = edge source target
160 GroupToGroup -> undefined
161 GroupToGroupMemory -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "memoryLink", toAttr "thr" (pack thr), toAttr "weight" (pack w)]
162 BranchToGroup -> undefined
163 BranchToBranch -> undefined
164 GroupToAncestor -> undefined
165 PeriodToPeriod -> undefined)
168 toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
169 toDotEdge source target lbl edgeType = edge source target
171 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
172 GroupToGroupMemory -> undefined
173 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
174 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
175 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
176 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
179 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
180 mergePointers groups =
181 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
182 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
183 in unionWith (\w w' -> max w w') toChilds toParents
185 mergePointersMemory :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId),(Double,Double))]
186 mergePointersMemory groups =
187 let toChilds = concat $ map (\g -> map (\(target,(t,w)) -> ((getGroupId g,target),(t,w))) $ g ^. phylo_groupPeriodMemoryChilds) groups
188 toParents = concat $ map (\g -> map (\(target,(t,w)) -> ((target,getGroupId g),(t,w))) $ g ^. phylo_groupPeriodMemoryParents) groups
189 in concat [toChilds,toParents]
191 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
192 mergeAncestors groups = concat
193 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
194 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
197 toBid :: PhyloGroup -> [PhyloBranch] -> Int
199 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
200 in fromJust $ elemIndex b' bs
202 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
203 exportToDot phylo export =
204 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
205 <> show(length $ export ^. export_groups) <> " groups "
206 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
207 <> "##########################") $
208 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
210 {- 1) init the dot graph -}
211 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
212 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
215 , Style [SItem Filled []],Color [toWColor White]]
216 {-- home made attributes -}
217 <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
218 ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
219 ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ getDocsByDate phylo))
220 ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
221 ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
222 ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
223 ,(toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo))
224 ,(toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo)
225 ,(toAttr (fromStrict "PhyloScale") $ pack $ show (getLevel phylo))
226 ,(toAttr (fromStrict "phyloQuality") $ pack $ show (phylo ^. phylo_quality))
227 ,(toAttr (fromStrict "phyloSeaRiseStart") $ pack $ show (getPhyloSeaRiseStart phylo))
228 ,(toAttr (fromStrict "phyloSeaRiseSteps") $ pack $ show (getPhyloSeaRiseSteps phylo))
229 -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
233 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
235 -- 2) create a layer for the branches labels -}
236 subgraph (Str "Branches peaks") $ do
238 -- graphAttrs [Rank SameRank]
240 -- 3) group the branches by hierarchy
241 -- mapM (\branches ->
242 -- subgraph (Str "Branches clade") $ do
243 -- graphAttrs [Rank SameRank]
245 -- -- 4) create a node for each branch
246 -- mapM branchToDotNode branches
247 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
249 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
251 {-- 5) create a layer for each period -}
252 _ <- mapM (\period ->
253 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
254 graphAttrs [Rank SameRank]
255 periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriodStr)
257 {-- 6) create a node for each group -}
258 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
259 ) $ phylo ^. phylo_periods
261 {-- 7) create the edges between a branch and its first groups -}
262 _ <- mapM (\(bId,groups) ->
263 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
266 $ map (\groups -> head' "toDot"
267 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
268 $ sortOn (fst . _phylo_groupPeriod) groups)
269 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
271 {- 8) create the edges between the groups -}
272 _ <- mapM (\((k,k'),v) ->
273 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
274 ) $ (toList . mergePointers) $ export ^. export_groups
276 {- 8-bis) create the edges between the groups -}
277 {- _ <- mapM (\((k,k'),v) ->
278 toDotEdge' (groupIdToDotId k) (groupIdToDotId k') (show (fst v)) (show (snd v)) GroupToGroupMemory
279 ) $ mergePointersMemory $ export ^. export_groups -}
281 _ <- mapM (\((k,k'),v) ->
282 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
283 ) $ mergeAncestors $ export ^. export_groups
285 -- 10) create the edges between the periods
286 _ <- mapM (\(prd,prd') ->
287 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
288 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
290 {- 8) create the edges between the branches
291 -- _ <- mapM (\(bId,bId') ->
292 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
293 -- (Text.pack $ show(branchIdsToSimilarity bId bId'
294 -- (getThresholdInit $ phyloSimilarity $ getConfig phylo)
295 -- (getThresholdStep $ phyloSimilarity $ getConfig phylo))) BranchToBranch
296 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
300 graphAttrs [Rank SameRank]
307 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
308 filterByBranchSize thr export =
309 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
310 in export & export_branches .~ (fst splited)
311 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
314 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
315 processFilters filters qua export =
316 foldl (\export' f -> case f of
317 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
318 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
319 else filterByBranchSize thr export'
326 branchToIso :: [PhyloBranch] -> [PhyloBranch]
327 branchToIso branches =
330 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
332 $ ([0] ++ (map (\(b,b') ->
333 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
334 lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
336 if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
337 then (b' ^. branch_seaLevel) !! (lmin - 1)
338 else (b' ^. branch_seaLevel) !! (idx - 1)
339 ) $ listToSeq branches))
340 in map (\(x,b) -> b & branch_x .~ x)
343 branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
344 branchToIso' start step branches =
345 let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
347 $ ([0] ++ (map (\(b,b') ->
348 let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
349 in 1 - start - step * root) $ listToSeq branches))
350 in map (\(x,b) -> b & branch_x .~ x)
354 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
355 sortByHierarchy depth branches =
356 if (length branches == 1)
360 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
361 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
362 ++ (sortByHierarchy (depth + 1) (snd partitions)))
363 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
364 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
367 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
368 sortByBirthDate order export =
369 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
370 branches' = case order of
372 Desc -> reverse branches
373 in export & export_branches .~ branches'
375 processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
376 processSort sort' elev export = case sort' of
377 ByBirthDate o -> sortByBirthDate o export
378 ByHierarchy _ -> case elev of
379 Constante s s' -> export & export_branches .~ (branchToIso' s s' $ sortByHierarchy 0 (export ^. export_branches))
380 Adaptative _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
381 Evolving _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
387 -- | Return the conditional probability of i knowing j
388 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
389 conditional m i j = (findWithDefault 0 (i,j) m)
393 -- | Return the genericity score of a given ngram
394 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
395 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
396 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
399 -- | Return the specificity score of a given ngram
400 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
401 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
402 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
405 -- | Return the inclusion score of a given ngram
406 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
407 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
408 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
411 ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
412 ngramsMetrics phylo export =
415 (\g -> g & phylo_groupMeta %~ insert "genericity"
416 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
417 & phylo_groupMeta %~ insert "specificity"
418 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
419 & phylo_groupMeta %~ insert "inclusion"
420 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
421 & phylo_groupMeta %~ insert "frequence"
422 (map (\n -> getInMap n (getLastRootsFreq phylo)) $ g ^. phylo_groupNgrams)
426 branchDating :: PhyloExport -> PhyloExport
427 branchDating export =
428 over ( export_branches
431 let groups = sortOn fst
432 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
433 then acc ++ [g ^. phylo_groupPeriod]
434 else acc ) [] $ export ^. export_groups
436 birth = fst $ head' "birth" groups
437 death = snd $ last' "death" groups
439 in b & branch_meta %~ insert "birth" [fromIntegral birth]
440 & branch_meta %~ insert "death" [fromIntegral death]
441 & branch_meta %~ insert "age" [fromIntegral age]
442 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
444 processMetrics :: Phylo -> PhyloExport -> PhyloExport
445 processMetrics phylo export = ngramsMetrics phylo
446 $ branchDating export
453 nk :: Int -> [[Int]] -> Int
455 $ map (\g -> if (elem n g)
460 tf :: Int -> [[Int]] -> Double
461 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
464 idf :: Int -> [[Int]] -> Double
465 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
468 findTfIdf :: [[Int]] -> [(Int,Double)]
469 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ sort $ nub $ concat groups
472 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
473 findEmergences groups freq =
474 let ngrams = map _phylo_groupNgrams groups
475 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
476 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
477 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
478 in reverse $ sortOn snd
479 $ map (\(n,_) -> if (member n freq)
484 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
485 mostEmergentTfIdf nth freq foundations export =
486 over ( export_branches
489 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
490 tfidf = findTfIdf (map _phylo_groupNgrams groups)
491 emergences = findEmergences groups freq
492 selected = if (null emergences)
493 then map fst $ take nth tfidf
494 else [fst $ head' "mostEmergentTfIdf" emergences]
495 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
496 in b & branch_label .~ (ngramsToLabel foundations selected)) export
499 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
500 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
503 $ sortOn snd $ zip [0..] meta
506 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
507 mostInclusive nth foundations export =
508 over ( export_branches
511 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
512 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
513 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
514 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
515 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
516 in b & branch_label .~ lbl ) export
519 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
520 mostEmergentInclusive nth foundations export =
524 let lbl = ngramsToLabel foundations
526 $ map (\(_,(_,idx)) -> idx)
528 $ map (\groups -> sortOn (fst . snd) groups)
529 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
530 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
531 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
532 in g & phylo_groupLabel .~ lbl ) export
535 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
536 processLabels labels foundations freq export =
537 foldl (\export' label ->
539 GroupLabel tagger nth ->
541 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
542 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
543 BranchLabel tagger nth ->
545 MostInclusive -> mostInclusive nth foundations export'
546 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
547 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
554 -- utiliser & creer une Map FdtId [PhyloGroup]
555 -- n = index of the current term
556 toDynamics :: FdtId -> [PhyloGroup] -> PhyloGroup -> Map FdtId (Date,Date) -> Double
557 toDynamics n elders g m =
558 let prd = g ^. phylo_groupPeriod
559 end = last' "dynamics" (sort $ map snd $ elems m)
560 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
563 else if ((fst prd) == (fst $ m ! n))
571 --------------------------------------
573 isNew = not $ elem n $ concat $ map _phylo_groupNgrams elders
576 processDynamics :: [PhyloGroup] -> [PhyloGroup]
577 processDynamics groups =
579 let elders = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
580 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
581 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n elders g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
583 --------------------------------------
584 mapNgrams :: Map FdtId (Date,Date)
585 mapNgrams = map (\dates ->
586 let dates' = sort dates
587 in (head' "dynamics" dates', last' "dynamics" dates'))
589 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
590 $ (g ^. phylo_groupNgrams))) [] groups
597 getGroupThr :: Double -> PhyloGroup -> Double
599 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
600 breaks = (g ^. phylo_groupMeta) ! "breaks"
601 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
603 toAncestor :: Double -> Map Int Double -> PhyloSimilarity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
604 toAncestor nbDocs diago similarity step candidates ego =
605 let curr = ego ^. phylo_groupAncestors
606 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
607 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
608 $ map (\g -> (g, toSimilarity nbDocs diago similarity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
609 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
612 headsToAncestors :: Double -> Map Int Double -> PhyloSimilarity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
613 headsToAncestors nbDocs diago similarity step heads acc =
617 let ego = head' "headsToAncestors" heads
618 heads' = tail' "headsToAncestors" heads
619 in headsToAncestors nbDocs diago similarity step heads' (acc ++ [toAncestor nbDocs diago similarity step heads' ego])
622 toHorizon :: Phylo -> Phylo
624 let phyloAncestor = updatePhyloGroups
626 (fromList $ map (\g -> (getGroupId g, g))
628 $ tracePhyloAncestors newGroups) phylo
629 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
630 $ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromScale scale phyloAncestor
631 in updatePhyloGroups scale reBranched phylo
633 -- | 1) for each periods
635 periods = getPeriodIds phylo
638 scale = getLastLevel phylo
641 frame = getTimeFrame $ timeUnit $ getConfig phylo
642 -- | 2) find ancestors between groups without parents
643 mapGroups :: [[PhyloGroup]]
644 mapGroups = map (\prd ->
645 let groups = getGroupsFromScalePeriods scale [prd] phylo
646 childs = getPreviousChildIds scale frame prd periods phylo
647 -- maybe add a better filter for non isolated ancestors
648 heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
649 $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
650 noHeads = groups \\ heads
651 nbDocs = sum $ elems $ filterDocs (getDocsByDate phylo) [prd]
652 diago = reduceDiagos $ filterDiago (getCoocByDate phylo) [prd]
653 sim = (similarity $ getConfig phylo)
654 step = case getSeaElevation phylo of
658 -- in headsToAncestors nbDocs diago Similarity heads groups []
659 in map (\ego -> toAncestor nbDocs diago sim step noHeads ego)
660 $ headsToAncestors nbDocs diago sim step heads []
662 -- | 3) process this task concurrently
663 newGroups :: [[PhyloGroup]]
664 newGroups = mapGroups `using` parList rdeepseq
665 --------------------------------------
667 getPreviousChildIds :: Scale -> Int -> Period -> [Period] -> Phylo -> [PhyloGroupId]
668 getPreviousChildIds lvl frame curr prds phylo =
669 concat $ map ((map fst) . _phylo_groupPeriodChilds)
670 $ getGroupsFromScalePeriods lvl (getNextPeriods ToParents frame curr prds) phylo
672 ---------------------
673 -- | phyloExport | --
674 ---------------------
676 toPhyloExport :: Phylo -> DotGraph DotId
677 toPhyloExport phylo = exportToDot phylo
678 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
679 $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
680 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (getLastRootsFreq phylo)
681 $ processMetrics phylo export
683 export :: PhyloExport
684 export = PhyloExport groups branches
685 --------------------------------------
686 branches :: [PhyloBranch]
687 branches = map (\g ->
688 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
689 breaks = (g ^. phylo_groupMeta) ! "breaks"
690 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
691 in PhyloBranch (g ^. phylo_groupBranchId)
695 (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
699 $ map (\gs -> head' "export" gs)
700 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
701 $ sortOn (\g -> g ^. phylo_groupBranchId) groups
702 --------------------------------------
703 groups :: [PhyloGroup]
704 groups = traceExportGroups
707 $ getGroupsFromScale (phyloScale $ getConfig phylo)
708 $ tracePhyloInfo phylo
711 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
712 traceExportBranches branches = trace ("\n"
713 <> "-- | Export " <> show(length branches) <> " branches") branches
715 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
716 tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
718 tracePhyloInfo :: Phylo -> Phylo
719 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with λ = "
720 <> show(getLevel phylo) <> " applied to "
721 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
725 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
726 traceExportGroups groups = trace ("\n" <> "-- | Export "
727 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
728 <> show(length groups) <> " groups and "
729 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"