2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE TypeSynonymInstances #-}
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
15 import Control.Lens hiding (Level)
16 import Control.Parallel.Strategies (parList, rdeepseq, using)
17 import Data.GraphViz hiding (DotGraph, Order)
18 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
19 import Data.GraphViz.Types.Generalised (DotGraph)
20 import Data.GraphViz.Types.Monadic
21 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
22 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
23 import Data.Text.Lazy (fromStrict, pack, unpack)
24 import Data.Vector (Vector)
25 import Debug.Trace (trace)
26 import Gargantext.Core.Viz.Phylo
27 import Gargantext.Core.Viz.Phylo.PhyloTools
28 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toSimilarity, getNextPeriods)
29 import Gargantext.Prelude hiding (scale)
30 import Prelude (writeFile)
31 import Protolude (floor)
32 import System.FilePath
33 import qualified Data.GraphViz.Attributes.HTML as H
34 import qualified Data.Text as Text
35 import qualified Data.Text.Lazy as Lazy
36 import qualified Data.Vector as Vector
42 dotToFile :: FilePath -> DotGraph DotId -> IO ()
43 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
45 dotToString :: DotGraph DotId -> [Char]
46 dotToString dotG = unpack (printDotGraph dotG)
48 dynamicToColor :: Int -> H.Attribute
50 | d == 0 = H.BGColor (toColor LightCoral)
51 | d == 1 = H.BGColor (toColor Khaki)
52 | d == 2 = H.BGColor (toColor SkyBlue)
53 | otherwise = H.Color (toColor Black)
55 pickLabelColor :: [Int] -> H.Attribute
57 | elem 0 lst = dynamicToColor 0
58 | elem 1 lst = dynamicToColor 1
59 | elem 2 lst = dynamicToColor 2
60 | otherwise = dynamicToColor 3
62 toDotLabel :: Text.Text -> Label
63 toDotLabel lbl = StrLabel $ fromStrict lbl
65 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
66 toAttr k v = customAttribute k v
68 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
69 metaToAttr meta = map (\(k, v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
71 groupIdToDotId :: PhyloGroupId -> DotId
72 groupIdToDotId (((d, d'), lvl), idx) =
73 (fromStrict . Text.pack) $ "group" <> show d <> show d' <> show lvl <> show idx
75 branchIdToDotId :: PhyloBranchId -> DotId
76 branchIdToDotId bId = (fromStrict . Text.pack) $ "branch" <> show (snd bId)
78 periodIdToDotId :: Period -> DotId
79 periodIdToDotId prd = (fromStrict . Text.pack) $ "period" <> show (fst prd) <> show (snd prd)
81 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
84 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
85 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
86 , H.tableRows = [header]
87 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
88 <> ( map ngramsToRow $ splitEvery 4
89 $ reverse $ sortOn (snd . snd)
90 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
91 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
93 --------------------------------------
94 ngramsToRow :: [(Ngrams, (Double, Double))] -> H.Row
96 H.Cells $ map (\(n, (d, _)) ->
97 H.LabelCell [ H.Align H.HLeft
98 , dynamicToColor $ floor d] $ H.Text [H.Str $ fromStrict n]) ns
99 --------------------------------------
102 H.Cells [ H.LabelCell [pickLabelColor $ floor <$> ((g ^. phylo_groupMeta) ! "dynamics")]
103 $ H.Text [H.Str $ ((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
105 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
107 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
109 <> (pack $ show (getGroupId g))]]
110 --------------------------------------
112 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
113 branchToDotNode b bId =
114 node (branchIdToDotId $ b ^. branch_id)
115 ( [ FillColor [toWColor CornSilk]
119 , Style [SItem Bold []]
120 , Label (toDotLabel $ b ^. branch_label) ]
121 <> (metaToAttr $ b ^. branch_meta)
122 <> [ toAttr "nodeType" "branch"
123 , toAttr "bId" (pack $ show bId)
124 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
125 , toAttr "branch_x" (fromStrict $ Text.pack $ show $ b ^. branch_x)
126 , toAttr "branch_y" (fromStrict $ Text.pack $ show $ b ^. branch_y)
127 , toAttr "label" (pack $ show $ b ^. branch_label)
130 periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
131 periodToDotNode prd prd' =
132 node (periodIdToDotId prd) $
135 , Label $ toDotLabel $ Text.pack $ show (fst prd) <> " " <> show (snd prd) ]
136 <> [ toAttr "nodeType" "period"
137 , toAttr "strFrom" $ fromStrict $ Text.pack $ show $ fst prd'
138 , toAttr "strTo" $ fromStrict $ Text.pack $ show $ snd prd'
139 , toAttr "from" $ fromStrict $ Text.pack $ show $ fst prd
140 , toAttr "to" $ fromStrict $ Text.pack $ show $ snd prd ]
143 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
144 groupToDotNode fdt g bId =
145 node (groupIdToDotId $ getGroupId g)
146 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
147 <> [ toAttr "nodeType" "group"
148 , toAttr "gid" (groupIdToDotId $ getGroupId g)
149 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
150 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
151 , toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
152 , toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
153 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
154 , toAttr "bId" (pack $ show bId)
155 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
156 , toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
157 , toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
158 , toAttr "sourceFull" (pack $ show (g ^. phylo_groupSources))
159 , toAttr "density" (pack $ show (g ^. phylo_groupDensity))
160 , toAttr "cooc" (pack $ show (g ^. phylo_groupCooc))
161 , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
162 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
163 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
164 , toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
165 , toAttr "seaLvl" (pack $ show ((g ^. phylo_groupMeta) ! "seaLevels"))
168 toDotEdge' :: DotId -> DotId -> [Char] -> [Char] -> EdgeType -> Dot DotId
169 toDotEdge' source target thr w edgeType = edge source target
171 GroupToGroup -> undefined
172 GroupToGroupMemory -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "memoryLink", toAttr "thr" (pack thr), toAttr "weight" (pack w)]
173 BranchToGroup -> undefined
174 BranchToBranch -> undefined
175 GroupToAncestor -> undefined
176 PeriodToPeriod -> undefined)
179 toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
180 toDotEdge source target lbl edgeType = edge source target
182 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
183 GroupToGroupMemory -> undefined
184 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
185 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
186 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
187 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
190 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
191 mergePointers groups =
192 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
193 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
194 in unionWith (\w w' -> max w w') toChilds toParents
196 mergePointersMemory :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId),(Double,Double))]
197 mergePointersMemory groups =
198 let toChilds = concat $ map (\g -> map (\(target,(t,w)) -> ((getGroupId g,target),(t,w))) $ g ^. phylo_groupPeriodMemoryChilds) groups
199 toParents = concat $ map (\g -> map (\(target,(t,w)) -> ((target,getGroupId g),(t,w))) $ g ^. phylo_groupPeriodMemoryParents) groups
200 in concat [toChilds,toParents]
202 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
203 mergeAncestors groups = concat
204 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
205 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
208 toBid :: PhyloGroup -> [PhyloBranch] -> Int
210 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
211 in fromJust $ elemIndex b' bs
213 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
214 exportToDot phylo export =
215 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
216 <> show(length $ export ^. export_groups) <> " groups "
217 <> show(length $ nub $ concat $ map (^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
218 <> "##########################") $
219 digraph ((Str . fromStrict) $ phyloName $ getConfig phylo) $ do
221 {- 1) init the dot graph -}
222 graphAttrs ( [ Label (toDotLabel $ phyloName $ getConfig phylo)]
223 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
226 , Style [SItem Filled []],Color [toWColor White]]
227 {-- home made attributes -}
228 <> [ toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo)
229 , toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (^. phylo_groupNgrams) $ export ^. export_groups)
230 , toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ getDocsByDate phylo)
231 , toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods)
232 , toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches)
233 , toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups)
234 , toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo)
235 , toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo
236 , toAttr (fromStrict "PhyloScale") $ pack $ show (getLevel phylo)
237 , toAttr (fromStrict "phyloQuality") $ pack $ show (phylo ^. phylo_quality)
238 , toAttr (fromStrict "phyloSeaRiseStart") $ pack $ show (getPhyloSeaRiseStart phylo)
239 , toAttr (fromStrict "phyloSeaRiseSteps") $ pack $ show (getPhyloSeaRiseSteps phylo)
240 -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
244 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
246 -- 2) create a layer for the branches labels -}
247 subgraph (Str "Branches peaks") $ do
249 -- graphAttrs [Rank SameRank]
251 -- 3) group the branches by hierarchy
252 -- mapM (\branches ->
253 -- subgraph (Str "Branches clade") $ do
254 -- graphAttrs [Rank SameRank]
256 -- -- 4) create a node for each branch
257 -- mapM branchToDotNode branches
258 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
260 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
262 {-- 5) create a layer for each period -}
263 _ <- mapM (\period ->
264 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
265 graphAttrs [Rank SameRank]
266 periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriodStr)
268 {-- 6) create a node for each group -}
269 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
270 ) $ phylo ^. phylo_periods
272 {-- 7) create the edges between a branch and its first groups -}
273 _ <- mapM (\(bId,groups) ->
274 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
277 $ map (\groups -> head' "toDot"
278 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
279 $ sortOn (fst . _phylo_groupPeriod) groups)
280 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
282 {- 8) create the edges between the groups -}
283 _ <- mapM (\((k,k'),v) ->
284 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
285 ) $ (toList . mergePointers) $ export ^. export_groups
287 {- 8-bis) create the edges between the groups -}
288 {- _ <- mapM (\((k,k'),v) ->
289 toDotEdge' (groupIdToDotId k) (groupIdToDotId k') (show (fst v)) (show (snd v)) GroupToGroupMemory
290 ) $ mergePointersMemory $ export ^. export_groups -}
292 _ <- mapM (\((k,k'),v) ->
293 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
294 ) $ mergeAncestors $ export ^. export_groups
296 -- 10) create the edges between the periods
297 _ <- mapM (\(prd,prd') ->
298 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
299 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
301 {- 8) create the edges between the branches
302 -- _ <- mapM (\(bId,bId') ->
303 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
304 -- (Text.pack $ show(branchIdsToSimilarity bId bId'
305 -- (getThresholdInit $ phyloSimilarity $ getConfig phylo)
306 -- (getThresholdStep $ phyloSimilarity $ getConfig phylo))) BranchToBranch
307 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
311 graphAttrs [Rank SameRank]
318 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
319 filterByBranchSize thr export =
320 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
321 in export & export_branches .~ (fst splited)
322 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
325 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
326 processFilters filters qua export =
327 foldl (\export' f -> case f of
328 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
329 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
330 else filterByBranchSize thr export'
337 branchToIso :: [PhyloBranch] -> [PhyloBranch]
338 branchToIso branches =
341 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
343 $ ([0] ++ (map (\(b,b') ->
344 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
345 lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
347 if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
348 then (b' ^. branch_seaLevel) !! (lmin - 1)
349 else (b' ^. branch_seaLevel) !! (idx - 1)
350 ) $ listToSeq branches))
351 in map (\(x,b) -> b & branch_x .~ x)
354 branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
355 branchToIso' start step branches =
356 let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
358 $ ([0] ++ (map (\(b,b') ->
359 let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
360 in 1 - start - step * root) $ listToSeq branches))
361 in map (\(x,b) -> b & branch_x .~ x)
365 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
366 sortByHierarchy depth branches =
367 if (length branches == 1)
371 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
372 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
373 ++ (sortByHierarchy (depth + 1) (snd partitions)))
374 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
375 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
378 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
379 sortByBirthDate order export =
380 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
381 branches' = case order of
383 Desc -> reverse branches
384 in export & export_branches .~ branches'
386 processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
387 processSort sort' elev export = case sort' of
388 ByBirthDate o -> sortByBirthDate o export
389 ByHierarchy _ -> case elev of
390 Constante s s' -> export & export_branches .~ (branchToIso' s s' $ sortByHierarchy 0 (export ^. export_branches))
391 Adaptative _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
392 Evolving _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
398 -- | Return the conditional probability of i knowing j
399 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
400 conditional m i j = (findWithDefault 0 (i,j) m)
404 -- | Return the genericity score of a given ngram
405 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
406 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
407 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
410 -- | Return the specificity score of a given ngram
411 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
412 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
413 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
416 -- | Return the inclusion score of a given ngram
417 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
418 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
419 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
422 ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
423 ngramsMetrics phylo export =
426 (\g -> g & phylo_groupMeta %~ insert "genericity"
427 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
428 & phylo_groupMeta %~ insert "specificity"
429 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
430 & phylo_groupMeta %~ insert "inclusion"
431 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
432 & phylo_groupMeta %~ insert "frequence"
433 (map (\n -> getInMap n (getLastRootsFreq phylo)) $ g ^. phylo_groupNgrams)
437 branchDating :: PhyloExport -> PhyloExport
438 branchDating export =
439 over ( export_branches
442 let groups = sortOn fst
443 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
444 then acc ++ [g ^. phylo_groupPeriod]
445 else acc ) [] $ export ^. export_groups
447 birth = fst $ head' "birth" groups
448 death = snd $ last' "death" groups
450 in b & branch_meta %~ insert "birth" [fromIntegral birth]
451 & branch_meta %~ insert "death" [fromIntegral death]
452 & branch_meta %~ insert "age" [fromIntegral age]
453 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
455 processMetrics :: Phylo -> PhyloExport -> PhyloExport
456 processMetrics phylo export = ngramsMetrics phylo
457 $ branchDating export
464 nk :: Int -> [[Int]] -> Int
466 $ map (\g -> if (elem n g)
471 tf :: Int -> [[Int]] -> Double
472 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
475 idf :: Int -> [[Int]] -> Double
476 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
479 findTfIdf :: [[Int]] -> [(Int,Double)]
480 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ nub $ concat groups
483 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
484 findEmergences groups freq =
485 let ngrams = map _phylo_groupNgrams groups
486 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
487 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
488 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
489 in reverse $ sortOn snd
490 $ map (\(n,_) -> if (member n freq)
495 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
496 mostEmergentTfIdf nth freq foundations export =
497 over ( export_branches
500 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
501 tfidf = findTfIdf (map _phylo_groupNgrams groups)
502 emergences = findEmergences groups freq
503 selected = if (null emergences)
504 then map fst $ take nth tfidf
505 else [fst $ head' "mostEmergentTfIdf" emergences]
506 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
507 in b & branch_label .~ (ngramsToLabel foundations selected)) export
510 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
511 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
514 $ sortOn snd $ zip [0..] meta
517 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
518 mostInclusive nth foundations export =
519 over ( export_branches
522 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
523 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
524 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
525 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
526 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
527 in b & branch_label .~ lbl ) export
530 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
531 mostEmergentInclusive nth foundations export =
535 let lbl = ngramsToLabel foundations
537 $ map (\(_,(_,idx)) -> idx)
539 $ map (\groups -> sortOn (fst . snd) groups)
540 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
541 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
542 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
543 in g & phylo_groupLabel .~ lbl ) export
546 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
547 processLabels labels foundations freq export =
548 foldl (\export' label ->
550 GroupLabel tagger nth ->
552 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
553 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
554 BranchLabel tagger nth ->
556 MostInclusive -> mostInclusive nth foundations export'
557 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
558 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
565 -- utiliser & creer une Map FdtId [PhyloGroup]
566 -- n = index of the current term
567 toDynamics :: FdtId -> [PhyloGroup] -> PhyloGroup -> Map FdtId (Date,Date) -> Double
568 toDynamics n elders g m =
569 let prd = g ^. phylo_groupPeriod
570 end = last' "dynamics" (sort $ map snd $ elems m)
571 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
574 else if ((fst prd) == (fst $ m ! n))
582 --------------------------------------
584 isNew = not $ elem n $ concat $ map _phylo_groupNgrams elders
587 processDynamics :: [PhyloGroup] -> [PhyloGroup]
588 processDynamics groups =
590 let elders = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
591 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
592 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n elders g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
594 --------------------------------------
595 mapNgrams :: Map FdtId (Date,Date)
596 mapNgrams = map (\dates ->
597 let dates' = sort dates
598 in (head' "dynamics" dates', last' "dynamics" dates'))
600 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
601 $ (g ^. phylo_groupNgrams))) [] groups
608 getGroupThr :: Double -> PhyloGroup -> Double
610 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
611 breaks = (g ^. phylo_groupMeta) ! "breaks"
612 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
621 toAncestor nbDocs diago similarity step candidates ego =
622 let curr = ego ^. phylo_groupAncestors
623 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
624 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
625 $ map (\g -> (g, toSimilarity nbDocs diago similarity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
626 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
629 headsToAncestors :: Double
636 headsToAncestors nbDocs diago similarity step heads acc =
640 let ego = head' "headsToAncestors" heads
641 heads' = tail' "headsToAncestors" heads
642 in headsToAncestors nbDocs diago similarity step heads' (acc ++ [toAncestor nbDocs diago similarity step heads' ego])
645 toHorizon :: Phylo -> Phylo
647 let phyloAncestor = updatePhyloGroups
649 (fromList $ map (\g -> (getGroupId g, g))
651 $ tracePhyloAncestors newGroups) phylo
652 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
653 $ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromScale scale phyloAncestor
654 in updatePhyloGroups scale reBranched phylo
656 -- | 1) for each periods
658 periods = getPeriodIds phylo
661 scale = getLastLevel phylo
664 frame = getTimeFrame $ timeUnit $ getConfig phylo
665 -- | 2) find ancestors between groups without parents
666 mapGroups :: [[PhyloGroup]]
667 mapGroups = map (\prd ->
668 let groups = getGroupsFromScalePeriods scale [prd] phylo
669 childs = getPreviousChildIds scale frame prd periods phylo
670 -- maybe add a better filter for non isolated ancestors
671 heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
672 $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
673 noHeads = groups \\ heads
674 nbDocs = sum $ elems $ filterDocs (getDocsByDate phylo) [prd]
675 diago = reduceDiagos $ filterDiago (getCoocByDate phylo) [prd]
676 sim = (similarity $ getConfig phylo)
677 step = case getSeaElevation phylo of
681 -- in headsToAncestors nbDocs diago Similarity heads groups []
682 in map (toAncestor nbDocs diago sim step noHeads)
683 $ headsToAncestors nbDocs diago sim step heads []
685 -- | 3) process this task concurrently
686 newGroups :: [[PhyloGroup]]
687 newGroups = mapGroups `using` parList rdeepseq
688 --------------------------------------
690 getPreviousChildIds :: Scale -> Int -> Period -> [Period] -> Phylo -> [PhyloGroupId]
691 getPreviousChildIds lvl frame curr prds phylo =
692 concat $ map ((map fst) . _phylo_groupPeriodChilds)
693 $ getGroupsFromScalePeriods lvl (getNextPeriods ToParents frame curr prds) phylo
695 ---------------------
696 -- | phyloExport | --
697 ---------------------
699 toPhyloExport :: Phylo -> DotGraph DotId
700 toPhyloExport phylo = exportToDot phylo
701 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
702 $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
703 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (getLastRootsFreq phylo)
704 $ processMetrics phylo export
706 export :: PhyloExport
707 export = PhyloExport groups branches
708 --------------------------------------
709 branches :: [PhyloBranch]
710 branches = map (\g ->
711 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
712 breaks = (g ^. phylo_groupMeta) ! "breaks"
713 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
714 in PhyloBranch { _branch_id = g ^. phylo_groupBranchId
715 , _branch_canonId = canonId
716 , _branch_seaLevel = seaLvl
718 , _branch_y = last' "export" $ take (round $ (last' "export" breaks) + 1) seaLvl
722 , _branch_meta = empty })
723 $ map (head' "export")
724 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
725 $ sortOn (^. phylo_groupBranchId) groups
726 --------------------------------------
727 groups :: [PhyloGroup]
728 groups = traceExportGroups
731 $ getGroupsFromScale (phyloScale $ getConfig phylo)
732 $ tracePhyloInfo phylo
735 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
736 traceExportBranches branches = trace ("\n"
737 <> "-- | Export " <> show(length branches) <> " branches") branches
739 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
740 tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
742 tracePhyloInfo :: Phylo -> Phylo
743 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with level = "
744 <> show(getLevel phylo) <> " applied to "
745 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
749 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
750 traceExportGroups groups = trace ("\n" <> "-- | Export "
751 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
752 <> show(length groups) <> " groups and "
753 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"