2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE TypeSynonymInstances #-}
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
15 import Control.Lens hiding (Level)
16 import Control.Parallel.Strategies (parList, rdeepseq, using)
17 import Data.GraphViz hiding (DotGraph, Order)
18 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
19 import Data.GraphViz.Types.Generalised (DotGraph)
20 import Data.GraphViz.Types.Monadic
21 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
22 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
23 import Data.Text.Lazy (fromStrict, pack, unpack)
24 import Data.Vector (Vector)
25 import Debug.Trace (trace)
26 import Gargantext.Core.Viz.Phylo
27 import Gargantext.Core.Viz.Phylo.PhyloTools
28 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toSimilarity, getNextPeriods)
29 import Gargantext.Prelude hiding (scale)
30 import Prelude (writeFile)
31 import Protolude (floor)
32 import System.FilePath
33 import qualified Data.GraphViz.Attributes.HTML as H
34 import qualified Data.Text as Text
35 import qualified Data.Text.Lazy as Lazy
36 import qualified Data.Vector as Vector
42 dotToFile :: FilePath -> DotGraph DotId -> IO ()
43 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
45 dotToString :: DotGraph DotId -> [Char]
46 dotToString dotG = unpack (printDotGraph dotG)
48 dynamicToColor :: Int -> H.Attribute
50 | d == 0 = H.BGColor (toColor LightCoral)
51 | d == 1 = H.BGColor (toColor Khaki)
52 | d == 2 = H.BGColor (toColor SkyBlue)
53 | otherwise = H.Color (toColor Black)
55 pickLabelColor :: [Int] -> H.Attribute
57 | elem 0 lst = dynamicToColor 0
58 | elem 1 lst = dynamicToColor 1
59 | elem 2 lst = dynamicToColor 2
60 | otherwise = dynamicToColor 3
62 toDotLabel :: Text.Text -> Label
63 toDotLabel lbl = StrLabel $ fromStrict lbl
65 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
66 toAttr k v = customAttribute k v
68 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
69 metaToAttr meta = map (\(k, v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
71 groupIdToDotId :: PhyloGroupId -> DotId
72 groupIdToDotId (((d, d'), lvl), idx) =
73 (fromStrict . Text.pack) $ "group" <> show d <> show d' <> show lvl <> show idx
75 branchIdToDotId :: PhyloBranchId -> DotId
76 branchIdToDotId bId = (fromStrict . Text.pack) $ "branch" <> show (snd bId)
78 periodIdToDotId :: Period -> DotId
79 periodIdToDotId prd = (fromStrict . Text.pack) $ "period" <> show (fst prd) <> show (snd prd)
81 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
84 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
85 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
86 , H.tableRows = [header]
87 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
88 <> ( map ngramsToRow $ splitEvery 4
89 $ reverse $ sortOn (snd . snd)
90 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
91 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
93 --------------------------------------
94 ngramsToRow :: [(Ngrams, (Double, Double))] -> H.Row
96 H.Cells $ map (\(n, (d, _)) ->
97 H.LabelCell [ H.Align H.HLeft
98 , dynamicToColor $ floor d] $ H.Text [H.Str $ fromStrict n]) ns
99 --------------------------------------
102 H.Cells [ H.LabelCell [pickLabelColor $ floor <$> ((g ^. phylo_groupMeta) ! "dynamics")]
103 $ H.Text [H.Str $ ((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
105 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
107 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
109 <> (pack $ show (getGroupId g))]]
110 --------------------------------------
112 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
113 branchToDotNode b bId =
114 node (branchIdToDotId $ b ^. branch_id)
115 ( [ FillColor [toWColor CornSilk]
119 , Style [SItem Bold []]
120 , Label (toDotLabel $ b ^. branch_label) ]
121 <> (metaToAttr $ b ^. branch_meta)
122 <> [ toAttr "nodeType" "branch"
123 , toAttr "bId" (pack $ show bId)
124 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
125 , toAttr "branch_x" (fromStrict $ Text.pack $ show $ b ^. branch_x)
126 , toAttr "branch_y" (fromStrict $ Text.pack $ show $ b ^. branch_y)
127 , toAttr "label" (pack $ show $ b ^. branch_label)
130 periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
131 periodToDotNode prd prd' =
132 node (periodIdToDotId prd) $
135 , Label $ toDotLabel $ Text.pack $ show (fst prd) <> " " <> show (snd prd) ]
136 <> [ toAttr "nodeType" "period"
137 , toAttr "strFrom" $ fromStrict $ Text.pack $ show $ fst prd'
138 , toAttr "strTo" $ fromStrict $ Text.pack $ show $ snd prd'
139 , toAttr "from" $ fromStrict $ Text.pack $ show $ fst prd
140 , toAttr "to" $ fromStrict $ Text.pack $ show $ snd prd ]
143 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
144 groupToDotNode fdt g bId =
145 node (groupIdToDotId $ getGroupId g)
149 , toLabel (groupToTable fdt g) ]
150 <> [ toAttr "nodeType" "group"
151 , toAttr "gid" (groupIdToDotId $ getGroupId g)
152 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
153 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
154 , toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
155 , toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
156 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
157 , toAttr "bId" (pack $ show bId)
158 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
159 , toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
160 , toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
161 , toAttr "sourceFull" (pack $ show (g ^. phylo_groupSources))
162 , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
163 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
164 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
165 , toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
166 , toAttr "seaLvl" (pack $ show ((g ^. phylo_groupMeta) ! "seaLevels"))
170 toDotEdge' :: DotId -> DotId -> [Char] -> [Char] -> EdgeType -> Dot DotId
171 toDotEdge' source target thr w edgeType = edge source target
173 GroupToGroup -> undefined
174 GroupToGroupMemory -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "memoryLink", toAttr "thr" (pack thr), toAttr "weight" (pack w)]
175 BranchToGroup -> undefined
176 BranchToBranch -> undefined
177 GroupToAncestor -> undefined
178 PeriodToPeriod -> undefined)
181 toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
182 toDotEdge source target lbl edgeType = edge source target
184 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
185 GroupToGroupMemory -> undefined
186 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
187 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
188 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
189 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
192 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
193 mergePointers groups =
194 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
195 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
196 in unionWith (\w w' -> max w w') toChilds toParents
198 mergePointersMemory :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId),(Double,Double))]
199 mergePointersMemory groups =
200 let toChilds = concat $ map (\g -> map (\(target,(t,w)) -> ((getGroupId g,target),(t,w))) $ g ^. phylo_groupPeriodMemoryChilds) groups
201 toParents = concat $ map (\g -> map (\(target,(t,w)) -> ((target,getGroupId g),(t,w))) $ g ^. phylo_groupPeriodMemoryParents) groups
202 in concat [toChilds,toParents]
204 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
205 mergeAncestors groups = concat
206 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
207 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
210 toBid :: PhyloGroup -> [PhyloBranch] -> Int
212 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
213 in fromJust $ elemIndex b' bs
215 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
216 exportToDot phylo export =
217 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
218 <> show(length $ export ^. export_groups) <> " groups "
219 <> show(length $ nub $ concat $ map (^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
220 <> "##########################") $
221 digraph ((Str . fromStrict) $ phyloName $ getConfig phylo) $ do
223 {- 1) init the dot graph -}
224 graphAttrs ( [ Label (toDotLabel $ phyloName $ getConfig phylo)]
225 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
228 , Style [SItem Filled []],Color [toWColor White]]
229 {-- home made attributes -}
230 <> [ toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo)
231 , toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (^. phylo_groupNgrams) $ export ^. export_groups)
232 , toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ getDocsByDate phylo)
233 , toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods)
234 , toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches)
235 , toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups)
236 , toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo)
237 , toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo
238 , toAttr (fromStrict "PhyloScale") $ pack $ show (getLevel phylo)
239 , toAttr (fromStrict "phyloQuality") $ pack $ show (phylo ^. phylo_quality)
240 , toAttr (fromStrict "phyloSeaRiseStart") $ pack $ show (getPhyloSeaRiseStart phylo)
241 , toAttr (fromStrict "phyloSeaRiseSteps") $ pack $ show (getPhyloSeaRiseSteps phylo)
242 -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
246 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
248 -- 2) create a layer for the branches labels -}
249 subgraph (Str "Branches peaks") $ do
251 -- graphAttrs [Rank SameRank]
253 -- 3) group the branches by hierarchy
254 -- mapM (\branches ->
255 -- subgraph (Str "Branches clade") $ do
256 -- graphAttrs [Rank SameRank]
258 -- -- 4) create a node for each branch
259 -- mapM branchToDotNode branches
260 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
262 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
264 {-- 5) create a layer for each period -}
265 _ <- mapM (\period ->
266 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
267 graphAttrs [Rank SameRank]
268 periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriodStr)
270 {-- 6) create a node for each group -}
271 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
272 ) $ phylo ^. phylo_periods
274 {-- 7) create the edges between a branch and its first groups -}
275 _ <- mapM (\(bId,groups) ->
276 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
279 $ map (\groups -> head' "toDot"
280 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
281 $ sortOn (fst . _phylo_groupPeriod) groups)
282 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
284 {- 8) create the edges between the groups -}
285 _ <- mapM (\((k,k'),v) ->
286 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
287 ) $ (toList . mergePointers) $ export ^. export_groups
289 {- 8-bis) create the edges between the groups -}
290 {- _ <- mapM (\((k,k'),v) ->
291 toDotEdge' (groupIdToDotId k) (groupIdToDotId k') (show (fst v)) (show (snd v)) GroupToGroupMemory
292 ) $ mergePointersMemory $ export ^. export_groups -}
294 _ <- mapM (\((k,k'),v) ->
295 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
296 ) $ mergeAncestors $ export ^. export_groups
298 -- 10) create the edges between the periods
299 _ <- mapM (\(prd,prd') ->
300 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
301 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
303 {- 8) create the edges between the branches
304 -- _ <- mapM (\(bId,bId') ->
305 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
306 -- (Text.pack $ show(branchIdsToSimilarity bId bId'
307 -- (getThresholdInit $ phyloSimilarity $ getConfig phylo)
308 -- (getThresholdStep $ phyloSimilarity $ getConfig phylo))) BranchToBranch
309 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
313 graphAttrs [Rank SameRank]
320 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
321 filterByBranchSize thr export =
322 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
323 in export & export_branches .~ (fst splited)
324 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
327 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
328 processFilters filters qua export =
329 foldl (\export' f -> case f of
330 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
331 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
332 else filterByBranchSize thr export'
339 branchToIso :: [PhyloBranch] -> [PhyloBranch]
340 branchToIso branches =
343 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
345 $ ([0] ++ (map (\(b,b') ->
346 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
347 lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
349 if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
350 then (b' ^. branch_seaLevel) !! (lmin - 1)
351 else (b' ^. branch_seaLevel) !! (idx - 1)
352 ) $ listToSeq branches))
353 in map (\(x,b) -> b & branch_x .~ x)
356 branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
357 branchToIso' start step branches =
358 let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
360 $ ([0] ++ (map (\(b,b') ->
361 let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
362 in 1 - start - step * root) $ listToSeq branches))
363 in map (\(x,b) -> b & branch_x .~ x)
367 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
368 sortByHierarchy depth branches =
369 if (length branches == 1)
373 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
374 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
375 ++ (sortByHierarchy (depth + 1) (snd partitions)))
376 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
377 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
380 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
381 sortByBirthDate order export =
382 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
383 branches' = case order of
385 Desc -> reverse branches
386 in export & export_branches .~ branches'
388 processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
389 processSort sort' elev export = case sort' of
390 ByBirthDate o -> sortByBirthDate o export
391 ByHierarchy _ -> case elev of
392 Constante s s' -> export & export_branches .~ (branchToIso' s s' $ sortByHierarchy 0 (export ^. export_branches))
393 Adaptative _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
394 Evolving _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
400 -- | Return the conditional probability of i knowing j
401 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
402 conditional m i j = (findWithDefault 0 (i,j) m)
406 -- | Return the genericity score of a given ngram
407 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
408 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
409 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
412 -- | Return the specificity score of a given ngram
413 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
414 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
415 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
418 -- | Return the inclusion score of a given ngram
419 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
420 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
421 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
424 ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
425 ngramsMetrics phylo export =
428 (\g -> g & phylo_groupMeta %~ insert "genericity"
429 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
430 & phylo_groupMeta %~ insert "specificity"
431 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
432 & phylo_groupMeta %~ insert "inclusion"
433 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
434 & phylo_groupMeta %~ insert "frequence"
435 (map (\n -> getInMap n (getLastRootsFreq phylo)) $ g ^. phylo_groupNgrams)
439 branchDating :: PhyloExport -> PhyloExport
440 branchDating export =
441 over ( export_branches
444 let groups = sortOn fst
445 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
446 then acc ++ [g ^. phylo_groupPeriod]
447 else acc ) [] $ export ^. export_groups
449 birth = fst $ head' "birth" groups
450 age = (snd $ last' "age" groups) - birth
451 in b & branch_meta %~ insert "birth" [fromIntegral birth]
452 & branch_meta %~ insert "age" [fromIntegral age]
453 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
455 processMetrics :: Phylo -> PhyloExport -> PhyloExport
456 processMetrics phylo export = ngramsMetrics phylo
457 $ branchDating export
464 nk :: Int -> [[Int]] -> Int
466 $ map (\g -> if (elem n g)
471 tf :: Int -> [[Int]] -> Double
472 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
475 idf :: Int -> [[Int]] -> Double
476 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
479 findTfIdf :: [[Int]] -> [(Int,Double)]
480 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ nub $ concat groups
483 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
484 findEmergences groups freq =
485 let ngrams = map _phylo_groupNgrams groups
486 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
487 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
488 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
489 in reverse $ sortOn snd
490 $ map (\(n,_) -> if (member n freq)
495 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
496 mostEmergentTfIdf nth freq foundations export =
497 over ( export_branches
500 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
501 tfidf = findTfIdf (map _phylo_groupNgrams groups)
502 emergences = findEmergences groups freq
503 selected = if (null emergences)
504 then map fst $ take nth tfidf
505 else [fst $ head' "mostEmergentTfIdf" emergences]
506 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
507 in b & branch_label .~ (ngramsToLabel foundations selected)) export
510 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
511 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
514 $ sortOn snd $ zip [0..] meta
517 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
518 mostInclusive nth foundations export =
519 over ( export_branches
522 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
523 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
524 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
525 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
526 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
527 in b & branch_label .~ lbl ) export
530 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
531 mostEmergentInclusive nth foundations export =
535 let lbl = ngramsToLabel foundations
537 $ map (\(_,(_,idx)) -> idx)
539 $ map (\groups -> sortOn (fst . snd) groups)
540 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
541 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
542 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
543 in g & phylo_groupLabel .~ lbl ) export
546 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
547 processLabels labels foundations freq export =
548 foldl (\export' label ->
550 GroupLabel tagger nth ->
552 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
553 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
554 BranchLabel tagger nth ->
556 MostInclusive -> mostInclusive nth foundations export'
557 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
558 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
565 -- utiliser & creer une Map FdtId [PhyloGroup]
566 -- n = index of the current term
567 toDynamics :: FdtId -> [PhyloGroup] -> PhyloGroup -> Map FdtId (Date,Date) -> Double
568 toDynamics n elders g m =
569 let prd = g ^. phylo_groupPeriod
570 end = last' "dynamics" (sort $ map snd $ elems m)
571 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
574 else if ((fst prd) == (fst $ m ! n))
582 --------------------------------------
584 isNew = not $ elem n $ concat $ map _phylo_groupNgrams elders
587 processDynamics :: [PhyloGroup] -> [PhyloGroup]
588 processDynamics groups =
590 let elders = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
591 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
592 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n elders g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
594 --------------------------------------
595 mapNgrams :: Map FdtId (Date,Date)
596 mapNgrams = map (\dates ->
597 let dates' = sort dates
598 in (head' "dynamics" dates', last' "dynamics" dates'))
600 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
601 $ (g ^. phylo_groupNgrams))) [] groups
608 getGroupThr :: Double -> PhyloGroup -> Double
610 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
611 breaks = (g ^. phylo_groupMeta) ! "breaks"
612 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
621 toAncestor nbDocs diago similarity step candidates ego =
622 let curr = ego ^. phylo_groupAncestors
623 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
624 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
625 $ map (\g -> (g, toSimilarity nbDocs diago similarity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
626 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
629 headsToAncestors :: Double
636 headsToAncestors nbDocs diago similarity step heads acc =
640 let ego = head' "headsToAncestors" heads
641 heads' = tail' "headsToAncestors" heads
642 in headsToAncestors nbDocs diago similarity step heads' (acc ++ [toAncestor nbDocs diago similarity step heads' ego])
645 toHorizon :: Phylo -> Phylo
647 let phyloAncestor = updatePhyloGroups
649 (fromList $ map (\g -> (getGroupId g, g))
651 $ tracePhyloAncestors newGroups) phylo
652 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
653 $ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromScale scale phyloAncestor
654 in updatePhyloGroups scale reBranched phylo
656 -- | 1) for each periods
658 periods = getPeriodIds phylo
661 scale = getLastLevel phylo
664 frame = getTimeFrame $ timeUnit $ getConfig phylo
665 -- | 2) find ancestors between groups without parents
666 mapGroups :: [[PhyloGroup]]
667 mapGroups = map (\prd ->
668 let groups = getGroupsFromScalePeriods scale [prd] phylo
669 childs = getPreviousChildIds scale frame prd periods phylo
670 -- maybe add a better filter for non isolated ancestors
671 heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
672 $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
673 noHeads = groups \\ heads
674 nbDocs = sum $ elems $ filterDocs (getDocsByDate phylo) [prd]
675 diago = reduceDiagos $ filterDiago (getCoocByDate phylo) [prd]
676 sim = (similarity $ getConfig phylo)
677 step = case getSeaElevation phylo of
681 -- in headsToAncestors nbDocs diago Similarity heads groups []
682 in map (toAncestor nbDocs diago sim step noHeads)
683 $ headsToAncestors nbDocs diago sim step heads []
685 -- | 3) process this task concurrently
686 newGroups :: [[PhyloGroup]]
687 newGroups = mapGroups `using` parList rdeepseq
688 --------------------------------------
690 getPreviousChildIds :: Scale -> Int -> Period -> [Period] -> Phylo -> [PhyloGroupId]
691 getPreviousChildIds lvl frame curr prds phylo =
692 concat $ map ((map fst) . _phylo_groupPeriodChilds)
693 $ getGroupsFromScalePeriods lvl (getNextPeriods ToParents frame curr prds) phylo
695 ---------------------
696 -- | phyloExport | --
697 ---------------------
699 toPhyloExport :: Phylo -> DotGraph DotId
700 toPhyloExport phylo = exportToDot phylo
701 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
702 $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
703 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (getLastRootsFreq phylo)
704 $ processMetrics phylo export
706 export :: PhyloExport
707 export = PhyloExport groups branches
708 --------------------------------------
709 branches :: [PhyloBranch]
710 branches = map (\g ->
711 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
712 breaks = (g ^. phylo_groupMeta) ! "breaks"
713 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
714 in PhyloBranch { _branch_id = g ^. phylo_groupBranchId
715 , _branch_canonId = canonId
716 , _branch_seaLevel = seaLvl
718 , _branch_y = last' "export" $ take (round $ (last' "export" breaks) + 1) seaLvl
722 , _branch_meta = empty })
723 $ map (head' "export")
724 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
725 $ sortOn (^. phylo_groupBranchId) groups
726 --------------------------------------
727 groups :: [PhyloGroup]
728 groups = traceExportGroups
731 $ getGroupsFromScale (phyloScale $ getConfig phylo)
732 $ tracePhyloInfo phylo
735 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
736 traceExportBranches branches = trace ("\n"
737 <> "-- | Export " <> show(length branches) <> " branches") branches
739 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
740 tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
742 tracePhyloInfo :: Phylo -> Phylo
743 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with λ = "
744 <> show(getLevel phylo) <> " applied to "
745 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
749 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
750 traceExportGroups groups = trace ("\n" <> "-- | Export "
751 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
752 <> show(length groups) <> " groups and "
753 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"