2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE TypeSynonymInstances #-}
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
15 import Control.Lens hiding (Level)
16 import Control.Parallel.Strategies (parList, rdeepseq, using)
17 import Data.GraphViz hiding (DotGraph, Order)
18 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
19 import Data.GraphViz.Types.Generalised (DotGraph)
20 import Data.GraphViz.Types.Monadic
21 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
22 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
23 import Data.Text.Lazy (fromStrict, pack, unpack)
24 import Data.Vector (Vector)
25 import Debug.Trace (trace)
26 import Gargantext.Core.Viz.Phylo
27 import Gargantext.Core.Viz.Phylo.PhyloTools
28 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
29 import Gargantext.Prelude
30 import Prelude (writeFile)
31 import System.FilePath
32 import qualified Data.GraphViz.Attributes.HTML as H
33 import qualified Data.Text as Text
34 import qualified Data.Text.Lazy as Lazy
35 import qualified Data.Vector as Vector
41 dotToFile :: FilePath -> DotGraph DotId -> IO ()
42 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
44 dotToString :: DotGraph DotId -> [Char]
45 dotToString dotG = unpack (printDotGraph dotG)
47 dynamicToColor :: Double -> H.Attribute
49 | d == 0 = H.BGColor (toColor LightCoral)
50 | d == 1 = H.BGColor (toColor Khaki)
51 | d == 2 = H.BGColor (toColor SkyBlue)
52 | otherwise = H.Color (toColor Black)
54 pickLabelColor :: [Double] -> H.Attribute
56 | elem 0 lst = dynamicToColor 0
57 | elem 2 lst = dynamicToColor 2
58 | elem 1 lst = dynamicToColor 1
59 | otherwise = dynamicToColor 3
61 toDotLabel :: Text.Text -> Label
62 toDotLabel lbl = StrLabel $ fromStrict lbl
64 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
65 toAttr k v = customAttribute k v
67 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
68 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
70 groupIdToDotId :: PhyloGroupId -> DotId
71 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
73 branchIdToDotId :: PhyloBranchId -> DotId
74 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
76 periodIdToDotId :: PhyloPeriodId -> DotId
77 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
79 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
80 groupToTable fdt g = H.Table H.HTable
81 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
82 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
83 , H.tableRows = [header]
84 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
85 <> ( map ngramsToRow $ splitEvery 4
86 $ reverse $ sortOn (snd . snd)
87 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
88 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
90 --------------------------------------
91 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
92 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
93 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
94 --------------------------------------
97 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
98 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
100 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
101 <> (fromStrict " , ")
102 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
103 <> (fromStrict " ) ")
104 <> (pack $ show (getGroupId g)))]]
105 --------------------------------------
107 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
108 branchToDotNode b bId =
109 node (branchIdToDotId $ b ^. branch_id)
110 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
111 <> (metaToAttr $ b ^. branch_meta)
112 <> [ toAttr "nodeType" "branch"
113 , toAttr "bId" (pack $ show bId)
114 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
115 , toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
116 , toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
117 , toAttr "label" (pack $ show $ b ^. branch_label)
120 periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
121 periodToDotNode prd prd' =
122 node (periodIdToDotId prd)
123 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
124 <> [ toAttr "nodeType" "period"
125 , toAttr "strFrom" (fromStrict $ Text.pack $ (show $ fst prd'))
126 , toAttr "strTo" (fromStrict $ Text.pack $ (show $ snd prd'))
127 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
128 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
131 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
132 groupToDotNode fdt g bId =
133 node (groupIdToDotId $ getGroupId g)
134 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
135 <> [ toAttr "nodeType" "group"
136 , toAttr "gid" (groupIdToDotId $ getGroupId g)
137 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
138 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
139 , toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
140 , toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
141 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
142 , toAttr "bId" (pack $ show bId)
143 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
144 , toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
145 , toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
146 , toAttr "sourceFull" (pack $ show (g ^. phylo_groupSources))
147 , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
148 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
149 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
150 , toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
151 , toAttr "seaLvl" (pack $ show ((g ^. phylo_groupMeta) ! "seaLevels"))
155 toDotEdge' :: DotId -> DotId -> [Char] -> [Char] -> EdgeType -> Dot DotId
156 toDotEdge' source target thr w edgeType = edge source target
158 GroupToGroup -> undefined
159 GroupToGroupMemory -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "memoryLink", toAttr "thr" (pack thr), toAttr "weight" (pack w)]
160 BranchToGroup -> undefined
161 BranchToBranch -> undefined
162 GroupToAncestor -> undefined
163 PeriodToPeriod -> undefined)
166 toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
167 toDotEdge source target lbl edgeType = edge source target
169 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
170 GroupToGroupMemory -> undefined
171 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
172 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
173 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
174 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
177 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
178 mergePointers groups =
179 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
180 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
181 in unionWith (\w w' -> max w w') toChilds toParents
183 mergePointersMemory :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId),(Double,Double))]
184 mergePointersMemory groups =
185 let toChilds = concat $ map (\g -> map (\(target,(t,w)) -> ((getGroupId g,target),(t,w))) $ g ^. phylo_groupPeriodMemoryChilds) groups
186 toParents = concat $ map (\g -> map (\(target,(t,w)) -> ((target,getGroupId g),(t,w))) $ g ^. phylo_groupPeriodMemoryParents) groups
187 in concat [toChilds,toParents]
189 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
190 mergeAncestors groups = concat
191 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
192 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
195 toBid :: PhyloGroup -> [PhyloBranch] -> Int
197 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
198 in fromJust $ elemIndex b' bs
200 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
201 exportToDot phylo export =
202 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
203 <> show(length $ export ^. export_groups) <> " groups "
204 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
205 <> "##########################") $
206 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
208 {- 1) init the dot graph -}
209 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
210 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
213 , Style [SItem Filled []],Color [toWColor White]]
214 {-- home made attributes -}
215 <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
216 ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
217 ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
218 ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
219 ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
220 ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
221 ,(toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo))
222 ,(toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo)
223 ,(toAttr (fromStrict "phyloLevel") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo))
224 ,(toAttr (fromStrict "phyloSeaRiseStart") $ pack $ show (_cons_start $ getSeaElevation phylo))
225 ,(toAttr (fromStrict "phyloSeaRiseSteps") $ pack $ show (_cons_step $ getSeaElevation phylo))
226 -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
230 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
232 -- 2) create a layer for the branches labels -}
233 subgraph (Str "Branches peaks") $ do
235 -- graphAttrs [Rank SameRank]
237 -- 3) group the branches by hierarchy
238 -- mapM (\branches ->
239 -- subgraph (Str "Branches clade") $ do
240 -- graphAttrs [Rank SameRank]
242 -- -- 4) create a node for each branch
243 -- mapM branchToDotNode branches
244 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
246 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
248 {-- 5) create a layer for each period -}
249 _ <- mapM (\period ->
250 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
251 graphAttrs [Rank SameRank]
252 periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriod')
254 {-- 6) create a node for each group -}
255 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
256 ) $ phylo ^. phylo_periods
258 {-- 7) create the edges between a branch and its first groups -}
259 _ <- mapM (\(bId,groups) ->
260 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
263 $ map (\groups -> head' "toDot"
264 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
265 $ sortOn (fst . _phylo_groupPeriod) groups)
266 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
268 {- 8) create the edges between the groups -}
269 _ <- mapM (\((k,k'),v) ->
270 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
271 ) $ (toList . mergePointers) $ export ^. export_groups
273 {- 8-bis) create the edges between the groups -}
274 {- _ <- mapM (\((k,k'),v) ->
275 toDotEdge' (groupIdToDotId k) (groupIdToDotId k') (show (fst v)) (show (snd v)) GroupToGroupMemory
276 ) $ mergePointersMemory $ export ^. export_groups -}
278 _ <- mapM (\((k,k'),v) ->
279 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
280 ) $ mergeAncestors $ export ^. export_groups
282 -- 10) create the edges between the periods
283 _ <- mapM (\(prd,prd') ->
284 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
285 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
287 {- 8) create the edges between the branches
288 -- _ <- mapM (\(bId,bId') ->
289 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
290 -- (Text.pack $ show(branchIdsToProximity bId bId'
291 -- (getThresholdInit $ phyloProximity $ getConfig phylo)
292 -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
293 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
297 graphAttrs [Rank SameRank]
304 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
305 filterByBranchSize thr export =
306 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
307 in export & export_branches .~ (fst splited)
308 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
311 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
312 processFilters filters qua export =
313 foldl (\export' f -> case f of
314 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
315 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
316 else filterByBranchSize thr export'
323 branchToIso :: [PhyloBranch] -> [PhyloBranch]
324 branchToIso branches =
327 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
329 $ ([0] ++ (map (\(b,b') ->
330 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
331 lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
333 if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
334 then (b' ^. branch_seaLevel) !! (lmin - 1)
335 else (b' ^. branch_seaLevel) !! (idx - 1)
336 ) $ listToSeq branches))
337 in map (\(x,b) -> b & branch_x .~ x)
340 branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
341 branchToIso' start step branches =
342 let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
344 $ ([0] ++ (map (\(b,b') ->
345 let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
346 in 1 - start - step * root) $ listToSeq branches))
347 in map (\(x,b) -> b & branch_x .~ x)
351 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
352 sortByHierarchy depth branches =
353 if (length branches == 1)
357 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
358 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
359 ++ (sortByHierarchy (depth + 1) (snd partitions)))
360 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
361 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
364 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
365 sortByBirthDate order export =
366 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
367 branches' = case order of
369 Desc -> reverse branches
370 in export & export_branches .~ branches'
372 processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
373 processSort sort' elev export = case sort' of
374 ByBirthDate o -> sortByBirthDate o export
375 ByHierarchy _ -> export & export_branches .~ (branchToIso' (_cons_start elev) (_cons_step elev)
376 $ sortByHierarchy 0 (export ^. export_branches))
383 -- | Return the conditional probability of i knowing j
384 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
385 conditional m i j = (findWithDefault 0 (i,j) m)
389 -- | Return the genericity score of a given ngram
390 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
391 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
392 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
395 -- | Return the specificity score of a given ngram
396 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
397 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
398 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
401 -- | Return the inclusion score of a given ngram
402 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
403 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
404 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
407 ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
408 ngramsMetrics phylo export =
411 (\g -> g & phylo_groupMeta %~ insert "genericity"
412 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
413 & phylo_groupMeta %~ insert "specificity"
414 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
415 & phylo_groupMeta %~ insert "inclusion"
416 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
417 & phylo_groupMeta %~ insert "frequence"
418 (map (\n -> getInMap n (phylo ^. phylo_lastTermFreq)) $ g ^. phylo_groupNgrams)
422 branchDating :: PhyloExport -> PhyloExport
423 branchDating export =
424 over ( export_branches
427 let groups = sortOn fst
428 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
429 then acc ++ [g ^. phylo_groupPeriod]
430 else acc ) [] $ export ^. export_groups
432 birth = fst $ head' "birth" groups
433 age = (snd $ last' "age" groups) - birth
434 in b & branch_meta %~ insert "birth" [fromIntegral birth]
435 & branch_meta %~ insert "age" [fromIntegral age]
436 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
438 processMetrics :: Phylo -> PhyloExport -> PhyloExport
439 processMetrics phylo export = ngramsMetrics phylo
440 $ branchDating export
447 nk :: Int -> [[Int]] -> Int
449 $ map (\g -> if (elem n g)
454 tf :: Int -> [[Int]] -> Double
455 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
458 idf :: Int -> [[Int]] -> Double
459 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
462 findTfIdf :: [[Int]] -> [(Int,Double)]
463 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ sort $ nub $ concat groups
466 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
467 findEmergences groups freq =
468 let ngrams = map _phylo_groupNgrams groups
469 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
470 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
471 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
472 in reverse $ sortOn snd
473 $ map (\(n,_) -> if (member n freq)
478 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
479 mostEmergentTfIdf nth freq foundations export =
480 over ( export_branches
483 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
484 tfidf = findTfIdf (map _phylo_groupNgrams groups)
485 emergences = findEmergences groups freq
486 selected = if (null emergences)
487 then map fst $ take nth tfidf
488 else [fst $ head' "mostEmergentTfIdf" emergences]
489 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
490 in b & branch_label .~ (ngramsToLabel foundations selected)) export
493 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
494 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
497 $ sortOn snd $ zip [0..] meta
500 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
501 mostInclusive nth foundations export =
502 over ( export_branches
505 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
506 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
507 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
508 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
509 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
510 in b & branch_label .~ lbl ) export
513 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
514 mostEmergentInclusive nth foundations export =
518 let lbl = ngramsToLabel foundations
520 $ map (\(_,(_,idx)) -> idx)
522 $ map (\groups -> sortOn (fst . snd) groups)
523 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
524 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
525 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
526 in g & phylo_groupLabel .~ lbl ) export
529 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
530 processLabels labels foundations freq export =
531 foldl (\export' label ->
533 GroupLabel tagger nth ->
535 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
536 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
537 BranchLabel tagger nth ->
539 MostInclusive -> mostInclusive nth foundations export'
540 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
541 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
549 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
550 toDynamics n parents g m =
551 let prd = g ^. phylo_groupPeriod
552 end = last' "dynamics" (sort $ map snd $ elems m)
553 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
556 else if ((fst prd) == (fst $ m ! n))
564 --------------------------------------
566 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
569 processDynamics :: [PhyloGroup] -> [PhyloGroup]
570 processDynamics groups =
572 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
573 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
574 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
576 --------------------------------------
577 mapNgrams :: Map Int (Date,Date)
578 mapNgrams = map (\dates ->
579 let dates' = sort dates
580 in (head' "dynamics" dates', last' "dynamics" dates'))
582 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
583 $ (g ^. phylo_groupNgrams))) [] groups
590 getGroupThr :: Double -> PhyloGroup -> Double
592 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
593 breaks = (g ^. phylo_groupMeta) ! "breaks"
594 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
596 toAncestor :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
597 toAncestor nbDocs diago proximity step candidates ego =
598 let curr = ego ^. phylo_groupAncestors
599 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
600 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
601 $ map (\g -> (g, toProximity nbDocs diago proximity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
602 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
605 headsToAncestors :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
606 headsToAncestors nbDocs diago proximity step heads acc =
610 let ego = head' "headsToAncestors" heads
611 heads' = tail' "headsToAncestors" heads
612 in headsToAncestors nbDocs diago proximity step heads' (acc ++ [toAncestor nbDocs diago proximity step heads' ego])
615 toHorizon :: Phylo -> Phylo
617 let phyloAncestor = updatePhyloGroups
619 (fromList $ map (\g -> (getGroupId g, g))
621 $ tracePhyloAncestors newGroups) phylo
622 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
623 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel level phyloAncestor
624 in updatePhyloGroups level reBranched phylo
626 -- | 1) for each periods
627 periods :: [PhyloPeriodId]
628 periods = getPeriodIds phylo
631 level = getLastLevel phylo
634 frame = getTimeFrame $ timeUnit $ getConfig phylo
635 -- | 2) find ancestors between groups without parents
636 mapGroups :: [[PhyloGroup]]
637 mapGroups = map (\prd ->
638 let groups = getGroupsFromLevelPeriods level [prd] phylo
639 childs = getPreviousChildIds level frame prd periods phylo
640 -- maybe add a better filter for non isolated ancestors
641 heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
642 $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
643 noHeads = groups \\ heads
644 nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
645 diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
646 proximity = (phyloProximity $ getConfig phylo)
647 step = case getSeaElevation phylo of
649 Adaptative _ -> undefined
650 -- in headsToAncestors nbDocs diago proximity heads groups []
651 in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego)
652 $ headsToAncestors nbDocs diago proximity step heads []
654 -- | 3) process this task concurrently
655 newGroups :: [[PhyloGroup]]
656 newGroups = mapGroups `using` parList rdeepseq
657 --------------------------------------
659 getPreviousChildIds :: Level -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> Phylo -> [PhyloGroupId]
660 getPreviousChildIds lvl frame curr prds phylo =
661 concat $ map ((map fst) . _phylo_groupPeriodChilds)
662 $ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo
664 ---------------------
665 -- | phyloExport | --
666 ---------------------
668 toPhyloExport :: Phylo -> DotGraph DotId
669 toPhyloExport phylo = exportToDot phylo
670 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
671 $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
672 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo)
673 $ processMetrics phylo export
675 export :: PhyloExport
676 export = PhyloExport groups branches
677 --------------------------------------
678 branches :: [PhyloBranch]
679 branches = map (\g ->
680 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
681 breaks = (g ^. phylo_groupMeta) ! "breaks"
682 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
683 in PhyloBranch (g ^. phylo_groupBranchId)
687 (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
691 $ map (\gs -> head' "export" gs)
692 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
693 $ sortOn (\g -> g ^. phylo_groupBranchId) groups
694 --------------------------------------
695 groups :: [PhyloGroup]
696 groups = traceExportGroups
698 $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
699 $ tracePhyloInfo phylo
700 -- \$ toHorizon phylo
703 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
704 traceExportBranches branches = trace ("\n"
705 <> "-- | Export " <> show(length branches) <> " branches") branches
707 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
708 tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
710 tracePhyloInfo :: Phylo -> Phylo
711 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with λ = "
712 <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
713 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
717 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
718 traceExportGroups groups = trace ("\n" <> "-- | Export "
719 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
720 <> show(length groups) <> " groups and "
721 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"