]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
refactoring after code review #1
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / PhyloExport.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE TypeSynonymInstances #-}
12
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
14
15 import Control.Lens hiding (Level)
16 import Control.Parallel.Strategies (parList, rdeepseq, using)
17 import Data.GraphViz hiding (DotGraph, Order)
18 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
19 import Data.GraphViz.Types.Generalised (DotGraph)
20 import Data.GraphViz.Types.Monadic
21 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
22 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
23 import Data.Text.Lazy (fromStrict, pack, unpack)
24 import Data.Vector (Vector)
25 import Debug.Trace (trace)
26 import Gargantext.Core.Viz.Phylo
27 import Gargantext.Core.Viz.Phylo.PhyloTools
28 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
29 import Gargantext.Prelude hiding (scale)
30 import Prelude (writeFile)
31 import System.FilePath
32 import qualified Data.GraphViz.Attributes.HTML as H
33 import qualified Data.Text as Text
34 import qualified Data.Text.Lazy as Lazy
35 import qualified Data.Vector as Vector
36
37 --------------------
38 -- | Dot export | --
39 --------------------
40
41 dotToFile :: FilePath -> DotGraph DotId -> IO ()
42 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
43
44 dotToString :: DotGraph DotId -> [Char]
45 dotToString dotG = unpack (printDotGraph dotG)
46
47 dynamicToColor :: Double -> H.Attribute
48 dynamicToColor d
49 | d == 0 = H.BGColor (toColor LightCoral)
50 | d == 1 = H.BGColor (toColor Khaki)
51 | d == 2 = H.BGColor (toColor SkyBlue)
52 | otherwise = H.Color (toColor Black)
53
54 pickLabelColor :: [Double] -> H.Attribute
55 pickLabelColor lst
56 | elem 0 lst = dynamicToColor 0
57 | elem 2 lst = dynamicToColor 2
58 | elem 1 lst = dynamicToColor 1
59 | otherwise = dynamicToColor 3
60
61 toDotLabel :: Text.Text -> Label
62 toDotLabel lbl = StrLabel $ fromStrict lbl
63
64 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
65 toAttr k v = customAttribute k v
66
67 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
68 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
69
70 groupIdToDotId :: PhyloGroupId -> DotId
71 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
72
73 branchIdToDotId :: PhyloBranchId -> DotId
74 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
75
76 periodIdToDotId :: Period -> DotId
77 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
78
79 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
80 groupToTable fdt g = H.Table H.HTable
81 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
82 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
83 , H.tableRows = [header]
84 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
85 <> ( map ngramsToRow $ splitEvery 4
86 $ reverse $ sortOn (snd . snd)
87 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
88 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
89 where
90 --------------------------------------
91 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
92 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
93 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
94 --------------------------------------
95 header :: H.Row
96 header =
97 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
98 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
99 <> (fromStrict " ( ")
100 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
101 <> (fromStrict " , ")
102 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
103 <> (fromStrict " ) ")
104 <> (pack $ show (getGroupId g)))]]
105 --------------------------------------
106
107 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
108 branchToDotNode b bId =
109 node (branchIdToDotId $ b ^. branch_id)
110 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
111 <> (metaToAttr $ b ^. branch_meta)
112 <> [ toAttr "nodeType" "branch"
113 , toAttr "bId" (pack $ show bId)
114 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
115 , toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
116 , toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
117 , toAttr "label" (pack $ show $ b ^. branch_label)
118 ])
119
120 periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
121 periodToDotNode prd prd' =
122 node (periodIdToDotId prd)
123 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
124 <> [ toAttr "nodeType" "period"
125 , toAttr "strFrom" (fromStrict $ Text.pack $ (show $ fst prd'))
126 , toAttr "strTo" (fromStrict $ Text.pack $ (show $ snd prd'))
127 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
128 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
129
130
131 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
132 groupToDotNode fdt g bId =
133 node (groupIdToDotId $ getGroupId g)
134 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
135 <> [ toAttr "nodeType" "group"
136 , toAttr "gid" (groupIdToDotId $ getGroupId g)
137 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
138 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
139 , toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
140 , toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
141 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
142 , toAttr "bId" (pack $ show bId)
143 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
144 , toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
145 , toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
146 , toAttr "sourceFull" (pack $ show (g ^. phylo_groupSources))
147 , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
148 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
149 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
150 , toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
151 , toAttr "seaLvl" (pack $ show ((g ^. phylo_groupMeta) ! "seaLevels"))
152 ])
153
154
155 toDotEdge' :: DotId -> DotId -> [Char] -> [Char] -> EdgeType -> Dot DotId
156 toDotEdge' source target thr w edgeType = edge source target
157 (case edgeType of
158 GroupToGroup -> undefined
159 GroupToGroupMemory -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "memoryLink", toAttr "thr" (pack thr), toAttr "weight" (pack w)]
160 BranchToGroup -> undefined
161 BranchToBranch -> undefined
162 GroupToAncestor -> undefined
163 PeriodToPeriod -> undefined)
164
165
166 toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
167 toDotEdge source target lbl edgeType = edge source target
168 (case edgeType of
169 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
170 GroupToGroupMemory -> undefined
171 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
172 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
173 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
174 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
175
176
177 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
178 mergePointers groups =
179 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
180 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
181 in unionWith (\w w' -> max w w') toChilds toParents
182
183 mergePointersMemory :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId),(Double,Double))]
184 mergePointersMemory groups =
185 let toChilds = concat $ map (\g -> map (\(target,(t,w)) -> ((getGroupId g,target),(t,w))) $ g ^. phylo_groupPeriodMemoryChilds) groups
186 toParents = concat $ map (\g -> map (\(target,(t,w)) -> ((target,getGroupId g),(t,w))) $ g ^. phylo_groupPeriodMemoryParents) groups
187 in concat [toChilds,toParents]
188
189 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
190 mergeAncestors groups = concat
191 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
192 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
193
194
195 toBid :: PhyloGroup -> [PhyloBranch] -> Int
196 toBid g bs =
197 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
198 in fromJust $ elemIndex b' bs
199
200 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
201 exportToDot phylo export =
202 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
203 <> show(length $ export ^. export_groups) <> " groups "
204 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
205 <> "##########################") $
206 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
207
208 {- 1) init the dot graph -}
209 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
210 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
211 , Ratio FillRatio
212 -- , Ratio AutoRatio
213 , Style [SItem Filled []],Color [toWColor White]]
214 {-- home made attributes -}
215 <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
216 ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
217 ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
218 ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
219 ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
220 ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
221 ,(toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo))
222 ,(toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo)
223 ,(toAttr (fromStrict "PhyloScale") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo))
224 ,(toAttr (fromStrict "phyloQuality") $ pack $ show (phylo ^. phylo_quality))
225 ,(toAttr (fromStrict "phyloSeaRiseStart") $ pack $ show (_cons_start $ getSeaElevation phylo))
226 ,(toAttr (fromStrict "phyloSeaRiseSteps") $ pack $ show (_cons_step $ getSeaElevation phylo))
227 -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
228 ])
229
230 {-
231 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
232
233 -- 2) create a layer for the branches labels -}
234 subgraph (Str "Branches peaks") $ do
235
236 -- graphAttrs [Rank SameRank]
237 {-
238 -- 3) group the branches by hierarchy
239 -- mapM (\branches ->
240 -- subgraph (Str "Branches clade") $ do
241 -- graphAttrs [Rank SameRank]
242
243 -- -- 4) create a node for each branch
244 -- mapM branchToDotNode branches
245 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
246 -}
247 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
248
249 {-- 5) create a layer for each period -}
250 _ <- mapM (\period ->
251 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
252 graphAttrs [Rank SameRank]
253 periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriodStr)
254
255 {-- 6) create a node for each group -}
256 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
257 ) $ phylo ^. phylo_periods
258
259 {-- 7) create the edges between a branch and its first groups -}
260 _ <- mapM (\(bId,groups) ->
261 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
262 )
263 $ toList
264 $ map (\groups -> head' "toDot"
265 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
266 $ sortOn (fst . _phylo_groupPeriod) groups)
267 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
268
269 {- 8) create the edges between the groups -}
270 _ <- mapM (\((k,k'),v) ->
271 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
272 ) $ (toList . mergePointers) $ export ^. export_groups
273
274 {- 8-bis) create the edges between the groups -}
275 {- _ <- mapM (\((k,k'),v) ->
276 toDotEdge' (groupIdToDotId k) (groupIdToDotId k') (show (fst v)) (show (snd v)) GroupToGroupMemory
277 ) $ mergePointersMemory $ export ^. export_groups -}
278
279 _ <- mapM (\((k,k'),v) ->
280 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
281 ) $ mergeAncestors $ export ^. export_groups
282
283 -- 10) create the edges between the periods
284 _ <- mapM (\(prd,prd') ->
285 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
286 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
287
288 {- 8) create the edges between the branches
289 -- _ <- mapM (\(bId,bId') ->
290 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
291 -- (Text.pack $ show(branchIdsToProximity bId bId'
292 -- (getThresholdInit $ phyloProximity $ getConfig phylo)
293 -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
294 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
295 -}
296
297
298 graphAttrs [Rank SameRank]
299
300
301 ----------------
302 -- | Filter | --
303 ----------------
304
305 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
306 filterByBranchSize thr export =
307 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
308 in export & export_branches .~ (fst splited)
309 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
310
311
312 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
313 processFilters filters qua export =
314 foldl (\export' f -> case f of
315 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
316 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
317 else filterByBranchSize thr export'
318 ) export filters
319
320 --------------
321 -- | Sort | --
322 --------------
323
324 branchToIso :: [PhyloBranch] -> [PhyloBranch]
325 branchToIso branches =
326 let steps = map sum
327 $ inits
328 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
329 $ zip branches
330 $ ([0] ++ (map (\(b,b') ->
331 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
332 lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
333 in
334 if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
335 then (b' ^. branch_seaLevel) !! (lmin - 1)
336 else (b' ^. branch_seaLevel) !! (idx - 1)
337 ) $ listToSeq branches))
338 in map (\(x,b) -> b & branch_x .~ x)
339 $ zip steps branches
340
341 branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
342 branchToIso' start step branches =
343 let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
344 $ inits
345 $ ([0] ++ (map (\(b,b') ->
346 let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
347 in 1 - start - step * root) $ listToSeq branches))
348 in map (\(x,b) -> b & branch_x .~ x)
349 $ zip bx branches
350
351
352 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
353 sortByHierarchy depth branches =
354 if (length branches == 1)
355 then branches
356 else concat
357 $ map (\branches' ->
358 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
359 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
360 ++ (sortByHierarchy (depth + 1) (snd partitions)))
361 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
362 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
363
364
365 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
366 sortByBirthDate order export =
367 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
368 branches' = case order of
369 Asc -> branches
370 Desc -> reverse branches
371 in export & export_branches .~ branches'
372
373 processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
374 processSort sort' elev export = case sort' of
375 ByBirthDate o -> sortByBirthDate o export
376 ByHierarchy _ -> export & export_branches .~ (branchToIso' (_cons_start elev) (_cons_step elev)
377 $ sortByHierarchy 0 (export ^. export_branches))
378
379
380 -----------------
381 -- | Metrics | --
382 -----------------
383
384 -- | Return the conditional probability of i knowing j
385 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
386 conditional m i j = (findWithDefault 0 (i,j) m)
387 / (m ! (j,j))
388
389
390 -- | Return the genericity score of a given ngram
391 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
392 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
393 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
394
395
396 -- | Return the specificity score of a given ngram
397 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
398 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
399 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
400
401
402 -- | Return the inclusion score of a given ngram
403 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
404 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
405 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
406
407
408 ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
409 ngramsMetrics phylo export =
410 over ( export_groups
411 . traverse )
412 (\g -> g & phylo_groupMeta %~ insert "genericity"
413 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
414 & phylo_groupMeta %~ insert "specificity"
415 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
416 & phylo_groupMeta %~ insert "inclusion"
417 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
418 & phylo_groupMeta %~ insert "frequence"
419 (map (\n -> getInMap n (phylo ^. phylo_lastTermFreq)) $ g ^. phylo_groupNgrams)
420 ) export
421
422
423 branchDating :: PhyloExport -> PhyloExport
424 branchDating export =
425 over ( export_branches
426 . traverse )
427 (\b ->
428 let groups = sortOn fst
429 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
430 then acc ++ [g ^. phylo_groupPeriod]
431 else acc ) [] $ export ^. export_groups
432 periods = nub groups
433 birth = fst $ head' "birth" groups
434 age = (snd $ last' "age" groups) - birth
435 in b & branch_meta %~ insert "birth" [fromIntegral birth]
436 & branch_meta %~ insert "age" [fromIntegral age]
437 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
438
439 processMetrics :: Phylo -> PhyloExport -> PhyloExport
440 processMetrics phylo export = ngramsMetrics phylo
441 $ branchDating export
442
443
444 -----------------
445 -- | Taggers | --
446 -----------------
447
448 nk :: Int -> [[Int]] -> Int
449 nk n groups = sum
450 $ map (\g -> if (elem n g)
451 then 1
452 else 0) groups
453
454
455 tf :: Int -> [[Int]] -> Double
456 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
457
458
459 idf :: Int -> [[Int]] -> Double
460 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
461
462
463 findTfIdf :: [[Int]] -> [(Int,Double)]
464 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ sort $ nub $ concat groups
465
466
467 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
468 findEmergences groups freq =
469 let ngrams = map _phylo_groupNgrams groups
470 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
471 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
472 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
473 in reverse $ sortOn snd
474 $ map (\(n,_) -> if (member n freq)
475 then (n,freq ! n)
476 else (n,0)) emerging
477
478
479 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
480 mostEmergentTfIdf nth freq foundations export =
481 over ( export_branches
482 . traverse )
483 (\b ->
484 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
485 tfidf = findTfIdf (map _phylo_groupNgrams groups)
486 emergences = findEmergences groups freq
487 selected = if (null emergences)
488 then map fst $ take nth tfidf
489 else [fst $ head' "mostEmergentTfIdf" emergences]
490 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
491 in b & branch_label .~ (ngramsToLabel foundations selected)) export
492
493
494 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
495 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
496 $ take nth
497 $ reverse
498 $ sortOn snd $ zip [0..] meta
499
500
501 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
502 mostInclusive nth foundations export =
503 over ( export_branches
504 . traverse )
505 (\b ->
506 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
507 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
508 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
509 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
510 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
511 in b & branch_label .~ lbl ) export
512
513
514 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
515 mostEmergentInclusive nth foundations export =
516 over ( export_groups
517 . traverse )
518 (\g ->
519 let lbl = ngramsToLabel foundations
520 $ take nth
521 $ map (\(_,(_,idx)) -> idx)
522 $ concat
523 $ map (\groups -> sortOn (fst . snd) groups)
524 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
525 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
526 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
527 in g & phylo_groupLabel .~ lbl ) export
528
529
530 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
531 processLabels labels foundations freq export =
532 foldl (\export' label ->
533 case label of
534 GroupLabel tagger nth ->
535 case tagger of
536 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
537 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
538 BranchLabel tagger nth ->
539 case tagger of
540 MostInclusive -> mostInclusive nth foundations export'
541 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
542 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
543
544
545 ------------------
546 -- | Dynamics | --
547 ------------------
548
549
550 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
551 toDynamics n parents g m =
552 let prd = g ^. phylo_groupPeriod
553 end = last' "dynamics" (sort $ map snd $ elems m)
554 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
555 {- decrease -}
556 then 2
557 else if ((fst prd) == (fst $ m ! n))
558 {- emerging -}
559 then 0
560 else if isNew
561 {- emergence -}
562 then 1
563 else 3
564 where
565 --------------------------------------
566 isNew :: Bool
567 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
568
569
570 processDynamics :: [PhyloGroup] -> [PhyloGroup]
571 processDynamics groups =
572 map (\g ->
573 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
574 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
575 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
576 where
577 --------------------------------------
578 mapNgrams :: Map Int (Date,Date)
579 mapNgrams = map (\dates ->
580 let dates' = sort dates
581 in (head' "dynamics" dates', last' "dynamics" dates'))
582 $ fromListWith (++)
583 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
584 $ (g ^. phylo_groupNgrams))) [] groups
585
586
587 -----------------
588 -- | horizon | --
589 -----------------
590
591 getGroupThr :: Double -> PhyloGroup -> Double
592 getGroupThr step g =
593 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
594 breaks = (g ^. phylo_groupMeta) ! "breaks"
595 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
596
597 toAncestor :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
598 toAncestor nbDocs diago proximity step candidates ego =
599 let curr = ego ^. phylo_groupAncestors
600 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
601 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
602 $ map (\g -> (g, toProximity nbDocs diago proximity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
603 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
604
605
606 headsToAncestors :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
607 headsToAncestors nbDocs diago proximity step heads acc =
608 if (null heads)
609 then acc
610 else
611 let ego = head' "headsToAncestors" heads
612 heads' = tail' "headsToAncestors" heads
613 in headsToAncestors nbDocs diago proximity step heads' (acc ++ [toAncestor nbDocs diago proximity step heads' ego])
614
615
616 toHorizon :: Phylo -> Phylo
617 toHorizon phylo =
618 let phyloAncestor = updatePhyloGroups
619 scale
620 (fromList $ map (\g -> (getGroupId g, g))
621 $ concat
622 $ tracePhyloAncestors newGroups) phylo
623 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
624 $ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel scale phyloAncestor
625 in updatePhyloGroups scale reBranched phylo
626 where
627 -- | 1) for each periods
628 periods :: [Period]
629 periods = getPeriodIds phylo
630 -- --
631 scale :: Scale
632 scale = getLastLevel phylo
633 -- --
634 frame :: Int
635 frame = getTimeFrame $ timeUnit $ getConfig phylo
636 -- | 2) find ancestors between groups without parents
637 mapGroups :: [[PhyloGroup]]
638 mapGroups = map (\prd ->
639 let groups = getGroupsFromLevelPeriods scale [prd] phylo
640 childs = getPreviousChildIds scale frame prd periods phylo
641 -- maybe add a better filter for non isolated ancestors
642 heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
643 $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
644 noHeads = groups \\ heads
645 nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
646 diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
647 proximity = (phyloProximity $ getConfig phylo)
648 step = case getSeaElevation phylo of
649 Constante _ s -> s
650 Adaptative _ -> undefined
651 -- in headsToAncestors nbDocs diago proximity heads groups []
652 in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego)
653 $ headsToAncestors nbDocs diago proximity step heads []
654 ) periods
655 -- | 3) process this task concurrently
656 newGroups :: [[PhyloGroup]]
657 newGroups = mapGroups `using` parList rdeepseq
658 --------------------------------------
659
660 getPreviousChildIds :: Scale -> Int -> Period -> [Period] -> Phylo -> [PhyloGroupId]
661 getPreviousChildIds lvl frame curr prds phylo =
662 concat $ map ((map fst) . _phylo_groupPeriodChilds)
663 $ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo
664
665 ---------------------
666 -- | phyloExport | --
667 ---------------------
668
669 toPhyloExport :: Phylo -> DotGraph DotId
670 toPhyloExport phylo = exportToDot phylo
671 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
672 $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
673 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo)
674 $ processMetrics phylo export
675 where
676 export :: PhyloExport
677 export = PhyloExport groups branches
678 --------------------------------------
679 branches :: [PhyloBranch]
680 branches = map (\g ->
681 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
682 breaks = (g ^. phylo_groupMeta) ! "breaks"
683 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
684 in PhyloBranch (g ^. phylo_groupBranchId)
685 canonId
686 seaLvl
687 0
688 (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
689 0
690 0
691 "" empty)
692 $ map (\gs -> head' "export" gs)
693 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
694 $ sortOn (\g -> g ^. phylo_groupBranchId) groups
695 --------------------------------------
696 groups :: [PhyloGroup]
697 groups = traceExportGroups
698 $ processDynamics
699 $ getGroupsFromLevel (phyloScale $ getConfig phylo)
700 $ tracePhyloInfo phylo
701 -- \$ toHorizon phylo
702
703
704 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
705 traceExportBranches branches = trace ("\n"
706 <> "-- | Export " <> show(length branches) <> " branches") branches
707
708 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
709 tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
710
711 tracePhyloInfo :: Phylo -> Phylo
712 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with λ = "
713 <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
714 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
715 ) phylo
716
717
718 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
719 traceExportGroups groups = trace ("\n" <> "-- | Export "
720 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
721 <> show(length groups) <> " groups and "
722 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"
723 ) groups
724