]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
[phylo] some small phyloexport refactoring
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / PhyloExport.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE TypeSynonymInstances #-}
12
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
14
15 import Control.Lens hiding (Level)
16 import Control.Parallel.Strategies (parList, rdeepseq, using)
17 import Data.GraphViz hiding (DotGraph, Order)
18 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
19 import Data.GraphViz.Types.Generalised (DotGraph)
20 import Data.GraphViz.Types.Monadic
21 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
22 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
23 import Data.Text.Lazy (fromStrict, pack, unpack)
24 import Data.Vector (Vector)
25 import Debug.Trace (trace)
26 import Gargantext.Core.Viz.Phylo
27 import Gargantext.Core.Viz.Phylo.PhyloTools
28 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toSimilarity, getNextPeriods)
29 import Gargantext.Prelude hiding (scale)
30 import Prelude (writeFile)
31 import Protolude (floor)
32 import System.FilePath
33 import qualified Data.GraphViz.Attributes.HTML as H
34 import qualified Data.Text as Text
35 import qualified Data.Text.Lazy as Lazy
36 import qualified Data.Vector as Vector
37
38 --------------------
39 -- | Dot export | --
40 --------------------
41
42 dotToFile :: FilePath -> DotGraph DotId -> IO ()
43 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
44
45 dotToString :: DotGraph DotId -> [Char]
46 dotToString dotG = unpack (printDotGraph dotG)
47
48 dynamicToColor :: Int -> H.Attribute
49 dynamicToColor d
50 | d == 0 = H.BGColor (toColor LightCoral)
51 | d == 1 = H.BGColor (toColor Khaki)
52 | d == 2 = H.BGColor (toColor SkyBlue)
53 | otherwise = H.Color (toColor Black)
54
55 pickLabelColor :: [Int] -> H.Attribute
56 pickLabelColor lst
57 | elem 0 lst = dynamicToColor 0
58 | elem 1 lst = dynamicToColor 1
59 | elem 2 lst = dynamicToColor 2
60 | otherwise = dynamicToColor 3
61
62 toDotLabel :: Text.Text -> Label
63 toDotLabel lbl = StrLabel $ fromStrict lbl
64
65 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
66 toAttr k v = customAttribute k v
67
68 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
69 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
70
71 groupIdToDotId :: PhyloGroupId -> DotId
72 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
73
74 branchIdToDotId :: PhyloBranchId -> DotId
75 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
76
77 periodIdToDotId :: Period -> DotId
78 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
79
80 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
81 groupToTable fdt g = H.Table H.HTable
82 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
83 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
84 , H.tableRows = [header]
85 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
86 <> ( map ngramsToRow $ splitEvery 4
87 $ reverse $ sortOn (snd . snd)
88 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
89 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
90 where
91 --------------------------------------
92 ngramsToRow :: [(Ngrams, (Double, Double))] -> H.Row
93 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
94 H.LabelCell [ H.Align H.HLeft
95 , dynamicToColor $ floor d] $ H.Text [H.Str $ fromStrict n]) ns
96 --------------------------------------
97 header :: H.Row
98 header =
99 H.Cells [ H.LabelCell [pickLabelColor $ floor <$> ((g ^. phylo_groupMeta) ! "dynamics")]
100 $ H.Text [H.Str $ ((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
101 <> fromStrict " ( "
102 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
103 <> fromStrict " , "
104 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
105 <> fromStrict " ) "
106 <> (pack $ show (getGroupId g))]]
107 --------------------------------------
108
109 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
110 branchToDotNode b bId =
111 node (branchIdToDotId $ b ^. branch_id)
112 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
113 <> (metaToAttr $ b ^. branch_meta)
114 <> [ toAttr "nodeType" "branch"
115 , toAttr "bId" (pack $ show bId)
116 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
117 , toAttr "branch_x" (fromStrict $ Text.pack $ show $ b ^. branch_x)
118 , toAttr "branch_y" (fromStrict $ Text.pack $ show $ b ^. branch_y)
119 , toAttr "label" (pack $ show $ b ^. branch_label)
120 ])
121
122 periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
123 periodToDotNode prd prd' =
124 node (periodIdToDotId prd)
125 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
126 <> [ toAttr "nodeType" "period"
127 , toAttr "strFrom" (fromStrict $ Text.pack $ (show $ fst prd'))
128 , toAttr "strTo" (fromStrict $ Text.pack $ (show $ snd prd'))
129 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
130 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
131
132
133 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
134 groupToDotNode fdt g bId =
135 node (groupIdToDotId $ getGroupId g)
136 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
137 <> [ toAttr "nodeType" "group"
138 , toAttr "gid" (groupIdToDotId $ getGroupId g)
139 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
140 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
141 , toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
142 , toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
143 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
144 , toAttr "bId" (pack $ show bId)
145 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
146 , toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
147 , toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
148 , toAttr "sourceFull" (pack $ show (g ^. phylo_groupSources))
149 , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
150 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
151 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
152 , toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
153 , toAttr "seaLvl" (pack $ show ((g ^. phylo_groupMeta) ! "seaLevels"))
154 ])
155
156
157 toDotEdge' :: DotId -> DotId -> [Char] -> [Char] -> EdgeType -> Dot DotId
158 toDotEdge' source target thr w edgeType = edge source target
159 (case edgeType of
160 GroupToGroup -> undefined
161 GroupToGroupMemory -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "memoryLink", toAttr "thr" (pack thr), toAttr "weight" (pack w)]
162 BranchToGroup -> undefined
163 BranchToBranch -> undefined
164 GroupToAncestor -> undefined
165 PeriodToPeriod -> undefined)
166
167
168 toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
169 toDotEdge source target lbl edgeType = edge source target
170 (case edgeType of
171 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
172 GroupToGroupMemory -> undefined
173 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
174 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
175 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
176 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
177
178
179 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
180 mergePointers groups =
181 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
182 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
183 in unionWith (\w w' -> max w w') toChilds toParents
184
185 mergePointersMemory :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId),(Double,Double))]
186 mergePointersMemory groups =
187 let toChilds = concat $ map (\g -> map (\(target,(t,w)) -> ((getGroupId g,target),(t,w))) $ g ^. phylo_groupPeriodMemoryChilds) groups
188 toParents = concat $ map (\g -> map (\(target,(t,w)) -> ((target,getGroupId g),(t,w))) $ g ^. phylo_groupPeriodMemoryParents) groups
189 in concat [toChilds,toParents]
190
191 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
192 mergeAncestors groups = concat
193 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
194 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
195
196
197 toBid :: PhyloGroup -> [PhyloBranch] -> Int
198 toBid g bs =
199 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
200 in fromJust $ elemIndex b' bs
201
202 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
203 exportToDot phylo export =
204 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
205 <> show(length $ export ^. export_groups) <> " groups "
206 <> show(length $ nub $ concat $ map (^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
207 <> "##########################") $
208 digraph ((Str . fromStrict) $ phyloName $ getConfig phylo) $ do
209
210 {- 1) init the dot graph -}
211 graphAttrs ( [ Label (toDotLabel $ phyloName $ getConfig phylo)]
212 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
213 , Ratio FillRatio
214 -- , Ratio AutoRatio
215 , Style [SItem Filled []],Color [toWColor White]]
216 {-- home made attributes -}
217 <> [ toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo)
218 , toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (^. phylo_groupNgrams) $ export ^. export_groups)
219 , toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ getDocsByDate phylo)
220 , toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods)
221 , toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches)
222 , toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups)
223 , toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo)
224 , toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo
225 , toAttr (fromStrict "PhyloScale") $ pack $ show (getLevel phylo)
226 , toAttr (fromStrict "phyloQuality") $ pack $ show (phylo ^. phylo_quality)
227 , toAttr (fromStrict "phyloSeaRiseStart") $ pack $ show (getPhyloSeaRiseStart phylo)
228 , toAttr (fromStrict "phyloSeaRiseSteps") $ pack $ show (getPhyloSeaRiseSteps phylo)
229 -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
230 ])
231
232 {-
233 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
234
235 -- 2) create a layer for the branches labels -}
236 subgraph (Str "Branches peaks") $ do
237
238 -- graphAttrs [Rank SameRank]
239 {-
240 -- 3) group the branches by hierarchy
241 -- mapM (\branches ->
242 -- subgraph (Str "Branches clade") $ do
243 -- graphAttrs [Rank SameRank]
244
245 -- -- 4) create a node for each branch
246 -- mapM branchToDotNode branches
247 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
248 -}
249 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
250
251 {-- 5) create a layer for each period -}
252 _ <- mapM (\period ->
253 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
254 graphAttrs [Rank SameRank]
255 periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriodStr)
256
257 {-- 6) create a node for each group -}
258 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
259 ) $ phylo ^. phylo_periods
260
261 {-- 7) create the edges between a branch and its first groups -}
262 _ <- mapM (\(bId,groups) ->
263 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
264 )
265 $ toList
266 $ map (\groups -> head' "toDot"
267 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
268 $ sortOn (fst . _phylo_groupPeriod) groups)
269 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
270
271 {- 8) create the edges between the groups -}
272 _ <- mapM (\((k,k'),v) ->
273 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
274 ) $ (toList . mergePointers) $ export ^. export_groups
275
276 {- 8-bis) create the edges between the groups -}
277 {- _ <- mapM (\((k,k'),v) ->
278 toDotEdge' (groupIdToDotId k) (groupIdToDotId k') (show (fst v)) (show (snd v)) GroupToGroupMemory
279 ) $ mergePointersMemory $ export ^. export_groups -}
280
281 _ <- mapM (\((k,k'),v) ->
282 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
283 ) $ mergeAncestors $ export ^. export_groups
284
285 -- 10) create the edges between the periods
286 _ <- mapM (\(prd,prd') ->
287 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
288 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
289
290 {- 8) create the edges between the branches
291 -- _ <- mapM (\(bId,bId') ->
292 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
293 -- (Text.pack $ show(branchIdsToSimilarity bId bId'
294 -- (getThresholdInit $ phyloSimilarity $ getConfig phylo)
295 -- (getThresholdStep $ phyloSimilarity $ getConfig phylo))) BranchToBranch
296 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
297 -}
298
299
300 graphAttrs [Rank SameRank]
301
302
303 ----------------
304 -- | Filter | --
305 ----------------
306
307 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
308 filterByBranchSize thr export =
309 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
310 in export & export_branches .~ (fst splited)
311 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
312
313
314 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
315 processFilters filters qua export =
316 foldl (\export' f -> case f of
317 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
318 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
319 else filterByBranchSize thr export'
320 ) export filters
321
322 --------------
323 -- | Sort | --
324 --------------
325
326 branchToIso :: [PhyloBranch] -> [PhyloBranch]
327 branchToIso branches =
328 let steps = map sum
329 $ inits
330 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
331 $ zip branches
332 $ ([0] ++ (map (\(b,b') ->
333 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
334 lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
335 in
336 if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
337 then (b' ^. branch_seaLevel) !! (lmin - 1)
338 else (b' ^. branch_seaLevel) !! (idx - 1)
339 ) $ listToSeq branches))
340 in map (\(x,b) -> b & branch_x .~ x)
341 $ zip steps branches
342
343 branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
344 branchToIso' start step branches =
345 let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
346 $ inits
347 $ ([0] ++ (map (\(b,b') ->
348 let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
349 in 1 - start - step * root) $ listToSeq branches))
350 in map (\(x,b) -> b & branch_x .~ x)
351 $ zip bx branches
352
353
354 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
355 sortByHierarchy depth branches =
356 if (length branches == 1)
357 then branches
358 else concat
359 $ map (\branches' ->
360 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
361 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
362 ++ (sortByHierarchy (depth + 1) (snd partitions)))
363 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
364 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
365
366
367 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
368 sortByBirthDate order export =
369 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
370 branches' = case order of
371 Asc -> branches
372 Desc -> reverse branches
373 in export & export_branches .~ branches'
374
375 processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
376 processSort sort' elev export = case sort' of
377 ByBirthDate o -> sortByBirthDate o export
378 ByHierarchy _ -> case elev of
379 Constante s s' -> export & export_branches .~ (branchToIso' s s' $ sortByHierarchy 0 (export ^. export_branches))
380 Adaptative _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
381 Evolving _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
382
383 -----------------
384 -- | Metrics | --
385 -----------------
386
387 -- | Return the conditional probability of i knowing j
388 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
389 conditional m i j = (findWithDefault 0 (i,j) m)
390 / (m ! (j,j))
391
392
393 -- | Return the genericity score of a given ngram
394 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
395 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
396 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
397
398
399 -- | Return the specificity score of a given ngram
400 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
401 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
402 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
403
404
405 -- | Return the inclusion score of a given ngram
406 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
407 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
408 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
409
410
411 ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
412 ngramsMetrics phylo export =
413 over ( export_groups
414 . traverse )
415 (\g -> g & phylo_groupMeta %~ insert "genericity"
416 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
417 & phylo_groupMeta %~ insert "specificity"
418 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
419 & phylo_groupMeta %~ insert "inclusion"
420 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
421 & phylo_groupMeta %~ insert "frequence"
422 (map (\n -> getInMap n (getLastRootsFreq phylo)) $ g ^. phylo_groupNgrams)
423 ) export
424
425
426 branchDating :: PhyloExport -> PhyloExport
427 branchDating export =
428 over ( export_branches
429 . traverse )
430 (\b ->
431 let groups = sortOn fst
432 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
433 then acc ++ [g ^. phylo_groupPeriod]
434 else acc ) [] $ export ^. export_groups
435 periods = nub groups
436 birth = fst $ head' "birth" groups
437 age = (snd $ last' "age" groups) - birth
438 in b & branch_meta %~ insert "birth" [fromIntegral birth]
439 & branch_meta %~ insert "age" [fromIntegral age]
440 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
441
442 processMetrics :: Phylo -> PhyloExport -> PhyloExport
443 processMetrics phylo export = ngramsMetrics phylo
444 $ branchDating export
445
446
447 -----------------
448 -- | Taggers | --
449 -----------------
450
451 nk :: Int -> [[Int]] -> Int
452 nk n groups = sum
453 $ map (\g -> if (elem n g)
454 then 1
455 else 0) groups
456
457
458 tf :: Int -> [[Int]] -> Double
459 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
460
461
462 idf :: Int -> [[Int]] -> Double
463 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
464
465
466 findTfIdf :: [[Int]] -> [(Int,Double)]
467 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ nub $ concat groups
468
469
470 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
471 findEmergences groups freq =
472 let ngrams = map _phylo_groupNgrams groups
473 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
474 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
475 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
476 in reverse $ sortOn snd
477 $ map (\(n,_) -> if (member n freq)
478 then (n,freq ! n)
479 else (n,0)) emerging
480
481
482 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
483 mostEmergentTfIdf nth freq foundations export =
484 over ( export_branches
485 . traverse )
486 (\b ->
487 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
488 tfidf = findTfIdf (map _phylo_groupNgrams groups)
489 emergences = findEmergences groups freq
490 selected = if (null emergences)
491 then map fst $ take nth tfidf
492 else [fst $ head' "mostEmergentTfIdf" emergences]
493 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
494 in b & branch_label .~ (ngramsToLabel foundations selected)) export
495
496
497 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
498 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
499 $ take nth
500 $ reverse
501 $ sortOn snd $ zip [0..] meta
502
503
504 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
505 mostInclusive nth foundations export =
506 over ( export_branches
507 . traverse )
508 (\b ->
509 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
510 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
511 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
512 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
513 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
514 in b & branch_label .~ lbl ) export
515
516
517 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
518 mostEmergentInclusive nth foundations export =
519 over ( export_groups
520 . traverse )
521 (\g ->
522 let lbl = ngramsToLabel foundations
523 $ take nth
524 $ map (\(_,(_,idx)) -> idx)
525 $ concat
526 $ map (\groups -> sortOn (fst . snd) groups)
527 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
528 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
529 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
530 in g & phylo_groupLabel .~ lbl ) export
531
532
533 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
534 processLabels labels foundations freq export =
535 foldl (\export' label ->
536 case label of
537 GroupLabel tagger nth ->
538 case tagger of
539 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
540 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
541 BranchLabel tagger nth ->
542 case tagger of
543 MostInclusive -> mostInclusive nth foundations export'
544 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
545 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
546
547
548 ------------------
549 -- | Dynamics | --
550 ------------------
551
552 -- utiliser & creer une Map FdtId [PhyloGroup]
553 -- n = index of the current term
554 toDynamics :: FdtId -> [PhyloGroup] -> PhyloGroup -> Map FdtId (Date,Date) -> Double
555 toDynamics n elders g m =
556 let prd = g ^. phylo_groupPeriod
557 end = last' "dynamics" (sort $ map snd $ elems m)
558 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
559 {- decrease -}
560 then 2
561 else if ((fst prd) == (fst $ m ! n))
562 {- emerging -}
563 then 0
564 else if isNew
565 {- emergence -}
566 then 1
567 else 3
568 where
569 --------------------------------------
570 isNew :: Bool
571 isNew = not $ elem n $ concat $ map _phylo_groupNgrams elders
572
573 type FdtId = Int
574 processDynamics :: [PhyloGroup] -> [PhyloGroup]
575 processDynamics groups =
576 map (\g ->
577 let elders = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
578 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
579 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n elders g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
580 where
581 --------------------------------------
582 mapNgrams :: Map FdtId (Date,Date)
583 mapNgrams = map (\dates ->
584 let dates' = sort dates
585 in (head' "dynamics" dates', last' "dynamics" dates'))
586 $ fromListWith (++)
587 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
588 $ (g ^. phylo_groupNgrams))) [] groups
589
590
591 -----------------
592 -- | horizon | --
593 -----------------
594
595 getGroupThr :: Double -> PhyloGroup -> Double
596 getGroupThr step g =
597 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
598 breaks = (g ^. phylo_groupMeta) ! "breaks"
599 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
600
601 toAncestor :: Double -> Map Int Double -> PhyloSimilarity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
602 toAncestor nbDocs diago similarity step candidates ego =
603 let curr = ego ^. phylo_groupAncestors
604 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
605 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
606 $ map (\g -> (g, toSimilarity nbDocs diago similarity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
607 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
608
609
610 headsToAncestors :: Double -> Map Int Double -> PhyloSimilarity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
611 headsToAncestors nbDocs diago similarity step heads acc =
612 if (null heads)
613 then acc
614 else
615 let ego = head' "headsToAncestors" heads
616 heads' = tail' "headsToAncestors" heads
617 in headsToAncestors nbDocs diago similarity step heads' (acc ++ [toAncestor nbDocs diago similarity step heads' ego])
618
619
620 toHorizon :: Phylo -> Phylo
621 toHorizon phylo =
622 let phyloAncestor = updatePhyloGroups
623 scale
624 (fromList $ map (\g -> (getGroupId g, g))
625 $ concat
626 $ tracePhyloAncestors newGroups) phylo
627 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
628 $ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromScale scale phyloAncestor
629 in updatePhyloGroups scale reBranched phylo
630 where
631 -- | 1) for each periods
632 periods :: [Period]
633 periods = getPeriodIds phylo
634 -- --
635 scale :: Scale
636 scale = getLastLevel phylo
637 -- --
638 frame :: Int
639 frame = getTimeFrame $ timeUnit $ getConfig phylo
640 -- | 2) find ancestors between groups without parents
641 mapGroups :: [[PhyloGroup]]
642 mapGroups = map (\prd ->
643 let groups = getGroupsFromScalePeriods scale [prd] phylo
644 childs = getPreviousChildIds scale frame prd periods phylo
645 -- maybe add a better filter for non isolated ancestors
646 heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
647 $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
648 noHeads = groups \\ heads
649 nbDocs = sum $ elems $ filterDocs (getDocsByDate phylo) [prd]
650 diago = reduceDiagos $ filterDiago (getCoocByDate phylo) [prd]
651 sim = (similarity $ getConfig phylo)
652 step = case getSeaElevation phylo of
653 Constante _ s -> s
654 Adaptative _ -> 0
655 Evolving _ -> 0
656 -- in headsToAncestors nbDocs diago Similarity heads groups []
657 in map (toAncestor nbDocs diago sim step noHeads)
658 $ headsToAncestors nbDocs diago sim step heads []
659 ) periods
660 -- | 3) process this task concurrently
661 newGroups :: [[PhyloGroup]]
662 newGroups = mapGroups `using` parList rdeepseq
663 --------------------------------------
664
665 getPreviousChildIds :: Scale -> Int -> Period -> [Period] -> Phylo -> [PhyloGroupId]
666 getPreviousChildIds lvl frame curr prds phylo =
667 concat $ map ((map fst) . _phylo_groupPeriodChilds)
668 $ getGroupsFromScalePeriods lvl (getNextPeriods ToParents frame curr prds) phylo
669
670 ---------------------
671 -- | phyloExport | --
672 ---------------------
673
674 toPhyloExport :: Phylo -> DotGraph DotId
675 toPhyloExport phylo = exportToDot phylo
676 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
677 $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
678 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (getLastRootsFreq phylo)
679 $ processMetrics phylo export
680 where
681 export :: PhyloExport
682 export = PhyloExport groups branches
683 --------------------------------------
684 branches :: [PhyloBranch]
685 branches = map (\g ->
686 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
687 breaks = (g ^. phylo_groupMeta) ! "breaks"
688 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
689 in PhyloBranch { _branch_id = g ^. phylo_groupBranchId
690 , _branch_canonId = canonId
691 , _branch_seaLevel = seaLvl
692 , _branch_x = 0
693 , _branch_y = last' "export" $ take (round $ (last' "export" breaks) + 1) seaLvl
694 , _branch_w = 0
695 , _branch_t = 0
696 , _branch_label = ""
697 , _branch_meta = empty })
698 $ map (head' "export")
699 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
700 $ sortOn (^. phylo_groupBranchId) groups
701 --------------------------------------
702 groups :: [PhyloGroup]
703 groups = traceExportGroups
704 -- necessaire ?
705 $ processDynamics
706 $ getGroupsFromScale (phyloScale $ getConfig phylo)
707 $ tracePhyloInfo phylo
708
709
710 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
711 traceExportBranches branches = trace ("\n"
712 <> "-- | Export " <> show(length branches) <> " branches") branches
713
714 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
715 tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
716
717 tracePhyloInfo :: Phylo -> Phylo
718 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with λ = "
719 <> show(getLevel phylo) <> " applied to "
720 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
721 ) phylo
722
723
724 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
725 traceExportGroups groups = trace ("\n" <> "-- | Export "
726 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
727 <> show(length groups) <> " groups and "
728 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"
729 ) groups