]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
[FIX] Phylo
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / PhyloExport.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE TypeSynonymInstances #-}
12
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
14
15 import Control.Lens hiding (Level)
16 import Control.Parallel.Strategies (parList, rdeepseq, using)
17 import Data.GraphViz hiding (DotGraph, Order)
18 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
19 import Data.GraphViz.Types.Generalised (DotGraph)
20 import Data.GraphViz.Types.Monadic
21 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
22 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
23 import Data.Text.Lazy (fromStrict, pack, unpack)
24 import Data.Vector (Vector)
25 import Debug.Trace (trace)
26 import Gargantext.Core.Viz.Phylo
27 import Gargantext.Core.Viz.Phylo.PhyloTools
28 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toSimilarity, getNextPeriods)
29 import Gargantext.Prelude hiding (scale)
30 import Prelude (writeFile)
31 import Protolude (floor)
32 import System.FilePath
33 import qualified Data.GraphViz.Attributes.HTML as H
34 import qualified Data.Text as Text
35 import qualified Data.Text.Lazy as Lazy
36 import qualified Data.Vector as Vector
37
38 --------------------
39 -- | Dot export | --
40 --------------------
41
42 dotToFile :: FilePath -> DotGraph DotId -> IO ()
43 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
44
45 dotToString :: DotGraph DotId -> [Char]
46 dotToString dotG = unpack (printDotGraph dotG)
47
48 dynamicToColor :: Int -> H.Attribute
49 dynamicToColor d
50 | d == 0 = H.BGColor (toColor LightCoral)
51 | d == 1 = H.BGColor (toColor Khaki)
52 | d == 2 = H.BGColor (toColor SkyBlue)
53 | otherwise = H.Color (toColor Black)
54
55 pickLabelColor :: [Int] -> H.Attribute
56 pickLabelColor lst
57 | elem 0 lst = dynamicToColor 0
58 | elem 1 lst = dynamicToColor 1
59 | elem 2 lst = dynamicToColor 2
60 | otherwise = dynamicToColor 3
61
62 toDotLabel :: Text.Text -> Label
63 toDotLabel lbl = StrLabel $ fromStrict lbl
64
65 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
66 toAttr k v = customAttribute k v
67
68 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
69 metaToAttr meta = map (\(k, v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
70
71 groupIdToDotId :: PhyloGroupId -> DotId
72 groupIdToDotId (((d, d'), lvl), idx) =
73 (fromStrict . Text.pack) $ "group" <> show d <> show d' <> show lvl <> show idx
74
75 branchIdToDotId :: PhyloBranchId -> DotId
76 branchIdToDotId bId = (fromStrict . Text.pack) $ "branch" <> show (snd bId)
77
78 periodIdToDotId :: Period -> DotId
79 periodIdToDotId prd = (fromStrict . Text.pack) $ "period" <> show (fst prd) <> show (snd prd)
80
81 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
82 groupToTable fdt g =
83 H.Table H.HTable
84 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
85 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
86 , H.tableRows = [header]
87 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
88 <> ( map ngramsToRow $ splitEvery 4
89 $ reverse $ sortOn (snd . snd)
90 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
91 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
92 where
93 --------------------------------------
94 ngramsToRow :: [(Ngrams, (Double, Double))] -> H.Row
95 ngramsToRow ns =
96 H.Cells $ map (\(n, (d, _)) ->
97 H.LabelCell [ H.Align H.HLeft
98 , dynamicToColor $ floor d] $ H.Text [H.Str $ fromStrict n]) ns
99 --------------------------------------
100 header :: H.Row
101 header =
102 H.Cells [ H.LabelCell [pickLabelColor $ floor <$> ((g ^. phylo_groupMeta) ! "dynamics")]
103 $ H.Text [H.Str $ ((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
104 <> fromStrict " ( "
105 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
106 <> fromStrict " , "
107 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
108 <> fromStrict " ) "
109 <> (pack $ show (getGroupId g))]]
110 --------------------------------------
111
112 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
113 branchToDotNode b bId =
114 node (branchIdToDotId $ b ^. branch_id)
115 ( [ FillColor [toWColor CornSilk]
116 , FontName "Arial"
117 , FontSize 40
118 , Shape Egg
119 , Style [SItem Bold []]
120 , Label (toDotLabel $ b ^. branch_label) ]
121 <> (metaToAttr $ b ^. branch_meta)
122 <> [ toAttr "nodeType" "branch"
123 , toAttr "bId" (pack $ show bId)
124 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
125 , toAttr "branch_x" (fromStrict $ Text.pack $ show $ b ^. branch_x)
126 , toAttr "branch_y" (fromStrict $ Text.pack $ show $ b ^. branch_y)
127 , toAttr "label" (pack $ show $ b ^. branch_label)
128 ])
129
130 periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
131 periodToDotNode prd prd' =
132 node (periodIdToDotId prd) $
133 [ Shape BoxShape
134 , FontSize 50
135 , Label $ toDotLabel $ Text.pack $ show (fst prd) <> " " <> show (snd prd) ]
136 <> [ toAttr "nodeType" "period"
137 , toAttr "strFrom" $ fromStrict $ Text.pack $ show $ fst prd'
138 , toAttr "strTo" $ fromStrict $ Text.pack $ show $ snd prd'
139 , toAttr "from" $ fromStrict $ Text.pack $ show $ fst prd
140 , toAttr "to" $ fromStrict $ Text.pack $ show $ snd prd ]
141
142
143 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
144 groupToDotNode fdt g bId =
145 node (groupIdToDotId $ getGroupId g)
146 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
147 <> [ toAttr "nodeType" "group"
148 , toAttr "gid" (groupIdToDotId $ getGroupId g)
149 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
150 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
151 , toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
152 , toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
153 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
154 , toAttr "bId" (pack $ show bId)
155 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
156 , toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
157 , toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
158 , toAttr "sourceFull" (pack $ show (g ^. phylo_groupSources))
159 , toAttr "density" (pack $ show (g ^. phylo_groupDensity))
160 , toAttr "cooc" (pack $ show (g ^. phylo_groupCooc))
161 , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
162 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
163 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
164 , toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
165 , toAttr "seaLvl" (pack $ show ((g ^. phylo_groupMeta) ! "seaLevels"))
166 ])
167
168 toDotEdge' :: DotId -> DotId -> [Char] -> [Char] -> EdgeType -> Dot DotId
169 toDotEdge' source target thr w edgeType = edge source target
170 (case edgeType of
171 GroupToGroup -> undefined
172 GroupToGroupMemory -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "memoryLink", toAttr "thr" (pack thr), toAttr "weight" (pack w)]
173 BranchToGroup -> undefined
174 BranchToBranch -> undefined
175 GroupToAncestor -> undefined
176 PeriodToPeriod -> undefined)
177
178
179 toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
180 toDotEdge source target lbl edgeType = edge source target
181 (case edgeType of
182 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
183 GroupToGroupMemory -> undefined
184 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
185 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
186 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
187 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
188
189
190 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
191 mergePointers groups =
192 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
193 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
194 in unionWith (\w w' -> max w w') toChilds toParents
195
196 mergePointersMemory :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId),(Double,Double))]
197 mergePointersMemory groups =
198 let toChilds = concat $ map (\g -> map (\(target,(t,w)) -> ((getGroupId g,target),(t,w))) $ g ^. phylo_groupPeriodMemoryChilds) groups
199 toParents = concat $ map (\g -> map (\(target,(t,w)) -> ((target,getGroupId g),(t,w))) $ g ^. phylo_groupPeriodMemoryParents) groups
200 in concat [toChilds,toParents]
201
202 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
203 mergeAncestors groups = concat
204 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
205 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
206
207
208 toBid :: PhyloGroup -> [PhyloBranch] -> Int
209 toBid g bs =
210 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
211 in fromJust $ elemIndex b' bs
212
213 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
214 exportToDot phylo export =
215 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
216 <> show(length $ export ^. export_groups) <> " groups "
217 <> show(length $ nub $ concat $ map (^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
218 <> "##########################") $
219 digraph ((Str . fromStrict) $ phyloName $ getConfig phylo) $ do
220
221 {- 1) init the dot graph -}
222 graphAttrs ( [ Label (toDotLabel $ phyloName $ getConfig phylo)]
223 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
224 , Ratio FillRatio
225 -- , Ratio AutoRatio
226 , Style [SItem Filled []],Color [toWColor White]]
227 {-- home made attributes -}
228 <> [ toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo)
229 , toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (^. phylo_groupNgrams) $ export ^. export_groups)
230 , toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ getDocsByDate phylo)
231 , toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods)
232 , toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches)
233 , toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups)
234 , toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo)
235 , toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo
236 , toAttr (fromStrict "PhyloScale") $ pack $ show (getLevel phylo)
237 , toAttr (fromStrict "phyloQuality") $ pack $ show (phylo ^. phylo_quality)
238 , toAttr (fromStrict "phyloSeaRiseStart") $ pack $ show (getPhyloSeaRiseStart phylo)
239 , toAttr (fromStrict "phyloSeaRiseSteps") $ pack $ show (getPhyloSeaRiseSteps phylo)
240 -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
241 ])
242
243 {-
244 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
245
246 -- 2) create a layer for the branches labels -}
247 subgraph (Str "Branches peaks") $ do
248
249 -- graphAttrs [Rank SameRank]
250 {-
251 -- 3) group the branches by hierarchy
252 -- mapM (\branches ->
253 -- subgraph (Str "Branches clade") $ do
254 -- graphAttrs [Rank SameRank]
255
256 -- -- 4) create a node for each branch
257 -- mapM branchToDotNode branches
258 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
259 -}
260 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
261
262 {-- 5) create a layer for each period -}
263 _ <- mapM (\period ->
264 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
265 graphAttrs [Rank SameRank]
266 periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriodStr)
267
268 {-- 6) create a node for each group -}
269 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
270 ) $ phylo ^. phylo_periods
271
272 {-- 7) create the edges between a branch and its first groups -}
273 _ <- mapM (\(bId,groups) ->
274 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
275 )
276 $ toList
277 $ map (\groups -> head' "toDot"
278 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
279 $ sortOn (fst . _phylo_groupPeriod) groups)
280 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
281
282 {- 8) create the edges between the groups -}
283 _ <- mapM (\((k,k'),v) ->
284 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
285 ) $ (toList . mergePointers) $ export ^. export_groups
286
287 {- 8-bis) create the edges between the groups -}
288 {- _ <- mapM (\((k,k'),v) ->
289 toDotEdge' (groupIdToDotId k) (groupIdToDotId k') (show (fst v)) (show (snd v)) GroupToGroupMemory
290 ) $ mergePointersMemory $ export ^. export_groups -}
291
292 _ <- mapM (\((k,k'),v) ->
293 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
294 ) $ mergeAncestors $ export ^. export_groups
295
296 -- 10) create the edges between the periods
297 _ <- mapM (\(prd,prd') ->
298 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
299 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
300
301 {- 8) create the edges between the branches
302 -- _ <- mapM (\(bId,bId') ->
303 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
304 -- (Text.pack $ show(branchIdsToSimilarity bId bId'
305 -- (getThresholdInit $ phyloSimilarity $ getConfig phylo)
306 -- (getThresholdStep $ phyloSimilarity $ getConfig phylo))) BranchToBranch
307 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
308 -}
309
310
311 graphAttrs [Rank SameRank]
312
313
314 ----------------
315 -- | Filter | --
316 ----------------
317
318 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
319 filterByBranchSize thr export =
320 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
321 in export & export_branches .~ (fst splited)
322 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
323
324
325 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
326 processFilters filters qua export =
327 foldl (\export' f -> case f of
328 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
329 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
330 else filterByBranchSize thr export'
331 ) export filters
332
333 --------------
334 -- | Sort | --
335 --------------
336
337 branchToIso :: [PhyloBranch] -> [PhyloBranch]
338 branchToIso branches =
339 let steps = map sum
340 $ inits
341 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
342 $ zip branches
343 $ ([0] ++ (map (\(b,b') ->
344 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
345 lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
346 in
347 if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
348 then (b' ^. branch_seaLevel) !! (lmin - 1)
349 else (b' ^. branch_seaLevel) !! (idx - 1)
350 ) $ listToSeq branches))
351 in map (\(x,b) -> b & branch_x .~ x)
352 $ zip steps branches
353
354 branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
355 branchToIso' start step branches =
356 let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
357 $ inits
358 $ ([0] ++ (map (\(b,b') ->
359 let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
360 in 1 - start - step * root) $ listToSeq branches))
361 in map (\(x,b) -> b & branch_x .~ x)
362 $ zip bx branches
363
364
365 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
366 sortByHierarchy depth branches =
367 if (length branches == 1)
368 then branches
369 else concat
370 $ map (\branches' ->
371 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
372 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
373 ++ (sortByHierarchy (depth + 1) (snd partitions)))
374 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
375 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
376
377
378 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
379 sortByBirthDate order export =
380 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
381 branches' = case order of
382 Asc -> branches
383 Desc -> reverse branches
384 in export & export_branches .~ branches'
385
386 processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
387 processSort sort' elev export = case sort' of
388 ByBirthDate o -> sortByBirthDate o export
389 ByHierarchy _ -> case elev of
390 Constante s s' -> export & export_branches .~ (branchToIso' s s' $ sortByHierarchy 0 (export ^. export_branches))
391 Adaptative _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
392 Evolving _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
393
394 -----------------
395 -- | Metrics | --
396 -----------------
397
398 -- | Return the conditional probability of i knowing j
399 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
400 conditional m i j = (findWithDefault 0 (i,j) m)
401 / (m ! (j,j))
402
403
404 -- | Return the genericity score of a given ngram
405 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
406 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
407 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
408
409
410 -- | Return the specificity score of a given ngram
411 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
412 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
413 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
414
415
416 -- | Return the inclusion score of a given ngram
417 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
418 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
419 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
420
421
422 ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
423 ngramsMetrics phylo export =
424 over ( export_groups
425 . traverse )
426 (\g -> g & phylo_groupMeta %~ insert "genericity"
427 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
428 & phylo_groupMeta %~ insert "specificity"
429 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
430 & phylo_groupMeta %~ insert "inclusion"
431 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
432 & phylo_groupMeta %~ insert "frequence"
433 (map (\n -> getInMap n (getLastRootsFreq phylo)) $ g ^. phylo_groupNgrams)
434 ) export
435
436
437 branchDating :: PhyloExport -> PhyloExport
438 branchDating export =
439 over ( export_branches
440 . traverse )
441 (\b ->
442 let groups = sortOn fst
443 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
444 then acc ++ [g ^. phylo_groupPeriod]
445 else acc ) [] $ export ^. export_groups
446 periods = nub groups
447 birth = fst $ head' "birth" groups
448 death = snd $ last' "death" groups
449 age = death - birth
450 in b & branch_meta %~ insert "birth" [fromIntegral birth]
451 & branch_meta %~ insert "death" [fromIntegral death]
452 & branch_meta %~ insert "age" [fromIntegral age]
453 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
454
455 processMetrics :: Phylo -> PhyloExport -> PhyloExport
456 processMetrics phylo export = ngramsMetrics phylo
457 $ branchDating export
458
459
460 -----------------
461 -- | Taggers | --
462 -----------------
463
464 nk :: Int -> [[Int]] -> Int
465 nk n groups = sum
466 $ map (\g -> if (elem n g)
467 then 1
468 else 0) groups
469
470
471 tf :: Int -> [[Int]] -> Double
472 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
473
474
475 idf :: Int -> [[Int]] -> Double
476 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
477
478
479 findTfIdf :: [[Int]] -> [(Int,Double)]
480 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ nub $ concat groups
481
482
483 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
484 findEmergences groups freq =
485 let ngrams = map _phylo_groupNgrams groups
486 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
487 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
488 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
489 in reverse $ sortOn snd
490 $ map (\(n,_) -> if (member n freq)
491 then (n,freq ! n)
492 else (n,0)) emerging
493
494
495 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
496 mostEmergentTfIdf nth freq foundations export =
497 over ( export_branches
498 . traverse )
499 (\b ->
500 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
501 tfidf = findTfIdf (map _phylo_groupNgrams groups)
502 emergences = findEmergences groups freq
503 selected = if (null emergences)
504 then map fst $ take nth tfidf
505 else [fst $ head' "mostEmergentTfIdf" emergences]
506 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
507 in b & branch_label .~ (ngramsToLabel foundations selected)) export
508
509
510 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
511 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
512 $ take nth
513 $ reverse
514 $ sortOn snd $ zip [0..] meta
515
516
517 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
518 mostInclusive nth foundations export =
519 over ( export_branches
520 . traverse )
521 (\b ->
522 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
523 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
524 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
525 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
526 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
527 in b & branch_label .~ lbl ) export
528
529
530 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
531 mostEmergentInclusive nth foundations export =
532 over ( export_groups
533 . traverse )
534 (\g ->
535 let lbl = ngramsToLabel foundations
536 $ take nth
537 $ map (\(_,(_,idx)) -> idx)
538 $ concat
539 $ map (\groups -> sortOn (fst . snd) groups)
540 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
541 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
542 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
543 in g & phylo_groupLabel .~ lbl ) export
544
545
546 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
547 processLabels labels foundations freq export =
548 foldl (\export' label ->
549 case label of
550 GroupLabel tagger nth ->
551 case tagger of
552 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
553 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
554 BranchLabel tagger nth ->
555 case tagger of
556 MostInclusive -> mostInclusive nth foundations export'
557 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
558 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
559
560
561 ------------------
562 -- | Dynamics | --
563 ------------------
564
565 -- utiliser & creer une Map FdtId [PhyloGroup]
566 -- n = index of the current term
567 toDynamics :: FdtId -> [PhyloGroup] -> PhyloGroup -> Map FdtId (Date,Date) -> Double
568 toDynamics n elders g m =
569 let prd = g ^. phylo_groupPeriod
570 end = last' "dynamics" (sort $ map snd $ elems m)
571 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
572 {- decrease -}
573 then 2
574 else if ((fst prd) == (fst $ m ! n))
575 {- emerging -}
576 then 0
577 else if isNew
578 {- emergence -}
579 then 1
580 else 3
581 where
582 --------------------------------------
583 isNew :: Bool
584 isNew = not $ elem n $ concat $ map _phylo_groupNgrams elders
585
586 type FdtId = Int
587 processDynamics :: [PhyloGroup] -> [PhyloGroup]
588 processDynamics groups =
589 map (\g ->
590 let elders = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
591 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
592 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n elders g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
593 where
594 --------------------------------------
595 mapNgrams :: Map FdtId (Date,Date)
596 mapNgrams = map (\dates ->
597 let dates' = sort dates
598 in (head' "dynamics" dates', last' "dynamics" dates'))
599 $ fromListWith (++)
600 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
601 $ (g ^. phylo_groupNgrams))) [] groups
602
603
604 -----------------
605 -- | horizon | --
606 -----------------
607
608 getGroupThr :: Double -> PhyloGroup -> Double
609 getGroupThr step g =
610 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
611 breaks = (g ^. phylo_groupMeta) ! "breaks"
612 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
613
614 toAncestor :: Double
615 -> Map Int Double
616 -> PhyloSimilarity
617 -> Double
618 -> [PhyloGroup]
619 -> PhyloGroup
620 -> PhyloGroup
621 toAncestor nbDocs diago similarity step candidates ego =
622 let curr = ego ^. phylo_groupAncestors
623 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
624 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
625 $ map (\g -> (g, toSimilarity nbDocs diago similarity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
626 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
627
628
629 headsToAncestors :: Double
630 -> Map Int Double
631 -> PhyloSimilarity
632 -> Double
633 -> [PhyloGroup]
634 -> [PhyloGroup]
635 -> [PhyloGroup]
636 headsToAncestors nbDocs diago similarity step heads acc =
637 if (null heads)
638 then acc
639 else
640 let ego = head' "headsToAncestors" heads
641 heads' = tail' "headsToAncestors" heads
642 in headsToAncestors nbDocs diago similarity step heads' (acc ++ [toAncestor nbDocs diago similarity step heads' ego])
643
644
645 toHorizon :: Phylo -> Phylo
646 toHorizon phylo =
647 let phyloAncestor = updatePhyloGroups
648 scale
649 (fromList $ map (\g -> (getGroupId g, g))
650 $ concat
651 $ tracePhyloAncestors newGroups) phylo
652 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
653 $ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromScale scale phyloAncestor
654 in updatePhyloGroups scale reBranched phylo
655 where
656 -- | 1) for each periods
657 periods :: [Period]
658 periods = getPeriodIds phylo
659 -- --
660 scale :: Scale
661 scale = getLastLevel phylo
662 -- --
663 frame :: Int
664 frame = getTimeFrame $ timeUnit $ getConfig phylo
665 -- | 2) find ancestors between groups without parents
666 mapGroups :: [[PhyloGroup]]
667 mapGroups = map (\prd ->
668 let groups = getGroupsFromScalePeriods scale [prd] phylo
669 childs = getPreviousChildIds scale frame prd periods phylo
670 -- maybe add a better filter for non isolated ancestors
671 heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
672 $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
673 noHeads = groups \\ heads
674 nbDocs = sum $ elems $ filterDocs (getDocsByDate phylo) [prd]
675 diago = reduceDiagos $ filterDiago (getCoocByDate phylo) [prd]
676 sim = (similarity $ getConfig phylo)
677 step = case getSeaElevation phylo of
678 Constante _ s -> s
679 Adaptative _ -> 0
680 Evolving _ -> 0
681 -- in headsToAncestors nbDocs diago Similarity heads groups []
682 in map (toAncestor nbDocs diago sim step noHeads)
683 $ headsToAncestors nbDocs diago sim step heads []
684 ) periods
685 -- | 3) process this task concurrently
686 newGroups :: [[PhyloGroup]]
687 newGroups = mapGroups `using` parList rdeepseq
688 --------------------------------------
689
690 getPreviousChildIds :: Scale -> Int -> Period -> [Period] -> Phylo -> [PhyloGroupId]
691 getPreviousChildIds lvl frame curr prds phylo =
692 concat $ map ((map fst) . _phylo_groupPeriodChilds)
693 $ getGroupsFromScalePeriods lvl (getNextPeriods ToParents frame curr prds) phylo
694
695 ---------------------
696 -- | phyloExport | --
697 ---------------------
698
699 toPhyloExport :: Phylo -> DotGraph DotId
700 toPhyloExport phylo = exportToDot phylo
701 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
702 $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
703 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (getLastRootsFreq phylo)
704 $ processMetrics phylo export
705 where
706 export :: PhyloExport
707 export = PhyloExport groups branches
708 --------------------------------------
709 branches :: [PhyloBranch]
710 branches = map (\g ->
711 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
712 breaks = (g ^. phylo_groupMeta) ! "breaks"
713 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
714 in PhyloBranch { _branch_id = g ^. phylo_groupBranchId
715 , _branch_canonId = canonId
716 , _branch_seaLevel = seaLvl
717 , _branch_x = 0
718 , _branch_y = last' "export" $ take (round $ (last' "export" breaks) + 1) seaLvl
719 , _branch_w = 0
720 , _branch_t = 0
721 , _branch_label = ""
722 , _branch_meta = empty })
723 $ map (head' "export")
724 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
725 $ sortOn (^. phylo_groupBranchId) groups
726 --------------------------------------
727 groups :: [PhyloGroup]
728 groups = traceExportGroups
729 -- necessaire ?
730 $ processDynamics
731 $ getGroupsFromScale (phyloScale $ getConfig phylo)
732 $ tracePhyloInfo phylo
733
734
735 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
736 traceExportBranches branches = trace ("\n"
737 <> "-- | Export " <> show(length branches) <> " branches") branches
738
739 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
740 tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
741
742 tracePhyloInfo :: Phylo -> Phylo
743 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with level = "
744 <> show(getLevel phylo) <> " applied to "
745 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
746 ) phylo
747
748
749 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
750 traceExportGroups groups = trace ("\n" <> "-- | Export "
751 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
752 <> show(length groups) <> " groups and "
753 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"
754 ) groups