2 Module : Gargantext.Viz.Phylo.Example
3 Description : Phylomemy example based on history of Cleopatre.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 -- | Cesar et Cleôpatre
11 -- Exemple de phylomemie
12 -- French without accents
16 - split the functions : RAW -> Document -> Ngrams
18 -- reverse history: antechronologique
24 {-# LANGUAGE NoImplicitPrelude #-}
25 {-# LANGUAGE FlexibleContexts #-}
26 {-# LANGUAGE OverloadedStrings #-}
28 module Gargantext.Viz.Phylo.Example where
30 import Control.Lens hiding (makeLenses, both, Level)
32 import Data.Bool (Bool, not)
33 import Data.List (concat, union, intersect, tails, tail, head, last, null, zip, sort, length, any, (++), (!!), nub, sortOn, reverse, splitAt, take, delete, init)
34 import Data.Map (Map, elems, member, adjust, singleton, empty, (!), keys, restrictKeys, mapWithKey, filterWithKey, mapKeys, intersectionWith, unionWith)
35 import Data.Semigroup (Semigroup)
37 import Data.Text (Text, unwords, toLower, words)
38 import Data.Tuple (fst, snd)
39 import Data.Tuple.Extra
40 import Data.Vector (Vector, fromList, elemIndex)
42 import Gargantext.Prelude hiding (head)
43 import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
44 import Gargantext.Text.Terms.Mono (monoTexts)
45 import Gargantext.Viz.Phylo
46 import Gargantext.Viz.Phylo.Tools
48 import qualified Data.Bool as Bool
49 import qualified Data.List as List
50 import qualified Data.Map as Map
51 import qualified Data.Maybe as Maybe
52 import qualified Data.Set as Set
53 import qualified Data.Tuple as Tuple
54 import qualified Data.Vector as Vector
57 ------------------------------------------------------------------------
58 -- | STEP 14 | -- Incrementaly cluster the PhyloGroups n times, link them through the Periods and build level n of the Phylo
61 ------------------------------------------------------------------------
62 -- | STEP 13 | -- Cluster the Fis
65 -- | To do : ajouter de nouveaux clusters / proxi
66 -- gérer les cooc à level 2 et +, idem pour les quality
67 -- réfléchir aux formats de sortie
70 -- | To apply a Clustering method to a PhyloGraph
71 graphToClusters :: (Clustering,[Double]) -> PhyloGraph -> [[PhyloGroup]]
72 graphToClusters (clust,param) (nodes,edges) = case clust of
74 RelatedComponents -> relatedComp 0 (head nodes) (tail nodes,edges) [] []
77 -- | To transform a Phylo into Clusters of PhyloGroups at a given level
78 phyloToClusters :: Level -> (Proximity,[Double]) -> (Clustering,[Double]) -> Phylo -> Map (Date,Date) [[PhyloGroup]]
79 phyloToClusters lvl (prox,param) (clus,param') p = Map.fromList
80 $ zip (getPhyloPeriods p)
81 (map (\prd -> let graph = groupsToGraph (prox,param) (getGroupsWithFilters (getLevelValue lvl) prd p) p
82 in if null (fst graph)
84 else graphToClusters (clus,param') graph)
88 -- | To transform a Cluster into a Phylogroup
89 clusterToGroup :: PhyloPeriodId -> Int -> Int -> Text -> [PhyloGroup] -> PhyloGroup
90 clusterToGroup prd lvl idx lbl groups = PhyloGroup ((prd, lvl), idx)
92 ((sort . nub . concat) $ map getGroupNgrams groups)
96 (map (\g -> (getGroupId g, 1)) groups)
98 -- | To transform a list of Clusters into a new Phylolevel
99 clustersToPhyloLevel :: Level -> Map (Date,Date) [[PhyloGroup]] -> Phylo -> Phylo
100 clustersToPhyloLevel lvl m p = over (phylo_periods . traverse)
102 let periodId = _phylo_periodId period
103 clusters = zip [1..] (m ! periodId)
104 in over (phylo_periodLevels)
106 let groups = map (\cluster -> clusterToGroup periodId (getLevelValue lvl) (fst cluster) "" (snd cluster)) clusters
107 in levels ++ [PhyloLevel (periodId, (getLevelValue lvl)) groups]
111 phyloWithGroups2 = clustersToPhyloLevel (initLevel 2 Level_N)
112 (phyloToClusters (initLevel 1 Level_1) (WeightedLogJaccard,[0]) (RelatedComponents, []) phyloWithBranches_1)
115 ------------------------------------------------------------------------
116 -- | STEP 12 | -- Find the Branches
119 -- | To apply the related components method to a PhyloGraph
120 -- curr = the current PhyloGroup
121 -- (nodes,edges) = the initial PhyloGraph minus the current PhyloGroup
122 -- next = the next PhyloGroups to be added in the cluster
123 -- memo = the memory of the allready created clusters
124 relatedComp :: Int -> PhyloGroup -> PhyloGraph -> [PhyloGroup] -> [[PhyloGroup]] -> [[PhyloGroup]]
125 relatedComp idx curr (nodes,edges) next memo
126 | null nodes' && null next' = memo'
127 | (not . null) next' = relatedComp idx (head next') (nodes',edges) (tail next') memo'
128 | otherwise = relatedComp (idx + 1) (head nodes') (tail nodes',edges) [] memo'
130 --------------------------------------
131 memo' :: [[PhyloGroup]]
133 | null memo = [[curr]]
134 | idx == ((length memo) - 1) = (init memo) ++ [(last memo) ++ [curr]]
135 | otherwise = memo ++ [[curr]]
136 --------------------------------------
137 next' :: [PhyloGroup]
138 next' = filter (\x -> not $ elem x $ concat memo) $ nub $ next ++ (getNeighbours False curr edges)
139 --------------------------------------
140 nodes' :: [PhyloGroup]
141 nodes' = filter (\x -> not $ elem x next') nodes
142 --------------------------------------
145 -- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
146 graphToBranches :: Int -> PhyloGraph -> Phylo -> [PhyloBranch]
147 graphToBranches lvl (nodes,edges) p = map (\(idx,c) -> PhyloBranch (lvl,idx) "" (map getGroupId c)) $ zip [0..] clusters
149 --------------------------------------
150 clusters :: [[PhyloGroup]]
151 clusters = relatedComp 0 (head nodes) (tail nodes,edges) [] []
152 --------------------------------------
155 -- | To transform a list of PhyloGroups into a PhyloGraph by using a given Proximity mesure
156 groupsToGraph :: (Proximity,[Double]) -> [PhyloGroup] -> Phylo -> PhyloGraph
157 groupsToGraph (prox,param) groups p = (groups,edges)
161 FromPairs -> (nub . concat) $ map (\g -> (map (\g' -> ((g',g),1)) $ getGroupParents g p)
163 (map (\g' -> ((g,g'),1)) $ getGroupChilds g p)) groups
164 WeightedLogJaccard -> map (\(x,y) -> ((x,y), weightedLogJaccard
165 (param !! 0) (getGroupCooc x)
166 (unifySharedKeys (getGroupCooc x) (getGroupCooc y)))) $ listToDirectedCombi groups
170 -- | To set all the PhyloBranches for a given Level in a Phylo
171 setPhyloBranches :: Level -> Phylo -> Phylo
172 setPhyloBranches lvl p = alterPhyloBranches
173 (\branches -> branches
175 (graphToBranches (getLevelValue lvl) (groupsToGraph (FromPairs,[]) groups p) p)
178 --------------------------------------
179 groups :: [PhyloGroup]
180 groups = getGroupsWithLevel (getLevelValue lvl) p
181 --------------------------------------
184 phyloWithBranches_1 = setPhyloBranches (initLevel 1 Level_1) phyloWithPair_1_Childs
187 ------------------------------------------------------------------------
188 -- | STEP 11 | -- Link the PhyloGroups of level 1 through the Periods
191 -- | To process the weightedLogJaccard between two PhyloGroups fields
192 weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double
193 weightedLogJaccard s f1 f2
195 | wUnion == wInter = 1
196 | s == 0 = (fromIntegral $ length wInter)/(fromIntegral $ length wUnion)
197 | s > 0 = (sumInvLog wInter)/(sumInvLog wUnion)
198 | otherwise = (sumLog wInter)/(sumLog wUnion)
200 --------------------------------------
202 wInter = elems $ intersectionWith (+) f1 f2
203 --------------------------------------
205 wUnion = elems $ unionWith (+) f1 f2
206 --------------------------------------
207 sumInvLog :: [Double] -> Double
208 sumInvLog l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
209 --------------------------------------
210 sumLog :: [Double] -> Double
211 sumLog l = foldl (\mem x -> mem + log (s + x)) 0 l
212 --------------------------------------
215 -- | To apply the corresponding proximity function based on a given Proximity
216 getProximity :: (Proximity,[Double]) -> PhyloGroup -> PhyloGroup -> (PhyloGroupId, Double)
217 getProximity (prox,param) g1 g2 = case prox of
218 WeightedLogJaccard -> ((getGroupId g2),weightedLogJaccard (param !! 0) (getGroupCooc g1) (unifySharedKeys (getGroupCooc g2) (getGroupCooc g1)))
219 _ -> panic ("[ERR][Viz.Phylo.Example.getProximity] Proximity function not defined")
222 -- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
223 getNextPeriods :: PairTo -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
224 getNextPeriods to id l = case to of
225 Childs -> unNested id ((tail . snd) next)
226 Parents -> unNested id ((reverse . fst) next)
227 _ -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PairTo type not defined")
229 --------------------------------------
230 next :: ([PhyloPeriodId], [PhyloPeriodId])
232 --------------------------------------
234 idx = case (List.elemIndex id l) of
235 Nothing -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PhyloPeriodId not defined")
237 --------------------------------------
238 -- | To have an non-overlapping next period
239 unNested :: PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
242 | nested (fst $ head l) x = unNested x (tail l)
243 | nested (snd $ head l) x = unNested x (tail l)
245 --------------------------------------
246 nested :: Date -> PhyloPeriodId -> Bool
247 nested d prd = d >= fst prd && d <= snd prd
248 --------------------------------------
251 -- | To find the best set (max = 2) of Childs/Parents candidates based on a given Proximity mesure until a maximum depth (max = Period + 5 units )
252 findBestCandidates :: PairTo -> Int -> Int -> Double -> (Proximity,[Double]) -> PhyloGroup -> Phylo -> [(PhyloGroupId, Double)]
253 findBestCandidates to depth max thr (prox,param) group p
254 | depth > max || null next = []
255 | (not . null) best = take 2 best
256 | otherwise = findBestCandidates to (depth + 1) max thr (prox,param) group p
258 --------------------------------------
259 next :: [PhyloPeriodId]
260 next = getNextPeriods to (getGroupPeriod group) (getPhyloPeriods p)
261 --------------------------------------
262 candidates :: [PhyloGroup]
263 candidates = getGroupsWithFilters (getGroupLevel group) (head next) p
264 --------------------------------------
265 scores :: [(PhyloGroupId, Double)]
266 scores = map (\group' -> getProximity (prox,param) group group') candidates
267 --------------------------------------
268 best :: [(PhyloGroupId, Double)]
271 $ filter (\(id,score) -> score >= thr) scores
272 --------------------------------------
275 -- | To add a new list of Pointers into an existing Childs/Parents list of Pointers
276 makePair :: PairTo -> PhyloGroup -> [(PhyloGroupId, Double)] -> PhyloGroup
277 makePair to group ids = case to of
278 Childs -> over (phylo_groupPeriodChilds) addPointers group
279 Parents -> over (phylo_groupPeriodParents) addPointers group
280 _ -> panic ("[ERR][Viz.Phylo.Example.makePair] PairTo type not defined")
282 --------------------------------------
283 addPointers :: [Pointer] -> [Pointer]
284 addPointers l = nub $ (l ++ ids)
285 --------------------------------------
288 -- | To pair all the Phylogroups of given PhyloLevel to their best Parents or Childs
289 pairGroupsToGroups :: PairTo -> Level -> Double -> (Proximity,[Double]) -> Phylo -> Phylo
290 pairGroupsToGroups to lvl thr (prox,param) p = alterPhyloGroups
293 if (getGroupLevel group) == (getLevelValue lvl)
296 --------------------------------------
297 candidates :: [(PhyloGroupId, Double)]
298 candidates = findBestCandidates to 1 5 thr (prox,param) group p
299 --------------------------------------
301 makePair to group candidates
306 phyloWithPair_1_Childs :: Phylo
307 phyloWithPair_1_Childs = pairGroupsToGroups Childs (initLevel 1 Level_1) 0.01 (WeightedLogJaccard,[0]) phyloWithPair_1_Parents
310 phyloWithPair_1_Parents :: Phylo
311 phyloWithPair_1_Parents = pairGroupsToGroups Parents (initLevel 1 Level_1) 0.01 (WeightedLogJaccard,[0]) phyloLinked_0_1
314 ------------------------------------------------------------------------
315 -- | STEP 10 | -- Build the coocurency Matrix of the Phylo
318 -- | Are two PhyloGroups sharing at leats one Ngrams
319 shareNgrams :: PhyloGroup -> PhyloGroup -> Bool
320 shareNgrams g g' = (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g')
323 -- | To get the good pair of keys (x,y) or (y,x) in a given Map (a,b) c
324 getKeyPair :: (Int,Int) -> Map (Int,Int) a -> (Int,Int)
325 getKeyPair (x,y) m = case findPair (x,y) m of
326 Nothing -> panic "[ERR][Viz.Phylo.Example.getKeyPair] Nothing"
329 --------------------------------------
330 findPair :: (Int,Int) -> Map (Int,Int) a -> Maybe (Int,Int)
332 | member (x,y) m = Just (x,y)
333 | member (y,x) m = Just (y,x)
334 | otherwise = Nothing
335 --------------------------------------
338 -- | To transform the Fis into a coocurency Matrix in a Phylo
339 fisToCooc :: Map (Date, Date) Fis -> Phylo -> Map (Int, Int) Double
340 fisToCooc m p = map (/docs)
341 $ foldl (\mem x -> adjust (+1) (getKeyPair x mem) mem) cooc
343 $ map (\x -> listToUnDirectedCombiWith (\x -> ngramsToIdx x p) $ (Set.toList . fst) x) fis
345 --------------------------------------
346 fis :: [(Clique,Support)]
347 fis = concat $ map (\x -> Map.toList x) (elems m)
348 --------------------------------------
349 fisNgrams :: [Ngrams]
350 fisNgrams = foldl (\mem x -> union mem $ (Set.toList . fst) x) [] fis
351 --------------------------------------
353 docs = fromIntegral $ foldl (\mem x -> mem + (snd x)) 0 fis
354 --------------------------------------
355 cooc :: Map (Int, Int) (Double)
356 cooc = Map.fromList $ map (\x -> (x,0)) (listToUnDirectedCombiWith (\x -> ngramsToIdx x p) fisNgrams)
357 --------------------------------------
360 phyloCooc :: Map (Int, Int) Double
361 phyloCooc = fisToCooc phyloFisFiltered phyloLinked_0_1
364 ------------------------------------------------------------------------
365 -- | STEP 9 | -- Build level 1 of the Phylo
368 -- | To Cliques into Groups
369 cliqueToGroup :: PhyloPeriodId -> Int -> Int -> Ngrams -> (Clique,Support) -> Map (Date, Date) Fis -> Phylo -> PhyloGroup
370 cliqueToGroup period lvl idx label fis m p = PhyloGroup ((period, lvl), idx)
373 (singleton "support" (fromIntegral $ snd fis))
377 --------------------------------------
379 ngrams = sort $ map (\x -> ngramsToIdx x p)
382 --------------------------------------
383 cooc :: Map (Int, Int) Double
384 cooc = filterWithKey (\k _ -> elem (fst k) ngrams && elem (snd k) ngrams)
385 $ fisToCooc (restrictKeys m $ Set.fromList [period]) p
386 --------------------------------------
389 -- | To transform Fis into PhyloLevels
390 fisToPhyloLevel :: Map (Date, Date) Fis -> Phylo -> Phylo
391 fisToPhyloLevel m p = over (phylo_periods . traverse)
393 let periodId = _phylo_periodId period
394 fisList = zip [1..] (Map.toList (m ! periodId))
395 in over (phylo_periodLevels)
397 let groups = map (\fis -> cliqueToGroup periodId 1 (fst fis) "" (snd fis) m p) fisList
398 in levels ++ [PhyloLevel (periodId, 1) groups]
402 phyloLinked_0_1 :: Phylo
403 phyloLinked_0_1 = alterLevelLinks lvl_0_1 phyloLinked_1_0
407 lvl_0_1 = initLevelLink (initLevel 0 Level_0) (initLevel 1 Level_1)
410 phyloLinked_1_0 :: Phylo
411 phyloLinked_1_0 = alterLevelLinks lvl_1_0 phyloWithGroups1
415 lvl_1_0 = initLevelLink (initLevel 1 Level_1) (initLevel 0 Level_0)
418 phyloWithGroups1 :: Phylo
419 phyloWithGroups1 = updatePhyloByLevel (initLevel 1 Level_1) phyloLinked_m1_0
422 ------------------------------------------------------------------------
423 -- | STEP 8 | -- Create Frequent Items Sets by Period and filter them
426 -- | To Filter Fis by support
427 filterFisBySupport :: Bool -> Int -> Map (Date, Date) Fis -> Map (Date, Date) Fis
428 filterFisBySupport empty min m = case empty of
429 True -> Map.map (\fis -> filterMinorFis min fis) m
430 False -> Map.map (\fis -> filterMinorFisNonEmpty min fis) m
433 -- | To filter Fis with small Support, to preserve nonempty periods please use : filterFisBySupport False
434 filterMinorFis :: Int -> Fis -> Fis
435 filterMinorFis min fis = Map.filter (\s -> s > min) fis
438 -- | To filter Fis with small Support but by keeping non empty Periods
439 filterMinorFisNonEmpty :: Int -> Fis -> Fis
440 filterMinorFisNonEmpty min fis = if (Map.null fis') && (not $ Map.null fis)
441 then filterMinorFisNonEmpty (min - 1) fis
444 --------------------------------------
446 fis' = filterMinorFis min fis
447 --------------------------------------
450 -- | To filter nested Fis
451 filterFisByNested :: Map (Date, Date) Fis -> Map (Date, Date) Fis
452 filterFisByNested = map (\fis -> restrictKeys fis
454 $ filterNestedSets (head (keys fis)) (keys fis) []
458 -- | To transform a list of Documents into a Frequent Items Set
459 docsToFis :: Map (Date, Date) [Document] -> Map (Date, Date) Fis
460 docsToFis docs = map (\d -> fisWithSizePolyMap
463 (map (words . text) d)) docs
466 phyloFisFiltered :: Map (Date, Date) Fis
467 phyloFisFiltered = filterFisBySupport True 1 (filterFisByNested phyloFis)
470 phyloFis :: Map (Date, Date) Fis
471 phyloFis = docsToFis phyloPeriods
474 ------------------------------------------------------------------------
475 -- | STEP 7 | -- Link level -1 to level 0
478 phyloLinked_m1_0 :: Phylo
479 phyloLinked_m1_0 = alterLevelLinks lvl_m1_0 phyloLinked_0_m1
482 lvl_m1_0 :: LevelLink
483 lvl_m1_0 = initLevelLink (initLevel (-1) Level_m1) (initLevel 0 Level_0)
486 ------------------------------------------------------------------------
487 -- | STEP 6 | -- Link level 0 to level -1
490 -- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
491 linkGroupToGroups :: LevelLink -> PhyloGroup -> [PhyloGroup] -> PhyloGroup
492 linkGroupToGroups lvl current targets
493 | getLevelLinkValue From lvl < getLevelLinkValue To lvl = setLevelParents current
494 | getLevelLinkValue From lvl > getLevelLinkValue To lvl = setLevelChilds current
495 | otherwise = current
497 --------------------------------------
498 setLevelChilds :: PhyloGroup -> PhyloGroup
499 setLevelChilds = over (phylo_groupLevelChilds) addPointers
500 --------------------------------------
501 setLevelParents :: PhyloGroup -> PhyloGroup
502 setLevelParents = over (phylo_groupLevelParents) addPointers
503 --------------------------------------
504 addPointers :: [Pointer] -> [Pointer]
505 addPointers lp = lp ++ Maybe.mapMaybe (\target ->
506 if shouldLink lvl (_phylo_groupNgrams current)
507 (_phylo_groupNgrams target )
508 then Just ((getGroupId target),1)
509 else Nothing) targets
510 --------------------------------------
513 -- | To set the LevelLinks between two lists of PhyloGroups
514 linkGroupsByLevel :: LevelLink -> Phylo -> [PhyloGroup] -> [PhyloGroup]
515 linkGroupsByLevel lvl p groups = map (\group ->
516 if getGroupLevel group == getLevelLinkValue From lvl
517 then linkGroupToGroups lvl group (getGroupsWithFilters (getLevelLinkValue To lvl) (getGroupPeriod group) p)
521 -- | To set the LevelLink of all the PhyloGroups of a Phylo
522 alterLevelLinks :: LevelLink -> Phylo -> Phylo
523 alterLevelLinks lvl p = alterPhyloGroups (linkGroupsByLevel lvl p) p
526 phyloLinked_0_m1 :: Phylo
527 phyloLinked_0_m1 = alterLevelLinks lvl_0_m1 phyloWithGroups0
530 lvl_0_m1 :: LevelLink
531 lvl_0_m1 = initLevelLink (initLevel 0 Level_0) (initLevel (-1) Level_m1)
534 ------------------------------------------------------------------------
535 -- | STEP 5 | -- Build level 0 as a copy of level -1
538 -- | To clone the last PhyloLevel of each PhyloPeriod and update it with a new LevelValue
539 clonePhyloLevel :: Int -> Phylo -> Phylo
540 clonePhyloLevel lvl p = alterPhyloLevels (\l -> addPhyloLevel
541 (setPhyloLevelId lvl $ head l)
545 phyloWithGroups0 :: Phylo
546 phyloWithGroups0 = updatePhyloByLevel (initLevel 0 Level_0) phyloWithGroupsm1
549 ------------------------------------------------------------------------
550 -- | STEP 4 | -- Build level -1
553 -- | To transform a list of Documents into a PhyloLevel
554 docsToPhyloLevel :: Int ->(Date, Date) -> [Document] -> Phylo -> PhyloLevel
555 docsToPhyloLevel lvl (d, d') docs p = initPhyloLevel
557 (map (\(f,s) -> initGroup [s] s f lvl d d' p)
560 $ map (words . text) docs)
563 -- | To transform a Map of Periods and Documents into a list of PhyloPeriods
564 docsToPhyloPeriods :: Int -> Map (Date,Date) [Document] -> Phylo -> [PhyloPeriod]
565 docsToPhyloPeriods lvl docs p = map (\(id,l) -> initPhyloPeriod id l)
568 --------------------------------------
569 levels :: Map (Date,Date) [PhyloLevel]
570 levels = mapWithKey (\k v -> [docsToPhyloLevel lvl k v p]) docs
571 --------------------------------------
574 -- | To update a Phylo for a given Levels
575 updatePhyloByLevel :: Level -> Phylo -> Phylo
576 updatePhyloByLevel lvl p
577 = case getLevelLabel lvl of
579 Level_m1 -> appendPhyloPeriods (docsToPhyloPeriods (getLevelValue lvl) lvlData p) p
581 --------------------------------------
582 lvlData :: Map (Date,Date) [Document]
583 lvlData = phyloPeriods
584 --------------------------------------
586 Level_0 -> clonePhyloLevel (getLevelValue lvl) p
588 Level_1 -> fisToPhyloLevel lvlData p
590 --------------------------------------
591 lvlData :: Map (Date, Date) Fis
592 lvlData = phyloFisFiltered
593 --------------------------------------
595 _ -> panic ("[ERR][Viz.Phylo.Example.updatePhyloByLevel] Level not defined")
598 phyloWithGroupsm1 :: Phylo
599 phyloWithGroupsm1 = updatePhyloByLevel (initLevel (-1) Level_m1) phylo
602 ------------------------------------------------------------------------
603 -- | STEP 3 | -- Parse the Documents and group them by Periods
606 -- | To init a set of periods out of a given Grain and Step
607 docsToPeriods :: (Ord date, Enum date) => (doc -> date)
608 -> Grain -> Step -> [doc] -> Map (date, date) [doc]
609 docsToPeriods _ _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
610 docsToPeriods f g s es = Map.fromList $ zip hs $ map (inPeriode f es) hs
612 --------------------------------------
613 hs = steps g s $ both f (head es, last es)
614 --------------------------------------
615 inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
616 inPeriode f' h (start,end) =
617 fst $ List.partition (\d -> f' d >= start && f' d <= end) h
618 --------------------------------------
619 steps :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
620 steps s' o' (start,end) = map (\l -> (head l, last l))
621 $ chunkAlong s' o' [start .. end]
622 --------------------------------------
625 -- | To parse a list of Documents by filtering on a Vector of Ngrams
626 parseDocs :: PhyloNgrams -> [Document] -> [Document]
627 parseDocs l docs = map (\(Document d t) -> Document d (unwords
628 $ filter (\x -> Vector.elem x l)
632 -- | To group a list of Documents by fixed periods
633 groupDocsByPeriod :: Grain -> Step -> [Document] -> Phylo -> Map (Date, Date) [Document]
634 groupDocsByPeriod g s docs p = docsToPeriods date g s
635 $ parseDocs (getPhyloNgrams p) docs
638 phyloPeriods :: Map (Date, Date) [Document]
639 phyloPeriods = groupDocsByPeriod 5 3 phyloDocs phylo
642 ------------------------------------------------------------------------
643 -- | STEP 2 | -- Init an initial list of Ngrams and a Phylo
646 -- | To init a Phylomemy
647 initPhylo :: [Document] -> PhyloNgrams -> Phylo
648 initPhylo docs ngrams = Phylo (both date $ (last &&& head) docs) ngrams [] []
651 -- | To init a PhyloNgrams as a Vector of Ngrams
652 initNgrams :: [Ngrams] -> PhyloNgrams
653 initNgrams l = Vector.fromList $ map toLower l
657 phylo = initPhylo phyloDocs (initNgrams actants)
660 ------------------------------------------------------------------------
661 -- | STEP 1 | -- Get a list of Document
664 -- | To transform a corpus of texts into a structured list of Documents
665 corpusToDocs :: [(Date, Text)] -> [Document]
666 corpusToDocs l = map (\(d,t) -> Document d t) l
669 phyloDocs :: [Document]
670 phyloDocs = corpusToDocs corpus
673 ------------------------------------------------------------------------
674 -- | STEP 0 | -- Let's start with an example
678 actants = [ "Cleopatre" , "Ptolemee", "Ptolemee-XIII", "Ptolemee-XIV"
679 , "Marc-Antoine", "Cesar" , "Antoine" , "Octave" , "Rome"
680 , "Alexandrie" , "Auguste" , "Pompee" , "Cassius" , "Brutus"]
683 corpus :: [(Date, Text)]
684 corpus = List.sortOn fst [ (-51,"Cleopatre règne sur l’egypte entre 51 et 30 av. J.-C. avec ses frères-epoux Ptolemee-XIII et Ptolemee-XIV, puis aux côtes du general romain Marc-Antoine. Elle est celèbre pour avoir ete la compagne de Jules Cesar puis d'Antoine, avec lesquels elle a eu plusieurs enfants. Partie prenante dans la guerre civile opposant Antoine à Octave, elle est vaincue à la bataille d'Actium en 31 av. J.-C. Sa defaite va permettre aux Romains de mener à bien la conquête de l’egypte, evenement qui marquera la fin de l'epoque hellenistique."), (-40,"Il existe relativement peu d'informations sur son sejour à Rome, au lendemain de l'assassinat de Cesar, ou sur la periode passee à Alexandrie durant l'absence d'Antoine, entre -40 et -37."), (-48,"L'historiographie antique lui est globalement defavorable car inspiree par son vainqueur, l'empereur Auguste, et par son entourage, dont l'interêt est de la noircir, afin d'en faire l'adversaire malfaisant de Rome et le mauvais genie d'Antoine. On observe par ailleurs que Cesar ne fait aucune mention de sa liaison avec elle dans les Commentaires sur la Guerre civile"), (-69,"Cleopatre est nee au cours de l'hiver -69/-686 probablement à Alexandrie."), (-48,"Pompee a en effet ete le protecteur de Ptolemee XII, le père de Cleopatre et de Ptolemee-XIII dont il se considère comme le tuteur."), (-48,"Ptolemee-XIII et Cleopatre auraient d'ailleurs aide Pompee par l'envoi d'une flotte de soixante navires."), (-48,"Mais le jeune roi Ptolemee-XIII et ses conseillers jugent sa cause perdue et pensent s'attirer les bonnes graces du vainqueur en le faisant assassiner à peine a-t-il pose le pied sur le sol egyptien, près de Peluse, le 30 juillet 48 av. J.-C., sous les yeux de son entourage."), (-48,"Cesar fait enterrer la tête de Pompee dans le bosquet de Nemesis en bordure du mur est de l'enceinte d'Alexandrie. Pour autant la mort de Pompee est une aubaine pour Cesar qui tente par ailleurs de profiter des querelles dynastiques pour annexer l’egypte."), (-48,"Il est difficile de se prononcer clairement sur les raisons qui ont pousse Cesar à s'attarder à Alexandrie. Il y a des raisons politiques, mais aussi des raisons plus sentimentales (Cleopatre ?). Il tente d'abord d'obtenir le remboursement de dettes que Ptolemee XII"), (-46,"Les deux souverains sont convoques par Cesar au palais royal d'Alexandrie. Ptolemee-XIII s'y rend après diverses tergiversations ainsi que Cleopatre."), (-47,"A Rome, Cleopatre epouse alors un autre de ses frères cadets, à Alexandrie, Ptolemee-XIV, sur l'injonction de Jules Cesar"), (-46,"Cesar a-t-il comme objectif de montrer ce qu'il en coûte de se revolter contre Rome en faisant figurer dans son triomphe la sœur de Cleopatre et de Ptolemee-XIV, Arsinoe, qui s'est fait reconnaître reine par les troupes de Ptolemee-XIII ?"), (-44,"Au debut de l'annee -44, Cesar est assassine par Brutus. Profitant de la situation confuse qui s'ensuit, Cleopatre quitte alors Rome à la mi-avril, faisant escale en Grèce. Elle parvient à Alexandrie en juillet -44."), (-44,"La guerre que se livrent les assassins de Cesar, Cassius et Brutus et ses heritiers, Octave et Marc-Antoine, oblige Cleopatre à des contorsions diplomatiques."), (-41,"Nous ignorons depuis quand Cleopatre, agee de 29 ans en -41, et Marc-Antoine, qui a une quarantaine d'annees, se connaissent. Marc-Antoine est l'un des officiers qui ont participe au retablissement de Ptolemee XII. Il est plus vraisemblable qu'ils se soient frequentes lors du sejour à Rome de Cleopatre."), (-42,"Brutus tient la Grèce tandis que Cassius s'installe en Syrie. Le gouverneur de Cleopatre à Chypre, Serapion, vient en aide à Cassius."), (-42,"Cassius aurait envisage de s'emparer d'Alexandrie quand le 'debarquement' en Grèce d'Antoine et d'Octave l'oblige à renoncer à ses projets")]