2 Module : Gargantext.Viz.Phylo.Example
3 Description : Phylomemy example based on history of Cleopatre.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 -- | Cesar et Cleôpatre
11 -- Exemple de phylomemie
12 -- French without accents
16 - split the functions : RAW -> Document -> Ngrams
18 -- reverse history: antechronologique
24 {-# LANGUAGE NoImplicitPrelude #-}
25 {-# LANGUAGE FlexibleContexts #-}
26 {-# LANGUAGE OverloadedStrings #-}
27 {-# LANGUAGE MultiParamTypeClasses #-}
29 module Gargantext.Viz.Phylo.Example where
31 import Control.Lens hiding (makeLenses, both, Level)
33 import Data.Bool (Bool, not)
34 import Data.List (concat, union, intersect, tails, tail, head, last, null, zip, sort, length, any, (++), (!!), nub, sortOn, reverse, splitAt, take, delete, init)
35 import Data.Map (Map, elems, member, adjust, singleton, empty, (!), keys, restrictKeys, mapWithKey, filterWithKey, mapKeys, intersectionWith, unionWith)
36 import Data.Semigroup (Semigroup)
38 import Data.Text (Text, unwords, toLower, words)
39 import Data.Tuple (fst, snd)
40 import Data.Tuple.Extra
41 import Data.Vector (Vector, fromList, elemIndex)
43 import Gargantext.Prelude hiding (head)
44 import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
45 import Gargantext.Text.Terms.Mono (monoTexts)
46 import Gargantext.Viz.Phylo
47 import Gargantext.Viz.Phylo.Tools
49 import qualified Data.Bool as Bool
50 import qualified Data.List as List
51 import qualified Data.Map as Map
52 import qualified Data.Maybe as Maybe
53 import qualified Data.Set as Set
54 import qualified Data.Tuple as Tuple
55 import qualified Data.Vector as Vector
58 ------------------------------------------------------------------------
59 -- | STEP 14 | -- Incrementaly cluster the PhyloGroups n times, link them through the Periods and build level n of the Phylo
62 ------------------------------------------------------------------------
63 -- | STEP 13 | -- Cluster the Fis
66 -- | To do : ajouter de nouveaux clusters / proxi
67 -- gérer les cooc à level 2 et +, idem pour les quality
68 -- réfléchir aux formats de sortie
71 -- | To apply a Clustering method to a PhyloGraph
72 graphToClusters :: (Clustering,[Double]) -> PhyloGraph -> [[PhyloGroup]]
73 graphToClusters (clust,param) (nodes,edges) = case clust of
75 RelatedComponents -> relatedComp 0 (head nodes) (tail nodes,edges) [] []
78 -- | To transform a Phylo into Clusters of PhyloGroups at a given level
79 phyloToClusters :: Level -> (Proximity,[Double]) -> (Clustering,[Double]) -> Phylo -> Map (Date,Date) [[PhyloGroup]]
80 phyloToClusters lvl (prox,param) (clus,param') p = Map.fromList
81 $ zip (getPhyloPeriods p)
82 (map (\prd -> let graph = groupsToGraph (prox,param) (getGroupsWithFilters lvl prd p) p
83 in if null (fst graph)
85 else graphToClusters (clus,param') graph)
89 -- | To transform a Cluster into a Phylogroup
90 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [PhyloGroup] -> PhyloGroup
91 clusterToGroup prd lvl idx lbl groups =
92 PhyloGroup ((prd, lvl), idx) lbl ((sort . nub . concat) $ map getGroupNgrams groups) empty empty [] [] [] (map (\g -> (getGroupId g, 1)) groups)
95 -- | To transform a list of Clusters into a new Phylolevel
96 clustersToPhyloLevel :: Level -> Map (Date,Date) [[PhyloGroup]] -> Phylo -> Phylo
97 clustersToPhyloLevel lvl m p = over (phylo_periods . traverse)
99 let periodId = _phylo_periodId period
100 clusters = zip [1..] (m ! periodId)
101 in over (phylo_periodLevels)
103 let groups = map (\cluster -> clusterToGroup periodId lvl (fst cluster) "" (snd cluster)) clusters
104 in levels ++ [PhyloLevel (periodId, lvl) groups]
108 phyloWithGroups2 = clustersToPhyloLevel
110 (phyloToClusters 1 (WeightedLogJaccard,[0]) (RelatedComponents, []) phyloWithBranches_1) phyloWithBranches_1
112 ------------------------------------------------------------------------
113 -- | STEP 12 | -- Find the Branches
116 -- | To apply the related components method to a PhyloGraph
117 -- curr = the current PhyloGroup
118 -- (nodes,edges) = the initial PhyloGraph minus the current PhyloGroup
119 -- next = the next PhyloGroups to be added in the cluster
120 -- memo = the memory of the allready created clusters
121 relatedComp :: Int -> PhyloGroup -> PhyloGraph -> [PhyloGroup] -> [[PhyloGroup]] -> [[PhyloGroup]]
122 relatedComp idx curr (nodes,edges) next memo
123 | null nodes' && null next' = memo'
124 | (not . null) next' = relatedComp idx (head next') (nodes',edges) (tail next') memo'
125 | otherwise = relatedComp (idx + 1) (head nodes') (tail nodes',edges) [] memo'
127 --------------------------------------
128 memo' :: [[PhyloGroup]]
130 | null memo = [[curr]]
131 | idx == ((length memo) - 1) = (init memo) ++ [(last memo) ++ [curr]]
132 | otherwise = memo ++ [[curr]]
133 --------------------------------------
134 next' :: [PhyloGroup]
135 next' = filter (\x -> not $ elem x $ concat memo) $ nub $ next ++ (getNeighbours False curr edges)
136 --------------------------------------
137 nodes' :: [PhyloGroup]
138 nodes' = filter (\x -> not $ elem x next') nodes
139 --------------------------------------
142 -- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
143 graphToBranches :: Level -> PhyloGraph -> Phylo -> [PhyloBranch]
144 graphToBranches lvl (nodes,edges) p = map (\(idx,c) -> PhyloBranch (lvl,idx) "" (map getGroupId c))
146 $ relatedComp 0 (head nodes) (tail nodes,edges) [] []
149 -- | To transform a list of PhyloGroups into a PhyloGraph by using a given Proximity mesure
150 groupsToGraph :: (Proximity,[Double]) -> [PhyloGroup] -> Phylo -> PhyloGraph
151 groupsToGraph (prox,param) groups p = (groups,edges)
155 FromPairs -> (nub . concat) $ map (\g -> (map (\g' -> ((g',g),1)) $ getGroupParents g p)
157 (map (\g' -> ((g,g'),1)) $ getGroupChilds g p)) groups
158 WeightedLogJaccard -> map (\(x,y) -> ((x,y), weightedLogJaccard
159 (param !! 0) (getGroupCooc x)
160 (unifySharedKeys (getGroupCooc x) (getGroupCooc y)))) $ listToDirectedCombi groups
164 -- | To set all the PhyloBranches for a given Level in a Phylo
165 setPhyloBranches :: Level -> Phylo -> Phylo
166 setPhyloBranches lvl p = alterPhyloBranches (\l -> l ++ (graphToBranches lvl (groupsToGraph (FromPairs,[]) (getGroupsWithLevel lvl p) p) p) ) p
169 phyloWithBranches_1 = setPhyloBranches 1 phyloWithPair_1_Childs
172 ------------------------------------------------------------------------
173 -- | STEP 11 | -- Link the PhyloGroups of level 1 through the Periods
176 -- | To process the weightedLogJaccard between two PhyloGroups fields
177 weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double
178 weightedLogJaccard s f1 f2
180 | wUnion == wInter = 1
181 | s == 0 = (fromIntegral $ length wInter)/(fromIntegral $ length wUnion)
182 | s > 0 = (sumInvLog wInter)/(sumInvLog wUnion)
183 | otherwise = (sumLog wInter)/(sumLog wUnion)
185 --------------------------------------
187 wInter = elems $ intersectionWith (+) f1 f2
188 --------------------------------------
190 wUnion = elems $ unionWith (+) f1 f2
191 --------------------------------------
192 sumInvLog :: [Double] -> Double
193 sumInvLog l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
194 --------------------------------------
195 sumLog :: [Double] -> Double
196 sumLog l = foldl (\mem x -> mem + log (s + x)) 0 l
197 --------------------------------------
200 -- | To apply the corresponding proximity function based on a given Proximity
201 getProximity :: (Proximity,[Double]) -> PhyloGroup -> PhyloGroup -> (PhyloGroupId, Double)
202 getProximity (prox,param) g1 g2 = case prox of
203 WeightedLogJaccard -> ((getGroupId g2),weightedLogJaccard (param !! 0) (getGroupCooc g1) (unifySharedKeys (getGroupCooc g2) (getGroupCooc g1)))
204 _ -> panic ("[ERR][Viz.Phylo.Example.getProximity] Proximity function not defined")
207 -- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
208 getNextPeriods :: PairTo -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
209 getNextPeriods to id l = case to of
210 Childs -> unNested id ((tail . snd) next)
211 Parents -> unNested id ((reverse . fst) next)
212 _ -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PairTo type not defined")
214 --------------------------------------
215 next :: ([PhyloPeriodId], [PhyloPeriodId])
217 --------------------------------------
219 idx = case (List.elemIndex id l) of
220 Nothing -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PhyloPeriodId not defined")
222 --------------------------------------
223 -- | To have an non-overlapping next period
224 unNested :: PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
227 | nested (fst $ head l) x = unNested x (tail l)
228 | nested (snd $ head l) x = unNested x (tail l)
230 --------------------------------------
231 nested :: Date -> PhyloPeriodId -> Bool
232 nested d prd = d >= fst prd && d <= snd prd
233 --------------------------------------
236 -- | To find the best set (max = 2) of Childs/Parents candidates based on a given Proximity mesure until a maximum depth (max = Period + 5 units )
237 findBestCandidates :: PairTo -> Int -> Int -> Double -> (Proximity,[Double]) -> PhyloGroup -> Phylo -> [(PhyloGroupId, Double)]
238 findBestCandidates to depth max thr (prox,param) group p
239 | depth > max || null next = []
240 | (not . null) best = take 2 best
241 | otherwise = findBestCandidates to (depth + 1) max thr (prox,param) group p
243 --------------------------------------
244 next :: [PhyloPeriodId]
245 next = getNextPeriods to (getGroupPeriod group) (getPhyloPeriods p)
246 --------------------------------------
247 candidates :: [PhyloGroup]
248 candidates = getGroupsWithFilters (getGroupLevel group) (head next) p
249 --------------------------------------
250 scores :: [(PhyloGroupId, Double)]
251 scores = map (\group' -> getProximity (prox,param) group group') candidates
252 --------------------------------------
253 best :: [(PhyloGroupId, Double)]
256 $ filter (\(id,score) -> score >= thr) scores
257 --------------------------------------
260 -- | To add a new list of Pointers into an existing Childs/Parents list of Pointers
261 makePair :: PairTo -> PhyloGroup -> [(PhyloGroupId, Double)] -> PhyloGroup
262 makePair to group ids = case to of
263 Childs -> over (phylo_groupPeriodChilds) addPointers group
264 Parents -> over (phylo_groupPeriodParents) addPointers group
265 _ -> panic ("[ERR][Viz.Phylo.Example.makePair] PairTo type not defined")
267 --------------------------------------
268 addPointers :: [Pointer] -> [Pointer]
269 addPointers l = nub $ (l ++ ids)
270 --------------------------------------
273 -- | To pair all the Phylogroups of given PhyloLevel to their best Parents or Childs
274 pairGroupsToGroups :: PairTo -> Level -> Double -> (Proximity,[Double]) -> Phylo -> Phylo
275 pairGroupsToGroups to lvl thr (prox,param) p = alterPhyloGroups
278 if (getGroupLevel group) == lvl
281 --------------------------------------
282 candidates :: [(PhyloGroupId, Double)]
283 candidates = findBestCandidates to 1 5 thr (prox,param) group p
284 --------------------------------------
286 makePair to group candidates
291 phyloWithPair_1_Childs :: Phylo
292 phyloWithPair_1_Childs = pairGroupsToGroups Childs 1 0.01 (WeightedLogJaccard,[0]) phyloWithPair_1_Parents
295 phyloWithPair_1_Parents :: Phylo
296 phyloWithPair_1_Parents = pairGroupsToGroups Parents 1 0.01 (WeightedLogJaccard,[0]) phyloLinked_0_1
299 ------------------------------------------------------------------------
300 -- | STEP 10 | -- Build the coocurency Matrix of the Phylo
303 -- | To transform the Fis into a coocurency Matrix in a Phylo
304 fisToCooc :: Map (Date, Date) [Fis] -> Phylo -> Map (Int, Int) Double
305 fisToCooc m p = map (/docs)
306 $ foldl (\mem x -> adjust (+1) (getKeyPair x mem) mem) cooc
308 $ map (\x -> listToUnDirectedCombiWith (\x -> ngramsToIdx x p) $ (Set.toList . fst) x)
311 --------------------------------------
312 fisNgrams :: [Ngrams]
313 fisNgrams = foldl (\mem x -> union mem $ (Set.toList . fst) x) [] $ (concat . elems) m
314 --------------------------------------
316 docs = fromIntegral $ foldl (\mem x -> mem + (snd x)) 0 $ (concat . elems) m
317 --------------------------------------
318 cooc :: Map (Int, Int) (Double)
319 cooc = Map.fromList $ map (\x -> (x,0)) (listToUnDirectedCombiWith (\x -> ngramsToIdx x p) fisNgrams)
320 --------------------------------------
323 phyloCooc :: Map (Int, Int) Double
324 phyloCooc = fisToCooc phyloFisFiltered phyloLinked_0_1
327 ------------------------------------------------------------------------
328 -- | STEP 9 | -- Build level 1 of the Phylo
331 -- | To Cliques into Groups
332 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Ngrams -> (Clique,Support) -> Map (Date, Date) [Fis] -> Phylo -> PhyloGroup
333 cliqueToGroup period lvl idx label fis m p =
334 PhyloGroup ((period, lvl), idx) label ngrams (singleton "support" (fromIntegral $ snd fis)) cooc [] [] [] []
336 --------------------------------------
338 ngrams = sort $ map (\x -> ngramsToIdx x p)
341 --------------------------------------
342 cooc :: Map (Int, Int) Double
343 cooc = filterWithKey (\k _ -> elem (fst k) ngrams && elem (snd k) ngrams)
344 $ fisToCooc (restrictKeys m $ Set.fromList [period]) p
345 --------------------------------------
348 -- | To transform Fis into PhyloLevels
349 fisToPhyloLevel :: Map (Date, Date) [Fis] -> Phylo -> Phylo
350 fisToPhyloLevel m p = over (phylo_periods . traverse)
352 let periodId = _phylo_periodId period
353 fisList = zip [1..] (m ! periodId)
354 in over (phylo_periodLevels)
356 let groups = map (\fis -> cliqueToGroup periodId 1 (fst fis) "" (snd fis) m p) fisList
357 in phyloLevels ++ [PhyloLevel (periodId, 1) groups]
361 -- | to do : ajouter ce truc à addPhylolevel puis le rendre polymorphique (Fis/Document -> Group)
363 -- aggregateToPhyloLevel' :: (a -> PhyloGroup) -> Map (Date, Date) [a] -> Phylo -> Phylo
364 -- aggregateToPhyloLevel' f m p = alterPhyloPeriods (\period ->
365 -- let periodId = _phylo_periodId period
366 -- aggList = zip [1..] (m ! periodId)
367 -- in over (phylo_periodLevels)
369 -- let groups = map f aggList
370 -- in phyloLevels ++ [PhyloLevel (periodId, 1) groups]
375 phyloLinked_0_1 :: Phylo
376 phyloLinked_0_1 = alterLevelLinks (0,1) phyloLinked_1_0
379 phyloLinked_1_0 :: Phylo
380 phyloLinked_1_0 = alterLevelLinks (1,0) phyloWithGroups1
383 phyloWithGroups1 :: Phylo
384 phyloWithGroups1 = updatePhyloByLevel 1 phyloLinked_m1_0
387 ------------------------------------------------------------------------
388 -- | STEP 8 | -- Create Frequent Items Sets by Period and filter them
391 -- | To Filter Fis by support
392 filterFisBySupport :: Bool -> Int -> Map (Date, Date) [Fis] -> Map (Date, Date) [Fis]
393 filterFisBySupport empty min m = case empty of
394 True -> Map.map (\l -> filterMinorFis min l) m
395 False -> Map.map (\l -> keepFilled (filterMinorFis) min l) m
398 -- | To filter Fis with small Support, to preserve nonempty periods please use : filterFisBySupport False
399 filterMinorFis :: Int -> [Fis] -> [Fis]
400 filterMinorFis min l = filter (\fis -> snd fis > min) l
403 -- | To filter nested Fis
404 filterFisByNested :: Map (Date, Date) [Fis] -> Map (Date, Date) [Fis]
405 filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head $ map fst l) (map fst l) []
406 in filter (\fis -> elem (fst fis) cliqueMax) l)
409 -- | To transform a list of Documents into a Frequent Items Set
410 docsToFis :: Map (Date, Date) [Document] -> Map (Date, Date) [Fis]
411 docsToFis docs = map (\d -> Map.toList
412 $ fisWithSizePolyMap (Segment 1 20) 1 (map (words . text) d)) docs
415 phyloFisFiltered :: Map (Date, Date) [Fis]
416 phyloFisFiltered = filterFisBySupport True 1 (filterFisByNested phyloFis)
419 phyloFis :: Map (Date, Date) [Fis]
420 phyloFis = docsToFis phyloPeriods
423 ------------------------------------------------------------------------
424 -- | STEP 7 | -- Link level -1 to level 0
427 phyloLinked_m1_0 :: Phylo
428 phyloLinked_m1_0 = alterLevelLinks ((-1),0) phyloLinked_0_m1
431 ------------------------------------------------------------------------
432 -- | STEP 6 | -- Link level 0 to level -1
435 -- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
436 linkGroupToGroups :: (Level,Level) -> PhyloGroup -> [PhyloGroup] -> PhyloGroup
437 linkGroupToGroups (lvl,lvl') current targets
438 | lvl < lvl' = setLevelParents current
439 | lvl > lvl' = setLevelChilds current
440 | otherwise = current
442 --------------------------------------
443 setLevelChilds :: PhyloGroup -> PhyloGroup
444 setLevelChilds = over (phylo_groupLevelChilds) addPointers
445 --------------------------------------
446 setLevelParents :: PhyloGroup -> PhyloGroup
447 setLevelParents = over (phylo_groupLevelParents) addPointers
448 --------------------------------------
449 addPointers :: [Pointer] -> [Pointer]
450 addPointers lp = lp ++ Maybe.mapMaybe (\target ->
451 if shouldLink (lvl,lvl')
452 (_phylo_groupNgrams current)
453 (_phylo_groupNgrams target )
454 then Just ((getGroupId target),1)
455 else Nothing) targets
456 --------------------------------------
459 -- | To set the LevelLinks between two lists of PhyloGroups
460 linkGroupsByLevel :: (Level,Level) -> Phylo -> [PhyloGroup] -> [PhyloGroup]
461 linkGroupsByLevel (lvl,lvl') p groups = map (\group ->
462 if getGroupLevel group == lvl
463 then linkGroupToGroups (lvl,lvl') group (getGroupsWithFilters lvl' (getGroupPeriod group) p)
467 -- | To set the LevelLink of all the PhyloGroups of a Phylo
468 alterLevelLinks :: (Level,Level) -> Phylo -> Phylo
469 alterLevelLinks (lvl,lvl') p = alterPhyloGroups (linkGroupsByLevel (lvl,lvl') p) p
472 phyloLinked_0_m1 :: Phylo
473 phyloLinked_0_m1 = alterLevelLinks (0,(-1)) phyloWithGroups0
476 ------------------------------------------------------------------------
477 -- | STEP 5 | -- Build level 0 as a copy of level -1
480 -- | To clone the last PhyloLevel of each PhyloPeriod and update it with a new LevelValue
481 clonePhyloLevel :: Level -> Phylo -> Phylo
482 clonePhyloLevel lvl p = alterPhyloLevels (\l -> l ++ [setPhyloLevelId lvl $ head l]) p
485 phyloWithGroups0 :: Phylo
486 phyloWithGroups0 = updatePhyloByLevel 0 phyloWithGroupsm1
489 ------------------------------------------------------------------------
490 -- | STEP 4 | -- Build level -1
493 -- | To transform a list of Documents into a PhyloLevel
494 docsToPhyloLevel :: Level -> (Date, Date) -> [Document] -> Phylo -> PhyloLevel
495 docsToPhyloLevel lvl (d, d') docs p = initPhyloLevel
497 (map (\(f,s) -> initGroup [s] s f lvl d d' p)
500 $ map (words . text) docs)
503 -- | To transform a Map of Periods and Documents into a list of PhyloPeriods
504 docsToPhyloPeriods :: Level -> Map (Date,Date) [Document] -> Phylo -> [PhyloPeriod]
505 docsToPhyloPeriods lvl docs p = map (\(id,l) -> initPhyloPeriod id l)
507 $ mapWithKey (\k v -> [docsToPhyloLevel lvl k v p]) docs
510 -- | To update a Phylo for a given Levels
511 updatePhyloByLevel :: Level -> Phylo -> Phylo
512 updatePhyloByLevel lvl p
513 | lvl < 0 = appendToPhyloPeriods (docsToPhyloPeriods lvl phyloPeriods p) p
514 | lvl == 0 = clonePhyloLevel lvl p
515 | lvl == 1 = fisToPhyloLevel phyloFisFiltered p
516 | lvl > 1 = undefined
517 | otherwise = panic ("[ERR][Viz.Phylo.Example.updatePhyloByLevel] Level not defined")
520 instance AppendToPhylo Fis
522 --------------------------------------
523 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
524 addPhyloLevel lvl m p
525 | lvl == 1 = fisToPhyloLevel m p
526 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
527 --------------------------------------
530 instance AppendToPhylo Cluster
532 --------------------------------------
533 -- | appendByLevel :: Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
534 addPhyloLevel lvl m p = undefined
535 --------------------------------------
538 instance AppendToPhylo Document
540 --------------------------------------
541 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
542 addPhyloLevel lvl m p
543 | lvl < 0 = over (phylo_periods) (++ docsToPhyloPeriods lvl m p) p
544 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> -1")
545 --------------------------------------
548 phyloWithGroupsm1 :: Phylo
549 phyloWithGroupsm1 = updatePhyloByLevel (-1) phylo
552 ------------------------------------------------------------------------
553 -- | STEP 3 | -- Parse the Documents and group them by Periods
556 -- | To init a set of periods out of a given Grain and Step
557 docsToPeriods :: (Ord date, Enum date) => (doc -> date)
558 -> Grain -> Step -> [doc] -> Map (date, date) [doc]
559 docsToPeriods _ _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
560 docsToPeriods f g s es = Map.fromList $ zip hs $ map (inPeriode f es) hs
562 --------------------------------------
563 hs = steps g s $ both f (head es, last es)
564 --------------------------------------
565 inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
566 inPeriode f' h (start,end) =
567 fst $ List.partition (\d -> f' d >= start && f' d <= end) h
568 --------------------------------------
569 steps :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
570 steps s' o' (start,end) = map (\l -> (head l, last l))
571 $ chunkAlong s' o' [start .. end]
572 --------------------------------------
575 -- | To parse a list of Documents by filtering on a Vector of Ngrams
576 parseDocs :: PhyloNgrams -> [Document] -> [Document]
577 parseDocs l docs = map (\(Document d t)
578 -> Document d ( unwords
579 $ filter (\x -> Vector.elem x l)
583 -- | To group a list of Documents by fixed periods
584 groupDocsByPeriod :: Grain -> Step -> [Document] -> Phylo -> Map (Date, Date) [Document]
585 groupDocsByPeriod g s docs p = docsToPeriods date g s $ parseDocs (getPhyloNgrams p) docs
588 phyloPeriods :: Map (Date, Date) [Document]
589 phyloPeriods = groupDocsByPeriod 5 3 phyloDocs phylo
592 ------------------------------------------------------------------------
593 -- | STEP 2 | -- Init an initial list of Ngrams and a Phylo
597 phylo = initPhylo phyloDocs (initNgrams actants)
600 ------------------------------------------------------------------------
601 -- | STEP 1 | -- Get a list of Document
604 -- | To transform a corpus of texts into a structured list of Documents
605 corpusToDocs :: [(Date, Text)] -> [Document]
606 corpusToDocs l = map (\(d,t) -> Document d t) l
609 phyloDocs :: [Document]
610 phyloDocs = corpusToDocs corpus
613 ------------------------------------------------------------------------
614 -- | STEP 0 | -- Let's start with an example
618 actants = [ "Cleopatre" , "Ptolemee", "Ptolemee-XIII", "Ptolemee-XIV"
619 , "Marc-Antoine", "Cesar" , "Antoine" , "Octave" , "Rome"
620 , "Alexandrie" , "Auguste" , "Pompee" , "Cassius" , "Brutus"]
623 corpus :: [(Date, Text)]
624 corpus = List.sortOn fst [ (-51,"Cleopatre règne sur l’egypte entre 51 et 30 av. J.-C. avec ses frères-epoux Ptolemee-XIII et Ptolemee-XIV, puis aux côtes du general romain Marc-Antoine. Elle est celèbre pour avoir ete la compagne de Jules Cesar puis d'Antoine, avec lesquels elle a eu plusieurs enfants. Partie prenante dans la guerre civile opposant Antoine à Octave, elle est vaincue à la bataille d'Actium en 31 av. J.-C. Sa defaite va permettre aux Romains de mener à bien la conquête de l’egypte, evenement qui marquera la fin de l'epoque hellenistique."), (-40,"Il existe relativement peu d'informations sur son sejour à Rome, au lendemain de l'assassinat de Cesar, ou sur la periode passee à Alexandrie durant l'absence d'Antoine, entre -40 et -37."), (-48,"L'historiographie antique lui est globalement defavorable car inspiree par son vainqueur, l'empereur Auguste, et par son entourage, dont l'interêt est de la noircir, afin d'en faire l'adversaire malfaisant de Rome et le mauvais genie d'Antoine. On observe par ailleurs que Cesar ne fait aucune mention de sa liaison avec elle dans les Commentaires sur la Guerre civile"), (-69,"Cleopatre est nee au cours de l'hiver -69/-686 probablement à Alexandrie."), (-48,"Pompee a en effet ete le protecteur de Ptolemee XII, le père de Cleopatre et de Ptolemee-XIII dont il se considère comme le tuteur."), (-48,"Ptolemee-XIII et Cleopatre auraient d'ailleurs aide Pompee par l'envoi d'une flotte de soixante navires."), (-48,"Mais le jeune roi Ptolemee-XIII et ses conseillers jugent sa cause perdue et pensent s'attirer les bonnes graces du vainqueur en le faisant assassiner à peine a-t-il pose le pied sur le sol egyptien, près de Peluse, le 30 juillet 48 av. J.-C., sous les yeux de son entourage."), (-48,"Cesar fait enterrer la tête de Pompee dans le bosquet de Nemesis en bordure du mur est de l'enceinte d'Alexandrie. Pour autant la mort de Pompee est une aubaine pour Cesar qui tente par ailleurs de profiter des querelles dynastiques pour annexer l’egypte."), (-48,"Il est difficile de se prononcer clairement sur les raisons qui ont pousse Cesar à s'attarder à Alexandrie. Il y a des raisons politiques, mais aussi des raisons plus sentimentales (Cleopatre ?). Il tente d'abord d'obtenir le remboursement de dettes que Ptolemee XII"), (-46,"Les deux souverains sont convoques par Cesar au palais royal d'Alexandrie. Ptolemee-XIII s'y rend après diverses tergiversations ainsi que Cleopatre."), (-47,"A Rome, Cleopatre epouse alors un autre de ses frères cadets, à Alexandrie, Ptolemee-XIV, sur l'injonction de Jules Cesar"), (-46,"Cesar a-t-il comme objectif de montrer ce qu'il en coûte de se revolter contre Rome en faisant figurer dans son triomphe la sœur de Cleopatre et de Ptolemee-XIV, Arsinoe, qui s'est fait reconnaître reine par les troupes de Ptolemee-XIII ?"), (-44,"Au debut de l'annee -44, Cesar est assassine par Brutus. Profitant de la situation confuse qui s'ensuit, Cleopatre quitte alors Rome à la mi-avril, faisant escale en Grèce. Elle parvient à Alexandrie en juillet -44."), (-44,"La guerre que se livrent les assassins de Cesar, Cassius et Brutus et ses heritiers, Octave et Marc-Antoine, oblige Cleopatre à des contorsions diplomatiques."), (-41,"Nous ignorons depuis quand Cleopatre, agee de 29 ans en -41, et Marc-Antoine, qui a une quarantaine d'annees, se connaissent. Marc-Antoine est l'un des officiers qui ont participe au retablissement de Ptolemee XII. Il est plus vraisemblable qu'ils se soient frequentes lors du sejour à Rome de Cleopatre."), (-42,"Brutus tient la Grèce tandis que Cassius s'installe en Syrie. Le gouverneur de Cleopatre à Chypre, Serapion, vient en aide à Cassius."), (-42,"Cassius aurait envisage de s'emparer d'Alexandrie quand le 'debarquement' en Grèce d'Antoine et d'Octave l'oblige à renoncer à ses projets")]