2 Module : Gargantext.Viz.Phylo.Example
3 Description : Phylomemy example based on history of Cleopatre.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 -- | Cesar et Cleôpatre
11 -- Exemple de phylomemie
12 -- French without accents
16 - split the functions : RAW -> Document -> Ngrams
18 -- reverse history: antechronologique
24 {-# LANGUAGE NoImplicitPrelude #-}
25 {-# LANGUAGE FlexibleContexts #-}
26 {-# LANGUAGE OverloadedStrings #-}
28 module Gargantext.Viz.Phylo.Example where
30 import Control.Lens hiding (makeLenses, both, Level)
32 import Data.Bool (Bool, not)
33 import Data.List (concat, union, intersect, tails, tail, head, last, null, zip, sort, length, any, (++), (!!), nub, sortOn, reverse, splitAt, take, delete, init)
34 import Data.Map (Map, elems, member, adjust, singleton, empty, (!), keys, restrictKeys, mapWithKey, filterWithKey, mapKeys, intersectionWith, unionWith)
35 import Data.Semigroup (Semigroup)
37 import Data.Text (Text, unwords, toLower, words)
38 import Data.Tuple (fst, snd)
39 import Data.Tuple.Extra
40 import Data.Vector (Vector, fromList, elemIndex)
42 import Gargantext.Prelude hiding (head)
43 import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
44 import Gargantext.Text.Terms.Mono (monoTexts)
45 import Gargantext.Viz.Phylo
46 import Gargantext.Viz.Phylo.Tools
48 import qualified Data.Bool as Bool
49 import qualified Data.List as List
50 import qualified Data.Map as Map
51 import qualified Data.Maybe as Maybe
52 import qualified Data.Set as Set
53 import qualified Data.Tuple as Tuple
54 import qualified Data.Vector as Vector
57 ------------------------------------------------------------------------
58 -- | STEP 14 | -- Incrementaly cluster the PhyloGroups n times, link them through the Periods and build level n of the Phylo
61 ------------------------------------------------------------------------
62 -- | STEP 13 | -- Cluster the Fis
65 -- Il faudrait plutôt passer (Proximity,[Double]) où [Double] serait la liste des paramètres
66 groupsToGraph :: Proximity -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)]
67 groupsToGraph prox groups = case prox of
68 WeightedLogJaccard -> map (\(x,y) -> ((x,y), weightedLogJaccard 0 (getGroupCooc x) (unifySharedKeys (getGroupCooc x) (getGroupCooc y)))) edges
71 edges :: [(PhyloGroup,PhyloGroup)]
72 edges = listToDirectedCombi groups
75 phyloToGraphs :: Level -> Proximity -> Phylo -> Map (Date,Date) [((PhyloGroup,PhyloGroup),Double)]
76 phyloToGraphs lvl prox p = Map.fromList
78 (map (\prd -> groupsToGraph prox
79 $ getGroupsWithFilters (getLevelValue lvl) prd p) periods)
81 --------------------------------------
82 periods :: [PhyloPeriodId]
83 periods = getPhyloPeriods p
84 --------------------------------------
87 ------------------------------------------------------------------------
88 -- | STEP 12 | -- Find the Branches
91 -- | To add a PhyloGroupId to list of Branches with conditions
92 addToBranches :: (Int,Int) -> PhyloGroupId -> [PhyloBranch] -> [PhyloBranch]
93 addToBranches (lvl,idx) id branches
94 | null branches = [newBranch]
95 | idx == lastIdx = (init branches) ++ [addGroupIdToBranch id (last branches)]
96 | otherwise = branches ++ [newBranch]
98 --------------------------------------
99 newBranch :: PhyloBranch
100 newBranch = PhyloBranch (lvl,idx) "" [id]
101 --------------------------------------
103 lastIdx = (snd . _phylo_branchId . last) branches
104 --------------------------------------
107 -- | To transform a list of PhyloGroups into a list of PhyloBranches where :
108 -- curr = the current PhyloGroup
109 -- rest = the rest of the initial list of PhyloGroups
110 -- next = the next PhyloGroups to be added in the current Branch
111 -- memo = the memory of the allready created Branches, the last one is the current one
112 groupsToBranches :: (Int,Int) -> PhyloGroup -> [PhyloGroup] -> [PhyloGroup] -> [PhyloBranch] -> Phylo -> [PhyloBranch]
113 groupsToBranches (lvl,idx) curr rest next memo p
114 | null rest' && null next' = memo'
115 | (not . null) next' = groupsToBranches (lvl,idx) (head next') rest' (tail next') memo' p
116 | otherwise = groupsToBranches (lvl,idx + 1) (head rest') (tail rest') [] memo' p
118 --------------------------------------
120 done = getGroupsFromIds (concat $ map (_phylo_branchGroups) memo) p
121 --------------------------------------
122 memo' :: [PhyloBranch]
123 memo' = addToBranches (lvl,idx) (getGroupId curr) memo
124 --------------------------------------
125 next' :: [PhyloGroup]
126 next' = filter (\x -> not $ elem x done) $ nub $ next ++ (getGroupPairs curr p)
127 --------------------------------------
128 rest' :: [PhyloGroup]
129 rest' = filter (\x -> not $ elem x next') rest
130 --------------------------------------
133 -- | To set all the PhyloBranches for a given Level in a Phylo
134 setPhyloBranches :: Level -> Phylo -> Phylo
135 setPhyloBranches lvl p = alterPhyloBranches
136 (\branches -> branches ++ (groupsToBranches
137 (getLevelValue lvl, 0)
143 --------------------------------------
144 groups :: [PhyloGroup]
145 groups = getGroupsWithLevel (getLevelValue lvl) p
146 --------------------------------------
149 phyloWithBranches_1 = setPhyloBranches (initLevel 1 Level_1) phyloWithPair_1_Childs
152 ------------------------------------------------------------------------
153 -- | STEP 11 | -- Link the PhyloGroups of level 1 through the Periods
156 -- | To process the weightedLogJaccard between two PhyloGroups fields
157 weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double
158 weightedLogJaccard s f1 f2
160 | wUnion == wInter = 1
161 | s == 0 = (fromIntegral $ length wInter)/(fromIntegral $ length wUnion)
162 | s > 0 = (sumInvLog wInter)/(sumInvLog wUnion)
163 | otherwise = (sumLog wInter)/(sumLog wUnion)
165 --------------------------------------
167 wInter = elems $ intersectionWith (+) f1 f2
168 --------------------------------------
170 wUnion = elems $ unionWith (+) f1 f2
171 --------------------------------------
172 sumInvLog :: [Double] -> Double
173 sumInvLog l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
174 --------------------------------------
175 sumLog :: [Double] -> Double
176 sumLog l = foldl (\mem x -> mem + log (s + x)) 0 l
177 --------------------------------------
180 -- | To apply the corresponding proximity function based on a given Proximity
181 getProximity :: Proximity -> Double -> PhyloGroup -> PhyloGroup -> (PhyloGroupId, Double)
182 getProximity prox s g1 g2 = case prox of
183 WeightedLogJaccard -> ((getGroupId g2),weightedLogJaccard s (getGroupCooc g1) (unifySharedKeys (getGroupCooc g2) (getGroupCooc g1)))
185 _ -> panic ("[ERR][Viz.Phylo.Example.getProximity] Proximity function not defined")
188 -- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
189 getNextPeriods :: PairTo -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
190 getNextPeriods to id l = case to of
191 Childs -> unNested id ((tail . snd) next)
192 Parents -> unNested id ((reverse . fst) next)
193 _ -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PairTo type not defined")
195 --------------------------------------
196 next :: ([PhyloPeriodId], [PhyloPeriodId])
198 --------------------------------------
200 idx = case (List.elemIndex id l) of
201 Nothing -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PhyloPeriodId not defined")
203 --------------------------------------
204 -- | To have an non-overlapping next period
205 unNested :: PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
208 | nested (fst $ head l) x = unNested x (tail l)
209 | nested (snd $ head l) x = unNested x (tail l)
211 --------------------------------------
212 nested :: Date -> PhyloPeriodId -> Bool
213 nested d prd = d >= fst prd && d <= snd prd
214 --------------------------------------
217 -- | To find the best set (max = 2) of Childs/Parents candidates based on a given Proximity mesure until a maximum depth (max = Period + 5 units )
218 findBestCandidates :: PairTo -> Int -> Int -> Double -> Double -> PhyloGroup -> Phylo -> [(PhyloGroupId, Double)]
219 findBestCandidates to depth max thr s group p
220 | depth > max || null next = []
221 | (not . null) best = take 2 best
222 | otherwise = findBestCandidates to (depth + 1) max thr s group p
224 --------------------------------------
225 next :: [PhyloPeriodId]
226 next = getNextPeriods to (getGroupPeriod group) (getPhyloPeriods p)
227 --------------------------------------
228 candidates :: [PhyloGroup]
229 candidates = getGroupsWithFilters (getGroupLevel group) (head next) p
230 --------------------------------------
231 scores :: [(PhyloGroupId, Double)]
232 scores = map (\group' -> getProximity WeightedLogJaccard s group group') candidates
233 --------------------------------------
234 best :: [(PhyloGroupId, Double)]
237 $ filter (\(id,score) -> score >= thr) scores
238 --------------------------------------
241 -- | To add a new list of Pointers into an existing Childs/Parents list of Pointers
242 makePair :: PairTo -> PhyloGroup -> [(PhyloGroupId, Double)] -> PhyloGroup
243 makePair to group ids = case to of
244 Childs -> over (phylo_groupPeriodChilds) addPointers group
245 Parents -> over (phylo_groupPeriodParents) addPointers group
246 _ -> panic ("[ERR][Viz.Phylo.Example.makePair] PairTo type not defined")
248 --------------------------------------
249 addPointers :: [Pointer] -> [Pointer]
250 addPointers l = nub $ (l ++ ids)
251 --------------------------------------
254 -- | To pair all the Phylogroups of given PhyloLevel to their best Parents or Childs
255 pairGroupsToGroups :: PairTo -> Level -> Double -> Double -> Phylo -> Phylo
256 pairGroupsToGroups to lvl thr s p = alterPhyloGroups
259 if (getGroupLevel group) == (getLevelValue lvl)
262 --------------------------------------
263 candidates :: [(PhyloGroupId, Double)]
264 candidates = findBestCandidates to 1 5 thr s group p
265 --------------------------------------
267 makePair to group candidates
272 phyloWithPair_1_Childs :: Phylo
273 phyloWithPair_1_Childs = pairGroupsToGroups Childs (initLevel 1 Level_1) 0.01 0 phyloWithPair_1_Parents
276 phyloWithPair_1_Parents :: Phylo
277 phyloWithPair_1_Parents = pairGroupsToGroups Parents (initLevel 1 Level_1) 0.01 0 phyloLinked_0_1
280 ------------------------------------------------------------------------
281 -- | STEP 10 | -- Build the coocurency Matrix of the Phylo
284 -- | Are two PhyloGroups sharing at leats one Ngrams
285 shareNgrams :: PhyloGroup -> PhyloGroup -> Bool
286 shareNgrams g g' = (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g')
289 -- | To get the good pair of keys (x,y) or (y,x) in a given Map (a,b) c
290 getKeyPair :: (Int,Int) -> Map (Int,Int) a -> (Int,Int)
291 getKeyPair (x,y) m = case findPair (x,y) m of
292 Nothing -> panic "[ERR][Viz.Phylo.Example.getKeyPair] Nothing"
295 --------------------------------------
296 findPair :: (Int,Int) -> Map (Int,Int) a -> Maybe (Int,Int)
298 | member (x,y) m = Just (x,y)
299 | member (y,x) m = Just (y,x)
300 | otherwise = Nothing
301 --------------------------------------
304 -- | To transform the Fis into a coocurency Matrix in a Phylo
305 fisToCooc :: Map (Date, Date) Fis -> Phylo -> Map (Int, Int) Double
306 fisToCooc m p = map (/docs)
307 $ foldl (\mem x -> adjust (+1) (getKeyPair x mem) mem) cooc
309 $ map (\x -> listToUnDirectedCombiWith (\x -> ngramsToIdx x p) $ (Set.toList . fst) x) fis
311 --------------------------------------
312 fis :: [(Clique,Support)]
313 fis = concat $ map (\x -> Map.toList x) (elems m)
314 --------------------------------------
315 fisNgrams :: [Ngrams]
316 fisNgrams = foldl (\mem x -> union mem $ (Set.toList . fst) x) [] fis
317 --------------------------------------
319 docs = fromIntegral $ foldl (\mem x -> mem + (snd x)) 0 fis
320 --------------------------------------
321 cooc :: Map (Int, Int) (Double)
322 cooc = Map.fromList $ map (\x -> (x,0)) (listToUnDirectedCombiWith (\x -> ngramsToIdx x p) fisNgrams)
323 --------------------------------------
326 phyloCooc :: Map (Int, Int) Double
327 phyloCooc = fisToCooc phyloFisFiltered phyloLinked_0_1
330 ------------------------------------------------------------------------
331 -- | STEP 9 | -- Build level 1 of the Phylo
334 -- | To Cliques into Groups
335 cliqueToGroup :: PhyloPeriodId -> Int -> Int -> Ngrams -> (Clique,Support) -> Map (Date, Date) Fis -> Phylo -> PhyloGroup
336 cliqueToGroup period lvl idx label fis m p = PhyloGroup ((period, lvl), idx)
339 (singleton "support" (fromIntegral $ snd fis))
343 --------------------------------------
345 ngrams = sort $ map (\x -> ngramsToIdx x p)
348 --------------------------------------
349 cooc :: Map (Int, Int) Double
350 cooc = filterWithKey (\k _ -> elem (fst k) ngrams && elem (snd k) ngrams)
351 $ fisToCooc (restrictKeys m $ Set.fromList [period]) p
352 --------------------------------------
355 -- | To transform Fis into PhyloLevels
356 fisToPhyloLevel :: Map (Date, Date) Fis -> Phylo -> Phylo
357 fisToPhyloLevel m p = over (phylo_periods . traverse)
359 let periodId = _phylo_periodId period
360 fisList = zip [1..] (Map.toList (m ! periodId))
361 in over (phylo_periodLevels)
363 let groups = map (\fis -> cliqueToGroup periodId 1 (fst fis) "" (snd fis) m p) fisList
364 in levels ++ [PhyloLevel (periodId, 1) groups]
368 phyloLinked_0_1 :: Phylo
369 phyloLinked_0_1 = alterLevelLinks lvl_0_1 phyloLinked_1_0
373 lvl_0_1 = initLevelLink (initLevel 0 Level_0) (initLevel 1 Level_1)
376 phyloLinked_1_0 :: Phylo
377 phyloLinked_1_0 = alterLevelLinks lvl_1_0 phyloWithGroups1
381 lvl_1_0 = initLevelLink (initLevel 1 Level_1) (initLevel 0 Level_0)
384 phyloWithGroups1 :: Phylo
385 phyloWithGroups1 = updatePhyloByLevel (initLevel 1 Level_1) phyloLinked_m1_0
388 ------------------------------------------------------------------------
389 -- | STEP 8 | -- Create Frequent Items Sets by Period and filter them
392 -- | To Filter Fis by support
393 filterFisBySupport :: Bool -> Int -> Map (Date, Date) Fis -> Map (Date, Date) Fis
394 filterFisBySupport empty min m = case empty of
395 True -> Map.map (\fis -> filterMinorFis min fis) m
396 False -> Map.map (\fis -> filterMinorFisNonEmpty min fis) m
399 -- | To filter Fis with small Support, to preserve nonempty periods please use : filterFisBySupport False
400 filterMinorFis :: Int -> Fis -> Fis
401 filterMinorFis min fis = Map.filter (\s -> s > min) fis
404 -- | To filter Fis with small Support but by keeping non empty Periods
405 filterMinorFisNonEmpty :: Int -> Fis -> Fis
406 filterMinorFisNonEmpty min fis = if (Map.null fis') && (not $ Map.null fis)
407 then filterMinorFisNonEmpty (min - 1) fis
410 --------------------------------------
412 fis' = filterMinorFis min fis
413 --------------------------------------
416 -- | To filter nested Fis
417 filterFisByNested :: Map (Date, Date) Fis -> Map (Date, Date) Fis
418 filterFisByNested = map (\fis -> restrictKeys fis
420 $ filterNestedSets (head (keys fis)) (keys fis) []
424 -- | To transform a list of Documents into a Frequent Items Set
425 docsToFis :: Map (Date, Date) [Document] -> Map (Date, Date) Fis
426 docsToFis docs = map (\d -> fisWithSizePolyMap
429 (map (words . text) d)) docs
432 phyloFisFiltered :: Map (Date, Date) Fis
433 phyloFisFiltered = filterFisBySupport True 1 (filterFisByNested phyloFis)
436 phyloFis :: Map (Date, Date) Fis
437 phyloFis = docsToFis phyloPeriods
440 ------------------------------------------------------------------------
441 -- | STEP 7 | -- Link level -1 to level 0
444 phyloLinked_m1_0 :: Phylo
445 phyloLinked_m1_0 = alterLevelLinks lvl_m1_0 phyloLinked_0_m1
448 lvl_m1_0 :: LevelLink
449 lvl_m1_0 = initLevelLink (initLevel (-1) Level_m1) (initLevel 0 Level_0)
452 ------------------------------------------------------------------------
453 -- | STEP 6 | -- Link level 0 to level -1
456 -- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
457 linkGroupToGroups :: LevelLink -> PhyloGroup -> [PhyloGroup] -> PhyloGroup
458 linkGroupToGroups lvl current targets
459 | getLevelLinkValue From lvl < getLevelLinkValue To lvl = setLevelParents current
460 | getLevelLinkValue From lvl > getLevelLinkValue To lvl = setLevelChilds current
461 | otherwise = current
463 --------------------------------------
464 setLevelChilds :: PhyloGroup -> PhyloGroup
465 setLevelChilds = over (phylo_groupLevelChilds) addPointers
466 --------------------------------------
467 setLevelParents :: PhyloGroup -> PhyloGroup
468 setLevelParents = over (phylo_groupLevelParents) addPointers
469 --------------------------------------
470 addPointers :: [Pointer] -> [Pointer]
471 addPointers lp = lp ++ Maybe.mapMaybe (\target ->
472 if shouldLink lvl (_phylo_groupNgrams current)
473 (_phylo_groupNgrams target )
474 then Just ((getGroupId target),1)
475 else Nothing) targets
476 --------------------------------------
479 -- | To set the LevelLinks between two lists of PhyloGroups
480 linkGroupsByLevel :: LevelLink -> Phylo -> [PhyloGroup] -> [PhyloGroup]
481 linkGroupsByLevel lvl p groups = map (\group ->
482 if getGroupLevel group == getLevelLinkValue From lvl
483 then linkGroupToGroups lvl group (getGroupsWithFilters (getLevelLinkValue To lvl) (getGroupPeriod group) p)
487 -- | To set the LevelLink of all the PhyloGroups of a Phylo
488 alterLevelLinks :: LevelLink -> Phylo -> Phylo
489 alterLevelLinks lvl p = alterPhyloGroups (linkGroupsByLevel lvl p) p
492 phyloLinked_0_m1 :: Phylo
493 phyloLinked_0_m1 = alterLevelLinks lvl_0_m1 phyloWithGroups0
496 lvl_0_m1 :: LevelLink
497 lvl_0_m1 = initLevelLink (initLevel 0 Level_0) (initLevel (-1) Level_m1)
500 ------------------------------------------------------------------------
501 -- | STEP 5 | -- Build level 0 as a copy of level -1
504 -- | To clone the last PhyloLevel of each PhyloPeriod and update it with a new LevelValue
505 clonePhyloLevel :: Int -> Phylo -> Phylo
506 clonePhyloLevel lvl p = alterPhyloLevels (\l -> addPhyloLevel
507 (setPhyloLevelId lvl $ head l)
511 phyloWithGroups0 :: Phylo
512 phyloWithGroups0 = updatePhyloByLevel (initLevel 0 Level_0) phyloWithGroupsm1
515 ------------------------------------------------------------------------
516 -- | STEP 4 | -- Build level -1
519 -- | To transform a list of Documents into a PhyloLevel
520 docsToPhyloLevel :: Int ->(Date, Date) -> [Document] -> Phylo -> PhyloLevel
521 docsToPhyloLevel lvl (d, d') docs p = initPhyloLevel
523 (map (\(f,s) -> initGroup [s] s f lvl d d' p)
526 $ map (words . text) docs)
529 -- | To transform a Map of Periods and Documents into a list of PhyloPeriods
530 docsToPhyloPeriods :: Int -> Map (Date,Date) [Document] -> Phylo -> [PhyloPeriod]
531 docsToPhyloPeriods lvl docs p = map (\(id,l) -> initPhyloPeriod id l)
534 --------------------------------------
535 levels :: Map (Date,Date) [PhyloLevel]
536 levels = mapWithKey (\k v -> [docsToPhyloLevel lvl k v p]) docs
537 --------------------------------------
540 -- | To update a Phylo for a given Levels
541 updatePhyloByLevel :: Level -> Phylo -> Phylo
542 updatePhyloByLevel lvl p
543 = case getLevelLabel lvl of
545 Level_m1 -> appendPhyloPeriods (docsToPhyloPeriods (getLevelValue lvl) lvlData p) p
547 --------------------------------------
548 lvlData :: Map (Date,Date) [Document]
549 lvlData = phyloPeriods
550 --------------------------------------
552 Level_0 -> clonePhyloLevel (getLevelValue lvl) p
554 Level_1 -> fisToPhyloLevel lvlData p
556 --------------------------------------
557 lvlData :: Map (Date, Date) Fis
558 lvlData = phyloFisFiltered
559 --------------------------------------
561 _ -> panic ("[ERR][Viz.Phylo.Example.updatePhyloByLevel] Level not defined")
564 phyloWithGroupsm1 :: Phylo
565 phyloWithGroupsm1 = updatePhyloByLevel (initLevel (-1) Level_m1) phylo
568 ------------------------------------------------------------------------
569 -- | STEP 3 | -- Parse the Documents and group them by Periods
572 -- | To init a set of periods out of a given Grain and Step
573 docsToPeriods :: (Ord date, Enum date) => (doc -> date)
574 -> Grain -> Step -> [doc] -> Map (date, date) [doc]
575 docsToPeriods _ _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
576 docsToPeriods f g s es = Map.fromList $ zip hs $ map (inPeriode f es) hs
578 --------------------------------------
579 hs = steps g s $ both f (head es, last es)
580 --------------------------------------
581 inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
582 inPeriode f' h (start,end) =
583 fst $ List.partition (\d -> f' d >= start && f' d <= end) h
584 --------------------------------------
585 steps :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
586 steps s' o' (start,end) = map (\l -> (head l, last l))
587 $ chunkAlong s' o' [start .. end]
588 --------------------------------------
591 -- | To parse a list of Documents by filtering on a Vector of Ngrams
592 parseDocs :: PhyloNgrams -> [Document] -> [Document]
593 parseDocs l docs = map (\(Document d t) -> Document d (unwords
594 $ filter (\x -> Vector.elem x l)
598 -- | To group a list of Documents by fixed periods
599 groupDocsByPeriod :: Grain -> Step -> [Document] -> Phylo -> Map (Date, Date) [Document]
600 groupDocsByPeriod g s docs p = docsToPeriods date g s
601 $ parseDocs (getPhyloNgrams p) docs
604 phyloPeriods :: Map (Date, Date) [Document]
605 phyloPeriods = groupDocsByPeriod 5 3 phyloDocs phylo
608 ------------------------------------------------------------------------
609 -- | STEP 2 | -- Init an initial list of Ngrams and a Phylo
612 -- | To init a Phylomemy
613 initPhylo :: [Document] -> PhyloNgrams -> Phylo
614 initPhylo docs ngrams = Phylo (both date $ (last &&& head) docs) ngrams [] []
617 -- | To init a PhyloNgrams as a Vector of Ngrams
618 initNgrams :: [Ngrams] -> PhyloNgrams
619 initNgrams l = Vector.fromList $ map toLower l
623 phylo = initPhylo phyloDocs (initNgrams actants)
626 ------------------------------------------------------------------------
627 -- | STEP 1 | -- Get a list of Document
630 -- | To transform a corpus of texts into a structured list of Documents
631 corpusToDocs :: [(Date, Text)] -> [Document]
632 corpusToDocs l = map (\(d,t) -> Document d t) l
635 phyloDocs :: [Document]
636 phyloDocs = corpusToDocs corpus
639 ------------------------------------------------------------------------
640 -- | STEP 0 | -- Let's start with an example
644 actants = [ "Cleopatre" , "Ptolemee", "Ptolemee-XIII", "Ptolemee-XIV"
645 , "Marc-Antoine", "Cesar" , "Antoine" , "Octave" , "Rome"
646 , "Alexandrie" , "Auguste" , "Pompee" , "Cassius" , "Brutus"]
649 corpus :: [(Date, Text)]
650 corpus = List.sortOn fst [ (-51,"Cleopatre règne sur l’egypte entre 51 et 30 av. J.-C. avec ses frères-epoux Ptolemee-XIII et Ptolemee-XIV, puis aux côtes du general romain Marc-Antoine. Elle est celèbre pour avoir ete la compagne de Jules Cesar puis d'Antoine, avec lesquels elle a eu plusieurs enfants. Partie prenante dans la guerre civile opposant Antoine à Octave, elle est vaincue à la bataille d'Actium en 31 av. J.-C. Sa defaite va permettre aux Romains de mener à bien la conquête de l’egypte, evenement qui marquera la fin de l'epoque hellenistique."), (-40,"Il existe relativement peu d'informations sur son sejour à Rome, au lendemain de l'assassinat de Cesar, ou sur la periode passee à Alexandrie durant l'absence d'Antoine, entre -40 et -37."), (-48,"L'historiographie antique lui est globalement defavorable car inspiree par son vainqueur, l'empereur Auguste, et par son entourage, dont l'interêt est de la noircir, afin d'en faire l'adversaire malfaisant de Rome et le mauvais genie d'Antoine. On observe par ailleurs que Cesar ne fait aucune mention de sa liaison avec elle dans les Commentaires sur la Guerre civile"), (-69,"Cleopatre est nee au cours de l'hiver -69/-686 probablement à Alexandrie."), (-48,"Pompee a en effet ete le protecteur de Ptolemee XII, le père de Cleopatre et de Ptolemee-XIII dont il se considère comme le tuteur."), (-48,"Ptolemee-XIII et Cleopatre auraient d'ailleurs aide Pompee par l'envoi d'une flotte de soixante navires."), (-48,"Mais le jeune roi Ptolemee-XIII et ses conseillers jugent sa cause perdue et pensent s'attirer les bonnes graces du vainqueur en le faisant assassiner à peine a-t-il pose le pied sur le sol egyptien, près de Peluse, le 30 juillet 48 av. J.-C., sous les yeux de son entourage."), (-48,"Cesar fait enterrer la tête de Pompee dans le bosquet de Nemesis en bordure du mur est de l'enceinte d'Alexandrie. Pour autant la mort de Pompee est une aubaine pour Cesar qui tente par ailleurs de profiter des querelles dynastiques pour annexer l’egypte."), (-48,"Il est difficile de se prononcer clairement sur les raisons qui ont pousse Cesar à s'attarder à Alexandrie. Il y a des raisons politiques, mais aussi des raisons plus sentimentales (Cleopatre ?). Il tente d'abord d'obtenir le remboursement de dettes que Ptolemee XII"), (-46,"Les deux souverains sont convoques par Cesar au palais royal d'Alexandrie. Ptolemee-XIII s'y rend après diverses tergiversations ainsi que Cleopatre."), (-47,"A Rome, Cleopatre epouse alors un autre de ses frères cadets, à Alexandrie, Ptolemee-XIV, sur l'injonction de Jules Cesar"), (-46,"Cesar a-t-il comme objectif de montrer ce qu'il en coûte de se revolter contre Rome en faisant figurer dans son triomphe la sœur de Cleopatre et de Ptolemee-XIV, Arsinoe, qui s'est fait reconnaître reine par les troupes de Ptolemee-XIII ?"), (-44,"Au debut de l'annee -44, Cesar est assassine par Brutus. Profitant de la situation confuse qui s'ensuit, Cleopatre quitte alors Rome à la mi-avril, faisant escale en Grèce. Elle parvient à Alexandrie en juillet -44."), (-44,"La guerre que se livrent les assassins de Cesar, Cassius et Brutus et ses heritiers, Octave et Marc-Antoine, oblige Cleopatre à des contorsions diplomatiques."), (-41,"Nous ignorons depuis quand Cleopatre, agee de 29 ans en -41, et Marc-Antoine, qui a une quarantaine d'annees, se connaissent. Marc-Antoine est l'un des officiers qui ont participe au retablissement de Ptolemee XII. Il est plus vraisemblable qu'ils se soient frequentes lors du sejour à Rome de Cleopatre."), (-42,"Brutus tient la Grèce tandis que Cassius s'installe en Syrie. Le gouverneur de Cleopatre à Chypre, Serapion, vient en aide à Cassius."), (-42,"Cassius aurait envisage de s'emparer d'Alexandrie quand le 'debarquement' en Grèce d'Antoine et d'Octave l'oblige à renoncer à ses projets")]