2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
17 module Gargantext.Viz.Phylo.Aggregates.Document
20 import Control.Lens hiding (both, Level)
22 import Data.List (last,head,nub,(++))
23 import Data.Map (Map,member)
24 import Data.Text (Text, unwords, toLower, words)
25 import Data.Tuple (fst, snd)
26 import Data.Tuple.Extra
27 import Data.Vector (Vector)
29 import Gargantext.Prelude hiding (head)
30 import Gargantext.Text.Terms.Mono (monoTexts)
31 import Gargantext.Viz.Phylo
32 import Gargantext.Viz.Phylo.Tools
34 import qualified Data.List as List
35 import qualified Data.Map as Map
36 import qualified Data.Vector as Vector
39 -- | To init a list of Periods framed by a starting Date and an ending Date
40 initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
41 initPeriods g s (start,end) = map (\l -> (head l, last l))
42 $ chunkAlong g s [start .. end]
45 -- | To group a list of Documents by fixed periods
46 groupDocsByPeriod :: (Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
47 groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
48 groupDocsByPeriod f pds es = Map.fromList $ zip pds $ map (inPeriode f es) pds
50 --------------------------------------
51 inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
52 inPeriode f' h (start,end) =
53 fst $ List.partition (\d -> f' d >= start && f' d <= end) h
54 --------------------------------------
56 reduceByPeaks :: Map Ngrams Ngrams -> [Ngrams] -> [Ngrams]
57 reduceByPeaks m ns = (\(f,s) -> f ++ (nub s))
58 $ foldl (\mem n -> if member n m
59 then (fst mem,(snd mem) ++ [m Map.! n])
60 else ((fst mem) ++ [n],snd mem)
63 -- | To parse a list of Documents by filtering on a Vector of Ngrams
64 parseDocs :: Vector Ngrams -> PhyloPeaks -> [(Date,Text)] -> [Document]
65 parseDocs fds peaks c = map (\(d,t)
66 -> Document d ( reduceByPeaks mPeaks
67 $ filter (\x -> Vector.elem x fds)
70 --------------------------------------
71 mPeaks :: Map Ngrams Ngrams
72 mPeaks = forestToMap (peaks ^. phylo_peaksForest)
73 --------------------------------------
76 -- | To transform a Corpus of texts into a Map of aggregated Documents grouped by Periods
77 corpusToDocs :: [(Date,Text)] -> Phylo -> Map (Date,Date) [Document]
78 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p)
79 $ parseDocs (getFoundations p) (getPeaks p) c