2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
17 module Gargantext.Viz.Phylo.Aggregates.Document
20 import Data.List (last,head)
22 import Data.Text (Text, unwords, toLower, words)
23 import Data.Tuple (fst, snd)
24 import Data.Tuple.Extra
25 import Data.Vector (Vector)
27 import Gargantext.Prelude hiding (head)
28 import Gargantext.Text.Terms.Mono (monoTexts)
29 import Gargantext.Viz.Phylo
30 import Gargantext.Viz.Phylo.Tools
32 import qualified Data.List as List
33 import qualified Data.Map as Map
34 import qualified Data.Vector as Vector
37 -- | To init a list of Periods framed by a starting Date and an ending Date
38 initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
39 initPeriods g s (start,end) = map (\l -> (head l, last l))
40 $ chunkAlong g s [start .. end]
43 -- | To be defined, for the moment it's just the id function
44 groupNgramsWithTrees :: Ngrams -> Ngrams
45 groupNgramsWithTrees n = n
48 -- | To group a list of Documents by fixed periods
49 groupDocsByPeriod :: (Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
50 groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
51 groupDocsByPeriod f pds es = Map.fromList $ zip pds $ map (inPeriode f es) pds
53 --------------------------------------
54 inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
55 inPeriode f' h (start,end) =
56 fst $ List.partition (\d -> f' d >= start && f' d <= end) h
57 --------------------------------------
60 -- | To parse a list of Documents by filtering on a Vector of Ngrams
61 parseDocs :: (Ngrams -> Ngrams) -> Vector Ngrams -> [Document] -> [Document]
62 parseDocs f l docs = map (\(Document d t)
63 -> Document d ( unwords
64 -- | To do : change 'f' for the Ngrams Tree Agregation
66 $ filter (\x -> Vector.elem x l)
70 -- | To transform a Corpus of texts into a Map of aggregated Documents grouped by Periods
71 corpusToDocs :: (Ngrams -> Ngrams) -> [(Date,Text)] -> Phylo -> Map (Date,Date) [Document]
72 corpusToDocs f c p = groupDocsByPeriod date (getPhyloPeriods p)
73 $ parseDocs f (getFoundations p) docs
75 --------------------------------------
77 docs = map (\(d,t) -> Document d t) c
78 --------------------------------------