2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
17 module Gargantext.Viz.Phylo.Aggregates.Document
20 import Data.List (last,head)
22 import Data.Text (Text, unwords, toLower, words)
23 import Data.Tuple (fst, snd)
24 import Data.Tuple.Extra
25 import Data.Vector (Vector)
27 import Gargantext.Prelude hiding (head)
28 import Gargantext.Text.Terms.Mono (monoTexts)
29 import Gargantext.Viz.Phylo
30 import Gargantext.Viz.Phylo.Tools
32 import qualified Data.List as List
33 import qualified Data.Map as Map
34 import qualified Data.Vector as Vector
37 -- | To init a set of periods out of a given Grain and Step
38 docsToPeriods :: (Ord date, Enum date) => (doc -> date)
39 -> Grain -> Step -> [doc] -> Map (date, date) [doc]
40 docsToPeriods _ _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
41 docsToPeriods f g s es = Map.fromList $ zip hs $ map (inPeriode f es) hs
43 --------------------------------------
44 hs = steps g s $ both f (head es, last es)
45 --------------------------------------
46 inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
47 inPeriode f' h (start,end) =
48 fst $ List.partition (\d -> f' d >= start && f' d <= end) h
49 --------------------------------------
50 steps :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
51 steps s' o' (start,end) = map (\l -> (head l, last l))
52 $ chunkAlong s' o' [start .. end]
53 --------------------------------------
56 -- | To parse a list of Documents by filtering on a Vector of Ngrams
57 parseDocs :: PhyloNgrams -> [Document] -> [Document]
58 parseDocs l docs = map (\(Document d t)
59 -> Document d ( unwords
60 $ filter (\x -> Vector.elem x l)
64 -- | To group a list of Documents by fixed periods
65 groupDocsByPeriod :: Grain -> Step -> [Document] -> PhyloNgrams -> Map (Date, Date) [Document]
66 groupDocsByPeriod g s docs ngrams = docsToPeriods date g s $ parseDocs ngrams docs
69 -- | To transform a corpus of texts into a structured list of Documents
70 corpusToDocs :: [(Date, Text)] -> [Document]
71 corpusToDocs l = map (\(d,t) -> Document d t) l