]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/Aggregates/Document.hs
adding some view functions
[gargantext.git] / src / Gargantext / Viz / Phylo / Aggregates / Document.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Viz.Phylo.Aggregates.Document
18 where
19
20 import Data.List (last,head)
21 import Data.Map (Map)
22 import Data.Text (Text, unwords, toLower, words)
23 import Data.Tuple (fst, snd)
24 import Data.Tuple.Extra
25 import Data.Vector (Vector)
26
27 import Gargantext.Prelude hiding (head)
28 import Gargantext.Text.Terms.Mono (monoTexts)
29 import Gargantext.Viz.Phylo
30 import Gargantext.Viz.Phylo.Tools
31
32 import qualified Data.List as List
33 import qualified Data.Map as Map
34 import qualified Data.Vector as Vector
35
36
37 -- | To init a set of periods out of a given Grain and Step
38 docsToPeriods :: (Ord date, Enum date) => (doc -> date)
39 -> Grain -> Step -> [doc] -> Map (date, date) [doc]
40 docsToPeriods _ _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
41 docsToPeriods f g s es = Map.fromList $ zip hs $ map (inPeriode f es) hs
42 where
43 --------------------------------------
44 hs = steps g s $ both f (head es, last es)
45 --------------------------------------
46 inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
47 inPeriode f' h (start,end) =
48 fst $ List.partition (\d -> f' d >= start && f' d <= end) h
49 --------------------------------------
50 steps :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
51 steps s' o' (start,end) = map (\l -> (head l, last l))
52 $ chunkAlong s' o' [start .. end]
53 --------------------------------------
54
55
56 -- | To parse a list of Documents by filtering on a Vector of Ngrams
57 parseDocs :: PhyloNgrams -> [Document] -> [Document]
58 parseDocs l docs = map (\(Document d t)
59 -> Document d ( unwords
60 $ filter (\x -> Vector.elem x l)
61 $ monoTexts t)) docs
62
63
64 -- | To group a list of Documents by fixed periods
65 groupDocsByPeriod :: Grain -> Step -> [Document] -> PhyloNgrams -> Map (Date, Date) [Document]
66 groupDocsByPeriod g s docs ngrams = docsToPeriods date g s $ parseDocs ngrams docs
67
68
69 -- | To transform a corpus of texts into a structured list of Documents
70 corpusToDocs :: [(Date, Text)] -> [Document]
71 corpusToDocs l = map (\(d,t) -> Document d t) l