]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/Aggregates/Document.hs
Merge branch 'dev-phylo' of https://gitlab.iscpif.fr/gargantext/haskell-gargantext...
[gargantext.git] / src / Gargantext / Viz / Phylo / Aggregates / Document.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Viz.Phylo.Aggregates.Document
18 where
19
20 import Data.List (last,head)
21 import Data.Map (Map)
22 import Data.Text (Text, unwords, toLower, words)
23 import Data.Tuple (fst, snd)
24 import Data.Tuple.Extra
25 import Data.Vector (Vector)
26
27 import Gargantext.Prelude hiding (head)
28 import Gargantext.Text.Terms.Mono (monoTexts)
29 import Gargantext.Viz.Phylo
30 import Gargantext.Viz.Phylo.Tools
31
32 import qualified Data.List as List
33 import qualified Data.Map as Map
34 import qualified Data.Vector as Vector
35
36
37 -- | To init a list of Periods framed by a starting Date and an ending Date
38 initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
39 initPeriods g s (start,end) = map (\l -> (head l, last l))
40 $ chunkAlong g s [start .. end]
41
42
43 -- | To be defined, for the moment it's just the id function
44 groupNgramsWithTrees :: Ngrams -> Ngrams
45 groupNgramsWithTrees n = n
46
47
48 -- | To group a list of Documents by fixed periods
49 groupDocsByPeriod :: (Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
50 groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
51 groupDocsByPeriod f pds es = Map.fromList $ zip pds $ map (inPeriode f es) pds
52 where
53 --------------------------------------
54 inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
55 inPeriode f' h (start,end) =
56 fst $ List.partition (\d -> f' d >= start && f' d <= end) h
57 --------------------------------------
58
59
60 -- | To parse a list of Documents by filtering on a Vector of Ngrams
61 parseDocs :: (Ngrams -> Ngrams) -> Vector Ngrams -> [Document] -> [Document]
62 parseDocs f l docs = map (\(Document d t)
63 -> Document d ( unwords
64 -- | To do : change 'f' for the Ngrams Tree Agregation
65 $ map f
66 $ filter (\x -> Vector.elem x l)
67 $ monoTexts t)) docs
68
69
70 -- | To transform a Corpus of texts into a Map of aggregated Documents grouped by Periods
71 corpusToDocs :: (Ngrams -> Ngrams) -> [(Date,Text)] -> Phylo -> Map (Date,Date) [Document]
72 corpusToDocs f c p = groupDocsByPeriod date (getPhyloPeriods p)
73 $ parseDocs f (getFoundations p) docs
74 where
75 --------------------------------------
76 docs :: [Document]
77 docs = map (\(d,t) -> Document d t) c
78 --------------------------------------