]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/Aggregates/Document.hs
Merge branch 'dev' into dev-phylo
[gargantext.git] / src / Gargantext / Viz / Phylo / Aggregates / Document.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Viz.Phylo.Aggregates.Document
18 where
19
20 import Data.Map (Map,fromListWith)
21 import Data.Text (Text)
22 import Data.Tuple (fst)
23 import Data.Vector (Vector)
24 import Gargantext.Prelude
25 import Gargantext.Text.Terms.Mono (monoTexts)
26 import Gargantext.Viz.Phylo
27 import qualified Data.List as List
28 import qualified Data.Map as Map
29 import qualified Data.Vector as Vector
30
31 import Debug.Trace (trace)
32
33
34 -- | To init a list of Periods framed by a starting Date and an ending Date
35 initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
36 initPeriods g s (start,end) = map (\l -> (head' "Doc" l, last' "Doc" l))
37 $ chunkAlong g s [start .. end]
38
39
40 -- | To group a list of Documents by fixed periods
41 groupDocsByPeriod :: (Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
42 groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
43 groupDocsByPeriod f pds es = trace ("----\nGroup docs by periods\n") $ Map.fromList $ zip pds $ map (inPeriode f es) pds
44 where
45 --------------------------------------
46 inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
47 inPeriode f' h (start,end) =
48 fst $ List.partition (\d -> f' d >= start && f' d <= end) h
49 --------------------------------------
50
51
52 -- | To parse a list of Documents by filtering on a Vector of Ngrams
53 parseDocs :: Vector Ngrams -> [(Date,Text)] -> [Document]
54 parseDocs roots c = map (\(d,t)
55 -> Document d ( filter (\x -> Vector.elem x roots)
56 $ monoTexts t)) c
57
58 -- | To count the number of documents by year
59 countDocs :: [(Date,a)] -> Map Date Double
60 countDocs corpus = fromListWith (+) $ map (\(d,_) -> (d,1)) corpus
61
62
63
64