]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/Aggregates/Document.hs
[REFACT] before scoring new ngrams lists.
[gargantext.git] / src / Gargantext / Viz / Phylo / Aggregates / Document.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Viz.Phylo.Aggregates.Document
18 where
19
20 import Data.List (last)
21 import Data.Map (Map)
22 import Data.Text (Text)
23 import Data.Tuple (fst)
24 import Data.Vector (Vector)
25 import Gargantext.Prelude
26 import Gargantext.Text.Terms.Mono (monoTexts)
27 import Gargantext.Viz.Phylo
28 import qualified Data.List as List
29 import qualified Data.Map as Map
30 import qualified Data.Vector as Vector
31
32
33 -- | To init a list of Periods framed by a starting Date and an ending Date
34 initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
35 initPeriods g s (start,end) = map (\l -> (head' "Doc" l, last l))
36 $ chunkAlong g s [start .. end]
37
38
39 -- | To group a list of Documents by fixed periods
40 groupDocsByPeriod :: (Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
41 groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
42 groupDocsByPeriod f pds es = Map.fromList $ zip pds $ map (inPeriode f es) pds
43 where
44 --------------------------------------
45 inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
46 inPeriode f' h (start,end) =
47 fst $ List.partition (\d -> f' d >= start && f' d <= end) h
48 --------------------------------------
49
50
51 -- | To parse a list of Documents by filtering on a Vector of Ngrams
52 parseDocs :: Vector Ngrams -> [(Date,Text)] -> [Document]
53 parseDocs roots c = map (\(d,t)
54 -> Document d ( filter (\x -> Vector.elem x roots)
55 $ monoTexts t)) c
56
57