]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/Aggregates/Document.hs
add the phylopeaks and correct some bugs on the phylodocs
[gargantext.git] / src / Gargantext / Viz / Phylo / Aggregates / Document.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Viz.Phylo.Aggregates.Document
18 where
19
20 import Control.Lens hiding (both, Level)
21
22 import Data.List (last,head,nub,(++))
23 import Data.Map (Map,member)
24 import Data.Text (Text, unwords, toLower, words)
25 import Data.Tuple (fst, snd)
26 import Data.Tuple.Extra
27 import Data.Vector (Vector)
28
29 import Gargantext.Prelude hiding (head)
30 import Gargantext.Text.Terms.Mono (monoTexts)
31 import Gargantext.Viz.Phylo
32 import Gargantext.Viz.Phylo.Tools
33
34 import qualified Data.List as List
35 import qualified Data.Map as Map
36 import qualified Data.Vector as Vector
37
38
39 -- | To init a list of Periods framed by a starting Date and an ending Date
40 initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
41 initPeriods g s (start,end) = map (\l -> (head l, last l))
42 $ chunkAlong g s [start .. end]
43
44
45 -- | To group a list of Documents by fixed periods
46 groupDocsByPeriod :: (Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
47 groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
48 groupDocsByPeriod f pds es = Map.fromList $ zip pds $ map (inPeriode f es) pds
49 where
50 --------------------------------------
51 inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
52 inPeriode f' h (start,end) =
53 fst $ List.partition (\d -> f' d >= start && f' d <= end) h
54 --------------------------------------
55
56 reduceByPeaks :: Map Ngrams Ngrams -> [Ngrams] -> [Ngrams]
57 reduceByPeaks m ns = (\(f,s) -> f ++ (nub s))
58 $ foldl (\mem n -> if member n m
59 then (fst mem,(snd mem) ++ [m Map.! n])
60 else ((fst mem) ++ [n],snd mem)
61 ) ([],[]) ns
62
63 -- | To parse a list of Documents by filtering on a Vector of Ngrams
64 parseDocs :: Vector Ngrams -> PhyloPeaks -> [(Date,Text)] -> [Document]
65 parseDocs fds peaks c = map (\(d,t)
66 -> Document d ( reduceByPeaks mPeaks
67 $ filter (\x -> Vector.elem x fds)
68 $ monoTexts t)) c
69 where
70 --------------------------------------
71 mPeaks :: Map Ngrams Ngrams
72 mPeaks = forestToMap (peaks ^. phylo_peaksForest)
73 --------------------------------------
74
75
76 -- | To transform a Corpus of texts into a Map of aggregated Documents grouped by Periods
77 corpusToDocs :: [(Date,Text)] -> Phylo -> Map (Date,Date) [Document]
78 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p)
79 $ parseDocs (getFoundations p) (getPeaks p) c