]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/Aggregates/Document.hs
add a filter for fis with too few ngrams
[gargantext.git] / src / Gargantext / Viz / Phylo / Aggregates / Document.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Viz.Phylo.Aggregates.Document
18 where
19
20 import Control.Lens hiding (both, Level)
21
22 import Data.List (last,nub,(++))
23 import Data.Map (Map,member)
24 import Data.Text (Text)
25 import Data.Tuple (fst, snd)
26 import Data.Vector (Vector)
27 import Gargantext.Prelude
28 import Gargantext.Text.Terms.Mono (monoTexts)
29 import Gargantext.Viz.Phylo
30 import Gargantext.Viz.Phylo.Tools
31 import qualified Data.List as List
32 import qualified Data.Map as Map
33 import qualified Data.Vector as Vector
34
35
36 -- | To init a list of Periods framed by a starting Date and an ending Date
37 initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
38 initPeriods g s (start,end) = map (\l -> (head' "Doc" l, last l))
39 $ chunkAlong g s [start .. end]
40
41
42 -- | To group a list of Documents by fixed periods
43 groupDocsByPeriod :: (Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
44 groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
45 groupDocsByPeriod f pds es = Map.fromList $ zip pds $ map (inPeriode f es) pds
46 where
47 --------------------------------------
48 inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
49 inPeriode f' h (start,end) =
50 fst $ List.partition (\d -> f' d >= start && f' d <= end) h
51 --------------------------------------
52
53 -- | Reduce a list of foundations as a list of corresponding roots
54 reduceByRoots :: Map Ngrams Ngrams -> [Ngrams] -> [Ngrams]
55 reduceByRoots m ns = (\(f,s) -> f ++ (nub s))
56 $ foldl (\mem n -> if member n m
57 then (fst mem,(snd mem) ++ [m Map.! n])
58 else ((fst mem) ++ [n],snd mem)
59 ) ([],[]) ns
60
61 -- | To parse a list of Documents by filtering on a Vector of Ngrams
62 parseDocs :: Vector Ngrams -> PhyloRoots -> [(Date,Text)] -> [Document]
63 parseDocs fds roots c = map (\(d,t)
64 -> Document d ( reduceByRoots mRoots
65 $ filter (\x -> Vector.elem x fds)
66 $ monoTexts t)) c
67 where
68 --------------------------------------
69 mRoots :: Map Ngrams Ngrams
70 mRoots = forestToMap (roots ^. phylo_rootsForest)
71 --------------------------------------
72
73
74 -- | To transform a Corpus of texts into a Map of aggregated Documents grouped by Periods
75
76