]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/LevelMaker.hs
add a filter for fis with too few ngrams
[gargantext.git] / src / Gargantext / Viz / Phylo / LevelMaker.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TypeSynonymInstances #-}
17 {-# LANGUAGE FlexibleInstances #-}
18
19 module Gargantext.Viz.Phylo.LevelMaker
20 where
21
22 import Control.Lens hiding (both, Level)
23 import Data.List ((++), sort, concat, nub, zip, last)
24 import Data.Map (Map, (!), empty, restrictKeys, filterWithKey, singleton, union)
25 import Data.Text (Text)
26 import Data.Tuple.Extra
27 import Data.Vector (Vector)
28 import Gargantext.Prelude
29 import Gargantext.Viz.Phylo
30 import Gargantext.Viz.Phylo.Aggregates.Cluster
31 import Gargantext.Viz.Phylo.Aggregates.Cooc
32 import Gargantext.Viz.Phylo.Aggregates.Document
33 import Gargantext.Viz.Phylo.Aggregates.Fis
34 import Gargantext.Viz.Phylo.BranchMaker
35 import Gargantext.Viz.Phylo.LinkMaker
36 import Gargantext.Viz.Phylo.Tools
37 import qualified Data.Set as Set
38
39
40 -- | A typeClass for polymorphic PhyloLevel functions
41 class PhyloLevelMaker aggregate
42 where
43 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
44 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
45 -- | To create a list of PhyloGroups based on a list of aggregates a
46 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
47
48
49 instance PhyloLevelMaker PhyloCluster
50 where
51 --------------------------------------
52 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
53 addPhyloLevel lvl m p
54 | lvl > 1 = toPhyloLevel lvl m p
55 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
56 --------------------------------------
57 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
58 toPhyloGroups lvl (d,d') l m p = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
59 --------------------------------------
60
61
62 instance PhyloLevelMaker PhyloFis
63 where
64 --------------------------------------
65 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
66 addPhyloLevel lvl m p
67 | lvl == 1 = toPhyloLevel lvl m p
68 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
69 --------------------------------------
70 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
71 toPhyloGroups lvl (d,d') l m p = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis m p) $ zip [1..] l
72 --------------------------------------
73
74
75 instance PhyloLevelMaker Document
76 where
77 --------------------------------------
78 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
79 addPhyloLevel lvl m p
80 | lvl == 0 = toPhyloLevel lvl m p
81 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
82 --------------------------------------
83 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
84 toPhyloGroups lvl (d,d') l _m p = map (\(idx,ngram) -> ngramsToGroup (d,d') lvl idx ngram [ngram] p)
85 $ zip [1..]
86 $ (nub . concat)
87 $ map text l
88 --------------------------------------
89
90
91 -- | To transform a Cluster into a Phylogroup
92 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster] -> Phylo -> PhyloGroup
93 clusterToGroup prd lvl idx lbl groups _m p =
94 PhyloGroup ((prd, lvl), idx) lbl ngrams empty cooc Nothing [] [] [] (map (\g -> (getGroupId g, 1)) groups)
95 where
96 --------------------------------------
97 ngrams :: [Int]
98 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
99 --------------------------------------
100 cooc :: Map (Int, Int) Double
101 cooc = filterWithKey (\k _ -> elem (fst k) ngrams && elem (snd k) ngrams)
102 $ foldl union empty
103 $ map getGroupCooc
104 $ getGroupsWithFilters 1 prd p
105 --------------------------------------
106
107
108 -- | To transform a Clique into a PhyloGroup
109 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Map (Date, Date) [PhyloFis] -> Phylo -> PhyloGroup
110 cliqueToGroup prd lvl idx lbl fis m p =
111 PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) cooc Nothing [] [] [] []
112 where
113 --------------------------------------
114 ngrams :: [Int]
115 ngrams = sort $ map (\x -> getIdxInRoots x p)
116 $ Set.toList
117 $ getClique fis
118 --------------------------------------
119 cooc :: Map (Int, Int) Double
120 cooc = filterWithKey (\k _ -> elem (fst k) ngrams && elem (snd k) ngrams)
121 $ fisToCooc (restrictKeys m $ Set.fromList [prd]) p
122 --------------------------------------
123
124
125 -- | To transform a list of Ngrams into a PhyloGroup
126 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
127 ngramsToGroup prd lvl idx lbl ngrams p =
128 PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty empty Nothing [] [] [] []
129
130
131 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
132 toPhyloLevel :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
133 toPhyloLevel lvl m p = alterPhyloPeriods
134 (\period -> let pId = _phylo_periodId period
135 in over (phylo_periodLevels)
136 (\phyloLevels ->
137 let groups = toPhyloGroups lvl pId (m ! pId) m p
138 in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
139 ) period) p
140
141
142 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
143 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
144 toNthLevel lvlMax prox clus p
145 | lvl >= lvlMax = p
146 | otherwise = toNthLevel lvlMax prox clus
147 $ setPhyloBranches (lvl + 1)
148 $ interTempoMatching Descendant (lvl + 1) prox
149 $ interTempoMatching Ascendant (lvl + 1) prox
150 $ setLevelLinks (lvl, lvl + 1)
151 $ addPhyloLevel (lvl + 1)
152 (phyloToClusters lvl clus p) p
153 where
154 --------------------------------------
155 lvl :: Level
156 lvl = getLastLevel p
157 --------------------------------------
158
159
160 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
161 toPhylo1 :: Cluster -> Proximity -> [Metric] -> [Filter] -> Map (Date, Date) [Document] -> Phylo -> Phylo
162 toPhylo1 clus prox metrics filters d p = case clus of
163 Fis (FisParams k s t) -> setPhyloBranches 1
164 $ interTempoMatching Descendant 1 prox
165 $ interTempoMatching Ascendant 1 prox
166 $ setLevelLinks (0,1)
167 $ setLevelLinks (1,0)
168 $ addPhyloLevel 1 phyloFis p
169 where
170 --------------------------------------
171 phyloFis :: Map (Date, Date) [PhyloFis]
172 phyloFis = toPhyloFis d k s t metrics filters
173 --------------------------------------
174
175 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
176
177
178 -- | To reconstruct the Level 0 of a Phylo
179 toPhylo0 :: Map (Date, Date) [Document] -> Phylo -> Phylo
180 toPhylo0 d p = addPhyloLevel 0 d p
181
182
183 -- | To reconstruct the Base of a Phylo
184
185 -- | To reconstruct a Phylomemy from a PhyloQueryBuild, a Corpus and a list of actants
186
187
188
189
190
191 class PhyloMaker corpus
192 where
193 toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> [Tree Ngrams] -> Phylo
194 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> corpus -> [Ngrams] -> [Tree Ngrams] -> Phylo
195 corpusToDocs :: corpus -> Phylo -> Map (Date,Date) [Document]
196
197 instance PhyloMaker [(Date, Text)]
198 where
199 --------------------------------------
200 toPhylo q c a ts = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
201 where
202 --------------------------------------
203 phylo1 :: Phylo
204 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
205 --------------------------------------
206 phylo0 :: Phylo
207 phylo0 = toPhylo0 phyloDocs phyloBase
208 --------------------------------------
209 phyloDocs :: Map (Date, Date) [Document]
210 phyloDocs = corpusToDocs c phyloBase
211 --------------------------------------
212 phyloBase :: Phylo
213 phyloBase = toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c a ts
214 --------------------------------------
215 --------------------------------------
216 toPhyloBase q p c a ts = initPhyloBase periods foundations roots p
217 where
218 --------------------------------------
219 roots :: PhyloRoots
220 roots = initRoots (map (\t -> alterLabels phyloAnalyzer t) ts) foundations
221 --------------------------------------
222 periods :: [(Date,Date)]
223 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
224 $ both fst (head' "LevelMaker" c,last c)
225 --------------------------------------
226 foundations :: Vector Ngrams
227 foundations = initFoundations a
228 --------------------------------------
229 --------------------------------------
230 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) $ parseDocs (getFoundations p) (getRoots p) c
231
232
233 instance PhyloMaker [Document]
234 where
235 --------------------------------------
236 toPhylo q c a ts = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
237 where
238 --------------------------------------
239 phylo1 :: Phylo
240 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
241 --------------------------------------
242 phylo0 :: Phylo
243 phylo0 = toPhylo0 phyloDocs phyloBase
244 --------------------------------------
245 phyloDocs :: Map (Date, Date) [Document]
246 phyloDocs = corpusToDocs c phyloBase
247 --------------------------------------
248 phyloBase :: Phylo
249 phyloBase = toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c a ts
250 --------------------------------------
251 --------------------------------------
252 toPhyloBase q p c a ts = initPhyloBase periods foundations roots p
253 where
254 --------------------------------------
255 roots :: PhyloRoots
256 roots = initRoots (map (\t -> alterLabels phyloAnalyzer t) ts) foundations
257 --------------------------------------
258 periods :: [(Date,Date)]
259 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
260 $ both date (head' "LevelMaker" c,last c)
261 --------------------------------------
262 foundations :: Vector Ngrams
263 foundations = initFoundations a
264 --------------------------------------
265 --------------------------------------
266 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) c