2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
25 {-# LANGUAGE DeriveAnyClass #-}
26 {-# LANGUAGE TemplateHaskell #-}
28 module Gargantext.Viz.Phylo where
30 import Control.DeepSeq
31 import Control.Lens (makeLenses)
32 import Data.Aeson.TH (deriveJSON,defaultOptions)
34 import Data.Maybe (Maybe)
37 import Data.Text (Text)
38 import Data.Vector (Vector)
39 import GHC.Generics (Generic)
40 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
41 import Gargantext.Prelude
42 import Gargantext.Text.Context (TermList)
43 import Prelude (Bounded)
50 -- | Global parameters of a Phylo
52 PhyloParam { _phyloParam_version :: Text -- Double ?
53 , _phyloParam_software :: Software
54 , _phyloParam_query :: PhyloQueryBuild
55 } deriving (Generic, Show, Eq)
58 -- | Software parameters
60 Software { _software_name :: Text
61 , _software_version :: Text
62 } deriving (Generic, Show, Eq)
70 -- | Phylo datatype of a phylomemy
71 -- Duration : time Segment of the whole Phylo
72 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
73 -- Periods : list of all the periods of a Phylo
75 Phylo { _phylo_duration :: (Start, End)
76 , _phylo_foundations :: PhyloFoundations
77 , _phylo_periods :: [PhyloPeriod]
78 , _phylo_docsByYears :: Map Date Double
79 , _phylo_cooc :: !(Map Date (Map (Int,Int) Double))
80 , _phylo_fis :: !(Map (Date,Date) [PhyloFis])
81 , _phylo_param :: PhyloParam
83 deriving (Generic, Show, Eq)
86 -- | The foundations of a phylomemy created from a given TermList
87 data PhyloFoundations =
88 PhyloFoundations { _phylo_foundationsRoots :: Vector Ngrams
89 , _phylo_foundationsTermsList :: TermList
90 } deriving (Generic, Show, Eq)
93 -- | Date : a simple Integer
96 -- | UTCTime in seconds since UNIX epoch
97 -- type Start = POSIXTime
98 -- type End = POSIXTime
103 ---------------------
104 -- | PhyloPeriod | --
105 ---------------------
108 -- | PhyloStep : steps of phylomemy on temporal axis
109 -- Period: tuple (start date, end date) of the step of the phylomemy
110 -- Levels: levels of granularity
112 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
113 , _phylo_periodLevels :: [PhyloLevel]
115 deriving (Generic, Show, Eq)
123 -- | PhyloLevel : levels of phylomemy on level axis
124 -- Levels description:
125 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
126 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
127 -- Level 1: First level of clustering
128 -- Level N: Nth level of clustering
130 PhyloLevel { _phylo_levelId :: PhyloLevelId
131 , _phylo_levelGroups :: [PhyloGroup]
133 deriving (Generic, Show, Eq)
141 -- | PhyloGroup : group of ngrams at each level and step
142 -- Label : maybe has a label as text
143 -- Ngrams: set of terms that build the group
144 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
145 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
146 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
147 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
149 PhyloGroup { _phylo_groupId :: PhyloGroupId
150 , _phylo_groupLabel :: Text
151 , _phylo_groupNgrams :: [Int]
152 , _phylo_groupNgramsMeta :: Map Text [Double]
153 , _phylo_groupMeta :: Map Text Double
154 , _phylo_groupBranchId :: Maybe PhyloBranchId
155 , _phylo_groupCooc :: !(Map (Int,Int) Double)
157 , _phylo_groupPeriodParents :: [Pointer]
158 , _phylo_groupPeriodChilds :: [Pointer]
160 , _phylo_groupLevelParents :: [Pointer]
161 , _phylo_groupLevelChilds :: [Pointer]
163 deriving (Generic, NFData, Show, Eq, Ord)
165 -- instance NFData PhyloGroup
168 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
170 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
174 type PhyloPeriodId = (Start, End)
175 type PhyloLevelId = (PhyloPeriodId, Level)
176 type PhyloGroupId = (PhyloLevelId, Index)
177 type PhyloBranchId = (Level, Index)
180 -- | Weight : A generic mesure that can be associated with an Id
182 -- | Pointer : A weighted linked with a given PhyloGroup
183 type Pointer = (PhyloGroupId, Weight)
184 -- | Ngrams : a contiguous sequence of n terms
193 -- | Document : a piece of Text linked to a Date
194 data Document = Document
197 } deriving (Show,Generic,NFData)
199 -- | Clique : Set of ngrams cooccurring in the same Document
200 type Clique = Set Ngrams
201 -- | Support : Number of Documents where a Clique occurs
203 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
204 data PhyloFis = PhyloFis
205 { _phyloFis_clique :: Clique
206 , _phyloFis_support :: Support
207 , _phyloFis_period :: (Date,Date)
208 } deriving (Generic,NFData,Show,Eq)
210 -- | A list of clustered PhyloGroup
211 type PhyloCluster = [PhyloGroup]
214 -- | A PhyloGroup in a Graph
215 type GroupNode = PhyloGroup
216 -- | A weighted links between two PhyloGroups in a Graph
217 type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
218 -- | The association as a Graph between a list of Nodes and a list of Edges
219 type GroupGraph = ([GroupNode],[GroupEdge])
227 data PhyloError = LevelDoesNotExist
237 -- | Cluster constructors
238 data Cluster = Fis FisParams
239 | RelatedComponents RCParams
240 | Louvain LouvainParams
241 deriving (Generic, Show, Eq, Read)
243 -- | Parameters for Fis clustering
244 data FisParams = FisParams
245 { _fis_keepMinorFis :: Bool
246 , _fis_minSupport :: Support
247 , _fis_minSize :: Int
248 } deriving (Generic, Show, Eq, Read)
250 -- | Parameters for RelatedComponents clustering
251 data RCParams = RCParams
252 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
254 -- | Parameters for Louvain clustering
255 data LouvainParams = LouvainParams
256 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
264 -- | Proximity constructors
265 data Proximity = WeightedLogJaccard WLJParams
266 | Hamming HammingParams
268 deriving (Generic, Show, Eq, Read)
270 -- | Parameters for WeightedLogJaccard proximity
271 data WLJParams = WLJParams
272 { _wlj_threshold :: Double
273 , _wlj_sensibility :: Double
274 } deriving (Generic, Show, Eq, Read)
276 -- | Parameters for Hamming proximity
277 data HammingParams = HammingParams
278 { _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
286 -- | Filter constructors
287 data Filter = LonelyBranch LBParams
288 | SizeBranch SBParams
289 deriving (Generic, Show, Eq)
291 -- | Parameters for LonelyBranch filter
292 data LBParams = LBParams
293 { _lb_periodsInf :: Int
294 , _lb_periodsSup :: Int
295 , _lb_minNodes :: Int } deriving (Generic, Show, Eq)
297 -- | Parameters for SizeBranch filter
298 data SBParams = SBParams
299 { _sb_minSize :: Int } deriving (Generic, Show, Eq)
307 -- | Metric constructors
308 data Metric = BranchAge | BranchBirth | BranchGroups deriving (Generic, Show, Eq, Read)
316 -- | Tagger constructors
317 data Tagger = BranchPeakFreq | BranchPeakCooc | BranchPeakInc
318 | GroupLabelCooc | GroupLabelInc | GroupLabelIncDyn deriving (Show,Generic,Read)
326 -- | Sort constructors
327 data Sort = ByBranchAge | ByBranchBirth deriving (Generic, Show, Read, Enum, Bounded)
328 data Order = Asc | Desc deriving (Generic, Show, Read)
336 -- | A Phyloquery describes a phylomemic reconstruction
337 data PhyloQueryBuild = PhyloQueryBuild
338 { _q_phyloTitle :: Text
339 , _q_phyloDesc :: Text
341 -- Grain and Steps for the PhyloPeriods
342 , _q_periodGrain :: Int
343 , _q_periodSteps :: Int
345 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
346 , _q_contextualUnit :: Cluster
347 , _q_contextualUnitMetrics :: [Metric]
348 , _q_contextualUnitFilters :: [Filter]
350 -- Inter-temporal matching method of the Phylo
351 , _q_interTemporalMatching :: Proximity
352 , _q_interTemporalMatchingFrame :: Int
353 , _q_interTemporalMatchingFrameTh :: Double
355 , _q_reBranchThr :: Double
356 , _q_reBranchNth :: Int
358 -- Last level of reconstruction
359 , _q_nthLevel :: Level
360 -- Clustering method used from level 1 to nthLevel
361 , _q_nthCluster :: Cluster
362 } deriving (Generic, Show, Eq)
364 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
365 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
366 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
373 -- | A PhyloView is the output type of a Phylo
374 data PhyloView = PhyloView
375 { _pv_param :: PhyloParam
377 , _pv_description :: Text
378 , _pv_filiation :: Filiation
380 , _pv_periods :: [PhyloPeriodId]
381 , _pv_metrics :: Map Text [Double]
382 , _pv_branches :: [PhyloBranch]
383 , _pv_nodes :: [PhyloNode]
384 , _pv_edges :: [PhyloEdge]
385 } deriving (Generic, Show)
387 -- | A phyloview is made of PhyloBranches, edges and nodes
388 data PhyloBranch = PhyloBranch
389 { _pb_id :: PhyloBranchId
391 , _pb_metrics :: Map Text [Double]
392 } deriving (Generic, Show)
394 data PhyloEdge = PhyloEdge
395 { _pe_source :: PhyloGroupId
396 , _pe_target :: PhyloGroupId
397 , _pe_type :: EdgeType
398 , _pe_weight :: Weight
399 } deriving (Generic, Show)
401 data PhyloNode = PhyloNode
402 { _pn_id :: PhyloGroupId
403 , _pn_bid :: Maybe PhyloBranchId
406 , _pn_ngrams :: Maybe [Ngrams]
407 , _pn_metrics :: Map Text [Double]
408 , _pn_cooc :: Map (Int,Int) Double
409 , _pn_parents :: Maybe [PhyloGroupId]
410 , _pn_childs :: [PhyloNode]
411 } deriving (Generic, Show)
413 ------------------------
414 -- | PhyloQueryView | --
415 ------------------------
418 data ExportMode = Json | Dot | Svg
419 deriving (Generic, Show, Read)
420 data DisplayMode = Flat | Nested
421 deriving (Generic, Show, Read)
423 -- | A PhyloQueryView describes a Phylo as an output view
424 data PhyloQueryView = PhyloQueryView
427 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
428 , _qv_filiation :: Filiation
430 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
431 , _qv_levelChilds :: Bool
432 , _qv_levelChildsDepth :: Level
434 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
435 -- Firstly the metrics, then the filters and the taggers
436 , _qv_metrics :: [Metric]
437 , _qv_filters :: [Filter]
438 , _qv_taggers :: [Tagger]
440 -- An asc or desc sort to apply to the PhyloGraph
441 , _qv_sort :: Maybe (Sort,Order)
443 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
444 , _qv_export :: ExportMode
445 , _qv_display :: DisplayMode
446 , _qv_verbose :: Bool
455 makeLenses ''PhyloParam
456 makeLenses ''Software
459 makeLenses ''PhyloFoundations
460 makeLenses ''PhyloGroup
461 makeLenses ''PhyloLevel
462 makeLenses ''PhyloPeriod
463 makeLenses ''PhyloFis
465 makeLenses ''Proximity
469 makeLenses ''PhyloQueryBuild
470 makeLenses ''PhyloQueryView
472 makeLenses ''PhyloView
473 makeLenses ''PhyloBranch
474 makeLenses ''PhyloNode
475 makeLenses ''PhyloEdge
478 ------------------------
479 -- | JSON instances | --
480 ------------------------
483 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
484 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
485 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
486 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
487 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
488 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
490 $(deriveJSON (unPrefix "_software_" ) ''Software )
491 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
493 $(deriveJSON defaultOptions ''Filter )
494 $(deriveJSON defaultOptions ''Metric )
495 $(deriveJSON defaultOptions ''Cluster )
496 $(deriveJSON defaultOptions ''Proximity )
498 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
499 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
500 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
501 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
502 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
504 $(deriveJSON (unPrefix "_lb_" ) ''LBParams )
505 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
507 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
508 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
509 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
510 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
511 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
513 $(deriveJSON defaultOptions ''Filiation )
514 $(deriveJSON defaultOptions ''EdgeType )
516 ---------------------------
517 -- | Swagger instances | --
518 ---------------------------
520 instance ToSchema Phylo where
521 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
522 instance ToSchema PhyloFoundations where
523 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_foundations")
524 instance ToSchema PhyloPeriod where
525 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_period")
526 instance ToSchema PhyloLevel where
527 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_level")
528 instance ToSchema PhyloGroup where
529 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_group")
530 instance ToSchema PhyloFis where
531 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloFis_")
532 instance ToSchema Software where
533 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_software_")
534 instance ToSchema PhyloParam where
535 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloParam_")
536 instance ToSchema Filter
537 instance ToSchema Metric
538 instance ToSchema Cluster
539 instance ToSchema Proximity where
540 declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
541 instance ToSchema FisParams where
542 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_fis_")
543 instance ToSchema HammingParams where
544 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_hamming_")
545 instance ToSchema LouvainParams where
546 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_louvain_")
547 instance ToSchema RCParams where
548 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_rc_")
549 instance ToSchema WLJParams where
550 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wlj_")
551 instance ToSchema LBParams where
552 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_lb_")
553 instance ToSchema SBParams where
554 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_sb_")
555 instance ToSchema PhyloQueryBuild where
556 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_q_")
557 instance ToSchema PhyloView where
558 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pv_")
559 instance ToSchema PhyloBranch where
560 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pb_")
561 instance ToSchema PhyloEdge where
562 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pe_")
563 instance ToSchema PhyloNode where
564 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pn_")
565 instance ToSchema Filiation
566 instance ToSchema EdgeType
568 ----------------------------
569 -- | TODO XML instances | --
570 ----------------------------