2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
30 module Gargantext.Viz.Phylo where
32 import Control.Lens (makeLenses)
33 import Data.Aeson.TH (deriveJSON)
34 import Data.Maybe (Maybe)
35 import Data.Text (Text)
38 import Data.Vector (Vector)
39 import Data.Time.Clock.POSIX (POSIXTime)
40 import GHC.Generics (Generic)
41 import Gargantext.Database.Schema.Ngrams (NgramsId)
42 import Gargantext.Core.Utils.Prefix (unPrefix)
43 import Gargantext.Prelude
45 ------------------------------------------------------------------------
47 PhyloExport { _phyloExport_param :: PhyloParam
48 , _phyloExport_data :: Phylo
51 -- | .phylo parameters
53 PhyloParam { _phyloParam_version :: Text -- Double ?
54 , _phyloParam_software :: Software
55 , _phyloParam_params :: Hash
61 -- TODO move somewhere since it is generic
63 Software { _software_name :: Text
64 , _software_version :: Text
67 ------------------------------------------------------------------------
69 -- | Phylo datatype descriptor of a phylomemy
70 -- Duration : time Segment of the whole phylomemy (start,end)
71 -- Ngrams : list of all (possible) terms contained in the phylomemy (with their id)
72 -- Steps : list of all steps to build the phylomemy
74 Phylo { _phylo_duration :: (Start, End)
75 , _phylo_ngrams :: PhyloNgrams
76 , _phylo_periods :: [PhyloPeriod]
77 , _phylo_branches :: [PhyloBranch]
79 deriving (Generic, Show)
82 -- | Date : a simple Integer
85 -- | UTCTime in seconds since UNIX epoch
86 -- type Start = POSIXTime
87 -- type End = POSIXTime
91 -- | PhyloStep : steps of phylomemy on temporal axis
92 -- Period: tuple (start date, end date) of the step of the phylomemy
93 -- Levels: levels of granularity
95 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
96 , _phylo_periodLevels :: [PhyloLevel]
98 deriving (Generic, Show)
101 -- | PhyloLevel : levels of phylomemy on level axis
102 -- Levels description:
103 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
104 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
105 -- Level 1: First level of clustering
106 -- Level N: Nth level of clustering
108 PhyloLevel { _phylo_levelId :: PhyloLevelId
109 , _phylo_levelGroups :: [PhyloGroup]
111 deriving (Generic, Show)
114 -- | PhyloGroup : group of ngrams at each level and step
115 -- Label : maybe has a label as text
116 -- Ngrams: set of terms that build the group
117 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
118 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
119 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
120 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
122 PhyloGroup { _phylo_groupId :: PhyloGroupId
123 , _phylo_groupLabel :: Text
124 , _phylo_groupNgrams :: [Int]
125 , _phylo_groupQuality :: Map Text Double
126 , _phylo_groupCooc :: Map (Int, Int) Double
128 , _phylo_groupPeriodParents :: [Pointer]
129 , _phylo_groupPeriodChilds :: [Pointer]
131 , _phylo_groupLevelParents :: [Pointer]
132 , _phylo_groupLevelChilds :: [Pointer]
134 deriving (Generic, Show, Eq, Ord)
137 PhyloBranch { _phylo_branchId :: (Level,Int)
138 , _phylo_branchLabel :: Text
139 , _phylo_branchGroups :: [PhyloGroupId]
141 deriving (Generic, Show)
144 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
146 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
150 type PhyloPeriodId = (Start, End)
151 type PhyloLevelId = (PhyloPeriodId, Level)
152 type PhyloGroupId = (PhyloLevelId, Index)
153 type PhyloBranchId = (Level, Index)
156 -- | Weight : A generic mesure that can be associated with an Id
158 -- | Pointer : A weighted linked with a given PhyloGroup
159 type Pointer = (PhyloGroupId, Weight)
162 -- | Ngrams : a contiguous sequence of n terms
164 -- | PhyloNgrams : Vector of all the Ngrams (PhyloGroup of level -1) used within a Phylo
165 type PhyloNgrams = Vector Ngrams
168 -- | Clique : Set of ngrams cooccurring in the same Document
169 type Clique = Set Ngrams
170 -- | Support : Number of Documents where a Clique occurs
172 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
173 type Fis = (Clique,Support)
176 -- | Document : a piece of Text linked to a Date
177 data Document = Document
183 type Cluster = [PhyloGroup]
186 -- | A List of PhyloGroup in a PhyloGraph
187 type PhyloNodes = [PhyloGroup]
188 -- | A List of weighted links between some PhyloGroups in a PhyloGraph
189 type PhyloEdges = [((PhyloGroup,PhyloGroup),Weight)]
190 -- | The association as a Graph between a list of Nodes and a list of Edges
191 type PhyloGraph = (PhyloNodes,PhyloEdges)
194 data PhyloError = LevelDoesNotExist
199 -- | A List of Proximity mesures or strategies
200 data Proximity = WeightedLogJaccard | Hamming | FromPairs
201 -- | A List of Clustering methods
202 data Clustering = Louvain | RelatedComponents
205 data PairTo = Childs | Parents
209 makeLenses ''PhyloParam
210 makeLenses ''PhyloExport
211 makeLenses ''Software
212 makeLenses ''PhyloGroup
213 makeLenses ''PhyloLevel
214 makeLenses ''PhyloPeriod
215 makeLenses ''PhyloBranch
218 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
219 $(deriveJSON (unPrefix "_phylo_period" ) 'PhyloPeriod )
220 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
221 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
222 $(deriveJSON (unPrefix "_phylo_branch" ) ''PhyloBranch )
224 $(deriveJSON (unPrefix "_software_" ) ''Software )
225 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
226 $(deriveJSON (unPrefix "_phyloExport_" ) ''PhyloExport )
228 -- | TODO XML instances