2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
30 module Gargantext.Viz.Phylo where
32 import Control.Lens (makeLenses)
33 import Data.Aeson.TH (deriveJSON)
34 import Data.Maybe (Maybe)
35 import Data.Text (Text)
38 import Data.Vector (Vector)
39 import Data.Time.Clock.POSIX (POSIXTime)
40 import GHC.Generics (Generic)
41 import Gargantext.Database.Schema.Ngrams (NgramsId)
42 import Gargantext.Core.Utils.Prefix (unPrefix)
43 import Gargantext.Prelude
45 ------------------------------------------------------------------------
47 PhyloExport { _phyloExport_param :: PhyloParam
48 , _phyloExport_data :: Phylo
51 -- | .phylo parameters
53 PhyloParam { _phyloParam_version :: Text -- Double ?
54 , _phyloParam_software :: Software
55 , _phyloParam_params :: Hash
61 -- TODO move somewhere since it is generic
63 Software { _software_name :: Text
64 , _software_version :: Text
67 ------------------------------------------------------------------------
69 -- | Phylo datatype descriptor of a phylomemy
70 -- Duration : time Segment of the whole phylomemy (start,end)
71 -- Ngrams : list of all (possible) terms contained in the phylomemy (with their id)
72 -- Steps : list of all steps to build the phylomemy
74 Phylo { _phylo_duration :: (Start, End)
75 , _phylo_ngrams :: PhyloNgrams
76 , _phylo_periods :: [PhyloPeriod]
77 , _phylo_branches :: [PhyloBranch]
79 deriving (Generic, Show)
82 -- | Date : a simple Integer
85 -- | UTCTime in seconds since UNIX epoch
86 -- type Start = POSIXTime
87 -- type End = POSIXTime
91 -- | PhyloStep : steps of phylomemy on temporal axis
92 -- Period: tuple (start date, end date) of the step of the phylomemy
93 -- Levels: levels of granularity
95 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
96 , _phylo_periodLevels :: [PhyloLevel]
98 deriving (Generic, Show)
101 -- | PhyloLevel : levels of phylomemy on level axis
102 -- Levels description:
103 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
104 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
105 -- Level 1: First level of clustering
106 -- Level N: Nth level of clustering
108 PhyloLevel { _phylo_levelId :: PhyloLevelId
109 , _phylo_levelGroups :: [PhyloGroup]
111 deriving (Generic, Show)
114 -- | PhyloGroup : group of ngrams at each level and step
115 -- Label : maybe has a label as text
116 -- Ngrams: set of terms that build the group
117 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
118 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
119 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
120 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
122 PhyloGroup { _phylo_groupId :: PhyloGroupId
123 , _phylo_groupLabel :: Text
124 , _phylo_groupNgrams :: [Int]
125 , _phylo_groupQuality :: Map Text Double
126 , _phylo_groupCooc :: Map (Int, Int) Double
128 , _phylo_groupPeriodParents :: [Pointer]
129 , _phylo_groupPeriodChilds :: [Pointer]
131 , _phylo_groupLevelParents :: [Pointer]
132 , _phylo_groupLevelChilds :: [Pointer]
134 deriving (Generic, Show, Eq)
137 PhyloBranch { _phylo_branchId :: (Int,Int)
138 , _phylo_branchLabel :: Text
139 , _phylo_branchGroups :: [PhyloGroupId]
141 deriving (Generic, Show)
144 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
146 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
150 type PhyloPeriodId = (Start, End)
151 type PhyloLevelId = (PhyloPeriodId, Level)
152 type PhyloGroupId = (PhyloLevelId, Index)
153 type PhyloBranchId = (Level, Index)
156 -- | Weight : A generic mesure that can be associated with an Id
158 -- | Pointer : A weighted linked with a given PhyloGroup
159 type Pointer = (PhyloGroupId, Weight)
162 -- | Ngrams : a contiguous sequence of n terms
164 -- | PhyloNgrams : Vector of all the Ngrams (PhyloGroup of level -1) used within a Phylo
165 type PhyloNgrams = Vector Ngrams
168 -- | Clique : Set of ngrams cooccurring in the same Document
169 type Clique = Set Ngrams
170 -- | Support : Number of Documents where a Clique occurs
172 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
173 type Fis = (Clique,Support)
176 -- | Document : a piece of Text linked to a Date
177 data Document = Document
183 type Cluster = [PhyloGroup]
186 class AppendToPhylo a where
187 addPhyloLevel :: Level -> Map (Date,Date) [a] -> Phylo -> Phylo
188 initPhyloGroup :: a -> PhyloGroup
190 -- | A List of PhyloGroup in a PhyloGraph
191 type PhyloNodes = [PhyloGroup]
192 -- | A List of weighted links between some PhyloGroups in a PhyloGraph
193 type PhyloEdges = [(((PhyloGroup,PhyloGroup)),Weight)]
194 -- | The association as a Graph between a list of Nodes and a list of Edges
195 type PhyloGraph = (PhyloNodes,PhyloEdges)
198 data PhyloError = LevelDoesNotExist
203 -- | A List of Proximity mesures or strategies
204 data Proximity = WeightedLogJaccard | Hamming | FromPairs
205 -- | A List of Clustering methods
206 data Clustering = Louvain | RelatedComponents
209 data PairTo = Childs | Parents
213 makeLenses ''PhyloParam
214 makeLenses ''PhyloExport
215 makeLenses ''Software
216 makeLenses ''PhyloGroup
217 makeLenses ''PhyloLevel
218 makeLenses ''PhyloPeriod
219 makeLenses ''PhyloBranch
222 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
223 $(deriveJSON (unPrefix "_phylo_period" ) 'PhyloPeriod )
224 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
225 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
226 $(deriveJSON (unPrefix "_phylo_branch" ) ''PhyloBranch )
228 $(deriveJSON (unPrefix "_software_" ) ''Software )
229 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
230 $(deriveJSON (unPrefix "_phyloExport_" ) ''PhyloExport )
232 -- | TODO XML instances