2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
29 module Gargantext.Viz.Phylo where
31 import Control.Lens (makeLenses)
32 import Data.Aeson.TH (deriveJSON)
33 import Data.Maybe (Maybe)
34 import Data.Text (Text)
37 import Data.Vector (Vector)
38 import Data.Time.Clock.POSIX (POSIXTime)
39 import GHC.Generics (Generic)
40 import Gargantext.Database.Schema.Ngrams (NgramsId)
41 import Gargantext.Core.Utils.Prefix (unPrefix)
42 import Gargantext.Prelude
44 ------------------------------------------------------------------------
46 PhyloExport { _phyloExport_param :: PhyloParam
47 , _phyloExport_data :: Phylo
50 -- | .phylo parameters
52 PhyloParam { _phyloParam_version :: Text -- Double ?
53 , _phyloParam_software :: Software
54 , _phyloParam_params :: Hash
60 -- TODO move somewhere since it is generic
62 Software { _software_name :: Text
63 , _software_version :: Text
66 ------------------------------------------------------------------------
67 -- | Phylo datatype descriptor of a phylomemy
68 -- Duration : time Segment of the whole phylomemy (start,end)
69 -- Ngrams : list of all (possible) terms contained in the phylomemy (with their id)
70 -- Steps : list of all steps to build the phylomemy
72 Phylo { _phylo_duration :: (Start, End)
73 , _phylo_ngrams :: PhyloNgrams
74 , _phylo_periods :: [PhyloPeriod]
75 , _phylo_branches :: [PhyloBranch]
77 deriving (Generic, Show)
80 -- | Date : a simple Integer
83 -- | UTCTime in seconds since UNIX epoch
84 -- type Start = POSIXTime
85 -- type End = POSIXTime
89 -- | PhyloStep : steps of phylomemy on temporal axis
90 -- Period: tuple (start date, end date) of the step of the phylomemy
91 -- Levels: levels of granularity
93 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
94 , _phylo_periodLevels :: [PhyloLevel]
96 deriving (Generic, Show)
99 -- | PhyloLevel : levels of phylomemy on level axis
100 -- Levels description:
101 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
102 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
103 -- Level 1: First level of clustering
104 -- Level N: Nth level of clustering
106 PhyloLevel { _phylo_levelId :: PhyloLevelId
107 , _phylo_levelGroups :: [PhyloGroup]
109 deriving (Generic, Show)
112 -- | PhyloGroup : group of ngrams at each level and step
113 -- Label : maybe has a label as text
114 -- Ngrams: set of terms that build the group
115 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
116 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
117 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
118 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
120 PhyloGroup { _phylo_groupId :: PhyloGroupId
121 , _phylo_groupLabel :: Text
122 , _phylo_groupNgrams :: [Int]
123 , _phylo_groupQuality :: Map Text Double
124 , _phylo_groupCooc :: Map (Int, Int) Double
126 , _phylo_groupPeriodParents :: [Pointer]
127 , _phylo_groupPeriodChilds :: [Pointer]
129 , _phylo_groupLevelParents :: [Pointer]
130 , _phylo_groupLevelChilds :: [Pointer]
132 deriving (Generic, Show, Eq)
135 PhyloBranch { _phylo_branchId :: (Int,Int)
136 , _phylo_branchLabel :: Text
137 , _phylo_branchGroups :: [PhyloGroupId]
139 deriving (Generic, Show)
142 -- | PhyloPeriodId : A period of time framed by a starting Date and an ending Date
143 type PhyloPeriodId = (Start, End)
144 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
146 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
150 type PhyloLevelId = (PhyloPeriodId, Level)
151 type PhyloGroupId = (PhyloLevelId, Index)
152 type PhyloBranchId = (Level, Index)
154 type Pointer = (PhyloGroupId, Weight)
165 -- | Ngrams : a contiguous sequence of n terms
167 -- | PhyloNgrams : Vector of all the Ngrams (PhyloGroup of level -1) used within a Phylo
168 type PhyloNgrams = Vector Ngrams
171 -- | Clique : Set of ngrams cooccurring in the same Document
172 type Clique = Set Ngrams
173 -- | Support : Number of Documents where a Clique occurs
175 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
176 type Fis = (Clique,Support)
180 -- | Document : a piece of Text linked to a Date
181 data Document = Document
189 data PhyloError = LevelDoesNotExist
194 type PhyloGraph = (PhyloNodes,PhyloEdges)
195 type PhyloNodes = [PhyloGroup]
196 type PhyloEdges = [(((PhyloGroup,PhyloGroup)),Double)]
199 data Proximity = WeightedLogJaccard | Hamming | FromPairs
201 data Clustering = Louvain | RelatedComponents
204 data PairTo = Childs | Parents
208 makeLenses ''PhyloParam
209 makeLenses ''PhyloExport
210 makeLenses ''Software
211 makeLenses ''PhyloGroup
212 makeLenses ''PhyloLevel
213 makeLenses ''PhyloPeriod
214 makeLenses ''PhyloBranch
217 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
218 $(deriveJSON (unPrefix "_phylo_period" ) 'PhyloPeriod )
219 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
220 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
221 $(deriveJSON (unPrefix "_phylo_branch" ) ''PhyloBranch )
223 $(deriveJSON (unPrefix "_software_" ) ''Software )
224 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
225 $(deriveJSON (unPrefix "_phyloExport_" ) ''PhyloExport )
227 -- | TODO XML instances