2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
30 module Gargantext.Viz.Phylo where
32 import Control.Lens (makeLenses)
33 import Data.Aeson.TH (deriveJSON)
34 import Data.Maybe (Maybe)
35 import Data.Text (Text)
38 import Data.Vector (Vector)
39 import Data.Time.Clock.POSIX (POSIXTime)
40 import GHC.Generics (Generic)
41 import Gargantext.Database.Schema.Ngrams (NgramsId)
42 import Gargantext.Core.Utils.Prefix (unPrefix)
43 import Gargantext.Prelude
45 ------------------------------------------------------------------------
47 PhyloExport { _phyloExport_param :: PhyloParam
48 , _phyloExport_data :: Phylo
51 -- | .phylo parameters
53 PhyloParam { _phyloParam_version :: Text -- Double ?
54 , _phyloParam_software :: Software
55 , _phyloParam_params :: Hash
61 -- TODO move somewhere since it is generic
63 Software { _software_name :: Text
64 , _software_version :: Text
67 ------------------------------------------------------------------------
69 -- | Phylo datatype of a phylomemy
70 -- Duration : time Segment of the whole Phylo
71 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
72 -- Periods : list of all the periods of a Phylo
74 Phylo { _phylo_duration :: (Start, End)
75 , _phylo_foundations :: Vector Ngrams
76 , _phylo_periods :: [PhyloPeriod]
78 deriving (Generic, Show)
81 -- | Date : a simple Integer
84 -- | UTCTime in seconds since UNIX epoch
85 -- type Start = POSIXTime
86 -- type End = POSIXTime
90 -- | PhyloStep : steps of phylomemy on temporal axis
91 -- Period: tuple (start date, end date) of the step of the phylomemy
92 -- Levels: levels of granularity
94 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
95 , _phylo_periodLevels :: [PhyloLevel]
97 deriving (Generic, Show)
100 -- | PhyloLevel : levels of phylomemy on level axis
101 -- Levels description:
102 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
103 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
104 -- Level 1: First level of clustering
105 -- Level N: Nth level of clustering
107 PhyloLevel { _phylo_levelId :: PhyloLevelId
108 , _phylo_levelGroups :: [PhyloGroup]
110 deriving (Generic, Show)
113 -- | PhyloGroup : group of ngrams at each level and step
114 -- Label : maybe has a label as text
115 -- Ngrams: set of terms that build the group
116 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
117 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
118 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
119 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
121 PhyloGroup { _phylo_groupId :: PhyloGroupId
122 , _phylo_groupLabel :: Text
123 , _phylo_groupNgrams :: [Int]
124 , _phylo_groupMeta :: Map Text Double
125 , _phylo_groupCooc :: Map (Int, Int) Double
126 , _phylo_groupBranchId :: Maybe PhyloBranchId
128 , _phylo_groupPeriodParents :: [Pointer]
129 , _phylo_groupPeriodChilds :: [Pointer]
131 , _phylo_groupLevelParents :: [Pointer]
132 , _phylo_groupLevelChilds :: [Pointer]
134 deriving (Generic, Show, Eq, Ord)
137 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
139 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
143 type PhyloPeriodId = (Start, End)
144 type PhyloLevelId = (PhyloPeriodId, Level)
145 type PhyloGroupId = (PhyloLevelId, Index)
146 type PhyloBranchId = (Level, Index)
149 -- | Weight : A generic mesure that can be associated with an Id
151 -- | Pointer : A weighted linked with a given PhyloGroup
152 type Pointer = (PhyloGroupId, Weight)
153 -- | Ngrams : a contiguous sequence of n terms
157 -- | Clique : Set of ngrams cooccurring in the same Document
158 type Clique = Set Ngrams
159 -- | Support : Number of Documents where a Clique occurs
161 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
162 type Fis = (Clique,Support)
165 -- | Document : a piece of Text linked to a Date
166 data Document = Document
172 type Cluster = [PhyloGroup]
175 -- | A List of PhyloGroup in a PhyloGraph
176 type PhyloNodes = [PhyloGroup]
177 -- | A List of weighted links between some PhyloGroups in a PhyloGraph
178 type PhyloEdges = [((PhyloGroup,PhyloGroup),Weight)]
179 -- | The association as a Graph between a list of Nodes and a list of Edges
180 type PhyloGraph = (PhyloNodes,PhyloEdges)
183 data PhyloError = LevelDoesNotExist
188 -- | A List of Proximity mesures or strategies
189 data Proximity = WeightedLogJaccard | Hamming | FromPairs
190 -- | A List of Clustering methods
191 data Clustering = Louvain | RelatedComponents
193 data PairTo = Childs | Parents
197 data EdgeType = Directed | UnDirected
199 data ViewGraph = ViewGraph
200 { _view_graphParam :: PhyloParam
201 , _view_graphLabel :: Text
202 , _view_graphEdgeType :: EdgeType
203 , _view_graphBranches :: [(PhyloBranchId,Text)]
204 , _view_graphNodes :: [ViewNode]
205 , _view_graphEdges :: [ViewEdge]
208 data ViewEdge = ViewEdge
209 { _view_edgeSource :: PhyloGroupId
210 , _view_edgeTarget :: PhyloGroupId
211 , _view_edgeWeight :: Weight
214 data ViewNode = ViewNode
215 { _view_nodeId :: PhyloGroupId
216 , _view_nodeLabel :: Text
217 , _view_nodeNgrams :: [Ngrams]
218 , _view_nodeMeta :: Map Text Double
219 , _view_nodeParent :: PhyloGroupId
224 makeLenses ''PhyloParam
225 makeLenses ''PhyloExport
226 makeLenses ''Software
227 makeLenses ''PhyloGroup
228 makeLenses ''PhyloLevel
229 makeLenses ''PhyloPeriod
232 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
233 $(deriveJSON (unPrefix "_phylo_period" ) 'PhyloPeriod )
234 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
235 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
237 $(deriveJSON (unPrefix "_software_" ) ''Software )
238 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
239 $(deriveJSON (unPrefix "_phyloExport_" ) ''PhyloExport )
241 -- | TODO XML instances